cdmdexer 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60ef91c0ea6d70b0242465b27945c265175312254e540b40a2ba15ffbb609413
4
- data.tar.gz: dce7221c4a962bad08f35ef39c2bf4bd0288650d97b2b1c3d1d6751226bc7a14
3
+ metadata.gz: ea1e0a2b54ce8d063a2d84d18987d662ef583abf044142356a46d5bcd85790f5
4
+ data.tar.gz: a9012a9ae4fcee9bdd37314a0acd5c19238c60691ff60a95668329c89e5c650b
5
5
  SHA512:
6
- metadata.gz: fd92890578c5e3c9642766b605b381219f789854918c9895ef3d8a895979dc5bde6a3e2522fc21b03893d304a4660914c45d080cb67066b10a998d3d15897d1a
7
- data.tar.gz: 05d038f8b6149dbfe7500dced962f853c7f3a2ed49821d253d02e71baf0af82dfb4ac9cfc526de4dc4e5cca5552712c8b955ec9a532c437ed9d0570852a4cc70
6
+ metadata.gz: bf5450e86279e1e3a16fc8aa862f69ac865047bb4a9dc1a067a547152437c9d849acc2cf78f1107b664d4b930c8140913bad5cb0d21a35ada2e720465427a187
7
+ data.tar.gz: 5ae4ef6681742ec8bd883a388d571c08057e58f23044b1c09278b305159add9aec4ddde656024c813e0afb7c4427bab06d81286874c4c7fe104a2a937134d40e
@@ -1,5 +1,6 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'cdmdexer/version'
5
6
 
@@ -9,7 +10,7 @@ Gem::Specification.new do |spec|
9
10
  spec.authors = ['chadfennell']
10
11
  spec.email = ['fenne035@umn.edu']
11
12
 
12
- spec.summary = %q{Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails application.}
13
+ spec.summary = 'Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails application.'
13
14
  spec.license = 'MIT'
14
15
 
15
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
@@ -17,11 +18,11 @@ Gem::Specification.new do |spec|
17
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
19
  spec.require_paths = ['lib']
19
20
 
20
- spec.add_dependency 'hash_at_path', '~> 0.1'
21
21
  spec.add_dependency 'contentdm_api', '~> 0.5.0'
22
+ spec.add_dependency 'hash_at_path', '~> 0.1.6'
23
+ spec.add_dependency 'rsolr', '~> 2.0'
22
24
  spec.add_dependency 'sidekiq', '>= 3.5'
23
25
  spec.add_dependency 'titleize', '~> 1.4'
24
- spec.add_dependency 'rsolr', '~> 2.0'
25
26
  # CDMDEXER expects to run in a rails app, but just to avoid adding
26
27
  # another external dependency for XML procssing, we rely on activesupport's
27
28
  # Has.to_jsonl feature for testing and to allow this gem to function
@@ -29,7 +30,7 @@ Gem::Specification.new do |spec|
29
30
  spec.add_dependency 'rails', '>= 5.2'
30
31
 
31
32
  spec.add_development_dependency 'bundler', '~> 1.12'
32
- spec.add_development_dependency 'rake', '~> 12.0'
33
33
  spec.add_development_dependency 'minitest', '~> 5.0'
34
+ spec.add_development_dependency 'rake', '~> 12.0'
34
35
  spec.add_development_dependency 'yard', '~> 0.9.0'
35
36
  end
@@ -38,11 +38,20 @@ module CDMDEXER
38
38
  @resumption_token = config.fetch('resumption_token', nil)
39
39
  @batch_size = config.fetch('batch_size', 5).to_i
40
40
  @is_recursive = config.fetch('is_recursive', true)
41
+ after_date = config.fetch('after_date', false)
41
42
 
42
43
  @oai_request = oai_request_klass.new(
43
44
  endpoint_url: oai_endpoint,
44
45
  resumption_token: resumption_token,
45
- set_spec: config.fetch('set_spec', nil)
46
+ set_spec: config.fetch('set_spec', nil),
47
+ # Optionally only select records that have been updated after a
48
+ # certain date. You may need to manually update a parent record
49
+ # after updating a child in order to signify to the indexer that
50
+ # some record in the parent's children has been updated. This indexer
51
+ # expects to only see parent records in the OAI responses.
52
+ # The default here is to skip indexing based on date.
53
+ # Rails example for getting a date: `after_date: 2.weeks.ago`
54
+ after_date: after_date
46
55
  )
47
56
 
48
57
  run_batch!
@@ -1,5 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hash_at_path'
2
4
  require 'json'
5
+ require 'time'
3
6
 
4
7
  module CDMDEXER
5
8
  # Light wrapper around OAI requests
@@ -10,16 +13,19 @@ module CDMDEXER
10
13
  attr_reader :endpoint_url,
11
14
  :resumption_token,
12
15
  :client,
13
- :set_spec
16
+ :set_spec,
17
+ :after_date
14
18
 
15
19
  def initialize(endpoint_url: '',
16
20
  resumption_token: nil,
17
21
  set_spec: nil,
18
- client: Net::HTTP)
22
+ client: Net::HTTP,
23
+ after_date: false)
19
24
  @endpoint_url = endpoint_url
20
25
  @resumption_token = resumption_token
21
26
  @client = client
22
27
  @set_spec = set_spec ? "&set=#{set_spec}" : ''
28
+ @after_date = after_date
23
29
  end
24
30
 
25
31
  def records
@@ -44,12 +50,23 @@ module CDMDEXER
44
50
  end
45
51
 
46
52
  def deletable_ids
47
- records.select { |record| record['status'] == 'deleted' }
48
- .map { |record| record[:id] }
53
+ records.select do |record|
54
+ if record['status'] == 'deleted'
55
+ after_date ? Time.parse(record['datestamp']) >= after_date : true
56
+ end
57
+ end.map { |record| record[:id] }
49
58
  end
50
59
 
51
60
  def updatables
52
- records.reject { |record| record['status'] == 'deleted' }
61
+ records.reject do |record|
62
+ if record['status'] == 'deleted'
63
+ true
64
+ elsif after_date && Time.parse(record['datestamp']) < after_date
65
+ true
66
+ else
67
+ false
68
+ end
69
+ end
53
70
  end
54
71
 
55
72
  private
@@ -1,3 +1,3 @@
1
1
  module CDMDEXER
2
- VERSION = "0.20.0"
2
+ VERSION = "0.21.0"
3
3
  end
metadata CHANGED
@@ -1,85 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmdexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-12 00:00:00.000000000 Z
11
+ date: 2020-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: hash_at_path
14
+ name: contentdm_api
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: 0.5.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.1'
26
+ version: 0.5.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: contentdm_api
28
+ name: hash_at_path
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.5.0
33
+ version: 0.1.6
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.5.0
40
+ version: 0.1.6
41
41
  - !ruby/object:Gem::Dependency
42
- name: sidekiq
42
+ name: rsolr
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.5'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.5'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: titleize
56
+ name: sidekiq
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '1.4'
61
+ version: '3.5'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '1.4'
68
+ version: '3.5'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rsolr
70
+ name: titleize
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '2.0'
75
+ version: '1.4'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '2.0'
82
+ version: '1.4'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rails
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -109,33 +109,33 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.12'
111
111
  - !ruby/object:Gem::Dependency
112
- name: rake
112
+ name: minitest
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '12.0'
117
+ version: '5.0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '12.0'
124
+ version: '5.0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: minitest
126
+ name: rake
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '5.0'
131
+ version: '12.0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '5.0'
138
+ version: '12.0'
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: yard
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -217,7 +217,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
217
217
  - !ruby/object:Gem::Version
218
218
  version: '0'
219
219
  requirements: []
220
- rubygems_version: 3.0.3
220
+ rubygems_version: 3.0.6
221
221
  signing_key:
222
222
  specification_version: 4
223
223
  summary: Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails