cdmdexer 0.20.0 → 0.21.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60ef91c0ea6d70b0242465b27945c265175312254e540b40a2ba15ffbb609413
4
- data.tar.gz: dce7221c4a962bad08f35ef39c2bf4bd0288650d97b2b1c3d1d6751226bc7a14
3
+ metadata.gz: ea1e0a2b54ce8d063a2d84d18987d662ef583abf044142356a46d5bcd85790f5
4
+ data.tar.gz: a9012a9ae4fcee9bdd37314a0acd5c19238c60691ff60a95668329c89e5c650b
5
5
  SHA512:
6
- metadata.gz: fd92890578c5e3c9642766b605b381219f789854918c9895ef3d8a895979dc5bde6a3e2522fc21b03893d304a4660914c45d080cb67066b10a998d3d15897d1a
7
- data.tar.gz: 05d038f8b6149dbfe7500dced962f853c7f3a2ed49821d253d02e71baf0af82dfb4ac9cfc526de4dc4e5cca5552712c8b955ec9a532c437ed9d0570852a4cc70
6
+ metadata.gz: bf5450e86279e1e3a16fc8aa862f69ac865047bb4a9dc1a067a547152437c9d849acc2cf78f1107b664d4b930c8140913bad5cb0d21a35ada2e720465427a187
7
+ data.tar.gz: 5ae4ef6681742ec8bd883a388d571c08057e58f23044b1c09278b305159add9aec4ddde656024c813e0afb7c4427bab06d81286874c4c7fe104a2a937134d40e
@@ -1,5 +1,6 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'cdmdexer/version'
5
6
 
@@ -9,7 +10,7 @@ Gem::Specification.new do |spec|
9
10
  spec.authors = ['chadfennell']
10
11
  spec.email = ['fenne035@umn.edu']
11
12
 
12
- spec.summary = %q{Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails application.}
13
+ spec.summary = 'Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails application.'
13
14
  spec.license = 'MIT'
14
15
 
15
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
@@ -17,11 +18,11 @@ Gem::Specification.new do |spec|
17
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
19
  spec.require_paths = ['lib']
19
20
 
20
- spec.add_dependency 'hash_at_path', '~> 0.1'
21
21
  spec.add_dependency 'contentdm_api', '~> 0.5.0'
22
+ spec.add_dependency 'hash_at_path', '~> 0.1.6'
23
+ spec.add_dependency 'rsolr', '~> 2.0'
22
24
  spec.add_dependency 'sidekiq', '>= 3.5'
23
25
  spec.add_dependency 'titleize', '~> 1.4'
24
- spec.add_dependency 'rsolr', '~> 2.0'
25
26
  # CDMDEXER expects to run in a rails app, but just to avoid adding
26
27
  # another external dependency for XML procssing, we rely on activesupport's
27
28
  # Has.to_jsonl feature for testing and to allow this gem to function
@@ -29,7 +30,7 @@ Gem::Specification.new do |spec|
29
30
  spec.add_dependency 'rails', '>= 5.2'
30
31
 
31
32
  spec.add_development_dependency 'bundler', '~> 1.12'
32
- spec.add_development_dependency 'rake', '~> 12.0'
33
33
  spec.add_development_dependency 'minitest', '~> 5.0'
34
+ spec.add_development_dependency 'rake', '~> 12.0'
34
35
  spec.add_development_dependency 'yard', '~> 0.9.0'
35
36
  end
@@ -38,11 +38,20 @@ module CDMDEXER
38
38
  @resumption_token = config.fetch('resumption_token', nil)
39
39
  @batch_size = config.fetch('batch_size', 5).to_i
40
40
  @is_recursive = config.fetch('is_recursive', true)
41
+ after_date = config.fetch('after_date', false)
41
42
 
42
43
  @oai_request = oai_request_klass.new(
43
44
  endpoint_url: oai_endpoint,
44
45
  resumption_token: resumption_token,
45
- set_spec: config.fetch('set_spec', nil)
46
+ set_spec: config.fetch('set_spec', nil),
47
+ # Optionally only select records that have been updated after a
48
+ # certain date. You may need to manually update a parent record
49
+ # after updating a child in order to signify to the indexer that
50
+ # some record in the parent's children has been updated. This indexer
51
+ # expects to only see parent records in the OAI responses.
52
+ # The default here is to skip indexing based on date.
53
+ # Rails example for getting a date: `after_date: 2.weeks.ago`
54
+ after_date: after_date
46
55
  )
47
56
 
48
57
  run_batch!
@@ -1,5 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'hash_at_path'
2
4
  require 'json'
5
+ require 'time'
3
6
 
4
7
  module CDMDEXER
5
8
  # Light wrapper around OAI requests
@@ -10,16 +13,19 @@ module CDMDEXER
10
13
  attr_reader :endpoint_url,
11
14
  :resumption_token,
12
15
  :client,
13
- :set_spec
16
+ :set_spec,
17
+ :after_date
14
18
 
15
19
  def initialize(endpoint_url: '',
16
20
  resumption_token: nil,
17
21
  set_spec: nil,
18
- client: Net::HTTP)
22
+ client: Net::HTTP,
23
+ after_date: false)
19
24
  @endpoint_url = endpoint_url
20
25
  @resumption_token = resumption_token
21
26
  @client = client
22
27
  @set_spec = set_spec ? "&set=#{set_spec}" : ''
28
+ @after_date = after_date
23
29
  end
24
30
 
25
31
  def records
@@ -44,12 +50,23 @@ module CDMDEXER
44
50
  end
45
51
 
46
52
  def deletable_ids
47
- records.select { |record| record['status'] == 'deleted' }
48
- .map { |record| record[:id] }
53
+ records.select do |record|
54
+ if record['status'] == 'deleted'
55
+ after_date ? Time.parse(record['datestamp']) >= after_date : true
56
+ end
57
+ end.map { |record| record[:id] }
49
58
  end
50
59
 
51
60
  def updatables
52
- records.reject { |record| record['status'] == 'deleted' }
61
+ records.reject do |record|
62
+ if record['status'] == 'deleted'
63
+ true
64
+ elsif after_date && Time.parse(record['datestamp']) < after_date
65
+ true
66
+ else
67
+ false
68
+ end
69
+ end
53
70
  end
54
71
 
55
72
  private
@@ -1,3 +1,3 @@
1
1
  module CDMDEXER
2
- VERSION = "0.20.0"
2
+ VERSION = "0.21.0"
3
3
  end
metadata CHANGED
@@ -1,85 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmdexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-12 00:00:00.000000000 Z
11
+ date: 2020-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: hash_at_path
14
+ name: contentdm_api
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: 0.5.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.1'
26
+ version: 0.5.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: contentdm_api
28
+ name: hash_at_path
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.5.0
33
+ version: 0.1.6
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.5.0
40
+ version: 0.1.6
41
41
  - !ruby/object:Gem::Dependency
42
- name: sidekiq
42
+ name: rsolr
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.5'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.5'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: titleize
56
+ name: sidekiq
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '1.4'
61
+ version: '3.5'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '1.4'
68
+ version: '3.5'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rsolr
70
+ name: titleize
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '2.0'
75
+ version: '1.4'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '2.0'
82
+ version: '1.4'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rails
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -109,33 +109,33 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '1.12'
111
111
  - !ruby/object:Gem::Dependency
112
- name: rake
112
+ name: minitest
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '12.0'
117
+ version: '5.0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '12.0'
124
+ version: '5.0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: minitest
126
+ name: rake
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '5.0'
131
+ version: '12.0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '5.0'
138
+ version: '12.0'
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: yard
141
141
  requirement: !ruby/object:Gem::Requirement
@@ -217,7 +217,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
217
217
  - !ruby/object:Gem::Version
218
218
  version: '0'
219
219
  requirements: []
220
- rubygems_version: 3.0.3
220
+ rubygems_version: 3.0.6
221
221
  signing_key:
222
222
  specification_version: 4
223
223
  summary: Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails