cdmbl 0.14.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 417415def1b0f28a66e6640201a3c53becf43737
4
- data.tar.gz: 36d22cc2acaab58453bb908877af79f2fe93ddba
2
+ SHA256:
3
+ metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
4
+ data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
5
5
  SHA512:
6
- metadata.gz: 6a4a95dbdf25f2cfd26d2d037b900d07850bf4be2c50112319bcfd220cbddcf7e0325b62ff45267b9cddb0fc135291d946509e8bfe5a78486c5231e1665f7354
7
- data.tar.gz: bc2795036b04aed45ac8be18829450001c08091b2df7c7af089b7faf1779d422ffb14e0610d5a9c3306b981bcbedcb584ef3b89b70a8fcdf94a81ff6dcd55752
6
+ metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
7
+ data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
data/cdmbl.gemspec CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ['lib']
19
19
 
20
20
  spec.add_dependency 'hash_at_path', '~> 0.1'
21
- spec.add_dependency 'contentdm_api', '~> 0.3.11'
21
+ spec.add_dependency 'contentdm_api', '~> 0.5.0'
22
22
  spec.add_dependency 'sidekiq', '>= 3.5'
23
23
  spec.add_dependency 'titleize', '~> 1.4'
24
24
  spec.add_dependency 'rsolr', '~> 2.0'
@@ -32,6 +32,4 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency 'rake', '~> 12.0'
33
33
  spec.add_development_dependency 'minitest', '~> 5.0'
34
34
  spec.add_development_dependency 'yard', '~> 0.9.0'
35
- spec.add_development_dependency 'webmock', '~> 1.24', '>= 1.24.0'
36
- spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.1'
37
35
  end
@@ -0,0 +1,90 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class CdmRequestWorker
4
+ include Sidekiq::Worker
5
+ attr_reader :collection,
6
+ :id,
7
+ :set_lookup,
8
+ :field_mappings
9
+
10
+ attr_writer :cdm_api_klass,
11
+ :oai_request_klass,
12
+ :oai_set_lookup_klass,
13
+ :cdm_notification_klass,
14
+ :load_worker_klass,
15
+ :transformer_klass
16
+
17
+ def perform(collection, id)
18
+
19
+ @identifiers = identifiers
20
+ @solr_config = solr_config
21
+ @cdm_endpoint = cdm_endpoint
22
+ @oai_endpoint = oai_endpoint
23
+ @field_mappings = field_mappings
24
+ @extract_compounds = extract_compounds
25
+ transform_and_load!
26
+ end
27
+
28
+ def oai_set_lookup_klass
29
+ @oai_set_lookup_klass ||= OAISetLookup
30
+ end
31
+
32
+ def oai_request_klass
33
+ @oai_request_klass ||= OaiRequest
34
+ end
35
+
36
+ def cdm_api_klass
37
+ @cdm_api_klass ||= CONTENTdmAPI::Item
38
+ end
39
+
40
+ def cdm_notification_klass
41
+ @cdm_notification_klass ||= CdmNotification
42
+ end
43
+
44
+ def transformer_klass
45
+ @transformer_klass ||= Transformer
46
+ end
47
+
48
+ def load_worker_klass
49
+ @load_worker_klass ||= LoadWorker
50
+ end
51
+
52
+ private
53
+
54
+ def transform_and_load!
55
+ load_worker_klass.perform_async(transformed_records, [], solr_config)
56
+ end
57
+
58
+ def transformed_records
59
+ @transformation ||=
60
+ transformer_klass.new(cdm_records: records,
61
+ oai_sets: set_lookup,
62
+ field_mappings: field_mappings,
63
+ extract_compounds: extract_compounds).records
64
+ end
65
+
66
+ def set_lookup
67
+ oai_set_lookup_klass.new(oai_sets: sets).keyed
68
+ end
69
+
70
+ def records
71
+ identifiers.map do |identifier|
72
+ cdm_request(*identifier)
73
+ end
74
+ end
75
+
76
+ # e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
77
+ def cdm_request(collection, id)
78
+ cdm_notification_klass.call!(collection, id, cdm_endpoint)
79
+ cdm_api_klass.new(base_url: cdm_endpoint,
80
+ collection: collection,
81
+ with_compounds: false,
82
+ id: id)
83
+ end
84
+
85
+ def sets
86
+ @oai_request ||=
87
+ oai_request_klass.new(base_uri: oai_endpoint).sets
88
+ end
89
+ end
90
+ end
@@ -16,7 +16,8 @@ module CDMBL
16
16
  :set_spec,
17
17
  :max_compounds,
18
18
  :batch_size,
19
- :is_recursive
19
+ :is_recursive,
20
+ :from
20
21
 
21
22
  attr_writer :compound_filter_klass,
22
23
  :extractor_klass,
@@ -40,6 +41,7 @@ module CDMBL
40
41
  @max_compounds = config.fetch('max_compounds', 10)
41
42
  @batch_size = config.fetch('batch_size', 5).to_i
42
43
  @is_recursive = config.fetch('is_recursive', true)
44
+ @from = config.fetch('from', nil)
43
45
  extract_batch!
44
46
  next_batch!
45
47
  end
@@ -114,12 +116,14 @@ module CDMBL
114
116
  end
115
117
 
116
118
  def transform!(ids)
117
- transform_worker_klass.perform_async(ids,
118
- solr_config,
119
- cdm_endpoint,
120
- oai_endpoint,
121
- field_mappings,
122
- extract_compounds)
119
+ transform_worker_klass.perform_async(
120
+ ids,
121
+ solr_config,
122
+ cdm_endpoint,
123
+ oai_endpoint,
124
+ field_mappings,
125
+ extract_compounds
126
+ )
123
127
  end
124
128
 
125
129
  def delete_deletables!
@@ -127,17 +131,20 @@ module CDMBL
127
131
  end
128
132
 
129
133
  def compound_filter
130
- @compound_filter ||=
131
- compound_filter_klass.new(record_ids: extraction.local_identifiers,
132
- cdm_endpoint: cdm_endpoint,
133
- max_compounds: max_compounds)
134
+ @compound_filter ||= compound_filter_klass.new(
135
+ record_ids: extraction.local_identifiers,
136
+ cdm_endpoint: cdm_endpoint,
137
+ max_compounds: max_compounds
138
+ )
134
139
  end
135
140
 
136
141
  def extraction
137
- @extraction ||=
138
- extractor_klass.new(oai_endpoint: oai_endpoint,
139
- resumption_token: resumption_token,
140
- set_spec: set_spec)
142
+ @extraction ||= extractor_klass.new(
143
+ oai_endpoint: oai_endpoint,
144
+ resumption_token: resumption_token,
145
+ set_spec: set_spec,
146
+ from: from
147
+ )
141
148
  end
142
149
  end
143
150
  end
@@ -16,6 +16,7 @@ module CDMBL
16
16
  def initialize(oai_endpoint: '',
17
17
  resumption_token: nil,
18
18
  set_spec: nil,
19
+ from: nil,
19
20
  oai_request_klass: OaiRequest,
20
21
  oai_filter_klass: OAIFilter,
21
22
  oai_set_lookup_klass: OAISetLookup)
@@ -24,7 +25,8 @@ module CDMBL
24
25
  @oai_set_lookup_klass = oai_set_lookup_klass
25
26
  @oai_request = oai_requester(oai_endpoint,
26
27
  resumption_token,
27
- set_spec)
28
+ set_spec,
29
+ from)
28
30
  end
29
31
 
30
32
  def deletable_ids
@@ -49,11 +51,16 @@ module CDMBL
49
51
 
50
52
  private
51
53
 
52
- def oai_requester(oai_endpoint, resumption_token, set_spec)
53
- @oai_requester ||=
54
- oai_request_klass.new(base_uri: oai_endpoint,
55
- resumption_token: resumption_token,
56
- set: set_spec)
54
+ def oai_requester(oai_endpoint, resumption_token, set_spec, from)
55
+ @oai_requester ||= begin
56
+ args = {
57
+ base_uri: oai_endpoint,
58
+ resumption_token: resumption_token,
59
+ set: set_spec,
60
+ }
61
+ args[:from] = from if from
62
+ oai_request_klass.new(args)
63
+ end
57
64
  end
58
65
 
59
66
  # Get the local collection and id from an OAI namespaced identifier
@@ -72,7 +72,7 @@ module CDMBL
72
72
  class Titlieze
73
73
  def self.format(value)
74
74
  if value.respond_to?(:map)
75
- value.map {|value| value.titleize }
75
+ value.map(&:titleize)
76
76
  else
77
77
  value.titleize
78
78
  end
@@ -172,4 +172,4 @@ module CDMBL
172
172
  end
173
173
  end
174
174
 
175
- end
175
+ end
@@ -3,6 +3,7 @@ module CDMBL
3
3
  # Load Records into a solr index
4
4
  class LoadWorker
5
5
  include Sidekiq::Worker
6
+ sidekiq_options queue: 'critical'
6
7
  attr_reader :solr_config, :records, :deletables
7
8
  attr_writer :loader_klass, :solr_klass
8
9
  def perform(records = [], deletables = [], solr_config = {})
@@ -1,10 +1,12 @@
1
+
1
2
  require 'json'
3
+ require 'http'
2
4
  module CDMBL
3
5
  class OaiClient
4
- attr_reader :base_url, :http_client
5
- def initialize(base_url: '', http_client: Net::HTTP)
6
+ attr_reader :base_url, :client
7
+ def initialize(base_url: '', client: HTTP)
6
8
  @base_url = base_url
7
- @http_client = http_client
9
+ @client = client
8
10
  end
9
11
 
10
12
  def request(query)
@@ -14,7 +16,7 @@ module CDMBL
14
16
  private
15
17
 
16
18
  def get(url)
17
- http_client.get_response(URI(url)).body
19
+ client.get(url).to_s
18
20
  end
19
21
 
20
22
  def hashify(xml)
@@ -1,48 +1,51 @@
1
1
  require 'json'
2
2
  module CDMBL
3
- class OaiRequest
4
- attr_reader :base_uri,
5
- :resumption_token,
6
- :client,
7
- :set,
8
- :identifier
9
- def initialize(base_uri: '',
10
- resumption_token: nil,
11
- set: nil,
12
- identifier: '',
13
- client: Net::HTTP)
14
- @base_uri = base_uri
15
- @resumption_token = resumption_token
16
- @client = client
17
- @set = (set) ? "&set=#{set}" : ''
18
- @identifier = identifier
19
- end
3
+ class OaiRequest
4
+ attr_reader :base_uri,
5
+ :resumption_token,
6
+ :client,
7
+ :set,
8
+ :identifier,
9
+ :from
10
+ def initialize(base_uri: '',
11
+ resumption_token: nil,
12
+ set: nil,
13
+ identifier: '',
14
+ from: nil,
15
+ client: Net::HTTP)
16
+ @base_uri = base_uri
17
+ @resumption_token = resumption_token
18
+ @client = client
19
+ @set = (set) ? "&set=#{set}" : ''
20
+ @from = from ? "&from=#{from}" : ''
21
+ @identifier = identifier
22
+ end
20
23
 
21
- def identifiers
22
- @ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
23
- end
24
+ def identifiers
25
+ @ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
26
+ end
24
27
 
25
- def sets
26
- @sets ||= request(sets_uri)
27
- end
28
+ def sets
29
+ @sets ||= request(sets_uri)
30
+ end
28
31
 
29
- private
32
+ private
30
33
 
31
- def first_batch_uri
32
- "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}"
33
- end
34
+ def first_batch_uri
35
+ "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
36
+ end
34
37
 
35
- def batch_uri
36
- "#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
37
- end
38
+ def batch_uri
39
+ "#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
40
+ end
38
41
 
39
- def sets_uri
40
- "#{base_uri}?verb=ListSets"
41
- end
42
+ def sets_uri
43
+ "#{base_uri}?verb=ListSets"
44
+ end
42
45
 
43
- def request(location)
44
- CDMBL::OaiNotification.call!(location)
45
- Hash.from_xml(client.get_response(URI(location)).body)
46
- end
46
+ def request(location)
47
+ CDMBL::OaiNotification.call!(location)
48
+ Hash.from_xml(client.get_response(URI(location)).body)
47
49
  end
48
- end
50
+ end
51
+ end
@@ -10,11 +10,12 @@ module CDMBL
10
10
  :extract_compounds
11
11
 
12
12
  attr_writer :cdm_api_klass,
13
- :oai_request_klass,
14
- :oai_set_lookup_klass,
15
- :cdm_notification_klass,
16
- :load_worker_klass,
17
- :transformer_klass
13
+ :oai_request_klass,
14
+ :oai_set_lookup_klass,
15
+ :cdm_notification_klass,
16
+ :load_worker_klass,
17
+ :transformer_klass,
18
+ :cache_klass
18
19
 
19
20
  def perform(identifiers,
20
21
  solr_config,
@@ -56,6 +57,10 @@ module CDMBL
56
57
  @load_worker_klass ||= LoadWorker
57
58
  end
58
59
 
60
+ def cache_klass
61
+ @cache_klass ||= Rails
62
+ end
63
+
59
64
  private
60
65
 
61
66
  def transform_and_load!
@@ -90,7 +95,9 @@ module CDMBL
90
95
 
91
96
  def sets
92
97
  @oai_request ||=
93
- oai_request_klass.new(base_uri: oai_endpoint).sets
98
+ cache_klass.cache.fetch("cdmbl_set_specs", expires_in: 10.minutes) do
99
+ oai_request_klass.new(base_uri: oai_endpoint).sets
100
+ end
94
101
  end
95
102
  end
96
103
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.14.0"
3
- end
2
+ VERSION = "0.18.0"
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-07 00:00:00.000000000 Z
11
+ date: 2021-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.11
33
+ version: 0.5.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.11
40
+ version: 0.5.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: sidekiq
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -150,47 +150,7 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 0.9.0
153
- - !ruby/object:Gem::Dependency
154
- name: webmock
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - "~>"
158
- - !ruby/object:Gem::Version
159
- version: '1.24'
160
- - - ">="
161
- - !ruby/object:Gem::Version
162
- version: 1.24.0
163
- type: :development
164
- prerelease: false
165
- version_requirements: !ruby/object:Gem::Requirement
166
- requirements:
167
- - - "~>"
168
- - !ruby/object:Gem::Version
169
- version: '1.24'
170
- - - ">="
171
- - !ruby/object:Gem::Version
172
- version: 1.24.0
173
- - !ruby/object:Gem::Dependency
174
- name: vcr
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - "~>"
178
- - !ruby/object:Gem::Version
179
- version: '3.0'
180
- - - ">="
181
- - !ruby/object:Gem::Version
182
- version: 3.0.1
183
- type: :development
184
- prerelease: false
185
- version_requirements: !ruby/object:Gem::Requirement
186
- requirements:
187
- - - "~>"
188
- - !ruby/object:Gem::Version
189
- version: '3.0'
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: 3.0.1
193
- description:
153
+ description:
194
154
  email:
195
155
  - fenne035@umn.edu
196
156
  executables: []
@@ -211,6 +171,7 @@ files:
211
171
  - lib/cdmbl.rb
212
172
  - lib/cdmbl/batch_deleter.rb
213
173
  - lib/cdmbl/batch_deleter_worker.rb
174
+ - lib/cdmbl/cdm_request_worker.rb
214
175
  - lib/cdmbl/compound_filter.rb
215
176
  - lib/cdmbl/compound_lookup.rb
216
177
  - lib/cdmbl/default_cdm_notification.rb
@@ -245,11 +206,11 @@ files:
245
206
  - lib/cdmbl/transformer.rb
246
207
  - lib/cdmbl/version.rb
247
208
  - travis.yml
248
- homepage:
209
+ homepage:
249
210
  licenses:
250
211
  - MIT
251
212
  metadata: {}
252
- post_install_message:
213
+ post_install_message:
253
214
  rdoc_options: []
254
215
  require_paths:
255
216
  - lib
@@ -264,9 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
225
  - !ruby/object:Gem::Version
265
226
  version: '0'
266
227
  requirements: []
267
- rubyforge_project:
268
- rubygems_version: 2.6.13
269
- signing_key:
228
+ rubygems_version: 3.0.8
229
+ signing_key:
270
230
  specification_version: 4
271
231
  summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
272
232
  application.