cdmbl 0.14.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 417415def1b0f28a66e6640201a3c53becf43737
4
- data.tar.gz: 36d22cc2acaab58453bb908877af79f2fe93ddba
2
+ SHA256:
3
+ metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
4
+ data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
5
5
  SHA512:
6
- metadata.gz: 6a4a95dbdf25f2cfd26d2d037b900d07850bf4be2c50112319bcfd220cbddcf7e0325b62ff45267b9cddb0fc135291d946509e8bfe5a78486c5231e1665f7354
7
- data.tar.gz: bc2795036b04aed45ac8be18829450001c08091b2df7c7af089b7faf1779d422ffb14e0610d5a9c3306b981bcbedcb584ef3b89b70a8fcdf94a81ff6dcd55752
6
+ metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
7
+ data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
data/cdmbl.gemspec CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ['lib']
19
19
 
20
20
  spec.add_dependency 'hash_at_path', '~> 0.1'
21
- spec.add_dependency 'contentdm_api', '~> 0.3.11'
21
+ spec.add_dependency 'contentdm_api', '~> 0.5.0'
22
22
  spec.add_dependency 'sidekiq', '>= 3.5'
23
23
  spec.add_dependency 'titleize', '~> 1.4'
24
24
  spec.add_dependency 'rsolr', '~> 2.0'
@@ -32,6 +32,4 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency 'rake', '~> 12.0'
33
33
  spec.add_development_dependency 'minitest', '~> 5.0'
34
34
  spec.add_development_dependency 'yard', '~> 0.9.0'
35
- spec.add_development_dependency 'webmock', '~> 1.24', '>= 1.24.0'
36
- spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.1'
37
35
  end
@@ -0,0 +1,90 @@
1
+ require 'sidekiq'
2
+ module CDMBL
3
+ class CdmRequestWorker
4
+ include Sidekiq::Worker
5
+ attr_reader :collection,
6
+ :id,
7
+ :set_lookup,
8
+ :field_mappings
9
+
10
+ attr_writer :cdm_api_klass,
11
+ :oai_request_klass,
12
+ :oai_set_lookup_klass,
13
+ :cdm_notification_klass,
14
+ :load_worker_klass,
15
+ :transformer_klass
16
+
17
+ def perform(collection, id)
18
+
19
+ @identifiers = identifiers
20
+ @solr_config = solr_config
21
+ @cdm_endpoint = cdm_endpoint
22
+ @oai_endpoint = oai_endpoint
23
+ @field_mappings = field_mappings
24
+ @extract_compounds = extract_compounds
25
+ transform_and_load!
26
+ end
27
+
28
+ def oai_set_lookup_klass
29
+ @oai_set_lookup_klass ||= OAISetLookup
30
+ end
31
+
32
+ def oai_request_klass
33
+ @oai_request_klass ||= OaiRequest
34
+ end
35
+
36
+ def cdm_api_klass
37
+ @cdm_api_klass ||= CONTENTdmAPI::Item
38
+ end
39
+
40
+ def cdm_notification_klass
41
+ @cdm_notification_klass ||= CdmNotification
42
+ end
43
+
44
+ def transformer_klass
45
+ @transformer_klass ||= Transformer
46
+ end
47
+
48
+ def load_worker_klass
49
+ @load_worker_klass ||= LoadWorker
50
+ end
51
+
52
+ private
53
+
54
+ def transform_and_load!
55
+ load_worker_klass.perform_async(transformed_records, [], solr_config)
56
+ end
57
+
58
+ def transformed_records
59
+ @transformation ||=
60
+ transformer_klass.new(cdm_records: records,
61
+ oai_sets: set_lookup,
62
+ field_mappings: field_mappings,
63
+ extract_compounds: extract_compounds).records
64
+ end
65
+
66
+ def set_lookup
67
+ oai_set_lookup_klass.new(oai_sets: sets).keyed
68
+ end
69
+
70
+ def records
71
+ identifiers.map do |identifier|
72
+ cdm_request(*identifier)
73
+ end
74
+ end
75
+
76
+ # e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
77
+ def cdm_request(collection, id)
78
+ cdm_notification_klass.call!(collection, id, cdm_endpoint)
79
+ cdm_api_klass.new(base_url: cdm_endpoint,
80
+ collection: collection,
81
+ with_compounds: false,
82
+ id: id)
83
+ end
84
+
85
+ def sets
86
+ @oai_request ||=
87
+ oai_request_klass.new(base_uri: oai_endpoint).sets
88
+ end
89
+ end
90
+ end
@@ -16,7 +16,8 @@ module CDMBL
16
16
  :set_spec,
17
17
  :max_compounds,
18
18
  :batch_size,
19
- :is_recursive
19
+ :is_recursive,
20
+ :from
20
21
 
21
22
  attr_writer :compound_filter_klass,
22
23
  :extractor_klass,
@@ -40,6 +41,7 @@ module CDMBL
40
41
  @max_compounds = config.fetch('max_compounds', 10)
41
42
  @batch_size = config.fetch('batch_size', 5).to_i
42
43
  @is_recursive = config.fetch('is_recursive', true)
44
+ @from = config.fetch('from', nil)
43
45
  extract_batch!
44
46
  next_batch!
45
47
  end
@@ -114,12 +116,14 @@ module CDMBL
114
116
  end
115
117
 
116
118
  def transform!(ids)
117
- transform_worker_klass.perform_async(ids,
118
- solr_config,
119
- cdm_endpoint,
120
- oai_endpoint,
121
- field_mappings,
122
- extract_compounds)
119
+ transform_worker_klass.perform_async(
120
+ ids,
121
+ solr_config,
122
+ cdm_endpoint,
123
+ oai_endpoint,
124
+ field_mappings,
125
+ extract_compounds
126
+ )
123
127
  end
124
128
 
125
129
  def delete_deletables!
@@ -127,17 +131,20 @@ module CDMBL
127
131
  end
128
132
 
129
133
  def compound_filter
130
- @compound_filter ||=
131
- compound_filter_klass.new(record_ids: extraction.local_identifiers,
132
- cdm_endpoint: cdm_endpoint,
133
- max_compounds: max_compounds)
134
+ @compound_filter ||= compound_filter_klass.new(
135
+ record_ids: extraction.local_identifiers,
136
+ cdm_endpoint: cdm_endpoint,
137
+ max_compounds: max_compounds
138
+ )
134
139
  end
135
140
 
136
141
  def extraction
137
- @extraction ||=
138
- extractor_klass.new(oai_endpoint: oai_endpoint,
139
- resumption_token: resumption_token,
140
- set_spec: set_spec)
142
+ @extraction ||= extractor_klass.new(
143
+ oai_endpoint: oai_endpoint,
144
+ resumption_token: resumption_token,
145
+ set_spec: set_spec,
146
+ from: from
147
+ )
141
148
  end
142
149
  end
143
150
  end
@@ -16,6 +16,7 @@ module CDMBL
16
16
  def initialize(oai_endpoint: '',
17
17
  resumption_token: nil,
18
18
  set_spec: nil,
19
+ from: nil,
19
20
  oai_request_klass: OaiRequest,
20
21
  oai_filter_klass: OAIFilter,
21
22
  oai_set_lookup_klass: OAISetLookup)
@@ -24,7 +25,8 @@ module CDMBL
24
25
  @oai_set_lookup_klass = oai_set_lookup_klass
25
26
  @oai_request = oai_requester(oai_endpoint,
26
27
  resumption_token,
27
- set_spec)
28
+ set_spec,
29
+ from)
28
30
  end
29
31
 
30
32
  def deletable_ids
@@ -49,11 +51,16 @@ module CDMBL
49
51
 
50
52
  private
51
53
 
52
- def oai_requester(oai_endpoint, resumption_token, set_spec)
53
- @oai_requester ||=
54
- oai_request_klass.new(base_uri: oai_endpoint,
55
- resumption_token: resumption_token,
56
- set: set_spec)
54
+ def oai_requester(oai_endpoint, resumption_token, set_spec, from)
55
+ @oai_requester ||= begin
56
+ args = {
57
+ base_uri: oai_endpoint,
58
+ resumption_token: resumption_token,
59
+ set: set_spec,
60
+ }
61
+ args[:from] = from if from
62
+ oai_request_klass.new(args)
63
+ end
57
64
  end
58
65
 
59
66
  # Get the local collection and id from an OAI namespaced identifier
@@ -72,7 +72,7 @@ module CDMBL
72
72
  class Titlieze
73
73
  def self.format(value)
74
74
  if value.respond_to?(:map)
75
- value.map {|value| value.titleize }
75
+ value.map(&:titleize)
76
76
  else
77
77
  value.titleize
78
78
  end
@@ -172,4 +172,4 @@ module CDMBL
172
172
  end
173
173
  end
174
174
 
175
- end
175
+ end
@@ -3,6 +3,7 @@ module CDMBL
3
3
  # Load Records into a solr index
4
4
  class LoadWorker
5
5
  include Sidekiq::Worker
6
+ sidekiq_options queue: 'critical'
6
7
  attr_reader :solr_config, :records, :deletables
7
8
  attr_writer :loader_klass, :solr_klass
8
9
  def perform(records = [], deletables = [], solr_config = {})
@@ -1,10 +1,12 @@
1
+
1
2
  require 'json'
3
+ require 'http'
2
4
  module CDMBL
3
5
  class OaiClient
4
- attr_reader :base_url, :http_client
5
- def initialize(base_url: '', http_client: Net::HTTP)
6
+ attr_reader :base_url, :client
7
+ def initialize(base_url: '', client: HTTP)
6
8
  @base_url = base_url
7
- @http_client = http_client
9
+ @client = client
8
10
  end
9
11
 
10
12
  def request(query)
@@ -14,7 +16,7 @@ module CDMBL
14
16
  private
15
17
 
16
18
  def get(url)
17
- http_client.get_response(URI(url)).body
19
+ client.get(url).to_s
18
20
  end
19
21
 
20
22
  def hashify(xml)
@@ -1,48 +1,51 @@
1
1
  require 'json'
2
2
  module CDMBL
3
- class OaiRequest
4
- attr_reader :base_uri,
5
- :resumption_token,
6
- :client,
7
- :set,
8
- :identifier
9
- def initialize(base_uri: '',
10
- resumption_token: nil,
11
- set: nil,
12
- identifier: '',
13
- client: Net::HTTP)
14
- @base_uri = base_uri
15
- @resumption_token = resumption_token
16
- @client = client
17
- @set = (set) ? "&set=#{set}" : ''
18
- @identifier = identifier
19
- end
3
+ class OaiRequest
4
+ attr_reader :base_uri,
5
+ :resumption_token,
6
+ :client,
7
+ :set,
8
+ :identifier,
9
+ :from
10
+ def initialize(base_uri: '',
11
+ resumption_token: nil,
12
+ set: nil,
13
+ identifier: '',
14
+ from: nil,
15
+ client: Net::HTTP)
16
+ @base_uri = base_uri
17
+ @resumption_token = resumption_token
18
+ @client = client
19
+ @set = (set) ? "&set=#{set}" : ''
20
+ @from = from ? "&from=#{from}" : ''
21
+ @identifier = identifier
22
+ end
20
23
 
21
- def identifiers
22
- @ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
23
- end
24
+ def identifiers
25
+ @ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
26
+ end
24
27
 
25
- def sets
26
- @sets ||= request(sets_uri)
27
- end
28
+ def sets
29
+ @sets ||= request(sets_uri)
30
+ end
28
31
 
29
- private
32
+ private
30
33
 
31
- def first_batch_uri
32
- "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}"
33
- end
34
+ def first_batch_uri
35
+ "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
36
+ end
34
37
 
35
- def batch_uri
36
- "#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
37
- end
38
+ def batch_uri
39
+ "#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
40
+ end
38
41
 
39
- def sets_uri
40
- "#{base_uri}?verb=ListSets"
41
- end
42
+ def sets_uri
43
+ "#{base_uri}?verb=ListSets"
44
+ end
42
45
 
43
- def request(location)
44
- CDMBL::OaiNotification.call!(location)
45
- Hash.from_xml(client.get_response(URI(location)).body)
46
- end
46
+ def request(location)
47
+ CDMBL::OaiNotification.call!(location)
48
+ Hash.from_xml(client.get_response(URI(location)).body)
47
49
  end
48
- end
50
+ end
51
+ end
@@ -10,11 +10,12 @@ module CDMBL
10
10
  :extract_compounds
11
11
 
12
12
  attr_writer :cdm_api_klass,
13
- :oai_request_klass,
14
- :oai_set_lookup_klass,
15
- :cdm_notification_klass,
16
- :load_worker_klass,
17
- :transformer_klass
13
+ :oai_request_klass,
14
+ :oai_set_lookup_klass,
15
+ :cdm_notification_klass,
16
+ :load_worker_klass,
17
+ :transformer_klass,
18
+ :cache_klass
18
19
 
19
20
  def perform(identifiers,
20
21
  solr_config,
@@ -56,6 +57,10 @@ module CDMBL
56
57
  @load_worker_klass ||= LoadWorker
57
58
  end
58
59
 
60
+ def cache_klass
61
+ @cache_klass ||= Rails
62
+ end
63
+
59
64
  private
60
65
 
61
66
  def transform_and_load!
@@ -90,7 +95,9 @@ module CDMBL
90
95
 
91
96
  def sets
92
97
  @oai_request ||=
93
- oai_request_klass.new(base_uri: oai_endpoint).sets
98
+ cache_klass.cache.fetch("cdmbl_set_specs", expires_in: 10.minutes) do
99
+ oai_request_klass.new(base_uri: oai_endpoint).sets
100
+ end
94
101
  end
95
102
  end
96
103
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.14.0"
3
- end
2
+ VERSION = "0.18.0"
3
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-07 00:00:00.000000000 Z
11
+ date: 2021-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.3.11
33
+ version: 0.5.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.3.11
40
+ version: 0.5.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: sidekiq
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -150,47 +150,7 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 0.9.0
153
- - !ruby/object:Gem::Dependency
154
- name: webmock
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - "~>"
158
- - !ruby/object:Gem::Version
159
- version: '1.24'
160
- - - ">="
161
- - !ruby/object:Gem::Version
162
- version: 1.24.0
163
- type: :development
164
- prerelease: false
165
- version_requirements: !ruby/object:Gem::Requirement
166
- requirements:
167
- - - "~>"
168
- - !ruby/object:Gem::Version
169
- version: '1.24'
170
- - - ">="
171
- - !ruby/object:Gem::Version
172
- version: 1.24.0
173
- - !ruby/object:Gem::Dependency
174
- name: vcr
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - "~>"
178
- - !ruby/object:Gem::Version
179
- version: '3.0'
180
- - - ">="
181
- - !ruby/object:Gem::Version
182
- version: 3.0.1
183
- type: :development
184
- prerelease: false
185
- version_requirements: !ruby/object:Gem::Requirement
186
- requirements:
187
- - - "~>"
188
- - !ruby/object:Gem::Version
189
- version: '3.0'
190
- - - ">="
191
- - !ruby/object:Gem::Version
192
- version: 3.0.1
193
- description:
153
+ description:
194
154
  email:
195
155
  - fenne035@umn.edu
196
156
  executables: []
@@ -211,6 +171,7 @@ files:
211
171
  - lib/cdmbl.rb
212
172
  - lib/cdmbl/batch_deleter.rb
213
173
  - lib/cdmbl/batch_deleter_worker.rb
174
+ - lib/cdmbl/cdm_request_worker.rb
214
175
  - lib/cdmbl/compound_filter.rb
215
176
  - lib/cdmbl/compound_lookup.rb
216
177
  - lib/cdmbl/default_cdm_notification.rb
@@ -245,11 +206,11 @@ files:
245
206
  - lib/cdmbl/transformer.rb
246
207
  - lib/cdmbl/version.rb
247
208
  - travis.yml
248
- homepage:
209
+ homepage:
249
210
  licenses:
250
211
  - MIT
251
212
  metadata: {}
252
- post_install_message:
213
+ post_install_message:
253
214
  rdoc_options: []
254
215
  require_paths:
255
216
  - lib
@@ -264,9 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
264
225
  - !ruby/object:Gem::Version
265
226
  version: '0'
266
227
  requirements: []
267
- rubyforge_project:
268
- rubygems_version: 2.6.13
269
- signing_key:
228
+ rubygems_version: 3.0.8
229
+ signing_key:
270
230
  specification_version: 4
271
231
  summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
272
232
  application.