cdmbl 0.14.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/cdmbl.gemspec +1 -3
- data/lib/cdmbl/cdm_request_worker.rb +90 -0
- data/lib/cdmbl/etl_worker.rb +22 -15
- data/lib/cdmbl/extractor.rb +13 -6
- data/lib/cdmbl/formatters.rb +2 -2
- data/lib/cdmbl/load_worker.rb +1 -0
- data/lib/cdmbl/oai_client.rb +6 -4
- data/lib/cdmbl/oai_request.rb +41 -38
- data/lib/cdmbl/transform_worker.rb +13 -6
- data/lib/cdmbl/version.rb +2 -2
- metadata +11 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
|
4
|
+
data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
|
7
|
+
data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
|
data/cdmbl.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
20
|
spec.add_dependency 'hash_at_path', '~> 0.1'
|
21
|
-
spec.add_dependency 'contentdm_api', '~> 0.
|
21
|
+
spec.add_dependency 'contentdm_api', '~> 0.5.0'
|
22
22
|
spec.add_dependency 'sidekiq', '>= 3.5'
|
23
23
|
spec.add_dependency 'titleize', '~> 1.4'
|
24
24
|
spec.add_dependency 'rsolr', '~> 2.0'
|
@@ -32,6 +32,4 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_development_dependency 'rake', '~> 12.0'
|
33
33
|
spec.add_development_dependency 'minitest', '~> 5.0'
|
34
34
|
spec.add_development_dependency 'yard', '~> 0.9.0'
|
35
|
-
spec.add_development_dependency 'webmock', '~> 1.24', '>= 1.24.0'
|
36
|
-
spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.1'
|
37
35
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class CdmRequestWorker
|
4
|
+
include Sidekiq::Worker
|
5
|
+
attr_reader :collection,
|
6
|
+
:id,
|
7
|
+
:set_lookup,
|
8
|
+
:field_mappings
|
9
|
+
|
10
|
+
attr_writer :cdm_api_klass,
|
11
|
+
:oai_request_klass,
|
12
|
+
:oai_set_lookup_klass,
|
13
|
+
:cdm_notification_klass,
|
14
|
+
:load_worker_klass,
|
15
|
+
:transformer_klass
|
16
|
+
|
17
|
+
def perform(collection, id)
|
18
|
+
|
19
|
+
@identifiers = identifiers
|
20
|
+
@solr_config = solr_config
|
21
|
+
@cdm_endpoint = cdm_endpoint
|
22
|
+
@oai_endpoint = oai_endpoint
|
23
|
+
@field_mappings = field_mappings
|
24
|
+
@extract_compounds = extract_compounds
|
25
|
+
transform_and_load!
|
26
|
+
end
|
27
|
+
|
28
|
+
def oai_set_lookup_klass
|
29
|
+
@oai_set_lookup_klass ||= OAISetLookup
|
30
|
+
end
|
31
|
+
|
32
|
+
def oai_request_klass
|
33
|
+
@oai_request_klass ||= OaiRequest
|
34
|
+
end
|
35
|
+
|
36
|
+
def cdm_api_klass
|
37
|
+
@cdm_api_klass ||= CONTENTdmAPI::Item
|
38
|
+
end
|
39
|
+
|
40
|
+
def cdm_notification_klass
|
41
|
+
@cdm_notification_klass ||= CdmNotification
|
42
|
+
end
|
43
|
+
|
44
|
+
def transformer_klass
|
45
|
+
@transformer_klass ||= Transformer
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_worker_klass
|
49
|
+
@load_worker_klass ||= LoadWorker
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def transform_and_load!
|
55
|
+
load_worker_klass.perform_async(transformed_records, [], solr_config)
|
56
|
+
end
|
57
|
+
|
58
|
+
def transformed_records
|
59
|
+
@transformation ||=
|
60
|
+
transformer_klass.new(cdm_records: records,
|
61
|
+
oai_sets: set_lookup,
|
62
|
+
field_mappings: field_mappings,
|
63
|
+
extract_compounds: extract_compounds).records
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_lookup
|
67
|
+
oai_set_lookup_klass.new(oai_sets: sets).keyed
|
68
|
+
end
|
69
|
+
|
70
|
+
def records
|
71
|
+
identifiers.map do |identifier|
|
72
|
+
cdm_request(*identifier)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
|
77
|
+
def cdm_request(collection, id)
|
78
|
+
cdm_notification_klass.call!(collection, id, cdm_endpoint)
|
79
|
+
cdm_api_klass.new(base_url: cdm_endpoint,
|
80
|
+
collection: collection,
|
81
|
+
with_compounds: false,
|
82
|
+
id: id)
|
83
|
+
end
|
84
|
+
|
85
|
+
def sets
|
86
|
+
@oai_request ||=
|
87
|
+
oai_request_klass.new(base_uri: oai_endpoint).sets
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -16,7 +16,8 @@ module CDMBL
|
|
16
16
|
:set_spec,
|
17
17
|
:max_compounds,
|
18
18
|
:batch_size,
|
19
|
-
:is_recursive
|
19
|
+
:is_recursive,
|
20
|
+
:from
|
20
21
|
|
21
22
|
attr_writer :compound_filter_klass,
|
22
23
|
:extractor_klass,
|
@@ -40,6 +41,7 @@ module CDMBL
|
|
40
41
|
@max_compounds = config.fetch('max_compounds', 10)
|
41
42
|
@batch_size = config.fetch('batch_size', 5).to_i
|
42
43
|
@is_recursive = config.fetch('is_recursive', true)
|
44
|
+
@from = config.fetch('from', nil)
|
43
45
|
extract_batch!
|
44
46
|
next_batch!
|
45
47
|
end
|
@@ -114,12 +116,14 @@ module CDMBL
|
|
114
116
|
end
|
115
117
|
|
116
118
|
def transform!(ids)
|
117
|
-
transform_worker_klass.perform_async(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
transform_worker_klass.perform_async(
|
120
|
+
ids,
|
121
|
+
solr_config,
|
122
|
+
cdm_endpoint,
|
123
|
+
oai_endpoint,
|
124
|
+
field_mappings,
|
125
|
+
extract_compounds
|
126
|
+
)
|
123
127
|
end
|
124
128
|
|
125
129
|
def delete_deletables!
|
@@ -127,17 +131,20 @@ module CDMBL
|
|
127
131
|
end
|
128
132
|
|
129
133
|
def compound_filter
|
130
|
-
@compound_filter ||=
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
+
@compound_filter ||= compound_filter_klass.new(
|
135
|
+
record_ids: extraction.local_identifiers,
|
136
|
+
cdm_endpoint: cdm_endpoint,
|
137
|
+
max_compounds: max_compounds
|
138
|
+
)
|
134
139
|
end
|
135
140
|
|
136
141
|
def extraction
|
137
|
-
@extraction ||=
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
@extraction ||= extractor_klass.new(
|
143
|
+
oai_endpoint: oai_endpoint,
|
144
|
+
resumption_token: resumption_token,
|
145
|
+
set_spec: set_spec,
|
146
|
+
from: from
|
147
|
+
)
|
141
148
|
end
|
142
149
|
end
|
143
150
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -16,6 +16,7 @@ module CDMBL
|
|
16
16
|
def initialize(oai_endpoint: '',
|
17
17
|
resumption_token: nil,
|
18
18
|
set_spec: nil,
|
19
|
+
from: nil,
|
19
20
|
oai_request_klass: OaiRequest,
|
20
21
|
oai_filter_klass: OAIFilter,
|
21
22
|
oai_set_lookup_klass: OAISetLookup)
|
@@ -24,7 +25,8 @@ module CDMBL
|
|
24
25
|
@oai_set_lookup_klass = oai_set_lookup_klass
|
25
26
|
@oai_request = oai_requester(oai_endpoint,
|
26
27
|
resumption_token,
|
27
|
-
set_spec
|
28
|
+
set_spec,
|
29
|
+
from)
|
28
30
|
end
|
29
31
|
|
30
32
|
def deletable_ids
|
@@ -49,11 +51,16 @@ module CDMBL
|
|
49
51
|
|
50
52
|
private
|
51
53
|
|
52
|
-
def oai_requester(oai_endpoint, resumption_token, set_spec)
|
53
|
-
@oai_requester ||=
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
def oai_requester(oai_endpoint, resumption_token, set_spec, from)
|
55
|
+
@oai_requester ||= begin
|
56
|
+
args = {
|
57
|
+
base_uri: oai_endpoint,
|
58
|
+
resumption_token: resumption_token,
|
59
|
+
set: set_spec,
|
60
|
+
}
|
61
|
+
args[:from] = from if from
|
62
|
+
oai_request_klass.new(args)
|
63
|
+
end
|
57
64
|
end
|
58
65
|
|
59
66
|
# Get the local collection and id from an OAI namespaced identifier
|
data/lib/cdmbl/formatters.rb
CHANGED
@@ -72,7 +72,7 @@ module CDMBL
|
|
72
72
|
class Titlieze
|
73
73
|
def self.format(value)
|
74
74
|
if value.respond_to?(:map)
|
75
|
-
value.map
|
75
|
+
value.map(&:titleize)
|
76
76
|
else
|
77
77
|
value.titleize
|
78
78
|
end
|
@@ -172,4 +172,4 @@ module CDMBL
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
end
|
175
|
+
end
|
data/lib/cdmbl/load_worker.rb
CHANGED
@@ -3,6 +3,7 @@ module CDMBL
|
|
3
3
|
# Load Records into a solr index
|
4
4
|
class LoadWorker
|
5
5
|
include Sidekiq::Worker
|
6
|
+
sidekiq_options queue: 'critical'
|
6
7
|
attr_reader :solr_config, :records, :deletables
|
7
8
|
attr_writer :loader_klass, :solr_klass
|
8
9
|
def perform(records = [], deletables = [], solr_config = {})
|
data/lib/cdmbl/oai_client.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
|
1
2
|
require 'json'
|
3
|
+
require 'http'
|
2
4
|
module CDMBL
|
3
5
|
class OaiClient
|
4
|
-
attr_reader :base_url, :
|
5
|
-
def initialize(base_url: '',
|
6
|
+
attr_reader :base_url, :client
|
7
|
+
def initialize(base_url: '', client: HTTP)
|
6
8
|
@base_url = base_url
|
7
|
-
@
|
9
|
+
@client = client
|
8
10
|
end
|
9
11
|
|
10
12
|
def request(query)
|
@@ -14,7 +16,7 @@ module CDMBL
|
|
14
16
|
private
|
15
17
|
|
16
18
|
def get(url)
|
17
|
-
|
19
|
+
client.get(url).to_s
|
18
20
|
end
|
19
21
|
|
20
22
|
def hashify(xml)
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,48 +1,51 @@
|
|
1
1
|
require 'json'
|
2
2
|
module CDMBL
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
3
|
+
class OaiRequest
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:set,
|
8
|
+
:identifier,
|
9
|
+
:from
|
10
|
+
def initialize(base_uri: '',
|
11
|
+
resumption_token: nil,
|
12
|
+
set: nil,
|
13
|
+
identifier: '',
|
14
|
+
from: nil,
|
15
|
+
client: Net::HTTP)
|
16
|
+
@base_uri = base_uri
|
17
|
+
@resumption_token = resumption_token
|
18
|
+
@client = client
|
19
|
+
@set = (set) ? "&set=#{set}" : ''
|
20
|
+
@from = from ? "&from=#{from}" : ''
|
21
|
+
@identifier = identifier
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
+
def identifiers
|
25
|
+
@ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
|
26
|
+
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
def sets
|
29
|
+
@sets ||= request(sets_uri)
|
30
|
+
end
|
28
31
|
|
29
|
-
|
32
|
+
private
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
def first_batch_uri
|
35
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
|
36
|
+
end
|
34
37
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
def batch_uri
|
39
|
+
"#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
|
40
|
+
end
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
+
def sets_uri
|
43
|
+
"#{base_uri}?verb=ListSets"
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
46
|
+
def request(location)
|
47
|
+
CDMBL::OaiNotification.call!(location)
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
47
49
|
end
|
48
|
-
end
|
50
|
+
end
|
51
|
+
end
|
@@ -10,11 +10,12 @@ module CDMBL
|
|
10
10
|
:extract_compounds
|
11
11
|
|
12
12
|
attr_writer :cdm_api_klass,
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
:oai_request_klass,
|
14
|
+
:oai_set_lookup_klass,
|
15
|
+
:cdm_notification_klass,
|
16
|
+
:load_worker_klass,
|
17
|
+
:transformer_klass,
|
18
|
+
:cache_klass
|
18
19
|
|
19
20
|
def perform(identifiers,
|
20
21
|
solr_config,
|
@@ -56,6 +57,10 @@ module CDMBL
|
|
56
57
|
@load_worker_klass ||= LoadWorker
|
57
58
|
end
|
58
59
|
|
60
|
+
def cache_klass
|
61
|
+
@cache_klass ||= Rails
|
62
|
+
end
|
63
|
+
|
59
64
|
private
|
60
65
|
|
61
66
|
def transform_and_load!
|
@@ -90,7 +95,9 @@ module CDMBL
|
|
90
95
|
|
91
96
|
def sets
|
92
97
|
@oai_request ||=
|
93
|
-
|
98
|
+
cache_klass.cache.fetch("cdmbl_set_specs", expires_in: 10.minutes) do
|
99
|
+
oai_request_klass.new(base_uri: oai_endpoint).sets
|
100
|
+
end
|
94
101
|
end
|
95
102
|
end
|
96
103
|
end
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.18.0"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.5.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 0.5.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: sidekiq
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,47 +150,7 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.9.0
|
153
|
-
|
154
|
-
name: webmock
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - "~>"
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '1.24'
|
160
|
-
- - ">="
|
161
|
-
- !ruby/object:Gem::Version
|
162
|
-
version: 1.24.0
|
163
|
-
type: :development
|
164
|
-
prerelease: false
|
165
|
-
version_requirements: !ruby/object:Gem::Requirement
|
166
|
-
requirements:
|
167
|
-
- - "~>"
|
168
|
-
- !ruby/object:Gem::Version
|
169
|
-
version: '1.24'
|
170
|
-
- - ">="
|
171
|
-
- !ruby/object:Gem::Version
|
172
|
-
version: 1.24.0
|
173
|
-
- !ruby/object:Gem::Dependency
|
174
|
-
name: vcr
|
175
|
-
requirement: !ruby/object:Gem::Requirement
|
176
|
-
requirements:
|
177
|
-
- - "~>"
|
178
|
-
- !ruby/object:Gem::Version
|
179
|
-
version: '3.0'
|
180
|
-
- - ">="
|
181
|
-
- !ruby/object:Gem::Version
|
182
|
-
version: 3.0.1
|
183
|
-
type: :development
|
184
|
-
prerelease: false
|
185
|
-
version_requirements: !ruby/object:Gem::Requirement
|
186
|
-
requirements:
|
187
|
-
- - "~>"
|
188
|
-
- !ruby/object:Gem::Version
|
189
|
-
version: '3.0'
|
190
|
-
- - ">="
|
191
|
-
- !ruby/object:Gem::Version
|
192
|
-
version: 3.0.1
|
193
|
-
description:
|
153
|
+
description:
|
194
154
|
email:
|
195
155
|
- fenne035@umn.edu
|
196
156
|
executables: []
|
@@ -211,6 +171,7 @@ files:
|
|
211
171
|
- lib/cdmbl.rb
|
212
172
|
- lib/cdmbl/batch_deleter.rb
|
213
173
|
- lib/cdmbl/batch_deleter_worker.rb
|
174
|
+
- lib/cdmbl/cdm_request_worker.rb
|
214
175
|
- lib/cdmbl/compound_filter.rb
|
215
176
|
- lib/cdmbl/compound_lookup.rb
|
216
177
|
- lib/cdmbl/default_cdm_notification.rb
|
@@ -245,11 +206,11 @@ files:
|
|
245
206
|
- lib/cdmbl/transformer.rb
|
246
207
|
- lib/cdmbl/version.rb
|
247
208
|
- travis.yml
|
248
|
-
homepage:
|
209
|
+
homepage:
|
249
210
|
licenses:
|
250
211
|
- MIT
|
251
212
|
metadata: {}
|
252
|
-
post_install_message:
|
213
|
+
post_install_message:
|
253
214
|
rdoc_options: []
|
254
215
|
require_paths:
|
255
216
|
- lib
|
@@ -264,9 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
264
225
|
- !ruby/object:Gem::Version
|
265
226
|
version: '0'
|
266
227
|
requirements: []
|
267
|
-
|
268
|
-
|
269
|
-
signing_key:
|
228
|
+
rubygems_version: 3.0.8
|
229
|
+
signing_key:
|
270
230
|
specification_version: 4
|
271
231
|
summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
|
272
232
|
application.
|