cdmbl 0.14.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/cdmbl.gemspec +1 -3
- data/lib/cdmbl/cdm_request_worker.rb +90 -0
- data/lib/cdmbl/etl_worker.rb +22 -15
- data/lib/cdmbl/extractor.rb +13 -6
- data/lib/cdmbl/formatters.rb +2 -2
- data/lib/cdmbl/load_worker.rb +1 -0
- data/lib/cdmbl/oai_client.rb +6 -4
- data/lib/cdmbl/oai_request.rb +41 -38
- data/lib/cdmbl/transform_worker.rb +13 -6
- data/lib/cdmbl/version.rb +2 -2
- metadata +11 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
|
4
|
+
data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
|
7
|
+
data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
|
data/cdmbl.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
20
|
spec.add_dependency 'hash_at_path', '~> 0.1'
|
21
|
-
spec.add_dependency 'contentdm_api', '~> 0.
|
21
|
+
spec.add_dependency 'contentdm_api', '~> 0.5.0'
|
22
22
|
spec.add_dependency 'sidekiq', '>= 3.5'
|
23
23
|
spec.add_dependency 'titleize', '~> 1.4'
|
24
24
|
spec.add_dependency 'rsolr', '~> 2.0'
|
@@ -32,6 +32,4 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_development_dependency 'rake', '~> 12.0'
|
33
33
|
spec.add_development_dependency 'minitest', '~> 5.0'
|
34
34
|
spec.add_development_dependency 'yard', '~> 0.9.0'
|
35
|
-
spec.add_development_dependency 'webmock', '~> 1.24', '>= 1.24.0'
|
36
|
-
spec.add_development_dependency 'vcr', '~> 3.0', '>= 3.0.1'
|
37
35
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
module CDMBL
|
3
|
+
class CdmRequestWorker
|
4
|
+
include Sidekiq::Worker
|
5
|
+
attr_reader :collection,
|
6
|
+
:id,
|
7
|
+
:set_lookup,
|
8
|
+
:field_mappings
|
9
|
+
|
10
|
+
attr_writer :cdm_api_klass,
|
11
|
+
:oai_request_klass,
|
12
|
+
:oai_set_lookup_klass,
|
13
|
+
:cdm_notification_klass,
|
14
|
+
:load_worker_klass,
|
15
|
+
:transformer_klass
|
16
|
+
|
17
|
+
def perform(collection, id)
|
18
|
+
|
19
|
+
@identifiers = identifiers
|
20
|
+
@solr_config = solr_config
|
21
|
+
@cdm_endpoint = cdm_endpoint
|
22
|
+
@oai_endpoint = oai_endpoint
|
23
|
+
@field_mappings = field_mappings
|
24
|
+
@extract_compounds = extract_compounds
|
25
|
+
transform_and_load!
|
26
|
+
end
|
27
|
+
|
28
|
+
def oai_set_lookup_klass
|
29
|
+
@oai_set_lookup_klass ||= OAISetLookup
|
30
|
+
end
|
31
|
+
|
32
|
+
def oai_request_klass
|
33
|
+
@oai_request_klass ||= OaiRequest
|
34
|
+
end
|
35
|
+
|
36
|
+
def cdm_api_klass
|
37
|
+
@cdm_api_klass ||= CONTENTdmAPI::Item
|
38
|
+
end
|
39
|
+
|
40
|
+
def cdm_notification_klass
|
41
|
+
@cdm_notification_klass ||= CdmNotification
|
42
|
+
end
|
43
|
+
|
44
|
+
def transformer_klass
|
45
|
+
@transformer_klass ||= Transformer
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_worker_klass
|
49
|
+
@load_worker_klass ||= LoadWorker
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def transform_and_load!
|
55
|
+
load_worker_klass.perform_async(transformed_records, [], solr_config)
|
56
|
+
end
|
57
|
+
|
58
|
+
def transformed_records
|
59
|
+
@transformation ||=
|
60
|
+
transformer_klass.new(cdm_records: records,
|
61
|
+
oai_sets: set_lookup,
|
62
|
+
field_mappings: field_mappings,
|
63
|
+
extract_compounds: extract_compounds).records
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_lookup
|
67
|
+
oai_set_lookup_klass.new(oai_sets: sets).keyed
|
68
|
+
end
|
69
|
+
|
70
|
+
def records
|
71
|
+
identifiers.map do |identifier|
|
72
|
+
cdm_request(*identifier)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
|
77
|
+
def cdm_request(collection, id)
|
78
|
+
cdm_notification_klass.call!(collection, id, cdm_endpoint)
|
79
|
+
cdm_api_klass.new(base_url: cdm_endpoint,
|
80
|
+
collection: collection,
|
81
|
+
with_compounds: false,
|
82
|
+
id: id)
|
83
|
+
end
|
84
|
+
|
85
|
+
def sets
|
86
|
+
@oai_request ||=
|
87
|
+
oai_request_klass.new(base_uri: oai_endpoint).sets
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -16,7 +16,8 @@ module CDMBL
|
|
16
16
|
:set_spec,
|
17
17
|
:max_compounds,
|
18
18
|
:batch_size,
|
19
|
-
:is_recursive
|
19
|
+
:is_recursive,
|
20
|
+
:from
|
20
21
|
|
21
22
|
attr_writer :compound_filter_klass,
|
22
23
|
:extractor_klass,
|
@@ -40,6 +41,7 @@ module CDMBL
|
|
40
41
|
@max_compounds = config.fetch('max_compounds', 10)
|
41
42
|
@batch_size = config.fetch('batch_size', 5).to_i
|
42
43
|
@is_recursive = config.fetch('is_recursive', true)
|
44
|
+
@from = config.fetch('from', nil)
|
43
45
|
extract_batch!
|
44
46
|
next_batch!
|
45
47
|
end
|
@@ -114,12 +116,14 @@ module CDMBL
|
|
114
116
|
end
|
115
117
|
|
116
118
|
def transform!(ids)
|
117
|
-
transform_worker_klass.perform_async(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
transform_worker_klass.perform_async(
|
120
|
+
ids,
|
121
|
+
solr_config,
|
122
|
+
cdm_endpoint,
|
123
|
+
oai_endpoint,
|
124
|
+
field_mappings,
|
125
|
+
extract_compounds
|
126
|
+
)
|
123
127
|
end
|
124
128
|
|
125
129
|
def delete_deletables!
|
@@ -127,17 +131,20 @@ module CDMBL
|
|
127
131
|
end
|
128
132
|
|
129
133
|
def compound_filter
|
130
|
-
@compound_filter ||=
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
+
@compound_filter ||= compound_filter_klass.new(
|
135
|
+
record_ids: extraction.local_identifiers,
|
136
|
+
cdm_endpoint: cdm_endpoint,
|
137
|
+
max_compounds: max_compounds
|
138
|
+
)
|
134
139
|
end
|
135
140
|
|
136
141
|
def extraction
|
137
|
-
@extraction ||=
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
@extraction ||= extractor_klass.new(
|
143
|
+
oai_endpoint: oai_endpoint,
|
144
|
+
resumption_token: resumption_token,
|
145
|
+
set_spec: set_spec,
|
146
|
+
from: from
|
147
|
+
)
|
141
148
|
end
|
142
149
|
end
|
143
150
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -16,6 +16,7 @@ module CDMBL
|
|
16
16
|
def initialize(oai_endpoint: '',
|
17
17
|
resumption_token: nil,
|
18
18
|
set_spec: nil,
|
19
|
+
from: nil,
|
19
20
|
oai_request_klass: OaiRequest,
|
20
21
|
oai_filter_klass: OAIFilter,
|
21
22
|
oai_set_lookup_klass: OAISetLookup)
|
@@ -24,7 +25,8 @@ module CDMBL
|
|
24
25
|
@oai_set_lookup_klass = oai_set_lookup_klass
|
25
26
|
@oai_request = oai_requester(oai_endpoint,
|
26
27
|
resumption_token,
|
27
|
-
set_spec
|
28
|
+
set_spec,
|
29
|
+
from)
|
28
30
|
end
|
29
31
|
|
30
32
|
def deletable_ids
|
@@ -49,11 +51,16 @@ module CDMBL
|
|
49
51
|
|
50
52
|
private
|
51
53
|
|
52
|
-
def oai_requester(oai_endpoint, resumption_token, set_spec)
|
53
|
-
@oai_requester ||=
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
def oai_requester(oai_endpoint, resumption_token, set_spec, from)
|
55
|
+
@oai_requester ||= begin
|
56
|
+
args = {
|
57
|
+
base_uri: oai_endpoint,
|
58
|
+
resumption_token: resumption_token,
|
59
|
+
set: set_spec,
|
60
|
+
}
|
61
|
+
args[:from] = from if from
|
62
|
+
oai_request_klass.new(args)
|
63
|
+
end
|
57
64
|
end
|
58
65
|
|
59
66
|
# Get the local collection and id from an OAI namespaced identifier
|
data/lib/cdmbl/formatters.rb
CHANGED
@@ -72,7 +72,7 @@ module CDMBL
|
|
72
72
|
class Titlieze
|
73
73
|
def self.format(value)
|
74
74
|
if value.respond_to?(:map)
|
75
|
-
value.map
|
75
|
+
value.map(&:titleize)
|
76
76
|
else
|
77
77
|
value.titleize
|
78
78
|
end
|
@@ -172,4 +172,4 @@ module CDMBL
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
end
|
175
|
+
end
|
data/lib/cdmbl/load_worker.rb
CHANGED
@@ -3,6 +3,7 @@ module CDMBL
|
|
3
3
|
# Load Records into a solr index
|
4
4
|
class LoadWorker
|
5
5
|
include Sidekiq::Worker
|
6
|
+
sidekiq_options queue: 'critical'
|
6
7
|
attr_reader :solr_config, :records, :deletables
|
7
8
|
attr_writer :loader_klass, :solr_klass
|
8
9
|
def perform(records = [], deletables = [], solr_config = {})
|
data/lib/cdmbl/oai_client.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
|
1
2
|
require 'json'
|
3
|
+
require 'http'
|
2
4
|
module CDMBL
|
3
5
|
class OaiClient
|
4
|
-
attr_reader :base_url, :
|
5
|
-
def initialize(base_url: '',
|
6
|
+
attr_reader :base_url, :client
|
7
|
+
def initialize(base_url: '', client: HTTP)
|
6
8
|
@base_url = base_url
|
7
|
-
@
|
9
|
+
@client = client
|
8
10
|
end
|
9
11
|
|
10
12
|
def request(query)
|
@@ -14,7 +16,7 @@ module CDMBL
|
|
14
16
|
private
|
15
17
|
|
16
18
|
def get(url)
|
17
|
-
|
19
|
+
client.get(url).to_s
|
18
20
|
end
|
19
21
|
|
20
22
|
def hashify(xml)
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,48 +1,51 @@
|
|
1
1
|
require 'json'
|
2
2
|
module CDMBL
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
3
|
+
class OaiRequest
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:set,
|
8
|
+
:identifier,
|
9
|
+
:from
|
10
|
+
def initialize(base_uri: '',
|
11
|
+
resumption_token: nil,
|
12
|
+
set: nil,
|
13
|
+
identifier: '',
|
14
|
+
from: nil,
|
15
|
+
client: Net::HTTP)
|
16
|
+
@base_uri = base_uri
|
17
|
+
@resumption_token = resumption_token
|
18
|
+
@client = client
|
19
|
+
@set = (set) ? "&set=#{set}" : ''
|
20
|
+
@from = from ? "&from=#{from}" : ''
|
21
|
+
@identifier = identifier
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
+
def identifiers
|
25
|
+
@ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
|
26
|
+
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
def sets
|
29
|
+
@sets ||= request(sets_uri)
|
30
|
+
end
|
28
31
|
|
29
|
-
|
32
|
+
private
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
def first_batch_uri
|
35
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
|
36
|
+
end
|
34
37
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
def batch_uri
|
39
|
+
"#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
|
40
|
+
end
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
+
def sets_uri
|
43
|
+
"#{base_uri}?verb=ListSets"
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
46
|
+
def request(location)
|
47
|
+
CDMBL::OaiNotification.call!(location)
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
47
49
|
end
|
48
|
-
end
|
50
|
+
end
|
51
|
+
end
|
@@ -10,11 +10,12 @@ module CDMBL
|
|
10
10
|
:extract_compounds
|
11
11
|
|
12
12
|
attr_writer :cdm_api_klass,
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
:oai_request_klass,
|
14
|
+
:oai_set_lookup_klass,
|
15
|
+
:cdm_notification_klass,
|
16
|
+
:load_worker_klass,
|
17
|
+
:transformer_klass,
|
18
|
+
:cache_klass
|
18
19
|
|
19
20
|
def perform(identifiers,
|
20
21
|
solr_config,
|
@@ -56,6 +57,10 @@ module CDMBL
|
|
56
57
|
@load_worker_klass ||= LoadWorker
|
57
58
|
end
|
58
59
|
|
60
|
+
def cache_klass
|
61
|
+
@cache_klass ||= Rails
|
62
|
+
end
|
63
|
+
|
59
64
|
private
|
60
65
|
|
61
66
|
def transform_and_load!
|
@@ -90,7 +95,9 @@ module CDMBL
|
|
90
95
|
|
91
96
|
def sets
|
92
97
|
@oai_request ||=
|
93
|
-
|
98
|
+
cache_klass.cache.fetch("cdmbl_set_specs", expires_in: 10.minutes) do
|
99
|
+
oai_request_klass.new(base_uri: oai_endpoint).sets
|
100
|
+
end
|
94
101
|
end
|
95
102
|
end
|
96
103
|
end
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.18.0"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.5.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 0.5.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: sidekiq
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,47 +150,7 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.9.0
|
153
|
-
|
154
|
-
name: webmock
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - "~>"
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '1.24'
|
160
|
-
- - ">="
|
161
|
-
- !ruby/object:Gem::Version
|
162
|
-
version: 1.24.0
|
163
|
-
type: :development
|
164
|
-
prerelease: false
|
165
|
-
version_requirements: !ruby/object:Gem::Requirement
|
166
|
-
requirements:
|
167
|
-
- - "~>"
|
168
|
-
- !ruby/object:Gem::Version
|
169
|
-
version: '1.24'
|
170
|
-
- - ">="
|
171
|
-
- !ruby/object:Gem::Version
|
172
|
-
version: 1.24.0
|
173
|
-
- !ruby/object:Gem::Dependency
|
174
|
-
name: vcr
|
175
|
-
requirement: !ruby/object:Gem::Requirement
|
176
|
-
requirements:
|
177
|
-
- - "~>"
|
178
|
-
- !ruby/object:Gem::Version
|
179
|
-
version: '3.0'
|
180
|
-
- - ">="
|
181
|
-
- !ruby/object:Gem::Version
|
182
|
-
version: 3.0.1
|
183
|
-
type: :development
|
184
|
-
prerelease: false
|
185
|
-
version_requirements: !ruby/object:Gem::Requirement
|
186
|
-
requirements:
|
187
|
-
- - "~>"
|
188
|
-
- !ruby/object:Gem::Version
|
189
|
-
version: '3.0'
|
190
|
-
- - ">="
|
191
|
-
- !ruby/object:Gem::Version
|
192
|
-
version: 3.0.1
|
193
|
-
description:
|
153
|
+
description:
|
194
154
|
email:
|
195
155
|
- fenne035@umn.edu
|
196
156
|
executables: []
|
@@ -211,6 +171,7 @@ files:
|
|
211
171
|
- lib/cdmbl.rb
|
212
172
|
- lib/cdmbl/batch_deleter.rb
|
213
173
|
- lib/cdmbl/batch_deleter_worker.rb
|
174
|
+
- lib/cdmbl/cdm_request_worker.rb
|
214
175
|
- lib/cdmbl/compound_filter.rb
|
215
176
|
- lib/cdmbl/compound_lookup.rb
|
216
177
|
- lib/cdmbl/default_cdm_notification.rb
|
@@ -245,11 +206,11 @@ files:
|
|
245
206
|
- lib/cdmbl/transformer.rb
|
246
207
|
- lib/cdmbl/version.rb
|
247
208
|
- travis.yml
|
248
|
-
homepage:
|
209
|
+
homepage:
|
249
210
|
licenses:
|
250
211
|
- MIT
|
251
212
|
metadata: {}
|
252
|
-
post_install_message:
|
213
|
+
post_install_message:
|
253
214
|
rdoc_options: []
|
254
215
|
require_paths:
|
255
216
|
- lib
|
@@ -264,9 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
264
225
|
- !ruby/object:Gem::Version
|
265
226
|
version: '0'
|
266
227
|
requirements: []
|
267
|
-
|
268
|
-
|
269
|
-
signing_key:
|
228
|
+
rubygems_version: 3.0.8
|
229
|
+
signing_key:
|
270
230
|
specification_version: 4
|
271
231
|
summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
|
272
232
|
application.
|