cdmbl 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_worker.rb +22 -15
- data/lib/cdmbl/extractor.rb +13 -6
- data/lib/cdmbl/formatters.rb +2 -2
- data/lib/cdmbl/oai_request.rb +41 -38
- data/lib/cdmbl/version.rb +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
|
4
|
+
data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
|
7
|
+
data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -16,7 +16,8 @@ module CDMBL
|
|
16
16
|
:set_spec,
|
17
17
|
:max_compounds,
|
18
18
|
:batch_size,
|
19
|
-
:is_recursive
|
19
|
+
:is_recursive,
|
20
|
+
:from
|
20
21
|
|
21
22
|
attr_writer :compound_filter_klass,
|
22
23
|
:extractor_klass,
|
@@ -40,6 +41,7 @@ module CDMBL
|
|
40
41
|
@max_compounds = config.fetch('max_compounds', 10)
|
41
42
|
@batch_size = config.fetch('batch_size', 5).to_i
|
42
43
|
@is_recursive = config.fetch('is_recursive', true)
|
44
|
+
@from = config.fetch('from', nil)
|
43
45
|
extract_batch!
|
44
46
|
next_batch!
|
45
47
|
end
|
@@ -114,12 +116,14 @@ module CDMBL
|
|
114
116
|
end
|
115
117
|
|
116
118
|
def transform!(ids)
|
117
|
-
transform_worker_klass.perform_async(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
transform_worker_klass.perform_async(
|
120
|
+
ids,
|
121
|
+
solr_config,
|
122
|
+
cdm_endpoint,
|
123
|
+
oai_endpoint,
|
124
|
+
field_mappings,
|
125
|
+
extract_compounds
|
126
|
+
)
|
123
127
|
end
|
124
128
|
|
125
129
|
def delete_deletables!
|
@@ -127,17 +131,20 @@ module CDMBL
|
|
127
131
|
end
|
128
132
|
|
129
133
|
def compound_filter
|
130
|
-
@compound_filter ||=
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
+
@compound_filter ||= compound_filter_klass.new(
|
135
|
+
record_ids: extraction.local_identifiers,
|
136
|
+
cdm_endpoint: cdm_endpoint,
|
137
|
+
max_compounds: max_compounds
|
138
|
+
)
|
134
139
|
end
|
135
140
|
|
136
141
|
def extraction
|
137
|
-
@extraction ||=
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
@extraction ||= extractor_klass.new(
|
143
|
+
oai_endpoint: oai_endpoint,
|
144
|
+
resumption_token: resumption_token,
|
145
|
+
set_spec: set_spec,
|
146
|
+
from: from
|
147
|
+
)
|
141
148
|
end
|
142
149
|
end
|
143
150
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -16,6 +16,7 @@ module CDMBL
|
|
16
16
|
def initialize(oai_endpoint: '',
|
17
17
|
resumption_token: nil,
|
18
18
|
set_spec: nil,
|
19
|
+
from: nil,
|
19
20
|
oai_request_klass: OaiRequest,
|
20
21
|
oai_filter_klass: OAIFilter,
|
21
22
|
oai_set_lookup_klass: OAISetLookup)
|
@@ -24,7 +25,8 @@ module CDMBL
|
|
24
25
|
@oai_set_lookup_klass = oai_set_lookup_klass
|
25
26
|
@oai_request = oai_requester(oai_endpoint,
|
26
27
|
resumption_token,
|
27
|
-
set_spec
|
28
|
+
set_spec,
|
29
|
+
from)
|
28
30
|
end
|
29
31
|
|
30
32
|
def deletable_ids
|
@@ -49,11 +51,16 @@ module CDMBL
|
|
49
51
|
|
50
52
|
private
|
51
53
|
|
52
|
-
def oai_requester(oai_endpoint, resumption_token, set_spec)
|
53
|
-
@oai_requester ||=
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
def oai_requester(oai_endpoint, resumption_token, set_spec, from)
|
55
|
+
@oai_requester ||= begin
|
56
|
+
args = {
|
57
|
+
base_uri: oai_endpoint,
|
58
|
+
resumption_token: resumption_token,
|
59
|
+
set: set_spec,
|
60
|
+
}
|
61
|
+
args[:from] = from if from
|
62
|
+
oai_request_klass.new(args)
|
63
|
+
end
|
57
64
|
end
|
58
65
|
|
59
66
|
# Get the local collection and id from an OAI namespaced identifier
|
data/lib/cdmbl/formatters.rb
CHANGED
@@ -72,7 +72,7 @@ module CDMBL
|
|
72
72
|
class Titlieze
|
73
73
|
def self.format(value)
|
74
74
|
if value.respond_to?(:map)
|
75
|
-
value.map
|
75
|
+
value.map(&:titleize)
|
76
76
|
else
|
77
77
|
value.titleize
|
78
78
|
end
|
@@ -172,4 +172,4 @@ module CDMBL
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
end
|
175
|
+
end
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,48 +1,51 @@
|
|
1
1
|
require 'json'
|
2
2
|
module CDMBL
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
3
|
+
class OaiRequest
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:set,
|
8
|
+
:identifier,
|
9
|
+
:from
|
10
|
+
def initialize(base_uri: '',
|
11
|
+
resumption_token: nil,
|
12
|
+
set: nil,
|
13
|
+
identifier: '',
|
14
|
+
from: nil,
|
15
|
+
client: Net::HTTP)
|
16
|
+
@base_uri = base_uri
|
17
|
+
@resumption_token = resumption_token
|
18
|
+
@client = client
|
19
|
+
@set = (set) ? "&set=#{set}" : ''
|
20
|
+
@from = from ? "&from=#{from}" : ''
|
21
|
+
@identifier = identifier
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
+
def identifiers
|
25
|
+
@ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
|
26
|
+
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
def sets
|
29
|
+
@sets ||= request(sets_uri)
|
30
|
+
end
|
28
31
|
|
29
|
-
|
32
|
+
private
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
def first_batch_uri
|
35
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
|
36
|
+
end
|
34
37
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
def batch_uri
|
39
|
+
"#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
|
40
|
+
end
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
+
def sets_uri
|
43
|
+
"#{base_uri}?verb=ListSets"
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
46
|
+
def request(location)
|
47
|
+
CDMBL::OaiNotification.call!(location)
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
47
49
|
end
|
48
|
-
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.18.0"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -150,7 +150,7 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.9.0
|
153
|
-
description:
|
153
|
+
description:
|
154
154
|
email:
|
155
155
|
- fenne035@umn.edu
|
156
156
|
executables: []
|
@@ -206,11 +206,11 @@ files:
|
|
206
206
|
- lib/cdmbl/transformer.rb
|
207
207
|
- lib/cdmbl/version.rb
|
208
208
|
- travis.yml
|
209
|
-
homepage:
|
209
|
+
homepage:
|
210
210
|
licenses:
|
211
211
|
- MIT
|
212
212
|
metadata: {}
|
213
|
-
post_install_message:
|
213
|
+
post_install_message:
|
214
214
|
rdoc_options: []
|
215
215
|
require_paths:
|
216
216
|
- lib
|
@@ -225,8 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
225
|
- !ruby/object:Gem::Version
|
226
226
|
version: '0'
|
227
227
|
requirements: []
|
228
|
-
rubygems_version: 3.0.
|
229
|
-
signing_key:
|
228
|
+
rubygems_version: 3.0.8
|
229
|
+
signing_key:
|
230
230
|
specification_version: 4
|
231
231
|
summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
|
232
232
|
application.
|