cdmbl 0.17.1 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_worker.rb +22 -15
- data/lib/cdmbl/extractor.rb +13 -6
- data/lib/cdmbl/formatters.rb +2 -2
- data/lib/cdmbl/oai_request.rb +41 -38
- data/lib/cdmbl/version.rb +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98471f931e9b4535c9f1019cfe954cfba9eb7e9742fdb4f2eda70fe914d4fd3e
|
4
|
+
data.tar.gz: 32524fe0f8ae2d4ea5b45954d0b368a46153b9f7b3298cb064a5202cd75049ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6205cbb83cc23192f3157341e4980b49b4bd6cb89a615ddec3ae13d829f26738d293e6cb174e56741c6fe1406c501568896fdf3ca148be55743b0318452c75ff
|
7
|
+
data.tar.gz: c9b9f37bc27e3a7d21fb30035310dae9109e5f0d73d8bf8a45f7b0f175251766e06185de27a03763a50eb8d034777437f910b73e2446e388c98f7371c3a04d8d
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -16,7 +16,8 @@ module CDMBL
|
|
16
16
|
:set_spec,
|
17
17
|
:max_compounds,
|
18
18
|
:batch_size,
|
19
|
-
:is_recursive
|
19
|
+
:is_recursive,
|
20
|
+
:from
|
20
21
|
|
21
22
|
attr_writer :compound_filter_klass,
|
22
23
|
:extractor_klass,
|
@@ -40,6 +41,7 @@ module CDMBL
|
|
40
41
|
@max_compounds = config.fetch('max_compounds', 10)
|
41
42
|
@batch_size = config.fetch('batch_size', 5).to_i
|
42
43
|
@is_recursive = config.fetch('is_recursive', true)
|
44
|
+
@from = config.fetch('from', nil)
|
43
45
|
extract_batch!
|
44
46
|
next_batch!
|
45
47
|
end
|
@@ -114,12 +116,14 @@ module CDMBL
|
|
114
116
|
end
|
115
117
|
|
116
118
|
def transform!(ids)
|
117
|
-
transform_worker_klass.perform_async(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
transform_worker_klass.perform_async(
|
120
|
+
ids,
|
121
|
+
solr_config,
|
122
|
+
cdm_endpoint,
|
123
|
+
oai_endpoint,
|
124
|
+
field_mappings,
|
125
|
+
extract_compounds
|
126
|
+
)
|
123
127
|
end
|
124
128
|
|
125
129
|
def delete_deletables!
|
@@ -127,17 +131,20 @@ module CDMBL
|
|
127
131
|
end
|
128
132
|
|
129
133
|
def compound_filter
|
130
|
-
@compound_filter ||=
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
+
@compound_filter ||= compound_filter_klass.new(
|
135
|
+
record_ids: extraction.local_identifiers,
|
136
|
+
cdm_endpoint: cdm_endpoint,
|
137
|
+
max_compounds: max_compounds
|
138
|
+
)
|
134
139
|
end
|
135
140
|
|
136
141
|
def extraction
|
137
|
-
@extraction ||=
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
@extraction ||= extractor_klass.new(
|
143
|
+
oai_endpoint: oai_endpoint,
|
144
|
+
resumption_token: resumption_token,
|
145
|
+
set_spec: set_spec,
|
146
|
+
from: from
|
147
|
+
)
|
141
148
|
end
|
142
149
|
end
|
143
150
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -16,6 +16,7 @@ module CDMBL
|
|
16
16
|
def initialize(oai_endpoint: '',
|
17
17
|
resumption_token: nil,
|
18
18
|
set_spec: nil,
|
19
|
+
from: nil,
|
19
20
|
oai_request_klass: OaiRequest,
|
20
21
|
oai_filter_klass: OAIFilter,
|
21
22
|
oai_set_lookup_klass: OAISetLookup)
|
@@ -24,7 +25,8 @@ module CDMBL
|
|
24
25
|
@oai_set_lookup_klass = oai_set_lookup_klass
|
25
26
|
@oai_request = oai_requester(oai_endpoint,
|
26
27
|
resumption_token,
|
27
|
-
set_spec
|
28
|
+
set_spec,
|
29
|
+
from)
|
28
30
|
end
|
29
31
|
|
30
32
|
def deletable_ids
|
@@ -49,11 +51,16 @@ module CDMBL
|
|
49
51
|
|
50
52
|
private
|
51
53
|
|
52
|
-
def oai_requester(oai_endpoint, resumption_token, set_spec)
|
53
|
-
@oai_requester ||=
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
def oai_requester(oai_endpoint, resumption_token, set_spec, from)
|
55
|
+
@oai_requester ||= begin
|
56
|
+
args = {
|
57
|
+
base_uri: oai_endpoint,
|
58
|
+
resumption_token: resumption_token,
|
59
|
+
set: set_spec,
|
60
|
+
}
|
61
|
+
args[:from] = from if from
|
62
|
+
oai_request_klass.new(args)
|
63
|
+
end
|
57
64
|
end
|
58
65
|
|
59
66
|
# Get the local collection and id from an OAI namespaced identifier
|
data/lib/cdmbl/formatters.rb
CHANGED
@@ -72,7 +72,7 @@ module CDMBL
|
|
72
72
|
class Titlieze
|
73
73
|
def self.format(value)
|
74
74
|
if value.respond_to?(:map)
|
75
|
-
value.map
|
75
|
+
value.map(&:titleize)
|
76
76
|
else
|
77
77
|
value.titleize
|
78
78
|
end
|
@@ -172,4 +172,4 @@ module CDMBL
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
-
end
|
175
|
+
end
|
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,48 +1,51 @@
|
|
1
1
|
require 'json'
|
2
2
|
module CDMBL
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
3
|
+
class OaiRequest
|
4
|
+
attr_reader :base_uri,
|
5
|
+
:resumption_token,
|
6
|
+
:client,
|
7
|
+
:set,
|
8
|
+
:identifier,
|
9
|
+
:from
|
10
|
+
def initialize(base_uri: '',
|
11
|
+
resumption_token: nil,
|
12
|
+
set: nil,
|
13
|
+
identifier: '',
|
14
|
+
from: nil,
|
15
|
+
client: Net::HTTP)
|
16
|
+
@base_uri = base_uri
|
17
|
+
@resumption_token = resumption_token
|
18
|
+
@client = client
|
19
|
+
@set = (set) ? "&set=#{set}" : ''
|
20
|
+
@from = from ? "&from=#{from}" : ''
|
21
|
+
@identifier = identifier
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
+
def identifiers
|
25
|
+
@ids ||= (resumption_token) ? request(batch_uri) : request(first_batch_uri)
|
26
|
+
end
|
24
27
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
def sets
|
29
|
+
@sets ||= request(sets_uri)
|
30
|
+
end
|
28
31
|
|
29
|
-
|
32
|
+
private
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
def first_batch_uri
|
35
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{set}#{from}"
|
36
|
+
end
|
34
37
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
def batch_uri
|
39
|
+
"#{base_uri}?verb=ListIdentifiers&resumptionToken=#{resumption_token}"
|
40
|
+
end
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
+
def sets_uri
|
43
|
+
"#{base_uri}?verb=ListSets"
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
46
|
+
def request(location)
|
47
|
+
CDMBL::OaiNotification.call!(location)
|
48
|
+
Hash.from_xml(client.get_response(URI(location)).body)
|
47
49
|
end
|
48
|
-
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.18.0"
|
3
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -150,7 +150,7 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.9.0
|
153
|
-
description:
|
153
|
+
description:
|
154
154
|
email:
|
155
155
|
- fenne035@umn.edu
|
156
156
|
executables: []
|
@@ -206,11 +206,11 @@ files:
|
|
206
206
|
- lib/cdmbl/transformer.rb
|
207
207
|
- lib/cdmbl/version.rb
|
208
208
|
- travis.yml
|
209
|
-
homepage:
|
209
|
+
homepage:
|
210
210
|
licenses:
|
211
211
|
- MIT
|
212
212
|
metadata: {}
|
213
|
-
post_install_message:
|
213
|
+
post_install_message:
|
214
214
|
rdoc_options: []
|
215
215
|
require_paths:
|
216
216
|
- lib
|
@@ -225,8 +225,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
225
|
- !ruby/object:Gem::Version
|
226
226
|
version: '0'
|
227
227
|
requirements: []
|
228
|
-
rubygems_version: 3.0.
|
229
|
-
signing_key:
|
228
|
+
rubygems_version: 3.0.8
|
229
|
+
signing_key:
|
230
230
|
specification_version: 4
|
231
231
|
summary: Load CONTENTdm data into a Solr Index. CDMBL expects to run inside a Rails
|
232
232
|
application.
|