cdmbl 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/default_oai_notification.rb +2 -2
- data/lib/cdmbl/etl_run.rb +10 -18
- data/lib/cdmbl/etl_worker.rb +61 -10
- data/lib/cdmbl/extractor.rb +6 -9
- data/lib/cdmbl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a45fd0906d4aabd9d22383376d7bd0d241906121
|
4
|
+
data.tar.gz: 2ea5a558a0d703b6c146b421426e828713852392
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a158929c2288f0aa589e275cd841504d9c3143d53df2ce062d746f683d3ad3d1bd21636122011a3e893196f9564cafcc3f4b43a36db6874d352a234cfda62d17
|
7
|
+
data.tar.gz: d5a2daec503800ac3df7bdb9e65b7cd931942f28c69721f285e985bad3dd2ec78d6932202abc409a74bf1c2c72cb969afffbb543c275a8b1de62e28f798156c9
|
data/lib/cdmbl/etl_run.rb
CHANGED
@@ -34,29 +34,21 @@ module CDMBL
|
|
34
34
|
@solr_client = solr_client
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def next_resumption_token
|
42
|
-
extraction.next_resumption_token
|
43
|
-
end
|
44
|
-
|
45
|
-
def persister
|
46
|
-
loader.new(records: transformation.records,
|
47
|
-
deletable_ids: extraction.deletable_ids,
|
48
|
-
solr_client: solr_client)
|
37
|
+
def extract
|
38
|
+
@extraction ||= extractor.new(oai_request: oai_request,
|
39
|
+
cdm_endpoint: cdm_endpoint)
|
49
40
|
end
|
50
41
|
|
51
|
-
def
|
52
|
-
@transformation ||= transformer.new(cdm_records:
|
53
|
-
oai_sets:
|
42
|
+
def transform(sets, records)
|
43
|
+
@transformation ||= transformer.new(cdm_records: records,
|
44
|
+
oai_sets: sets,
|
54
45
|
field_mappings: field_mappings)
|
55
46
|
end
|
56
47
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
48
|
+
def load!(deletables, records)
|
49
|
+
loader.new(records: records,
|
50
|
+
deletable_ids: deletables,
|
51
|
+
solr_client: solr_client).load!
|
60
52
|
end
|
61
53
|
|
62
54
|
def oai_request
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -1,20 +1,71 @@
|
|
1
1
|
require 'sidekiq'
|
2
2
|
module CDMBL
|
3
3
|
class ETLWorker
|
4
|
-
attr_reader :solr_config,
|
4
|
+
attr_reader :solr_config,
|
5
|
+
:etl_config,
|
6
|
+
:is_recursive,
|
7
|
+
:identifiers,
|
8
|
+
:deletables,
|
9
|
+
:sets
|
10
|
+
|
5
11
|
include Sidekiq::Worker
|
6
|
-
def perform(solr_config,
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
12
|
+
def perform(solr_config,
|
13
|
+
etl_config,
|
14
|
+
is_recursive = true,
|
15
|
+
identifiers = [],
|
16
|
+
deletables = [],
|
17
|
+
sets = [])
|
18
|
+
|
19
|
+
@etl_config = etl_config.symbolize_keys
|
20
|
+
@solr_config = solr_config.symbolize_keys
|
21
|
+
@is_recursive = is_recursive
|
22
|
+
@identifiers = identifiers
|
23
|
+
@deletables = deletables
|
24
|
+
@sets = sets
|
25
|
+
|
26
|
+
if !identifiers.empty?
|
27
|
+
load!
|
13
28
|
else
|
14
|
-
|
29
|
+
ingest_batches!
|
30
|
+
if extraction.next_resumption_token && is_recursive
|
31
|
+
ETLWorker.perform_async(solr_config, next_etl_config)
|
32
|
+
else
|
33
|
+
CDMBL::CompletedCallback.call!(solr_client)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def ingest_batches!
|
41
|
+
extraction.local_identifiers.each_slice(10) do |ids|
|
42
|
+
ETLWorker.perform_async(solr_config,
|
43
|
+
etl_config,
|
44
|
+
is_recursive,
|
45
|
+
ids,
|
46
|
+
extraction.deletable_ids,
|
47
|
+
extraction.set_lookup)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def load!
|
52
|
+
etl_run.load!(deletables, transformation.records)
|
53
|
+
end
|
54
|
+
|
55
|
+
def transformation
|
56
|
+
etl_run.transform(sets, records)
|
57
|
+
end
|
58
|
+
|
59
|
+
def records
|
60
|
+
identifiers.map do |identifier|
|
61
|
+
extraction.cdm_request(*identifier)
|
15
62
|
end
|
16
63
|
end
|
17
64
|
|
65
|
+
def extraction
|
66
|
+
@extraction ||= etl_run.extract
|
67
|
+
end
|
68
|
+
|
18
69
|
def etl_run
|
19
70
|
ETLRun.new(etl_config.merge(solr_client: solr_client))
|
20
71
|
end
|
@@ -24,7 +75,7 @@ module CDMBL
|
|
24
75
|
end
|
25
76
|
|
26
77
|
def next_etl_config
|
27
|
-
etl_config.merge(resumption_token:
|
78
|
+
etl_config.merge(resumption_token: extraction.next_resumption_token)
|
28
79
|
end
|
29
80
|
end
|
30
81
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -32,10 +32,6 @@ module CDMBL
|
|
32
32
|
oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
|
33
33
|
end
|
34
34
|
|
35
|
-
def records
|
36
|
-
local_identifiers.map { |identifier| cdm_request(*identifier) }
|
37
|
-
end
|
38
|
-
|
39
35
|
def ids
|
40
36
|
(specific_ids) ? specific_ids : local_identifiers
|
41
37
|
end
|
@@ -52,6 +48,12 @@ module CDMBL
|
|
52
48
|
oai_identifiers.at_path('OAI_PMH/ListIdentifiers/resumptionToken')
|
53
49
|
end
|
54
50
|
|
51
|
+
# e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
|
52
|
+
def cdm_request(collection, id)
|
53
|
+
CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
|
54
|
+
cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
|
55
|
+
end
|
56
|
+
|
55
57
|
private
|
56
58
|
|
57
59
|
def oai_ids
|
@@ -72,11 +74,6 @@ module CDMBL
|
|
72
74
|
to_hash(identifiers)
|
73
75
|
end
|
74
76
|
|
75
|
-
def cdm_request(collection, id)
|
76
|
-
CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
|
77
|
-
cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
|
78
|
-
end
|
79
|
-
|
80
77
|
def to_hash(xml)
|
81
78
|
Hash.from_xml(xml)
|
82
79
|
end
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|