cdmbl 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/default_oai_notification.rb +2 -2
- data/lib/cdmbl/etl_run.rb +10 -18
- data/lib/cdmbl/etl_worker.rb +61 -10
- data/lib/cdmbl/extractor.rb +6 -9
- data/lib/cdmbl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a45fd0906d4aabd9d22383376d7bd0d241906121
|
4
|
+
data.tar.gz: 2ea5a558a0d703b6c146b421426e828713852392
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a158929c2288f0aa589e275cd841504d9c3143d53df2ce062d746f683d3ad3d1bd21636122011a3e893196f9564cafcc3f4b43a36db6874d352a234cfda62d17
|
7
|
+
data.tar.gz: d5a2daec503800ac3df7bdb9e65b7cd931942f28c69721f285e985bad3dd2ec78d6932202abc409a74bf1c2c72cb969afffbb543c275a8b1de62e28f798156c9
|
data/lib/cdmbl/etl_run.rb
CHANGED
@@ -34,29 +34,21 @@ module CDMBL
|
|
34
34
|
@solr_client = solr_client
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def next_resumption_token
|
42
|
-
extraction.next_resumption_token
|
43
|
-
end
|
44
|
-
|
45
|
-
def persister
|
46
|
-
loader.new(records: transformation.records,
|
47
|
-
deletable_ids: extraction.deletable_ids,
|
48
|
-
solr_client: solr_client)
|
37
|
+
def extract
|
38
|
+
@extraction ||= extractor.new(oai_request: oai_request,
|
39
|
+
cdm_endpoint: cdm_endpoint)
|
49
40
|
end
|
50
41
|
|
51
|
-
def
|
52
|
-
@transformation ||= transformer.new(cdm_records:
|
53
|
-
oai_sets:
|
42
|
+
def transform(sets, records)
|
43
|
+
@transformation ||= transformer.new(cdm_records: records,
|
44
|
+
oai_sets: sets,
|
54
45
|
field_mappings: field_mappings)
|
55
46
|
end
|
56
47
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
48
|
+
def load!(deletables, records)
|
49
|
+
loader.new(records: records,
|
50
|
+
deletable_ids: deletables,
|
51
|
+
solr_client: solr_client).load!
|
60
52
|
end
|
61
53
|
|
62
54
|
def oai_request
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -1,20 +1,71 @@
|
|
1
1
|
require 'sidekiq'
|
2
2
|
module CDMBL
|
3
3
|
class ETLWorker
|
4
|
-
attr_reader :solr_config,
|
4
|
+
attr_reader :solr_config,
|
5
|
+
:etl_config,
|
6
|
+
:is_recursive,
|
7
|
+
:identifiers,
|
8
|
+
:deletables,
|
9
|
+
:sets
|
10
|
+
|
5
11
|
include Sidekiq::Worker
|
6
|
-
def perform(solr_config,
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
12
|
+
def perform(solr_config,
|
13
|
+
etl_config,
|
14
|
+
is_recursive = true,
|
15
|
+
identifiers = [],
|
16
|
+
deletables = [],
|
17
|
+
sets = [])
|
18
|
+
|
19
|
+
@etl_config = etl_config.symbolize_keys
|
20
|
+
@solr_config = solr_config.symbolize_keys
|
21
|
+
@is_recursive = is_recursive
|
22
|
+
@identifiers = identifiers
|
23
|
+
@deletables = deletables
|
24
|
+
@sets = sets
|
25
|
+
|
26
|
+
if !identifiers.empty?
|
27
|
+
load!
|
13
28
|
else
|
14
|
-
|
29
|
+
ingest_batches!
|
30
|
+
if extraction.next_resumption_token && is_recursive
|
31
|
+
ETLWorker.perform_async(solr_config, next_etl_config)
|
32
|
+
else
|
33
|
+
CDMBL::CompletedCallback.call!(solr_client)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def ingest_batches!
|
41
|
+
extraction.local_identifiers.each_slice(10) do |ids|
|
42
|
+
ETLWorker.perform_async(solr_config,
|
43
|
+
etl_config,
|
44
|
+
is_recursive,
|
45
|
+
ids,
|
46
|
+
extraction.deletable_ids,
|
47
|
+
extraction.set_lookup)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def load!
|
52
|
+
etl_run.load!(deletables, transformation.records)
|
53
|
+
end
|
54
|
+
|
55
|
+
def transformation
|
56
|
+
etl_run.transform(sets, records)
|
57
|
+
end
|
58
|
+
|
59
|
+
def records
|
60
|
+
identifiers.map do |identifier|
|
61
|
+
extraction.cdm_request(*identifier)
|
15
62
|
end
|
16
63
|
end
|
17
64
|
|
65
|
+
def extraction
|
66
|
+
@extraction ||= etl_run.extract
|
67
|
+
end
|
68
|
+
|
18
69
|
def etl_run
|
19
70
|
ETLRun.new(etl_config.merge(solr_client: solr_client))
|
20
71
|
end
|
@@ -24,7 +75,7 @@ module CDMBL
|
|
24
75
|
end
|
25
76
|
|
26
77
|
def next_etl_config
|
27
|
-
etl_config.merge(resumption_token:
|
78
|
+
etl_config.merge(resumption_token: extraction.next_resumption_token)
|
28
79
|
end
|
29
80
|
end
|
30
81
|
end
|
data/lib/cdmbl/extractor.rb
CHANGED
@@ -32,10 +32,6 @@ module CDMBL
|
|
32
32
|
oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
|
33
33
|
end
|
34
34
|
|
35
|
-
def records
|
36
|
-
local_identifiers.map { |identifier| cdm_request(*identifier) }
|
37
|
-
end
|
38
|
-
|
39
35
|
def ids
|
40
36
|
(specific_ids) ? specific_ids : local_identifiers
|
41
37
|
end
|
@@ -52,6 +48,12 @@ module CDMBL
|
|
52
48
|
oai_identifiers.at_path('OAI_PMH/ListIdentifiers/resumptionToken')
|
53
49
|
end
|
54
50
|
|
51
|
+
# e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
|
52
|
+
def cdm_request(collection, id)
|
53
|
+
CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
|
54
|
+
cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
|
55
|
+
end
|
56
|
+
|
55
57
|
private
|
56
58
|
|
57
59
|
def oai_ids
|
@@ -72,11 +74,6 @@ module CDMBL
|
|
72
74
|
to_hash(identifiers)
|
73
75
|
end
|
74
76
|
|
75
|
-
def cdm_request(collection, id)
|
76
|
-
CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
|
77
|
-
cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
|
78
|
-
end
|
79
|
-
|
80
77
|
def to_hash(xml)
|
81
78
|
Hash.from_xml(xml)
|
82
79
|
end
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|