cdmbl 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 205106670da9d0eb64b39e5079b3c885f0bc4e11
4
- data.tar.gz: 45310207e4bbd256823f5ad35fb94378ee70e93c
3
+ metadata.gz: a45fd0906d4aabd9d22383376d7bd0d241906121
4
+ data.tar.gz: 2ea5a558a0d703b6c146b421426e828713852392
5
5
  SHA512:
6
- metadata.gz: 0d674a82a9c58a9891d90037f2c3614c93b6a8db14a2eb5f0a154ac2632823d694f9091995a1441416220f5edaae9d1c64ab892bb74e9994c5b86a2d3e928843
7
- data.tar.gz: 9e4bdbe926771af0b9cac3185c6386fd2e8a4a8a7a8328d6f149c749ea7941e44a7db96bfbecebb48d1587edb5186c7829d80ba51d70b5410585aab5176cc0ae
6
+ metadata.gz: a158929c2288f0aa589e275cd841504d9c3143d53df2ce062d746f683d3ad3d1bd21636122011a3e893196f9564cafcc3f4b43a36db6874d352a234cfda62d17
7
+ data.tar.gz: d5a2daec503800ac3df7bdb9e65b7cd931942f28c69721f285e985bad3dd2ec78d6932202abc409a74bf1c2c72cb969afffbb543c275a8b1de62e28f798156c9
@@ -1,8 +1,8 @@
1
1
  module CDMBL
2
2
  # An example callback
3
3
  class DefaultOaiNotification
4
- def self.call!(solr_client)
5
- puts "An OAI callback task"
4
+ def self.call!(location)
5
+ puts "An OAI callback task for #{location}"
6
6
  end
7
7
  end
8
8
  end
data/lib/cdmbl/etl_run.rb CHANGED
@@ -34,29 +34,21 @@ module CDMBL
34
34
  @solr_client = solr_client
35
35
  end
36
36
 
37
- def load!(resumption_token: false)
38
- persister.load!
39
- end
40
-
41
- def next_resumption_token
42
- extraction.next_resumption_token
43
- end
44
-
45
- def persister
46
- loader.new(records: transformation.records,
47
- deletable_ids: extraction.deletable_ids,
48
- solr_client: solr_client)
37
+ def extract
38
+ @extraction ||= extractor.new(oai_request: oai_request,
39
+ cdm_endpoint: cdm_endpoint)
49
40
  end
50
41
 
51
- def transformation
52
- @transformation ||= transformer.new(cdm_records: extraction.records,
53
- oai_sets: extraction.set_lookup,
42
+ def transform(sets, records)
43
+ @transformation ||= transformer.new(cdm_records: records,
44
+ oai_sets: sets,
54
45
  field_mappings: field_mappings)
55
46
  end
56
47
 
57
- def extraction
58
- @extraction ||= extractor.new(oai_request: oai_request,
59
- cdm_endpoint: cdm_endpoint)
48
+ def load!(deletables, records)
49
+ loader.new(records: records,
50
+ deletable_ids: deletables,
51
+ solr_client: solr_client).load!
60
52
  end
61
53
 
62
54
  def oai_request
@@ -1,20 +1,71 @@
1
1
  require 'sidekiq'
2
2
  module CDMBL
3
3
  class ETLWorker
4
- attr_reader :solr_config, :etl_config
4
+ attr_reader :solr_config,
5
+ :etl_config,
6
+ :is_recursive,
7
+ :identifiers,
8
+ :deletables,
9
+ :sets
10
+
5
11
  include Sidekiq::Worker
6
- def perform(solr_config, etl_config, recursive = true)
7
- @etl_config = etl_config.symbolize_keys
8
- @solr_config = solr_config.symbolize_keys
9
- puts "Ingesting resumptionToken batch: #{etl_config['resumption_token']}"
10
- etl_run.load!
11
- if etl_run.next_resumption_token && recursive
12
- ETLWorker.perform_async(solr_config, next_etl_config)
12
+ def perform(solr_config,
13
+ etl_config,
14
+ is_recursive = true,
15
+ identifiers = [],
16
+ deletables = [],
17
+ sets = [])
18
+
19
+ @etl_config = etl_config.symbolize_keys
20
+ @solr_config = solr_config.symbolize_keys
21
+ @is_recursive = is_recursive
22
+ @identifiers = identifiers
23
+ @deletables = deletables
24
+ @sets = sets
25
+
26
+ if !identifiers.empty?
27
+ load!
13
28
  else
14
- CDMBL::CompletedCallback.call!(solr_client)
29
+ ingest_batches!
30
+ if extraction.next_resumption_token && is_recursive
31
+ ETLWorker.perform_async(solr_config, next_etl_config)
32
+ else
33
+ CDMBL::CompletedCallback.call!(solr_client)
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def ingest_batches!
41
+ extraction.local_identifiers.each_slice(10) do |ids|
42
+ ETLWorker.perform_async(solr_config,
43
+ etl_config,
44
+ is_recursive,
45
+ ids,
46
+ extraction.deletable_ids,
47
+ extraction.set_lookup)
48
+ end
49
+ end
50
+
51
+ def load!
52
+ etl_run.load!(deletables, transformation.records)
53
+ end
54
+
55
+ def transformation
56
+ etl_run.transform(sets, records)
57
+ end
58
+
59
+ def records
60
+ identifiers.map do |identifier|
61
+ extraction.cdm_request(*identifier)
15
62
  end
16
63
  end
17
64
 
65
+ def extraction
66
+ @extraction ||= etl_run.extract
67
+ end
68
+
18
69
  def etl_run
19
70
  ETLRun.new(etl_config.merge(solr_client: solr_client))
20
71
  end
@@ -24,7 +75,7 @@ module CDMBL
24
75
  end
25
76
 
26
77
  def next_etl_config
27
- etl_config.merge(resumption_token: etl_run.next_resumption_token)
78
+ etl_config.merge(resumption_token: extraction.next_resumption_token)
28
79
  end
29
80
  end
30
81
  end
@@ -32,10 +32,6 @@ module CDMBL
32
32
  oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
33
33
  end
34
34
 
35
- def records
36
- local_identifiers.map { |identifier| cdm_request(*identifier) }
37
- end
38
-
39
35
  def ids
40
36
  (specific_ids) ? specific_ids : local_identifiers
41
37
  end
@@ -52,6 +48,12 @@ module CDMBL
52
48
  oai_identifiers.at_path('OAI_PMH/ListIdentifiers/resumptionToken')
53
49
  end
54
50
 
51
+ # e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
52
+ def cdm_request(collection, id)
53
+ CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
54
+ cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
55
+ end
56
+
55
57
  private
56
58
 
57
59
  def oai_ids
@@ -72,11 +74,6 @@ module CDMBL
72
74
  to_hash(identifiers)
73
75
  end
74
76
 
75
- def cdm_request(collection, id)
76
- CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
77
- cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
78
- end
79
-
80
77
  def to_hash(xml)
81
78
  Hash.from_xml(xml)
82
79
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.2.12"
2
+ VERSION = "0.2.13"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-02 00:00:00.000000000 Z
11
+ date: 2016-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path