cdmbl 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 205106670da9d0eb64b39e5079b3c885f0bc4e11
4
- data.tar.gz: 45310207e4bbd256823f5ad35fb94378ee70e93c
3
+ metadata.gz: a45fd0906d4aabd9d22383376d7bd0d241906121
4
+ data.tar.gz: 2ea5a558a0d703b6c146b421426e828713852392
5
5
  SHA512:
6
- metadata.gz: 0d674a82a9c58a9891d90037f2c3614c93b6a8db14a2eb5f0a154ac2632823d694f9091995a1441416220f5edaae9d1c64ab892bb74e9994c5b86a2d3e928843
7
- data.tar.gz: 9e4bdbe926771af0b9cac3185c6386fd2e8a4a8a7a8328d6f149c749ea7941e44a7db96bfbecebb48d1587edb5186c7829d80ba51d70b5410585aab5176cc0ae
6
+ metadata.gz: a158929c2288f0aa589e275cd841504d9c3143d53df2ce062d746f683d3ad3d1bd21636122011a3e893196f9564cafcc3f4b43a36db6874d352a234cfda62d17
7
+ data.tar.gz: d5a2daec503800ac3df7bdb9e65b7cd931942f28c69721f285e985bad3dd2ec78d6932202abc409a74bf1c2c72cb969afffbb543c275a8b1de62e28f798156c9
@@ -1,8 +1,8 @@
1
1
  module CDMBL
2
2
  # An example callback
3
3
  class DefaultOaiNotification
4
- def self.call!(solr_client)
5
- puts "An OAI callback task"
4
+ def self.call!(location)
5
+ puts "An OAI callback task for #{location}"
6
6
  end
7
7
  end
8
8
  end
data/lib/cdmbl/etl_run.rb CHANGED
@@ -34,29 +34,21 @@ module CDMBL
34
34
  @solr_client = solr_client
35
35
  end
36
36
 
37
- def load!(resumption_token: false)
38
- persister.load!
39
- end
40
-
41
- def next_resumption_token
42
- extraction.next_resumption_token
43
- end
44
-
45
- def persister
46
- loader.new(records: transformation.records,
47
- deletable_ids: extraction.deletable_ids,
48
- solr_client: solr_client)
37
+ def extract
38
+ @extraction ||= extractor.new(oai_request: oai_request,
39
+ cdm_endpoint: cdm_endpoint)
49
40
  end
50
41
 
51
- def transformation
52
- @transformation ||= transformer.new(cdm_records: extraction.records,
53
- oai_sets: extraction.set_lookup,
42
+ def transform(sets, records)
43
+ @transformation ||= transformer.new(cdm_records: records,
44
+ oai_sets: sets,
54
45
  field_mappings: field_mappings)
55
46
  end
56
47
 
57
- def extraction
58
- @extraction ||= extractor.new(oai_request: oai_request,
59
- cdm_endpoint: cdm_endpoint)
48
+ def load!(deletables, records)
49
+ loader.new(records: records,
50
+ deletable_ids: deletables,
51
+ solr_client: solr_client).load!
60
52
  end
61
53
 
62
54
  def oai_request
@@ -1,20 +1,71 @@
1
1
  require 'sidekiq'
2
2
  module CDMBL
3
3
  class ETLWorker
4
- attr_reader :solr_config, :etl_config
4
+ attr_reader :solr_config,
5
+ :etl_config,
6
+ :is_recursive,
7
+ :identifiers,
8
+ :deletables,
9
+ :sets
10
+
5
11
  include Sidekiq::Worker
6
- def perform(solr_config, etl_config, recursive = true)
7
- @etl_config = etl_config.symbolize_keys
8
- @solr_config = solr_config.symbolize_keys
9
- puts "Ingesting resumptionToken batch: #{etl_config['resumption_token']}"
10
- etl_run.load!
11
- if etl_run.next_resumption_token && recursive
12
- ETLWorker.perform_async(solr_config, next_etl_config)
12
+ def perform(solr_config,
13
+ etl_config,
14
+ is_recursive = true,
15
+ identifiers = [],
16
+ deletables = [],
17
+ sets = [])
18
+
19
+ @etl_config = etl_config.symbolize_keys
20
+ @solr_config = solr_config.symbolize_keys
21
+ @is_recursive = is_recursive
22
+ @identifiers = identifiers
23
+ @deletables = deletables
24
+ @sets = sets
25
+
26
+ if !identifiers.empty?
27
+ load!
13
28
  else
14
- CDMBL::CompletedCallback.call!(solr_client)
29
+ ingest_batches!
30
+ if extraction.next_resumption_token && is_recursive
31
+ ETLWorker.perform_async(solr_config, next_etl_config)
32
+ else
33
+ CDMBL::CompletedCallback.call!(solr_client)
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def ingest_batches!
41
+ extraction.local_identifiers.each_slice(10) do |ids|
42
+ ETLWorker.perform_async(solr_config,
43
+ etl_config,
44
+ is_recursive,
45
+ ids,
46
+ extraction.deletable_ids,
47
+ extraction.set_lookup)
48
+ end
49
+ end
50
+
51
+ def load!
52
+ etl_run.load!(deletables, transformation.records)
53
+ end
54
+
55
+ def transformation
56
+ etl_run.transform(sets, records)
57
+ end
58
+
59
+ def records
60
+ identifiers.map do |identifier|
61
+ extraction.cdm_request(*identifier)
15
62
  end
16
63
  end
17
64
 
65
+ def extraction
66
+ @extraction ||= etl_run.extract
67
+ end
68
+
18
69
  def etl_run
19
70
  ETLRun.new(etl_config.merge(solr_client: solr_client))
20
71
  end
@@ -24,7 +75,7 @@ module CDMBL
24
75
  end
25
76
 
26
77
  def next_etl_config
27
- etl_config.merge(resumption_token: etl_run.next_resumption_token)
78
+ etl_config.merge(resumption_token: extraction.next_resumption_token)
28
79
  end
29
80
  end
30
81
  end
@@ -32,10 +32,6 @@ module CDMBL
32
32
  oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
33
33
  end
34
34
 
35
- def records
36
- local_identifiers.map { |identifier| cdm_request(*identifier) }
37
- end
38
-
39
35
  def ids
40
36
  (specific_ids) ? specific_ids : local_identifiers
41
37
  end
@@ -52,6 +48,12 @@ module CDMBL
52
48
  oai_identifiers.at_path('OAI_PMH/ListIdentifiers/resumptionToken')
53
49
  end
54
50
 
51
+ # e.g. local_identifiers.map { |identifier| extractor.cdm_request(*identifier) }
52
+ def cdm_request(collection, id)
53
+ CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
54
+ cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
55
+ end
56
+
55
57
  private
56
58
 
57
59
  def oai_ids
@@ -72,11 +74,6 @@ module CDMBL
72
74
  to_hash(identifiers)
73
75
  end
74
76
 
75
- def cdm_request(collection, id)
76
- CDMBL::CdmNotification.call!(collection, id, cdm_endpoint)
77
- cdm_item.new(base_url: cdm_endpoint, collection: collection, id: id).metadata
78
- end
79
-
80
77
  def to_hash(xml)
81
78
  Hash.from_xml(xml)
82
79
  end
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.2.12"
2
+ VERSION = "0.2.13"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-02 00:00:00.000000000 Z
11
+ date: 2016-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path