cdmbl 0.2.25 → 0.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a38f93dada47610d74fd9724fc057a86ab46d9db
4
- data.tar.gz: a410ba1672d49c893de4e862047eae7bbd7316df
3
+ metadata.gz: e7d48b4a55adff68d3096b6f4e04f20b26cbcb61
4
+ data.tar.gz: 7c670f721ef8617a51994becbca83e7ca9900f30
5
5
  SHA512:
6
- metadata.gz: e2bde7dc1b0879186db2149092c967db2d3f4ff5cead71acb71dd8f0cb6bdad394ff2eee4d676e4d998f320ef2486b285ecfa093647cb54395c42db79b820923
7
- data.tar.gz: 63df148bd2914fefba9972dc0b220493a8bb3398eed410ca6bfeece20b15c1a205acc89d9b22e98a53855e027ea8f20b908f215f83ac3ebdc929282871bce624
6
+ metadata.gz: 8802661c746e0c8ea8d162b00777c26be2f9928d8dc0ec36b4691dbf8635c51e5af80bf5c3950c9330592236e3812d9dadfc85637338ef5bd792f534e8b00e9e
7
+ data.tar.gz: f58528feef5a8d52ef4d76f9a98127414e037b7e38e89dcc8ed233a8e901b28ee7a0a47b54d0a51f3c2fb5ccfc2ad7bc59047ab65a322119f038be651915ae39
data/lib/cdmbl/etl_run.rb CHANGED
@@ -6,6 +6,7 @@ module CDMBL
6
6
  :resumption_token,
7
7
  :field_mappings,
8
8
  :minimum_date,
9
+ :set_spec,
9
10
  :oai_requester,
10
11
  :extractor,
11
12
  :transformer,
@@ -16,6 +17,7 @@ module CDMBL
16
17
  resumption_token: false,
17
18
  field_mappings: false,
18
19
  minimum_date: nil,
20
+ set_spec: false,
19
21
  oai_requester: OaiRequest,
20
22
  extractor: Extractor,
21
23
  transformer: Transformer,
@@ -28,6 +30,7 @@ module CDMBL
28
30
  @field_mappings = field_mappings
29
31
  @oai_requester = oai_requester
30
32
  @minimum_date = minimum_date
33
+ @set_spec = set_spec
31
34
  @extractor = extractor
32
35
  @transformer = transformer
33
36
  @loader = loader
@@ -54,7 +57,8 @@ module CDMBL
54
57
  def oai_request
55
58
  @oai_request ||= oai_requester.new(base_uri: oai_endpoint,
56
59
  resumption_token: resumption_token,
57
- from: minimum_date)
60
+ from: minimum_date,
61
+ set: set_spec)
58
62
  end
59
63
  end
60
64
 
@@ -1,30 +1,28 @@
1
1
  require 'sidekiq'
2
2
  module CDMBL
3
3
  class ETLWorker
4
+ include Sidekiq::Worker
5
+
4
6
  attr_reader :solr_config,
5
7
  :etl_config,
6
- :batch_size,
7
8
  :is_recursive,
8
- :identifiers,
9
+ :identifier,
9
10
  :deletables
10
11
 
11
- include Sidekiq::Worker
12
12
 
13
13
  def perform(solr_config,
14
14
  etl_config,
15
- batch_size = 10,
16
15
  is_recursive = true,
17
- identifiers = [],
16
+ identifier = false,
18
17
  deletables = [])
19
-
18
+
20
19
  @etl_config = etl_config.symbolize_keys
21
20
  @solr_config = solr_config.symbolize_keys
22
21
  @is_recursive = is_recursive
23
- @identifiers = identifiers
22
+ @identifier = identifier
24
23
  @deletables = deletables
25
- @batch_size = batch_size
26
-
27
- if !identifiers.empty?
24
+
25
+ if identifier
28
26
  load!
29
27
  else
30
28
  ingest_batches!
@@ -40,13 +38,12 @@ module CDMBL
40
38
 
41
39
  def ingest_batches!
42
40
  sent_deleted = false
43
- extraction.local_identifiers.each_slice(batch_size) do |ids|
41
+ extraction.local_identifiers.each do |id|
44
42
  delete_ids = (sent_deleted == false) ? extraction.deletable_ids : []
45
43
  ETLWorker.perform_async(solr_config,
46
44
  etl_config,
47
- batch_size,
48
45
  is_recursive,
49
- ids,
46
+ id,
50
47
  delete_ids)
51
48
  sent_deleted = true
52
49
  end
@@ -58,13 +55,11 @@ module CDMBL
58
55
  end
59
56
 
60
57
  def transformation
61
- @transformation ||= etl_run.transform(extraction.set_lookup, records)
58
+ @transformation ||= etl_run.transform(extraction.set_lookup, [record])
62
59
  end
63
60
 
64
- def records
65
- identifiers.map do |identifier|
66
- extraction.cdm_request(*identifier)
67
- end
61
+ def record
62
+ extraction.cdm_request(*identifier)
68
63
  end
69
64
 
70
65
  def extraction
@@ -32,10 +32,6 @@ module CDMBL
32
32
  oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
33
33
  end
34
34
 
35
- def ids
36
- (specific_ids) ? specific_ids : local_identifiers
37
- end
38
-
39
35
  def deletable_ids
40
36
  oai_ids.deletable_ids
41
37
  end
@@ -1,14 +1,16 @@
1
1
  module CDMBL
2
2
  class OaiRequest
3
- attr_reader :base_uri, :resumption_token, :client, :from
3
+ attr_reader :base_uri, :resumption_token, :client, :from, :set
4
4
  def initialize(base_uri: '',
5
5
  resumption_token: false,
6
6
  from: false,
7
+ set: false,
7
8
  client: Net::HTTP)
8
9
  @base_uri = base_uri
9
10
  @resumption_token = resumption_token
10
11
  @client = client
11
12
  @from = (from) ? "&from=#{from}" : ''
13
+ @set = (set) ? "&set=#{set}" : ''
12
14
  end
13
15
 
14
16
  def identifiers
@@ -22,7 +24,7 @@ module CDMBL
22
24
  private
23
25
 
24
26
  def first_batch_uri
25
- "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}"
27
+ "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}#{set}"
26
28
  end
27
29
 
28
30
  def batch_uri
@@ -4,10 +4,14 @@ namespace :cdmbl do
4
4
  desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
5
5
  task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date] do |t, args|
6
6
  solr_config = { url: args[solr_url] }
7
- etl_config = { oai_endpoint: args[:oai_endpoint], cdm_endpoint: args[:cdm_endpoint], minimum_date: args[:minimum_date] }
8
- etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
9
- batch_size = (args[:batch_size]) ? args[:batch_size] : 10
10
- CDMBL::ETLWorker.perform_async(solr_config, etl_config, batch_size)
7
+ etl_config = {
8
+ oai_endpoint: args[:oai_endpoint],
9
+ cdm_endpoint: args[:cdm_endpoint],
10
+ minimum_date: args[:minimum_date]
11
+ set_spec: args[:set_spec]
12
+ }
13
+ etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
14
+ CDMBL::ETLWorker.perform_async(solr_config, etl_config)
11
15
  end
12
16
  end
13
17
 
@@ -36,7 +36,7 @@ module CDMBL
36
36
  end
37
37
  end
38
38
 
39
- #TODO: Make a
39
+ #TODO: Make a
40
40
  def with_location(dest_record, record)
41
41
  if record['latitu'] && record['longit'] && record['latitu'] != '' && record['longit'] != ''
42
42
  dest_record.merge({'location_llsi': "#{record['latitu']}, #{record['longit']}"})
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.2.25"
2
+ VERSION = "0.2.26"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.25
4
+ version: 0.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell