cdmbl 0.2.25 → 0.2.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a38f93dada47610d74fd9724fc057a86ab46d9db
4
- data.tar.gz: a410ba1672d49c893de4e862047eae7bbd7316df
3
+ metadata.gz: e7d48b4a55adff68d3096b6f4e04f20b26cbcb61
4
+ data.tar.gz: 7c670f721ef8617a51994becbca83e7ca9900f30
5
5
  SHA512:
6
- metadata.gz: e2bde7dc1b0879186db2149092c967db2d3f4ff5cead71acb71dd8f0cb6bdad394ff2eee4d676e4d998f320ef2486b285ecfa093647cb54395c42db79b820923
7
- data.tar.gz: 63df148bd2914fefba9972dc0b220493a8bb3398eed410ca6bfeece20b15c1a205acc89d9b22e98a53855e027ea8f20b908f215f83ac3ebdc929282871bce624
6
+ metadata.gz: 8802661c746e0c8ea8d162b00777c26be2f9928d8dc0ec36b4691dbf8635c51e5af80bf5c3950c9330592236e3812d9dadfc85637338ef5bd792f534e8b00e9e
7
+ data.tar.gz: f58528feef5a8d52ef4d76f9a98127414e037b7e38e89dcc8ed233a8e901b28ee7a0a47b54d0a51f3c2fb5ccfc2ad7bc59047ab65a322119f038be651915ae39
data/lib/cdmbl/etl_run.rb CHANGED
@@ -6,6 +6,7 @@ module CDMBL
6
6
  :resumption_token,
7
7
  :field_mappings,
8
8
  :minimum_date,
9
+ :set_spec,
9
10
  :oai_requester,
10
11
  :extractor,
11
12
  :transformer,
@@ -16,6 +17,7 @@ module CDMBL
16
17
  resumption_token: false,
17
18
  field_mappings: false,
18
19
  minimum_date: nil,
20
+ set_spec: false,
19
21
  oai_requester: OaiRequest,
20
22
  extractor: Extractor,
21
23
  transformer: Transformer,
@@ -28,6 +30,7 @@ module CDMBL
28
30
  @field_mappings = field_mappings
29
31
  @oai_requester = oai_requester
30
32
  @minimum_date = minimum_date
33
+ @set_spec = set_spec
31
34
  @extractor = extractor
32
35
  @transformer = transformer
33
36
  @loader = loader
@@ -54,7 +57,8 @@ module CDMBL
54
57
  def oai_request
55
58
  @oai_request ||= oai_requester.new(base_uri: oai_endpoint,
56
59
  resumption_token: resumption_token,
57
- from: minimum_date)
60
+ from: minimum_date,
61
+ set: set_spec)
58
62
  end
59
63
  end
60
64
 
@@ -1,30 +1,28 @@
1
1
  require 'sidekiq'
2
2
  module CDMBL
3
3
  class ETLWorker
4
+ include Sidekiq::Worker
5
+
4
6
  attr_reader :solr_config,
5
7
  :etl_config,
6
- :batch_size,
7
8
  :is_recursive,
8
- :identifiers,
9
+ :identifier,
9
10
  :deletables
10
11
 
11
- include Sidekiq::Worker
12
12
 
13
13
  def perform(solr_config,
14
14
  etl_config,
15
- batch_size = 10,
16
15
  is_recursive = true,
17
- identifiers = [],
16
+ identifier = false,
18
17
  deletables = [])
19
-
18
+
20
19
  @etl_config = etl_config.symbolize_keys
21
20
  @solr_config = solr_config.symbolize_keys
22
21
  @is_recursive = is_recursive
23
- @identifiers = identifiers
22
+ @identifier = identifier
24
23
  @deletables = deletables
25
- @batch_size = batch_size
26
-
27
- if !identifiers.empty?
24
+
25
+ if identifier
28
26
  load!
29
27
  else
30
28
  ingest_batches!
@@ -40,13 +38,12 @@ module CDMBL
40
38
 
41
39
  def ingest_batches!
42
40
  sent_deleted = false
43
- extraction.local_identifiers.each_slice(batch_size) do |ids|
41
+ extraction.local_identifiers.each do |id|
44
42
  delete_ids = (sent_deleted == false) ? extraction.deletable_ids : []
45
43
  ETLWorker.perform_async(solr_config,
46
44
  etl_config,
47
- batch_size,
48
45
  is_recursive,
49
- ids,
46
+ id,
50
47
  delete_ids)
51
48
  sent_deleted = true
52
49
  end
@@ -58,13 +55,11 @@ module CDMBL
58
55
  end
59
56
 
60
57
  def transformation
61
- @transformation ||= etl_run.transform(extraction.set_lookup, records)
58
+ @transformation ||= etl_run.transform(extraction.set_lookup, [record])
62
59
  end
63
60
 
64
- def records
65
- identifiers.map do |identifier|
66
- extraction.cdm_request(*identifier)
67
- end
61
+ def record
62
+ extraction.cdm_request(*identifier)
68
63
  end
69
64
 
70
65
  def extraction
@@ -32,10 +32,6 @@ module CDMBL
32
32
  oai_set_lookup.new(oai_sets: to_hash(sets)).keyed
33
33
  end
34
34
 
35
- def ids
36
- (specific_ids) ? specific_ids : local_identifiers
37
- end
38
-
39
35
  def deletable_ids
40
36
  oai_ids.deletable_ids
41
37
  end
@@ -1,14 +1,16 @@
1
1
  module CDMBL
2
2
  class OaiRequest
3
- attr_reader :base_uri, :resumption_token, :client, :from
3
+ attr_reader :base_uri, :resumption_token, :client, :from, :set
4
4
  def initialize(base_uri: '',
5
5
  resumption_token: false,
6
6
  from: false,
7
+ set: false,
7
8
  client: Net::HTTP)
8
9
  @base_uri = base_uri
9
10
  @resumption_token = resumption_token
10
11
  @client = client
11
12
  @from = (from) ? "&from=#{from}" : ''
13
+ @set = (set) ? "&set=#{set}" : ''
12
14
  end
13
15
 
14
16
  def identifiers
@@ -22,7 +24,7 @@ module CDMBL
22
24
  private
23
25
 
24
26
  def first_batch_uri
25
- "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}"
27
+ "#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}#{set}"
26
28
  end
27
29
 
28
30
  def batch_uri
@@ -4,10 +4,14 @@ namespace :cdmbl do
4
4
  desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
5
5
  task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date] do |t, args|
6
6
  solr_config = { url: args[solr_url] }
7
- etl_config = { oai_endpoint: args[:oai_endpoint], cdm_endpoint: args[:cdm_endpoint], minimum_date: args[:minimum_date] }
8
- etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
9
- batch_size = (args[:batch_size]) ? args[:batch_size] : 10
10
- CDMBL::ETLWorker.perform_async(solr_config, etl_config, batch_size)
7
+ etl_config = {
8
+ oai_endpoint: args[:oai_endpoint],
9
+ cdm_endpoint: args[:cdm_endpoint],
10
+ minimum_date: args[:minimum_date]
11
+ set_spec: args[:set_spec]
12
+ }
13
+ etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
14
+ CDMBL::ETLWorker.perform_async(solr_config, etl_config)
11
15
  end
12
16
  end
13
17
 
@@ -36,7 +36,7 @@ module CDMBL
36
36
  end
37
37
  end
38
38
 
39
- #TODO: Make a
39
+ #TODO: Make a
40
40
  def with_location(dest_record, record)
41
41
  if record['latitu'] && record['longit'] && record['latitu'] != '' && record['longit'] != ''
42
42
  dest_record.merge({'location_llsi': "#{record['latitu']}, #{record['longit']}"})
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.2.25"
2
+ VERSION = "0.2.26"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.25
4
+ version: 0.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell