cdmbl 0.2.27 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbacbfdb0861491fb0f6fc15a120716a46c280a7
4
- data.tar.gz: 0eda27fe1e6f94db141ab7484c926f8ab387ea41
3
+ metadata.gz: 10286777e3b6a73bff82b645d9793478bef136d0
4
+ data.tar.gz: 0342f14ccb141499d3b9a72f4a1a3e7c2215652d
5
5
  SHA512:
6
- metadata.gz: de81249496cc904c2077f18454de06e429dfb1d80ea05775c794e452070c22ea3794054e3cb12cd327f111b935df08d89545d24761fa8cbd5f10f961181cef54
7
- data.tar.gz: ff87f9363619ffcf91219415ceacf49630bdc12ac5bcdc67fd8a3232f1586862bee90b854c94d30731b9b1b516f4936d761a413e7ac70f944baa585a1691d491
6
+ metadata.gz: 05227bcaab3a297715c4076345109770a6c701aae1923186c9043d391b9bbf252ce9e5ad7536e85e6220e8a3e1cd1a602f325d5c65e2aa1641ae1cfb458a0f1c
7
+ data.tar.gz: dc004423977fbf3917f03af1e07a5092142a23c3be50c91913e2bad3f2bec322acace8c13e931a3b552b02a715b87b6b45a9798d18064b348e01d347a45fc3da
data/cdmbl.gemspec CHANGED
@@ -29,7 +29,7 @@ Gem::Specification.new do |spec|
29
29
  spec.add_dependency 'activesupport', '~> 4.2'
30
30
 
31
31
  spec.add_development_dependency 'bundler', '~> 1.12'
32
- spec.add_development_dependency 'rake', '~> 10.0'
32
+ spec.add_development_dependency 'rake', '~> 12.0'
33
33
  spec.add_development_dependency 'minitest', '~> 5.0'
34
34
  spec.add_development_dependency 'yard', '~> 0.9.0'
35
35
  spec.add_development_dependency 'webmock', '~> 1.24', '>= 1.24.0'
@@ -5,24 +5,27 @@ module CDMBL
5
5
 
6
6
  attr_reader :solr_config,
7
7
  :etl_config,
8
+ :batch_size,
8
9
  :is_recursive,
9
- :identifier,
10
+ :identifiers,
10
11
  :deletables
11
12
 
12
13
 
13
14
  def perform(solr_config,
14
15
  etl_config,
16
+ batch_size = 5,
15
17
  is_recursive = true,
16
- identifier = false,
18
+ identifiers = [],
17
19
  deletables = [])
18
20
 
19
21
  @etl_config = etl_config.symbolize_keys
20
22
  @solr_config = solr_config.symbolize_keys
23
+ @batch_size = batch_size.to_i
21
24
  @is_recursive = is_recursive
22
- @identifier = identifier
25
+ @identifiers = identifiers
23
26
  @deletables = deletables
24
27
 
25
- if identifier
28
+ if !identifiers.empty?
26
29
  load!
27
30
  else
28
31
  ingest_batches!
@@ -38,12 +41,13 @@ module CDMBL
38
41
 
39
42
  def ingest_batches!
40
43
  sent_deleted = false
41
- extraction.local_identifiers.each do |id|
44
+ extraction.local_identifiers.each_slice(batch_size) do |ids|
42
45
  delete_ids = (sent_deleted == false) ? extraction.deletable_ids : []
43
46
  ETLWorker.perform_async(solr_config,
44
47
  etl_config,
48
+ batch_size,
45
49
  is_recursive,
46
- id,
50
+ ids,
47
51
  delete_ids)
48
52
  sent_deleted = true
49
53
  end
@@ -55,11 +59,13 @@ module CDMBL
55
59
  end
56
60
 
57
61
  def transformation
58
- @transformation ||= etl_run.transform(extraction.set_lookup, [record])
62
+ @transformation ||= etl_run.transform(extraction.set_lookup, records)
59
63
  end
60
64
 
61
- def record
62
- extraction.cdm_request(*identifier)
65
+ def records
66
+ identifiers.map do |identifier|
67
+ extraction.cdm_request(*identifier)
68
+ end
63
69
  end
64
70
 
65
71
  def extraction
@@ -2,7 +2,7 @@ require 'cdmbl'
2
2
 
3
3
  namespace :cdmbl do
4
4
  desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
5
- task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date, :set_spec] do |t, args|
5
+ task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date, :batch_size, :set_spec] do |t, args|
6
6
  solr_config = { url: args[solr_url] }
7
7
  etl_config = {
8
8
  oai_endpoint: args[:oai_endpoint],
@@ -11,7 +11,8 @@ namespace :cdmbl do
11
11
  set_spec: args[:set_spec]
12
12
  }
13
13
  etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
14
- CDMBL::ETLWorker.perform_async(solr_config, etl_config)
14
+ batch_size = (args[:batch_size]) ? args[:batch_size] : 10
15
+ CDMBL::ETLWorker.perform_async(solr_config, etl_config, batch_size, true)
15
16
  end
16
17
  end
17
18
 
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.2.27"
2
+ VERSION = "0.2.28"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.27
4
+ version: 0.2.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-03-02 00:00:00.000000000 Z
11
+ date: 2017-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -114,14 +114,14 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '10.0'
117
+ version: '12.0'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '10.0'
124
+ version: '12.0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: minitest
127
127
  requirement: !ruby/object:Gem::Requirement