cdmbl 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b42c6c8e77e084d0ef7859aa3e46c347f83d658
4
- data.tar.gz: a2a93fba6b9a2e25aebbebaa75b21bbfab980a0a
3
+ metadata.gz: cb68955dc77e2f7a1ceb938a49d7fa4aa471cd50
4
+ data.tar.gz: 66525455ae0dca29cbac2c175d43811bf781143e
5
5
  SHA512:
6
- metadata.gz: b820e473a137ef4179af8dd9e1bbeae4e35ea83fcc2dace62cbdb0c1df0eccc31b0821531bfcb1896f88075637f3b9ca8bf8e9cf62437bd9550d160e7251741c
7
- data.tar.gz: 8e0a7cf8762801a5e6cd9b59d63c74d4a38794db339f75fa3b9c9ce24947eda56e63ee0b97a04ee17ed1f62929aed365cb03f5be487a93aa5e1b0e09530f4e12
6
+ metadata.gz: 18c172df0c8265395e1316c73b229673a03a53d54449e819b002a1a710b0238487b8906084fb232596093a6a42cfba9e5797f58e15a1cb6dedb04482c55e953d
7
+ data.tar.gz: 97e3dc6c0c05f68ab04bb0d686623e092665ef022bf597b1a778df9105b85cd68f350887e068beb76534d234201c57f3664f9dad1fa7a284b14ea372302cadc5
@@ -0,0 +1,18 @@
1
+ module CDMBL
2
+ class ETLBySetSpecs
3
+ attr_reader :set_specs, :etl_config, :etl_worker_klass
4
+ def initialize(set_specs: [:missing_setspec],
5
+ etl_config: :missing_etl_config,
6
+ etl_worker_klass: ETLWorker)
7
+ @set_specs = set_specs
8
+ @etl_config = etl_config
9
+ @etl_worker_klass = etl_worker_klass
10
+ end
11
+
12
+ def run!
13
+ set_specs.map do |set_spec|
14
+ etl_worker_klass.perform_async(etl_config.merge(set_spec: set_spec))
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ module CDMBL
2
+ module DefaultFilterSetCallback
3
+ def valid?(set: {})
4
+ true
5
+ end
6
+ end
7
+
8
+ class FilteredSetSpecs
9
+ attr_reader :oai_base_url,
10
+ :oai_client,
11
+ :callback
12
+
13
+ def initialize(oai_base_url: :missing_oai_base_url,
14
+ oai_client: OaiClient,
15
+ callback: DefaultSetFilterCallback.new)
16
+ @oai_base_url = oai_base_url
17
+ @oai_client = oai_client
18
+ @callback = callback
19
+ end
20
+
21
+ def set_specs
22
+ filtered.map {|set| set['setSpec']}
23
+ end
24
+
25
+ private
26
+
27
+ def filtered
28
+ sets.select do |set|
29
+ callback.valid?(set: set)
30
+ end
31
+ end
32
+
33
+ def sets
34
+ @sets ||= list_sets['OAI_PMH']['ListSets']['set']
35
+ end
36
+
37
+ def list_sets
38
+ @list_sets ||= oai_client.new(base_url: oai_base_url).request('verb=ListSets')
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,17 @@
1
+ module CDMBL
2
+ class SetSpecFilterCallback
3
+ attr_reader :pattern, :inclusive
4
+ def initialize(pattern: /.*/, inclusive: true)
5
+ @pattern = pattern
6
+ @inclusive = inclusive
7
+ end
8
+
9
+ def valid?(set: {})
10
+ (inclusive) ? matches?(set) : !matches?(set)
11
+ end
12
+
13
+ def matches?(set)
14
+ pattern.match?(set['setSpec'])
15
+ end
16
+ end
17
+ end
@@ -20,6 +20,49 @@ namespace :cdmbl do
20
20
  )
21
21
  end
22
22
 
23
+ desc 'Launch an indexing worker for each collection with an optional regex
24
+ pattern to match setSpec. Patterns can be inclusive or exclusive.'
25
+ task :by_collections, [
26
+ :solr_url,
27
+ :oai_endpoint,
28
+ :cdm_endpoint,
29
+ :set_spec_pattern,
30
+ :inclusive,
31
+ :batch_size
32
+ ] do |t, args|
33
+ # Required args
34
+ oai_endpoint = args.fetch(:oai_endpoint)
35
+ solr_url = args.fetch(:solr_url)
36
+ cdm_endpoint = args.fetch(:cdm_endpoint)
37
+ # Optional args
38
+ pattern = args.fetch(:set_spec_pattern, false)
39
+ inclusive = args.fetch(:inclusive, true)
40
+ batch_size = args.fetch(:batch_size, 5)
41
+
42
+ # Define your own callback if you want to use other set related fields
43
+ # Use the SetSpecFilterCallback as an example of how to build your own filter
44
+ set_specs =
45
+ if pattern
46
+ filter = CDMBL::SetSpecFilterCallback.new(pattern: Regexp.new(pattern))
47
+ CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint,
48
+ callback: filter).set_specs
49
+ else
50
+ CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint).set_specs
51
+ end
52
+
53
+ puts "Indexing Sets: '#{set_specs.join(', ')}'"
54
+
55
+ etl_config = {
56
+ solr_config: { url: args.fetch(:solr_url) },
57
+ oai_endpoint: args.fetch(:oai_endpoint),
58
+ cdm_endpoint: args.fetch(:cdm_endpoint),
59
+ batch_size: args.fetch(:batch_size, 10),
60
+ max_compounds: args.fetch(:max_compounds, 10)
61
+ }
62
+
63
+ CDMBL::ETLBySetSpecs.new(set_specs: set_specs, etl_config: etl_config).run!
64
+ end
65
+
23
66
  desc 'Launch a background job to index a single record.'
24
67
  task :record, [
25
68
  :collection,
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.10.1"
3
- end
2
+ VERSION = "0.11.0"
3
+ end
data/lib/cdmbl.rb CHANGED
@@ -25,4 +25,7 @@ require 'cdmbl/batch_deleter_worker'
25
25
  require 'cdmbl/compound_lookup'
26
26
  require 'cdmbl/compound_filter'
27
27
  require 'cdmbl/load_worker'
28
- require 'cdmbl/transform_worker'
28
+ require 'cdmbl/transform_worker'
29
+ require 'cdmbl/filtered_set_specs'
30
+ require 'cdmbl/etl_by_set_specs'
31
+ require 'cdmbl/set_spec_filter_callback'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-26 00:00:00.000000000 Z
11
+ date: 2018-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -218,12 +218,14 @@ files:
218
218
  - lib/cdmbl/default_loader_notification.rb
219
219
  - lib/cdmbl/default_oai_notification.rb
220
220
  - lib/cdmbl/default_solr.rb
221
+ - lib/cdmbl/etl_by_set_specs.rb
221
222
  - lib/cdmbl/etl_run.rb
222
223
  - lib/cdmbl/etl_worker.rb
223
224
  - lib/cdmbl/extract_worker.rb
224
225
  - lib/cdmbl/extractor.rb
225
226
  - lib/cdmbl/field_formatter.rb
226
227
  - lib/cdmbl/field_transformer.rb
228
+ - lib/cdmbl/filtered_set_specs.rb
227
229
  - lib/cdmbl/formatters.rb
228
230
  - lib/cdmbl/hooks.rb
229
231
  - lib/cdmbl/load_worker.rb
@@ -236,6 +238,7 @@ files:
236
238
  - lib/cdmbl/oai_set_lookup.rb
237
239
  - lib/cdmbl/rake_task.rb
238
240
  - lib/cdmbl/record_transformer.rb
241
+ - lib/cdmbl/set_spec_filter_callback.rb
239
242
  - lib/cdmbl/tasks/delete.rake
240
243
  - lib/cdmbl/tasks/etl.rake
241
244
  - lib/cdmbl/transform_worker.rb