cdmbl 0.10.1 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b42c6c8e77e084d0ef7859aa3e46c347f83d658
4
- data.tar.gz: a2a93fba6b9a2e25aebbebaa75b21bbfab980a0a
3
+ metadata.gz: cb68955dc77e2f7a1ceb938a49d7fa4aa471cd50
4
+ data.tar.gz: 66525455ae0dca29cbac2c175d43811bf781143e
5
5
  SHA512:
6
- metadata.gz: b820e473a137ef4179af8dd9e1bbeae4e35ea83fcc2dace62cbdb0c1df0eccc31b0821531bfcb1896f88075637f3b9ca8bf8e9cf62437bd9550d160e7251741c
7
- data.tar.gz: 8e0a7cf8762801a5e6cd9b59d63c74d4a38794db339f75fa3b9c9ce24947eda56e63ee0b97a04ee17ed1f62929aed365cb03f5be487a93aa5e1b0e09530f4e12
6
+ metadata.gz: 18c172df0c8265395e1316c73b229673a03a53d54449e819b002a1a710b0238487b8906084fb232596093a6a42cfba9e5797f58e15a1cb6dedb04482c55e953d
7
+ data.tar.gz: 97e3dc6c0c05f68ab04bb0d686623e092665ef022bf597b1a778df9105b85cd68f350887e068beb76534d234201c57f3664f9dad1fa7a284b14ea372302cadc5
@@ -0,0 +1,18 @@
1
+ module CDMBL
2
+ class ETLBySetSpecs
3
+ attr_reader :set_specs, :etl_config, :etl_worker_klass
4
+ def initialize(set_specs: [:missing_setspec],
5
+ etl_config: :missing_etl_config,
6
+ etl_worker_klass: ETLWorker)
7
+ @set_specs = set_specs
8
+ @etl_config = etl_config
9
+ @etl_worker_klass = etl_worker_klass
10
+ end
11
+
12
+ def run!
13
+ set_specs.map do |set_spec|
14
+ etl_worker_klass.perform_async(etl_config.merge(set_spec: set_spec))
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ module CDMBL
2
+ module DefaultFilterSetCallback
3
+ def valid?(set: {})
4
+ true
5
+ end
6
+ end
7
+
8
+ class FilteredSetSpecs
9
+ attr_reader :oai_base_url,
10
+ :oai_client,
11
+ :callback
12
+
13
+ def initialize(oai_base_url: :missing_oai_base_url,
14
+ oai_client: OaiClient,
15
+ callback: DefaultSetFilterCallback.new)
16
+ @oai_base_url = oai_base_url
17
+ @oai_client = oai_client
18
+ @callback = callback
19
+ end
20
+
21
+ def set_specs
22
+ filtered.map {|set| set['setSpec']}
23
+ end
24
+
25
+ private
26
+
27
+ def filtered
28
+ sets.select do |set|
29
+ callback.valid?(set: set)
30
+ end
31
+ end
32
+
33
+ def sets
34
+ @sets ||= list_sets['OAI_PMH']['ListSets']['set']
35
+ end
36
+
37
+ def list_sets
38
+ @list_sets ||= oai_client.new(base_url: oai_base_url).request('verb=ListSets')
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,17 @@
1
+ module CDMBL
2
+ class SetSpecFilterCallback
3
+ attr_reader :pattern, :inclusive
4
+ def initialize(pattern: /.*/, inclusive: true)
5
+ @pattern = pattern
6
+ @inclusive = inclusive
7
+ end
8
+
9
+ def valid?(set: {})
10
+ (inclusive) ? matches?(set) : !matches?(set)
11
+ end
12
+
13
+ def matches?(set)
14
+ pattern.match?(set['setSpec'])
15
+ end
16
+ end
17
+ end
@@ -20,6 +20,49 @@ namespace :cdmbl do
20
20
  )
21
21
  end
22
22
 
23
+ desc 'Launch an indexing worker for each collection with an optional regex
24
+ pattern to match setSpec. Patterns can be inclusive or exclusive.'
25
+ task :by_collections, [
26
+ :solr_url,
27
+ :oai_endpoint,
28
+ :cdm_endpoint,
29
+ :set_spec_pattern,
30
+ :inclusive,
31
+ :batch_size
32
+ ] do |t, args|
33
+ # Required args
34
+ oai_endpoint = args.fetch(:oai_endpoint)
35
+ solr_url = args.fetch(:solr_url)
36
+ cdm_endpoint = args.fetch(:cdm_endpoint)
37
+ # Optional args
38
+ pattern = args.fetch(:set_spec_pattern, false)
39
+ inclusive = args.fetch(:inclusive, true)
40
+ batch_size = args.fetch(:batch_size, 5)
41
+
42
+ # Define your own callback if you want to use other set related fields
43
+ # Use the SetSpecFilterCallback as an example of how to build your own filter
44
+ set_specs =
45
+ if pattern
46
+ filter = CDMBL::SetSpecFilterCallback.new(pattern: Regexp.new(pattern))
47
+ CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint,
48
+ callback: filter).set_specs
49
+ else
50
+ CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint).set_specs
51
+ end
52
+
53
+ puts "Indexing Sets: '#{set_specs.join(', ')}'"
54
+
55
+ etl_config = {
56
+ solr_config: { url: args.fetch(:solr_url) },
57
+ oai_endpoint: args.fetch(:oai_endpoint),
58
+ cdm_endpoint: args.fetch(:cdm_endpoint),
59
+ batch_size: args.fetch(:batch_size, 10),
60
+ max_compounds: args.fetch(:max_compounds, 10)
61
+ }
62
+
63
+ CDMBL::ETLBySetSpecs.new(set_specs: set_specs, etl_config: etl_config).run!
64
+ end
65
+
23
66
  desc 'Launch a background job to index a single record.'
24
67
  task :record, [
25
68
  :collection,
data/lib/cdmbl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module CDMBL
2
- VERSION = "0.10.1"
3
- end
2
+ VERSION = "0.11.0"
3
+ end
data/lib/cdmbl.rb CHANGED
@@ -25,4 +25,7 @@ require 'cdmbl/batch_deleter_worker'
25
25
  require 'cdmbl/compound_lookup'
26
26
  require 'cdmbl/compound_filter'
27
27
  require 'cdmbl/load_worker'
28
- require 'cdmbl/transform_worker'
28
+ require 'cdmbl/transform_worker'
29
+ require 'cdmbl/filtered_set_specs'
30
+ require 'cdmbl/etl_by_set_specs'
31
+ require 'cdmbl/set_spec_filter_callback'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdmbl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chadfennell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-26 00:00:00.000000000 Z
11
+ date: 2018-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hash_at_path
@@ -218,12 +218,14 @@ files:
218
218
  - lib/cdmbl/default_loader_notification.rb
219
219
  - lib/cdmbl/default_oai_notification.rb
220
220
  - lib/cdmbl/default_solr.rb
221
+ - lib/cdmbl/etl_by_set_specs.rb
221
222
  - lib/cdmbl/etl_run.rb
222
223
  - lib/cdmbl/etl_worker.rb
223
224
  - lib/cdmbl/extract_worker.rb
224
225
  - lib/cdmbl/extractor.rb
225
226
  - lib/cdmbl/field_formatter.rb
226
227
  - lib/cdmbl/field_transformer.rb
228
+ - lib/cdmbl/filtered_set_specs.rb
227
229
  - lib/cdmbl/formatters.rb
228
230
  - lib/cdmbl/hooks.rb
229
231
  - lib/cdmbl/load_worker.rb
@@ -236,6 +238,7 @@ files:
236
238
  - lib/cdmbl/oai_set_lookup.rb
237
239
  - lib/cdmbl/rake_task.rb
238
240
  - lib/cdmbl/record_transformer.rb
241
+ - lib/cdmbl/set_spec_filter_callback.rb
239
242
  - lib/cdmbl/tasks/delete.rake
240
243
  - lib/cdmbl/tasks/etl.rake
241
244
  - lib/cdmbl/transform_worker.rb