cdmbl 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_by_set_specs.rb +18 -0
- data/lib/cdmbl/filtered_set_specs.rb +41 -0
- data/lib/cdmbl/set_spec_filter_callback.rb +17 -0
- data/lib/cdmbl/tasks/etl.rake +43 -0
- data/lib/cdmbl/version.rb +2 -2
- data/lib/cdmbl.rb +4 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb68955dc77e2f7a1ceb938a49d7fa4aa471cd50
|
4
|
+
data.tar.gz: 66525455ae0dca29cbac2c175d43811bf781143e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18c172df0c8265395e1316c73b229673a03a53d54449e819b002a1a710b0238487b8906084fb232596093a6a42cfba9e5797f58e15a1cb6dedb04482c55e953d
|
7
|
+
data.tar.gz: 97e3dc6c0c05f68ab04bb0d686623e092665ef022bf597b1a778df9105b85cd68f350887e068beb76534d234201c57f3664f9dad1fa7a284b14ea372302cadc5
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class ETLBySetSpecs
|
3
|
+
attr_reader :set_specs, :etl_config, :etl_worker_klass
|
4
|
+
def initialize(set_specs: [:missing_setspec],
|
5
|
+
etl_config: :missing_etl_config,
|
6
|
+
etl_worker_klass: ETLWorker)
|
7
|
+
@set_specs = set_specs
|
8
|
+
@etl_config = etl_config
|
9
|
+
@etl_worker_klass = etl_worker_klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def run!
|
13
|
+
set_specs.map do |set_spec|
|
14
|
+
etl_worker_klass.perform_async(etl_config.merge(set_spec: set_spec))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module CDMBL
|
2
|
+
module DefaultFilterSetCallback
|
3
|
+
def valid?(set: {})
|
4
|
+
true
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class FilteredSetSpecs
|
9
|
+
attr_reader :oai_base_url,
|
10
|
+
:oai_client,
|
11
|
+
:callback
|
12
|
+
|
13
|
+
def initialize(oai_base_url: :missing_oai_base_url,
|
14
|
+
oai_client: OaiClient,
|
15
|
+
callback: DefaultSetFilterCallback.new)
|
16
|
+
@oai_base_url = oai_base_url
|
17
|
+
@oai_client = oai_client
|
18
|
+
@callback = callback
|
19
|
+
end
|
20
|
+
|
21
|
+
def set_specs
|
22
|
+
filtered.map {|set| set['setSpec']}
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def filtered
|
28
|
+
sets.select do |set|
|
29
|
+
callback.valid?(set: set)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def sets
|
34
|
+
@sets ||= list_sets['OAI_PMH']['ListSets']['set']
|
35
|
+
end
|
36
|
+
|
37
|
+
def list_sets
|
38
|
+
@list_sets ||= oai_client.new(base_url: oai_base_url).request('verb=ListSets')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class SetSpecFilterCallback
|
3
|
+
attr_reader :pattern, :inclusive
|
4
|
+
def initialize(pattern: /.*/, inclusive: true)
|
5
|
+
@pattern = pattern
|
6
|
+
@inclusive = inclusive
|
7
|
+
end
|
8
|
+
|
9
|
+
def valid?(set: {})
|
10
|
+
(inclusive) ? matches?(set) : !matches?(set)
|
11
|
+
end
|
12
|
+
|
13
|
+
def matches?(set)
|
14
|
+
pattern.match?(set['setSpec'])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/cdmbl/tasks/etl.rake
CHANGED
@@ -20,6 +20,49 @@ namespace :cdmbl do
|
|
20
20
|
)
|
21
21
|
end
|
22
22
|
|
23
|
+
desc 'Launch an indexing worker for each collection with an optional regex
|
24
|
+
pattern to match setSpec. Patterns can be inclusive or exclusive.'
|
25
|
+
task :by_collections, [
|
26
|
+
:solr_url,
|
27
|
+
:oai_endpoint,
|
28
|
+
:cdm_endpoint,
|
29
|
+
:set_spec_pattern,
|
30
|
+
:inclusive,
|
31
|
+
:batch_size
|
32
|
+
] do |t, args|
|
33
|
+
# Required args
|
34
|
+
oai_endpoint = args.fetch(:oai_endpoint)
|
35
|
+
solr_url = args.fetch(:solr_url)
|
36
|
+
cdm_endpoint = args.fetch(:cdm_endpoint)
|
37
|
+
# Optional args
|
38
|
+
pattern = args.fetch(:set_spec_pattern, false)
|
39
|
+
inclusive = args.fetch(:inclusive, true)
|
40
|
+
batch_size = args.fetch(:batch_size, 5)
|
41
|
+
|
42
|
+
# Define your own callback if you want to use other set related fields
|
43
|
+
# Use the SetSpecFilterCallback as an example of how to build your own filter
|
44
|
+
set_specs =
|
45
|
+
if pattern
|
46
|
+
filter = CDMBL::SetSpecFilterCallback.new(pattern: Regexp.new(pattern))
|
47
|
+
CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint,
|
48
|
+
callback: filter).set_specs
|
49
|
+
else
|
50
|
+
CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint).set_specs
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "Indexing Sets: '#{set_specs.join(', ')}'"
|
54
|
+
|
55
|
+
etl_config = {
|
56
|
+
solr_config: { url: args.fetch(:solr_url) },
|
57
|
+
oai_endpoint: args.fetch(:oai_endpoint),
|
58
|
+
cdm_endpoint: args.fetch(:cdm_endpoint),
|
59
|
+
batch_size: args.fetch(:batch_size, 10),
|
60
|
+
max_compounds: args.fetch(:max_compounds, 10)
|
61
|
+
}
|
62
|
+
|
63
|
+
CDMBL::ETLBySetSpecs.new(set_specs: set_specs, etl_config: etl_config).run!
|
64
|
+
end
|
65
|
+
|
23
66
|
desc 'Launch a background job to index a single record.'
|
24
67
|
task :record, [
|
25
68
|
:collection,
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.11.0"
|
3
|
+
end
|
data/lib/cdmbl.rb
CHANGED
@@ -25,4 +25,7 @@ require 'cdmbl/batch_deleter_worker'
|
|
25
25
|
require 'cdmbl/compound_lookup'
|
26
26
|
require 'cdmbl/compound_filter'
|
27
27
|
require 'cdmbl/load_worker'
|
28
|
-
require 'cdmbl/transform_worker'
|
28
|
+
require 'cdmbl/transform_worker'
|
29
|
+
require 'cdmbl/filtered_set_specs'
|
30
|
+
require 'cdmbl/etl_by_set_specs'
|
31
|
+
require 'cdmbl/set_spec_filter_callback'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -218,12 +218,14 @@ files:
|
|
218
218
|
- lib/cdmbl/default_loader_notification.rb
|
219
219
|
- lib/cdmbl/default_oai_notification.rb
|
220
220
|
- lib/cdmbl/default_solr.rb
|
221
|
+
- lib/cdmbl/etl_by_set_specs.rb
|
221
222
|
- lib/cdmbl/etl_run.rb
|
222
223
|
- lib/cdmbl/etl_worker.rb
|
223
224
|
- lib/cdmbl/extract_worker.rb
|
224
225
|
- lib/cdmbl/extractor.rb
|
225
226
|
- lib/cdmbl/field_formatter.rb
|
226
227
|
- lib/cdmbl/field_transformer.rb
|
228
|
+
- lib/cdmbl/filtered_set_specs.rb
|
227
229
|
- lib/cdmbl/formatters.rb
|
228
230
|
- lib/cdmbl/hooks.rb
|
229
231
|
- lib/cdmbl/load_worker.rb
|
@@ -236,6 +238,7 @@ files:
|
|
236
238
|
- lib/cdmbl/oai_set_lookup.rb
|
237
239
|
- lib/cdmbl/rake_task.rb
|
238
240
|
- lib/cdmbl/record_transformer.rb
|
241
|
+
- lib/cdmbl/set_spec_filter_callback.rb
|
239
242
|
- lib/cdmbl/tasks/delete.rake
|
240
243
|
- lib/cdmbl/tasks/etl.rake
|
241
244
|
- lib/cdmbl/transform_worker.rb
|