cdmbl 0.10.1 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_by_set_specs.rb +18 -0
- data/lib/cdmbl/filtered_set_specs.rb +41 -0
- data/lib/cdmbl/set_spec_filter_callback.rb +17 -0
- data/lib/cdmbl/tasks/etl.rake +43 -0
- data/lib/cdmbl/version.rb +2 -2
- data/lib/cdmbl.rb +4 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb68955dc77e2f7a1ceb938a49d7fa4aa471cd50
|
4
|
+
data.tar.gz: 66525455ae0dca29cbac2c175d43811bf781143e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18c172df0c8265395e1316c73b229673a03a53d54449e819b002a1a710b0238487b8906084fb232596093a6a42cfba9e5797f58e15a1cb6dedb04482c55e953d
|
7
|
+
data.tar.gz: 97e3dc6c0c05f68ab04bb0d686623e092665ef022bf597b1a778df9105b85cd68f350887e068beb76534d234201c57f3664f9dad1fa7a284b14ea372302cadc5
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class ETLBySetSpecs
|
3
|
+
attr_reader :set_specs, :etl_config, :etl_worker_klass
|
4
|
+
def initialize(set_specs: [:missing_setspec],
|
5
|
+
etl_config: :missing_etl_config,
|
6
|
+
etl_worker_klass: ETLWorker)
|
7
|
+
@set_specs = set_specs
|
8
|
+
@etl_config = etl_config
|
9
|
+
@etl_worker_klass = etl_worker_klass
|
10
|
+
end
|
11
|
+
|
12
|
+
def run!
|
13
|
+
set_specs.map do |set_spec|
|
14
|
+
etl_worker_klass.perform_async(etl_config.merge(set_spec: set_spec))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module CDMBL
|
2
|
+
module DefaultFilterSetCallback
|
3
|
+
def valid?(set: {})
|
4
|
+
true
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class FilteredSetSpecs
|
9
|
+
attr_reader :oai_base_url,
|
10
|
+
:oai_client,
|
11
|
+
:callback
|
12
|
+
|
13
|
+
def initialize(oai_base_url: :missing_oai_base_url,
|
14
|
+
oai_client: OaiClient,
|
15
|
+
callback: DefaultSetFilterCallback.new)
|
16
|
+
@oai_base_url = oai_base_url
|
17
|
+
@oai_client = oai_client
|
18
|
+
@callback = callback
|
19
|
+
end
|
20
|
+
|
21
|
+
def set_specs
|
22
|
+
filtered.map {|set| set['setSpec']}
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def filtered
|
28
|
+
sets.select do |set|
|
29
|
+
callback.valid?(set: set)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def sets
|
34
|
+
@sets ||= list_sets['OAI_PMH']['ListSets']['set']
|
35
|
+
end
|
36
|
+
|
37
|
+
def list_sets
|
38
|
+
@list_sets ||= oai_client.new(base_url: oai_base_url).request('verb=ListSets')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class SetSpecFilterCallback
|
3
|
+
attr_reader :pattern, :inclusive
|
4
|
+
def initialize(pattern: /.*/, inclusive: true)
|
5
|
+
@pattern = pattern
|
6
|
+
@inclusive = inclusive
|
7
|
+
end
|
8
|
+
|
9
|
+
def valid?(set: {})
|
10
|
+
(inclusive) ? matches?(set) : !matches?(set)
|
11
|
+
end
|
12
|
+
|
13
|
+
def matches?(set)
|
14
|
+
pattern.match?(set['setSpec'])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/cdmbl/tasks/etl.rake
CHANGED
@@ -20,6 +20,49 @@ namespace :cdmbl do
|
|
20
20
|
)
|
21
21
|
end
|
22
22
|
|
23
|
+
desc 'Launch an indexing worker for each collection with an optional regex
|
24
|
+
pattern to match setSpec. Patterns can be inclusive or exclusive.'
|
25
|
+
task :by_collections, [
|
26
|
+
:solr_url,
|
27
|
+
:oai_endpoint,
|
28
|
+
:cdm_endpoint,
|
29
|
+
:set_spec_pattern,
|
30
|
+
:inclusive,
|
31
|
+
:batch_size
|
32
|
+
] do |t, args|
|
33
|
+
# Required args
|
34
|
+
oai_endpoint = args.fetch(:oai_endpoint)
|
35
|
+
solr_url = args.fetch(:solr_url)
|
36
|
+
cdm_endpoint = args.fetch(:cdm_endpoint)
|
37
|
+
# Optional args
|
38
|
+
pattern = args.fetch(:set_spec_pattern, false)
|
39
|
+
inclusive = args.fetch(:inclusive, true)
|
40
|
+
batch_size = args.fetch(:batch_size, 5)
|
41
|
+
|
42
|
+
# Define your own callback if you want to use other set related fields
|
43
|
+
# Use the SetSpecFilterCallback as an example of how to build your own filter
|
44
|
+
set_specs =
|
45
|
+
if pattern
|
46
|
+
filter = CDMBL::SetSpecFilterCallback.new(pattern: Regexp.new(pattern))
|
47
|
+
CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint,
|
48
|
+
callback: filter).set_specs
|
49
|
+
else
|
50
|
+
CDMBL::FilteredSetSpecs.new(oai_base_url: oai_endpoint).set_specs
|
51
|
+
end
|
52
|
+
|
53
|
+
puts "Indexing Sets: '#{set_specs.join(', ')}'"
|
54
|
+
|
55
|
+
etl_config = {
|
56
|
+
solr_config: { url: args.fetch(:solr_url) },
|
57
|
+
oai_endpoint: args.fetch(:oai_endpoint),
|
58
|
+
cdm_endpoint: args.fetch(:cdm_endpoint),
|
59
|
+
batch_size: args.fetch(:batch_size, 10),
|
60
|
+
max_compounds: args.fetch(:max_compounds, 10)
|
61
|
+
}
|
62
|
+
|
63
|
+
CDMBL::ETLBySetSpecs.new(set_specs: set_specs, etl_config: etl_config).run!
|
64
|
+
end
|
65
|
+
|
23
66
|
desc 'Launch a background job to index a single record.'
|
24
67
|
task :record, [
|
25
68
|
:collection,
|
data/lib/cdmbl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module CDMBL
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
2
|
+
VERSION = "0.11.0"
|
3
|
+
end
|
data/lib/cdmbl.rb
CHANGED
@@ -25,4 +25,7 @@ require 'cdmbl/batch_deleter_worker'
|
|
25
25
|
require 'cdmbl/compound_lookup'
|
26
26
|
require 'cdmbl/compound_filter'
|
27
27
|
require 'cdmbl/load_worker'
|
28
|
-
require 'cdmbl/transform_worker'
|
28
|
+
require 'cdmbl/transform_worker'
|
29
|
+
require 'cdmbl/filtered_set_specs'
|
30
|
+
require 'cdmbl/etl_by_set_specs'
|
31
|
+
require 'cdmbl/set_spec_filter_callback'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -218,12 +218,14 @@ files:
|
|
218
218
|
- lib/cdmbl/default_loader_notification.rb
|
219
219
|
- lib/cdmbl/default_oai_notification.rb
|
220
220
|
- lib/cdmbl/default_solr.rb
|
221
|
+
- lib/cdmbl/etl_by_set_specs.rb
|
221
222
|
- lib/cdmbl/etl_run.rb
|
222
223
|
- lib/cdmbl/etl_worker.rb
|
223
224
|
- lib/cdmbl/extract_worker.rb
|
224
225
|
- lib/cdmbl/extractor.rb
|
225
226
|
- lib/cdmbl/field_formatter.rb
|
226
227
|
- lib/cdmbl/field_transformer.rb
|
228
|
+
- lib/cdmbl/filtered_set_specs.rb
|
227
229
|
- lib/cdmbl/formatters.rb
|
228
230
|
- lib/cdmbl/hooks.rb
|
229
231
|
- lib/cdmbl/load_worker.rb
|
@@ -236,6 +238,7 @@ files:
|
|
236
238
|
- lib/cdmbl/oai_set_lookup.rb
|
237
239
|
- lib/cdmbl/rake_task.rb
|
238
240
|
- lib/cdmbl/record_transformer.rb
|
241
|
+
- lib/cdmbl/set_spec_filter_callback.rb
|
239
242
|
- lib/cdmbl/tasks/delete.rake
|
240
243
|
- lib/cdmbl/tasks/etl.rake
|
241
244
|
- lib/cdmbl/transform_worker.rb
|