cdmbl 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl.rb +0 -1
- data/lib/cdmbl/oai_filter.rb +1 -1
- data/lib/cdmbl/tasks/etl.rake +18 -0
- data/lib/cdmbl/version.rb +1 -1
- metadata +2 -3
- data/lib/cdmbl/etl_run.rb +0 -65
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 417415def1b0f28a66e6640201a3c53becf43737
|
|
4
|
+
data.tar.gz: 36d22cc2acaab58453bb908877af79f2fe93ddba
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6a4a95dbdf25f2cfd26d2d037b900d07850bf4be2c50112319bcfd220cbddcf7e0325b62ff45267b9cddb0fc135291d946509e8bfe5a78486c5231e1665f7354
|
|
7
|
+
data.tar.gz: bc2795036b04aed45ac8be18829450001c08091b2df7c7af089b7faf1779d422ffb14e0610d5a9c3306b981bcbedcb584ef3b89b70a8fcdf94a81ff6dcd55752
|
data/lib/cdmbl.rb
CHANGED
|
@@ -9,7 +9,6 @@ require 'cdmbl/record_transformer'
|
|
|
9
9
|
require 'cdmbl/transformer'
|
|
10
10
|
require 'cdmbl/default_solr'
|
|
11
11
|
require 'cdmbl/loader'
|
|
12
|
-
require 'cdmbl/etl_run'
|
|
13
12
|
require 'cdmbl/etl_worker'
|
|
14
13
|
require 'cdmbl/default_completed_callback'
|
|
15
14
|
require 'cdmbl/default_cdm_notification'
|
data/lib/cdmbl/oai_filter.rb
CHANGED
data/lib/cdmbl/tasks/etl.rake
CHANGED
|
@@ -1,6 +1,24 @@
|
|
|
1
1
|
require 'cdmbl'
|
|
2
2
|
|
|
3
3
|
namespace :cdmbl do
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
desc 'Ingest a Collection Syncronously'
|
|
8
|
+
task :collection_sync do
|
|
9
|
+
# config = etl.config
|
|
10
|
+
# raise etl.config.keys.inspect
|
|
11
|
+
CDMBL::ETLWorker.new.perform(
|
|
12
|
+
'solr_config' => {:url=>"http://solr:8983/solr/mdl-1"},
|
|
13
|
+
'oai_endpoint' => 'http://cdm16022.contentdm.oclc.org/oai/oai.php',
|
|
14
|
+
'cdm_endpoint' => 'https://server16022.contentdm.oclc.org/dmwebservices/index.php',
|
|
15
|
+
'set_spec' => 'mpls',
|
|
16
|
+
'batch_size' => 10,
|
|
17
|
+
'max_compounds' => 10
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
4
22
|
desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
|
|
5
23
|
task :batch, [
|
|
6
24
|
:solr_url,
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cdmbl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.14.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- chadfennell
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2019-01-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: hash_at_path
|
|
@@ -219,7 +219,6 @@ files:
|
|
|
219
219
|
- lib/cdmbl/default_oai_notification.rb
|
|
220
220
|
- lib/cdmbl/default_solr.rb
|
|
221
221
|
- lib/cdmbl/etl_by_set_specs.rb
|
|
222
|
-
- lib/cdmbl/etl_run.rb
|
|
223
222
|
- lib/cdmbl/etl_worker.rb
|
|
224
223
|
- lib/cdmbl/extract_worker.rb
|
|
225
224
|
- lib/cdmbl/extractor.rb
|
data/lib/cdmbl/etl_run.rb
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
module CDMBL
|
|
2
|
-
# TODO: extract params into a an ETL Profile and delegate
|
|
3
|
-
class ETLRun
|
|
4
|
-
attr_reader :oai_endpoint,
|
|
5
|
-
:cdm_endpoint,
|
|
6
|
-
:resumption_token,
|
|
7
|
-
:field_mappings,
|
|
8
|
-
:minimum_date,
|
|
9
|
-
:set_spec,
|
|
10
|
-
:oai_requester,
|
|
11
|
-
:extractor,
|
|
12
|
-
:transformer,
|
|
13
|
-
:loader,
|
|
14
|
-
:solr_client
|
|
15
|
-
def initialize(oai_endpoint: '',
|
|
16
|
-
cdm_endpoint: '',
|
|
17
|
-
resumption_token: false,
|
|
18
|
-
field_mappings: false,
|
|
19
|
-
minimum_date: nil,
|
|
20
|
-
set_spec: false,
|
|
21
|
-
oai_requester: OaiRequest,
|
|
22
|
-
extractor: Extractor,
|
|
23
|
-
transformer: Transformer,
|
|
24
|
-
loader: Loader,
|
|
25
|
-
solr_client: SolrClient.new)
|
|
26
|
-
|
|
27
|
-
@oai_endpoint = oai_endpoint
|
|
28
|
-
@cdm_endpoint = cdm_endpoint
|
|
29
|
-
@resumption_token = resumption_token
|
|
30
|
-
@field_mappings = field_mappings
|
|
31
|
-
@oai_requester = oai_requester
|
|
32
|
-
@minimum_date = minimum_date
|
|
33
|
-
@set_spec = set_spec
|
|
34
|
-
@extractor = extractor
|
|
35
|
-
@transformer = transformer
|
|
36
|
-
@loader = loader
|
|
37
|
-
@solr_client = solr_client
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def extract
|
|
41
|
-
@extraction ||= extractor.new(oai_request: oai_request,
|
|
42
|
-
cdm_endpoint: cdm_endpoint)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def transform(sets, records)
|
|
46
|
-
@transformation ||= transformer.new(cdm_records: records,
|
|
47
|
-
oai_sets: sets,
|
|
48
|
-
field_mappings: field_mappings)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def load!(deletables, records)
|
|
52
|
-
loader.new(records: records,
|
|
53
|
-
deletable_ids: deletables,
|
|
54
|
-
solr_client: solr_client).load!
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
def oai_request
|
|
58
|
-
@oai_request ||= oai_requester.new(base_uri: oai_endpoint,
|
|
59
|
-
resumption_token: resumption_token,
|
|
60
|
-
from: minimum_date,
|
|
61
|
-
set: set_spec)
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
end
|