cdmbl 0.2.25 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_run.rb +5 -1
- data/lib/cdmbl/etl_worker.rb +13 -18
- data/lib/cdmbl/extractor.rb +0 -4
- data/lib/cdmbl/oai_request.rb +4 -2
- data/lib/cdmbl/tasks/etl.rake +8 -4
- data/lib/cdmbl/transformer.rb +1 -1
- data/lib/cdmbl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e7d48b4a55adff68d3096b6f4e04f20b26cbcb61
|
|
4
|
+
data.tar.gz: 7c670f721ef8617a51994becbca83e7ca9900f30
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8802661c746e0c8ea8d162b00777c26be2f9928d8dc0ec36b4691dbf8635c51e5af80bf5c3950c9330592236e3812d9dadfc85637338ef5bd792f534e8b00e9e
|
|
7
|
+
data.tar.gz: f58528feef5a8d52ef4d76f9a98127414e037b7e38e89dcc8ed233a8e901b28ee7a0a47b54d0a51f3c2fb5ccfc2ad7bc59047ab65a322119f038be651915ae39
|
data/lib/cdmbl/etl_run.rb
CHANGED
|
@@ -6,6 +6,7 @@ module CDMBL
|
|
|
6
6
|
:resumption_token,
|
|
7
7
|
:field_mappings,
|
|
8
8
|
:minimum_date,
|
|
9
|
+
:set_spec,
|
|
9
10
|
:oai_requester,
|
|
10
11
|
:extractor,
|
|
11
12
|
:transformer,
|
|
@@ -16,6 +17,7 @@ module CDMBL
|
|
|
16
17
|
resumption_token: false,
|
|
17
18
|
field_mappings: false,
|
|
18
19
|
minimum_date: nil,
|
|
20
|
+
set_spec: false,
|
|
19
21
|
oai_requester: OaiRequest,
|
|
20
22
|
extractor: Extractor,
|
|
21
23
|
transformer: Transformer,
|
|
@@ -28,6 +30,7 @@ module CDMBL
|
|
|
28
30
|
@field_mappings = field_mappings
|
|
29
31
|
@oai_requester = oai_requester
|
|
30
32
|
@minimum_date = minimum_date
|
|
33
|
+
@set_spec = set_spec
|
|
31
34
|
@extractor = extractor
|
|
32
35
|
@transformer = transformer
|
|
33
36
|
@loader = loader
|
|
@@ -54,7 +57,8 @@ module CDMBL
|
|
|
54
57
|
def oai_request
|
|
55
58
|
@oai_request ||= oai_requester.new(base_uri: oai_endpoint,
|
|
56
59
|
resumption_token: resumption_token,
|
|
57
|
-
from: minimum_date
|
|
60
|
+
from: minimum_date,
|
|
61
|
+
set: set_spec)
|
|
58
62
|
end
|
|
59
63
|
end
|
|
60
64
|
|
data/lib/cdmbl/etl_worker.rb
CHANGED
|
@@ -1,30 +1,28 @@
|
|
|
1
1
|
require 'sidekiq'
|
|
2
2
|
module CDMBL
|
|
3
3
|
class ETLWorker
|
|
4
|
+
include Sidekiq::Worker
|
|
5
|
+
|
|
4
6
|
attr_reader :solr_config,
|
|
5
7
|
:etl_config,
|
|
6
|
-
:batch_size,
|
|
7
8
|
:is_recursive,
|
|
8
|
-
:
|
|
9
|
+
:identifier,
|
|
9
10
|
:deletables
|
|
10
11
|
|
|
11
|
-
include Sidekiq::Worker
|
|
12
12
|
|
|
13
13
|
def perform(solr_config,
|
|
14
14
|
etl_config,
|
|
15
|
-
batch_size = 10,
|
|
16
15
|
is_recursive = true,
|
|
17
|
-
|
|
16
|
+
identifier = false,
|
|
18
17
|
deletables = [])
|
|
19
|
-
|
|
18
|
+
|
|
20
19
|
@etl_config = etl_config.symbolize_keys
|
|
21
20
|
@solr_config = solr_config.symbolize_keys
|
|
22
21
|
@is_recursive = is_recursive
|
|
23
|
-
@
|
|
22
|
+
@identifier = identifier
|
|
24
23
|
@deletables = deletables
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
if !identifiers.empty?
|
|
24
|
+
|
|
25
|
+
if identifier
|
|
28
26
|
load!
|
|
29
27
|
else
|
|
30
28
|
ingest_batches!
|
|
@@ -40,13 +38,12 @@ module CDMBL
|
|
|
40
38
|
|
|
41
39
|
def ingest_batches!
|
|
42
40
|
sent_deleted = false
|
|
43
|
-
extraction.local_identifiers.
|
|
41
|
+
extraction.local_identifiers.each do |id|
|
|
44
42
|
delete_ids = (sent_deleted == false) ? extraction.deletable_ids : []
|
|
45
43
|
ETLWorker.perform_async(solr_config,
|
|
46
44
|
etl_config,
|
|
47
|
-
batch_size,
|
|
48
45
|
is_recursive,
|
|
49
|
-
|
|
46
|
+
id,
|
|
50
47
|
delete_ids)
|
|
51
48
|
sent_deleted = true
|
|
52
49
|
end
|
|
@@ -58,13 +55,11 @@ module CDMBL
|
|
|
58
55
|
end
|
|
59
56
|
|
|
60
57
|
def transformation
|
|
61
|
-
@transformation ||= etl_run.transform(extraction.set_lookup,
|
|
58
|
+
@transformation ||= etl_run.transform(extraction.set_lookup, [record])
|
|
62
59
|
end
|
|
63
60
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
extraction.cdm_request(*identifier)
|
|
67
|
-
end
|
|
61
|
+
def record
|
|
62
|
+
extraction.cdm_request(*identifier)
|
|
68
63
|
end
|
|
69
64
|
|
|
70
65
|
def extraction
|
data/lib/cdmbl/extractor.rb
CHANGED
data/lib/cdmbl/oai_request.rb
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
module CDMBL
|
|
2
2
|
class OaiRequest
|
|
3
|
-
attr_reader :base_uri, :resumption_token, :client, :from
|
|
3
|
+
attr_reader :base_uri, :resumption_token, :client, :from, :set
|
|
4
4
|
def initialize(base_uri: '',
|
|
5
5
|
resumption_token: false,
|
|
6
6
|
from: false,
|
|
7
|
+
set: false,
|
|
7
8
|
client: Net::HTTP)
|
|
8
9
|
@base_uri = base_uri
|
|
9
10
|
@resumption_token = resumption_token
|
|
10
11
|
@client = client
|
|
11
12
|
@from = (from) ? "&from=#{from}" : ''
|
|
13
|
+
@set = (set) ? "&set=#{set}" : ''
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def identifiers
|
|
@@ -22,7 +24,7 @@ module CDMBL
|
|
|
22
24
|
private
|
|
23
25
|
|
|
24
26
|
def first_batch_uri
|
|
25
|
-
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}"
|
|
27
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}#{set}"
|
|
26
28
|
end
|
|
27
29
|
|
|
28
30
|
def batch_uri
|
data/lib/cdmbl/tasks/etl.rake
CHANGED
|
@@ -4,10 +4,14 @@ namespace :cdmbl do
|
|
|
4
4
|
desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
|
|
5
5
|
task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date] do |t, args|
|
|
6
6
|
solr_config = { url: args[solr_url] }
|
|
7
|
-
etl_config = {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
etl_config = {
|
|
8
|
+
oai_endpoint: args[:oai_endpoint],
|
|
9
|
+
cdm_endpoint: args[:cdm_endpoint],
|
|
10
|
+
minimum_date: args[:minimum_date]
|
|
11
|
+
set_spec: args[:set_spec]
|
|
12
|
+
}
|
|
13
|
+
etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
|
|
14
|
+
CDMBL::ETLWorker.perform_async(solr_config, etl_config)
|
|
11
15
|
end
|
|
12
16
|
end
|
|
13
17
|
|
data/lib/cdmbl/transformer.rb
CHANGED
|
@@ -36,7 +36,7 @@ module CDMBL
|
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
#TODO: Make a
|
|
39
|
+
#TODO: Make a
|
|
40
40
|
def with_location(dest_record, record)
|
|
41
41
|
if record['latitu'] && record['longit'] && record['latitu'] != '' && record['longit'] != ''
|
|
42
42
|
dest_record.merge({'location_llsi': "#{record['latitu']}, #{record['longit']}"})
|
data/lib/cdmbl/version.rb
CHANGED