cdmbl 0.2.25 → 0.2.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/etl_run.rb +5 -1
- data/lib/cdmbl/etl_worker.rb +13 -18
- data/lib/cdmbl/extractor.rb +0 -4
- data/lib/cdmbl/oai_request.rb +4 -2
- data/lib/cdmbl/tasks/etl.rake +8 -4
- data/lib/cdmbl/transformer.rb +1 -1
- data/lib/cdmbl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7d48b4a55adff68d3096b6f4e04f20b26cbcb61
|
4
|
+
data.tar.gz: 7c670f721ef8617a51994becbca83e7ca9900f30
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8802661c746e0c8ea8d162b00777c26be2f9928d8dc0ec36b4691dbf8635c51e5af80bf5c3950c9330592236e3812d9dadfc85637338ef5bd792f534e8b00e9e
|
7
|
+
data.tar.gz: f58528feef5a8d52ef4d76f9a98127414e037b7e38e89dcc8ed233a8e901b28ee7a0a47b54d0a51f3c2fb5ccfc2ad7bc59047ab65a322119f038be651915ae39
|
data/lib/cdmbl/etl_run.rb
CHANGED
@@ -6,6 +6,7 @@ module CDMBL
|
|
6
6
|
:resumption_token,
|
7
7
|
:field_mappings,
|
8
8
|
:minimum_date,
|
9
|
+
:set_spec,
|
9
10
|
:oai_requester,
|
10
11
|
:extractor,
|
11
12
|
:transformer,
|
@@ -16,6 +17,7 @@ module CDMBL
|
|
16
17
|
resumption_token: false,
|
17
18
|
field_mappings: false,
|
18
19
|
minimum_date: nil,
|
20
|
+
set_spec: false,
|
19
21
|
oai_requester: OaiRequest,
|
20
22
|
extractor: Extractor,
|
21
23
|
transformer: Transformer,
|
@@ -28,6 +30,7 @@ module CDMBL
|
|
28
30
|
@field_mappings = field_mappings
|
29
31
|
@oai_requester = oai_requester
|
30
32
|
@minimum_date = minimum_date
|
33
|
+
@set_spec = set_spec
|
31
34
|
@extractor = extractor
|
32
35
|
@transformer = transformer
|
33
36
|
@loader = loader
|
@@ -54,7 +57,8 @@ module CDMBL
|
|
54
57
|
def oai_request
|
55
58
|
@oai_request ||= oai_requester.new(base_uri: oai_endpoint,
|
56
59
|
resumption_token: resumption_token,
|
57
|
-
from: minimum_date
|
60
|
+
from: minimum_date,
|
61
|
+
set: set_spec)
|
58
62
|
end
|
59
63
|
end
|
60
64
|
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -1,30 +1,28 @@
|
|
1
1
|
require 'sidekiq'
|
2
2
|
module CDMBL
|
3
3
|
class ETLWorker
|
4
|
+
include Sidekiq::Worker
|
5
|
+
|
4
6
|
attr_reader :solr_config,
|
5
7
|
:etl_config,
|
6
|
-
:batch_size,
|
7
8
|
:is_recursive,
|
8
|
-
:
|
9
|
+
:identifier,
|
9
10
|
:deletables
|
10
11
|
|
11
|
-
include Sidekiq::Worker
|
12
12
|
|
13
13
|
def perform(solr_config,
|
14
14
|
etl_config,
|
15
|
-
batch_size = 10,
|
16
15
|
is_recursive = true,
|
17
|
-
|
16
|
+
identifier = false,
|
18
17
|
deletables = [])
|
19
|
-
|
18
|
+
|
20
19
|
@etl_config = etl_config.symbolize_keys
|
21
20
|
@solr_config = solr_config.symbolize_keys
|
22
21
|
@is_recursive = is_recursive
|
23
|
-
@
|
22
|
+
@identifier = identifier
|
24
23
|
@deletables = deletables
|
25
|
-
|
26
|
-
|
27
|
-
if !identifiers.empty?
|
24
|
+
|
25
|
+
if identifier
|
28
26
|
load!
|
29
27
|
else
|
30
28
|
ingest_batches!
|
@@ -40,13 +38,12 @@ module CDMBL
|
|
40
38
|
|
41
39
|
def ingest_batches!
|
42
40
|
sent_deleted = false
|
43
|
-
extraction.local_identifiers.
|
41
|
+
extraction.local_identifiers.each do |id|
|
44
42
|
delete_ids = (sent_deleted == false) ? extraction.deletable_ids : []
|
45
43
|
ETLWorker.perform_async(solr_config,
|
46
44
|
etl_config,
|
47
|
-
batch_size,
|
48
45
|
is_recursive,
|
49
|
-
|
46
|
+
id,
|
50
47
|
delete_ids)
|
51
48
|
sent_deleted = true
|
52
49
|
end
|
@@ -58,13 +55,11 @@ module CDMBL
|
|
58
55
|
end
|
59
56
|
|
60
57
|
def transformation
|
61
|
-
@transformation ||= etl_run.transform(extraction.set_lookup,
|
58
|
+
@transformation ||= etl_run.transform(extraction.set_lookup, [record])
|
62
59
|
end
|
63
60
|
|
64
|
-
def
|
65
|
-
|
66
|
-
extraction.cdm_request(*identifier)
|
67
|
-
end
|
61
|
+
def record
|
62
|
+
extraction.cdm_request(*identifier)
|
68
63
|
end
|
69
64
|
|
70
65
|
def extraction
|
data/lib/cdmbl/extractor.rb
CHANGED
data/lib/cdmbl/oai_request.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
module CDMBL
|
2
2
|
class OaiRequest
|
3
|
-
attr_reader :base_uri, :resumption_token, :client, :from
|
3
|
+
attr_reader :base_uri, :resumption_token, :client, :from, :set
|
4
4
|
def initialize(base_uri: '',
|
5
5
|
resumption_token: false,
|
6
6
|
from: false,
|
7
|
+
set: false,
|
7
8
|
client: Net::HTTP)
|
8
9
|
@base_uri = base_uri
|
9
10
|
@resumption_token = resumption_token
|
10
11
|
@client = client
|
11
12
|
@from = (from) ? "&from=#{from}" : ''
|
13
|
+
@set = (set) ? "&set=#{set}" : ''
|
12
14
|
end
|
13
15
|
|
14
16
|
def identifiers
|
@@ -22,7 +24,7 @@ module CDMBL
|
|
22
24
|
private
|
23
25
|
|
24
26
|
def first_batch_uri
|
25
|
-
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}"
|
27
|
+
"#{base_uri}?verb=ListIdentifiers&metadataPrefix=oai_dc#{from}#{set}"
|
26
28
|
end
|
27
29
|
|
28
30
|
def batch_uri
|
data/lib/cdmbl/tasks/etl.rake
CHANGED
@@ -4,10 +4,14 @@ namespace :cdmbl do
|
|
4
4
|
desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
|
5
5
|
task :ingest, [:solr_url, :oai_endpoint, :cdm_endpoint, :minimum_date] do |t, args|
|
6
6
|
solr_config = { url: args[solr_url] }
|
7
|
-
etl_config = {
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
etl_config = {
|
8
|
+
oai_endpoint: args[:oai_endpoint],
|
9
|
+
cdm_endpoint: args[:cdm_endpoint],
|
10
|
+
minimum_date: args[:minimum_date]
|
11
|
+
set_spec: args[:set_spec]
|
12
|
+
}
|
13
|
+
etl_config = (args[:resumption_token]) ? etl_cofig.merge(args[:resumption_token]) : etl_config
|
14
|
+
CDMBL::ETLWorker.perform_async(solr_config, etl_config)
|
11
15
|
end
|
12
16
|
end
|
13
17
|
|
data/lib/cdmbl/transformer.rb
CHANGED
@@ -36,7 +36,7 @@ module CDMBL
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
#TODO: Make a
|
39
|
+
#TODO: Make a
|
40
40
|
def with_location(dest_record, record)
|
41
41
|
if record['latitu'] && record['longit'] && record['latitu'] != '' && record['longit'] != ''
|
42
42
|
dest_record.merge({'location_llsi': "#{record['latitu']}, #{record['longit']}"})
|
data/lib/cdmbl/version.rb
CHANGED