cdmbl 0.12.1 → 0.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/compound_filter.rb +5 -5
- data/lib/cdmbl/etl_worker.rb +18 -15
- data/lib/cdmbl/transform_worker.rb +13 -10
- data/lib/cdmbl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 457638c1ed09f0b1aa3b7e01aec28680fdf88fe5
|
4
|
+
data.tar.gz: 93589d8ef2aa08c9e8188a34e1f7578f427e83fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06a93e9dfc37262dc43d485c29c8d0735b554fc854281f76f7b24e0825486dae8e6992eb299f9a381632147e703387f2c679a877cf40027ea8c2851a1c33f8dc
|
7
|
+
data.tar.gz: 39459eac0735491054b8237fd8560716084ce0e26b2b65d1c4351a13cad32bb0477ecb557ac5b0c7cc683572c4782e2b8bbb51e266a14712d662ac4568a106f0
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module CDMBL
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
# Takes a list of record id/collection data, uses CompoundLookup to
|
3
|
+
# identifiy records with large numbers of compounds and sorts them
|
4
|
+
# into a large and a small heap
|
5
5
|
class CompoundFilter
|
6
6
|
attr_reader :record_ids,
|
7
7
|
:max_compounds,
|
@@ -28,7 +28,7 @@ module CDMBL
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def records
|
31
|
-
|
31
|
+
@records ||= record_ids.map do |identifier|
|
32
32
|
{
|
33
33
|
large: count(*identifier) >= max_compounds,
|
34
34
|
id: identifier
|
@@ -42,4 +42,4 @@ module CDMBL
|
|
42
42
|
id: id).count
|
43
43
|
end
|
44
44
|
end
|
45
|
-
end
|
45
|
+
end
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -11,6 +11,7 @@ module CDMBL
|
|
11
11
|
:cdm_endpoint,
|
12
12
|
:oai_endpoint,
|
13
13
|
:field_mappings,
|
14
|
+
:extract_compounds,
|
14
15
|
:resumption_token,
|
15
16
|
:set_spec,
|
16
17
|
:max_compounds,
|
@@ -28,16 +29,17 @@ module CDMBL
|
|
28
29
|
# Sidekiq stores params in JSON, so we can't inject dependencies. This
|
29
30
|
# results in the long set of arguments that follows. Otherwise, we'd
|
30
31
|
# simply inject the OAI request and extractor objects
|
31
|
-
@config
|
32
|
-
@solr_config
|
33
|
-
@cdm_endpoint
|
34
|
-
@oai_endpoint
|
35
|
-
@field_mappings
|
36
|
-
@
|
37
|
-
@
|
38
|
-
@
|
39
|
-
@
|
40
|
-
@
|
32
|
+
@config = config
|
33
|
+
@solr_config = config.fetch('solr_config').symbolize_keys
|
34
|
+
@cdm_endpoint = config.fetch('cdm_endpoint')
|
35
|
+
@oai_endpoint = config.fetch('oai_endpoint')
|
36
|
+
@field_mappings = config.fetch('field_mappings', false)
|
37
|
+
@extract_compounds = config.fetch('extract_compounds', false)
|
38
|
+
@resumption_token = config.fetch('resumption_token', nil)
|
39
|
+
@set_spec = config.fetch('set_spec', nil)
|
40
|
+
@max_compounds = config.fetch('max_compounds', 10)
|
41
|
+
@batch_size = config.fetch('batch_size', 5).to_i
|
42
|
+
@is_recursive = config.fetch('is_recursive', true)
|
41
43
|
extract_batch!
|
42
44
|
next_batch!
|
43
45
|
end
|
@@ -112,11 +114,12 @@ module CDMBL
|
|
112
114
|
end
|
113
115
|
|
114
116
|
def transform!(ids)
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
117
|
+
transform_worker_klass.perform_async(ids,
|
118
|
+
solr_config,
|
119
|
+
cdm_endpoint,
|
120
|
+
oai_endpoint,
|
121
|
+
field_mappings,
|
122
|
+
extract_compounds)
|
120
123
|
end
|
121
124
|
|
122
125
|
def delete_deletables!
|
@@ -6,7 +6,8 @@ module CDMBL
|
|
6
6
|
:solr_config,
|
7
7
|
:cdm_endpoint,
|
8
8
|
:oai_endpoint,
|
9
|
-
:field_mappings
|
9
|
+
:field_mappings,
|
10
|
+
:extract_compounds
|
10
11
|
|
11
12
|
attr_writer :cdm_api_klass,
|
12
13
|
:oai_request_klass,
|
@@ -19,14 +20,15 @@ module CDMBL
|
|
19
20
|
solr_config,
|
20
21
|
cdm_endpoint,
|
21
22
|
oai_endpoint,
|
22
|
-
field_mappings
|
23
|
-
|
24
|
-
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
|
23
|
+
field_mappings,
|
24
|
+
extract_compounds)
|
25
|
+
|
26
|
+
@identifiers = identifiers
|
27
|
+
@solr_config = solr_config
|
28
|
+
@cdm_endpoint = cdm_endpoint
|
29
|
+
@oai_endpoint = oai_endpoint
|
30
|
+
@field_mappings = field_mappings
|
31
|
+
@extract_compounds = extract_compounds
|
30
32
|
transform_and_load!
|
31
33
|
end
|
32
34
|
|
@@ -64,7 +66,8 @@ module CDMBL
|
|
64
66
|
@transformation ||=
|
65
67
|
transformer_klass.new(cdm_records: records,
|
66
68
|
oai_sets: set_lookup,
|
67
|
-
field_mappings: field_mappings
|
69
|
+
field_mappings: field_mappings,
|
70
|
+
extract_compounds: extract_compounds).records
|
68
71
|
end
|
69
72
|
|
70
73
|
def set_lookup
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|