cdmbl 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cdmbl/compound_filter.rb +5 -5
- data/lib/cdmbl/etl_worker.rb +18 -15
- data/lib/cdmbl/transform_worker.rb +13 -10
- data/lib/cdmbl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 457638c1ed09f0b1aa3b7e01aec28680fdf88fe5
|
4
|
+
data.tar.gz: 93589d8ef2aa08c9e8188a34e1f7578f427e83fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06a93e9dfc37262dc43d485c29c8d0735b554fc854281f76f7b24e0825486dae8e6992eb299f9a381632147e703387f2c679a877cf40027ea8c2851a1c33f8dc
|
7
|
+
data.tar.gz: 39459eac0735491054b8237fd8560716084ce0e26b2b65d1c4351a13cad32bb0477ecb557ac5b0c7cc683572c4782e2b8bbb51e266a14712d662ac4568a106f0
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module CDMBL
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
# Takes a list of record id/collection data, uses CompoundLookup to
|
3
|
+
# identifiy records with large numbers of compounds and sorts them
|
4
|
+
# into a large and a small heap
|
5
5
|
class CompoundFilter
|
6
6
|
attr_reader :record_ids,
|
7
7
|
:max_compounds,
|
@@ -28,7 +28,7 @@ module CDMBL
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def records
|
31
|
-
|
31
|
+
@records ||= record_ids.map do |identifier|
|
32
32
|
{
|
33
33
|
large: count(*identifier) >= max_compounds,
|
34
34
|
id: identifier
|
@@ -42,4 +42,4 @@ module CDMBL
|
|
42
42
|
id: id).count
|
43
43
|
end
|
44
44
|
end
|
45
|
-
end
|
45
|
+
end
|
data/lib/cdmbl/etl_worker.rb
CHANGED
@@ -11,6 +11,7 @@ module CDMBL
|
|
11
11
|
:cdm_endpoint,
|
12
12
|
:oai_endpoint,
|
13
13
|
:field_mappings,
|
14
|
+
:extract_compounds,
|
14
15
|
:resumption_token,
|
15
16
|
:set_spec,
|
16
17
|
:max_compounds,
|
@@ -28,16 +29,17 @@ module CDMBL
|
|
28
29
|
# Sidekiq stores params in JSON, so we can't inject dependencies. This
|
29
30
|
# results in the long set of arguments that follows. Otherwise, we'd
|
30
31
|
# simply inject the OAI request and extractor objects
|
31
|
-
@config
|
32
|
-
@solr_config
|
33
|
-
@cdm_endpoint
|
34
|
-
@oai_endpoint
|
35
|
-
@field_mappings
|
36
|
-
@
|
37
|
-
@
|
38
|
-
@
|
39
|
-
@
|
40
|
-
@
|
32
|
+
@config = config
|
33
|
+
@solr_config = config.fetch('solr_config').symbolize_keys
|
34
|
+
@cdm_endpoint = config.fetch('cdm_endpoint')
|
35
|
+
@oai_endpoint = config.fetch('oai_endpoint')
|
36
|
+
@field_mappings = config.fetch('field_mappings', false)
|
37
|
+
@extract_compounds = config.fetch('extract_compounds', false)
|
38
|
+
@resumption_token = config.fetch('resumption_token', nil)
|
39
|
+
@set_spec = config.fetch('set_spec', nil)
|
40
|
+
@max_compounds = config.fetch('max_compounds', 10)
|
41
|
+
@batch_size = config.fetch('batch_size', 5).to_i
|
42
|
+
@is_recursive = config.fetch('is_recursive', true)
|
41
43
|
extract_batch!
|
42
44
|
next_batch!
|
43
45
|
end
|
@@ -112,11 +114,12 @@ module CDMBL
|
|
112
114
|
end
|
113
115
|
|
114
116
|
def transform!(ids)
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
117
|
+
transform_worker_klass.perform_async(ids,
|
118
|
+
solr_config,
|
119
|
+
cdm_endpoint,
|
120
|
+
oai_endpoint,
|
121
|
+
field_mappings,
|
122
|
+
extract_compounds)
|
120
123
|
end
|
121
124
|
|
122
125
|
def delete_deletables!
|
@@ -6,7 +6,8 @@ module CDMBL
|
|
6
6
|
:solr_config,
|
7
7
|
:cdm_endpoint,
|
8
8
|
:oai_endpoint,
|
9
|
-
:field_mappings
|
9
|
+
:field_mappings,
|
10
|
+
:extract_compounds
|
10
11
|
|
11
12
|
attr_writer :cdm_api_klass,
|
12
13
|
:oai_request_klass,
|
@@ -19,14 +20,15 @@ module CDMBL
|
|
19
20
|
solr_config,
|
20
21
|
cdm_endpoint,
|
21
22
|
oai_endpoint,
|
22
|
-
field_mappings
|
23
|
-
|
24
|
-
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
|
23
|
+
field_mappings,
|
24
|
+
extract_compounds)
|
25
|
+
|
26
|
+
@identifiers = identifiers
|
27
|
+
@solr_config = solr_config
|
28
|
+
@cdm_endpoint = cdm_endpoint
|
29
|
+
@oai_endpoint = oai_endpoint
|
30
|
+
@field_mappings = field_mappings
|
31
|
+
@extract_compounds = extract_compounds
|
30
32
|
transform_and_load!
|
31
33
|
end
|
32
34
|
|
@@ -64,7 +66,8 @@ module CDMBL
|
|
64
66
|
@transformation ||=
|
65
67
|
transformer_klass.new(cdm_records: records,
|
66
68
|
oai_sets: set_lookup,
|
67
|
-
field_mappings: field_mappings
|
69
|
+
field_mappings: field_mappings,
|
70
|
+
extract_compounds: extract_compounds).records
|
68
71
|
end
|
69
72
|
|
70
73
|
def set_lookup
|
data/lib/cdmbl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|