cdmdexer 0.19.0 → 0.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -5
- data/lib/cdmdexer.rb +2 -0
- data/lib/cdmdexer/default_cdm_error.rb +8 -0
- data/lib/cdmdexer/field_transformer.rb +2 -3
- data/lib/cdmdexer/hooks.rb +2 -0
- data/lib/cdmdexer/record_transformer.rb +11 -2
- data/lib/cdmdexer/transformation_error_message.rb +23 -0
- data/lib/cdmdexer/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60ef91c0ea6d70b0242465b27945c265175312254e540b40a2ba15ffbb609413
|
4
|
+
data.tar.gz: dce7221c4a962bad08f35ef39c2bf4bd0288650d97b2b1c3d1d6751226bc7a14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd92890578c5e3c9642766b605b381219f789854918c9895ef3d8a895979dc5bde6a3e2522fc21b03893d304a4660914c45d080cb67066b10a998d3d15897d1a
|
7
|
+
data.tar.gz: 05d038f8b6149dbfe7500dced962f853c7f3a2ed49821d253d02e71baf0af82dfb4ac9cfc526de4dc4e5cca5552712c8b955ec9a532c437ed9d0570852a4cc70
|
data/README.md
CHANGED
@@ -116,19 +116,51 @@ You might also want to simply override some of the default mappings or add your
|
|
116
116
|
```ruby
|
117
117
|
mappings = CDMDEXER::Transformer.default_mappings.merge(your_custom_field_mappings)
|
118
118
|
```
|
119
|
-
## A Custom Post-indexing Callback
|
120
119
|
|
121
|
-
|
120
|
+
### Callbacks
|
121
|
+
|
122
|
+
CDMDEXER comes with a set of lifecycle hooks that are called at various points during the ETL process. Downstream applications may want to take advantage of these in order to perform logging or notification tasks. Create a Rails initializer at `config/initializers/cdmdexer.rb` in order to take advantage of these hooks.
|
123
|
+
|
124
|
+
**IMPORTANT NOTE:** Errors (except for http timeouts) are **not raised** but are rather sent to the `CdmError` notification hook below. This prevents sidekiq from piling-up with errors that will never resolve via retries but still allows you to capture the error and be notified of error events.
|
125
|
+
|
126
|
+
E.g.:
|
122
127
|
|
123
128
|
```ruby
|
124
129
|
module CDMDEXER
|
125
|
-
class
|
126
|
-
def self.call!
|
127
|
-
|
130
|
+
class CompletedCallback
|
131
|
+
def self.call!(config)
|
132
|
+
# e.g. commit records - ::SolrClient.new.commit
|
133
|
+
Rails.logger.info "Processing last batch for: #{config['set_spec']}"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
class OaiNotification
|
138
|
+
def self.call!(location)
|
139
|
+
Rails.logger.info "CDMDEXER: Requesting: #{location}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
class CdmNotification
|
144
|
+
def self.call!(collection, id, endpoint)
|
145
|
+
Rails.logger.info "CDMDEXER: Requesting: #{collection}:#{id}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class LoaderNotification
|
150
|
+
def self.call!(ingestables, deletables)
|
151
|
+
Rails.logger.info "CDMDEXER: Loading #{ingestables.length} records and deleting #{deletables.length}"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
class CdmError
|
156
|
+
def self.call!(error)
|
157
|
+
Rails.logger.info "CDMDEXER: #{error}"
|
158
|
+
# e.g. push error to a slack channel or send an email alert
|
128
159
|
end
|
129
160
|
end
|
130
161
|
end
|
131
162
|
```
|
163
|
+
|
132
164
|
## Development
|
133
165
|
|
134
166
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/cdmdexer.rb
CHANGED
@@ -4,12 +4,11 @@ module CDMDEXER
|
|
4
4
|
class FieldTransformer
|
5
5
|
extend Forwardable
|
6
6
|
def_delegators :@field_mapping, :origin_path, :dest_path, :formatters
|
7
|
-
attr_reader :field_value, :field_mapping, :formatter_klass
|
7
|
+
attr_reader :field_value, :field_mapping, :formatter_klass
|
8
8
|
def initialize(field_mapping: FieldMapping.new,
|
9
9
|
record: {},
|
10
10
|
formatter_klass: FieldFormatter)
|
11
11
|
@field_mapping = field_mapping
|
12
|
-
@record_id = record.fetch('id', 'MISSING_RECORD_ID')
|
13
12
|
@field_value = compact(record.at_path(origin_path))
|
14
13
|
@formatter_klass = formatter_klass
|
15
14
|
end
|
@@ -36,7 +35,7 @@ module CDMDEXER
|
|
36
35
|
def transform_field
|
37
36
|
formatter_klass.new(value: field_value, formatters: formatters).format!
|
38
37
|
rescue StandardError => e
|
39
|
-
raise "
|
38
|
+
raise "Mapping: #{field_mapping.config} Error:#{e.message}"
|
40
39
|
end
|
41
40
|
end
|
42
41
|
end
|
data/lib/cdmdexer/hooks.rb
CHANGED
@@ -10,6 +10,8 @@ module CDMDEXER
|
|
10
10
|
hook(pattern: name.to_s, default: DefaultLoaderNotification)
|
11
11
|
elsif name.to_s == 'CdmNotification'
|
12
12
|
hook(pattern: name.to_s, default: DefaultCdmNotification)
|
13
|
+
elsif name.to_s == 'CdmError'
|
14
|
+
hook(pattern: name.to_s, default: DefaultCdmError)
|
13
15
|
end
|
14
16
|
end
|
15
17
|
|
@@ -1,22 +1,31 @@
|
|
1
1
|
module CDMDEXER
|
2
|
+
# "Record Transformation Error: #{message}"
|
2
3
|
class RecordTransformer
|
3
|
-
attr_reader :record, :field_mappings, :field_transformer
|
4
|
+
attr_reader :record, :field_mappings, :field_transformer, :error_klass
|
4
5
|
def initialize(record: {},
|
5
6
|
field_mappings: [],
|
6
|
-
field_transformer: FieldTransformer
|
7
|
+
field_transformer: FieldTransformer,
|
8
|
+
error_klass: TransformationErrorMessage)
|
7
9
|
@record = record
|
8
10
|
@field_mappings = field_mappings
|
9
11
|
@field_transformer = field_transformer
|
12
|
+
@error_klass = error_klass
|
10
13
|
end
|
11
14
|
|
12
15
|
def transform!
|
13
16
|
field_mappings.inject({}) do |dest_record, field_mapping|
|
14
17
|
dest_record.merge(transform_field(record, field_mapping))
|
15
18
|
end
|
19
|
+
rescue StandardError => error
|
20
|
+
error_klass.new(message: message(error)).notify
|
16
21
|
end
|
17
22
|
|
18
23
|
private
|
19
24
|
|
25
|
+
def message(error)
|
26
|
+
"Record Transformation Error (Record #{record['id']}): #{error}"
|
27
|
+
end
|
28
|
+
|
20
29
|
def transform_field(record, field_mapping)
|
21
30
|
field_transformer.new(field_mapping: field_mapping,
|
22
31
|
record: record).reduce
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module CDMDEXER
|
2
|
+
# Raise anything but timeout errors or other http connection errors
|
3
|
+
# Notify downstream in case users want to log the non-timeout errors
|
4
|
+
class TransformationErrorMessage
|
5
|
+
attr_reader :message, :notification_klass
|
6
|
+
def initialize(message: :MISSING_ERROR_MESSAGE,
|
7
|
+
notification_klass: CDMDEXER::CdmError)
|
8
|
+
@notification_klass = notification_klass
|
9
|
+
@message = message
|
10
|
+
end
|
11
|
+
|
12
|
+
def notify
|
13
|
+
notification_klass.call! message
|
14
|
+
raise message if http_error?
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def http_error?
|
20
|
+
!(message =~ /ConnectionError/).nil?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/cdmdexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmdexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -170,6 +170,7 @@ files:
|
|
170
170
|
- cdmdexer.gemspec
|
171
171
|
- lib/cdmdexer.rb
|
172
172
|
- lib/cdmdexer/cdm_item.rb
|
173
|
+
- lib/cdmdexer/default_cdm_error.rb
|
173
174
|
- lib/cdmdexer/default_cdm_notification.rb
|
174
175
|
- lib/cdmdexer/default_completed_callback.rb
|
175
176
|
- lib/cdmdexer/default_loader_notification.rb
|
@@ -193,6 +194,7 @@ files:
|
|
193
194
|
- lib/cdmdexer/tasks/delete.rake
|
194
195
|
- lib/cdmdexer/tasks/etl.rake
|
195
196
|
- lib/cdmdexer/transform_worker.rb
|
197
|
+
- lib/cdmdexer/transformation_error_message.rb
|
196
198
|
- lib/cdmdexer/transformer.rb
|
197
199
|
- lib/cdmdexer/version.rb
|
198
200
|
- travis.yml
|
@@ -215,8 +217,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
217
|
- !ruby/object:Gem::Version
|
216
218
|
version: '0'
|
217
219
|
requirements: []
|
218
|
-
|
219
|
-
rubygems_version: 2.7.8
|
220
|
+
rubygems_version: 3.0.3
|
220
221
|
signing_key:
|
221
222
|
specification_version: 4
|
222
223
|
summary: Load CONTENTdm data into a Solr Index. CDMDEXER expects to run inside a Rails
|