ncs_mdes_warehouse 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +16 -0
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
- data/lib/ncs_navigator/warehouse.rb +4 -0
- data/lib/ncs_navigator/warehouse/cli.rb +31 -1
- data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
- data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
- data/lib/ncs_navigator/warehouse/models.rb +3 -0
- data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
- data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
- data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
- data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
- data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
- data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
- data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
- data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
- data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
- data/lib/ncs_navigator/warehouse/version.rb +1 -1
- data/spec/bcdatabase/test_sqlite.yml +4 -0
- data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
- data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
- data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
- data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
- data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
- data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
- data/spec/spec_helper.rb +1 -1
- metadata +44 -37
- data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'ncs_navigator/warehouse'
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module NcsNavigator::Warehouse
|
6
|
+
class TransformLoad
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
attr_reader :configuration
|
10
|
+
attr_reader :statuses
|
11
|
+
|
12
|
+
def_delegators :@configuration, :log, :shell
|
13
|
+
|
14
|
+
def initialize(configuration)
|
15
|
+
@configuration = configuration
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
position = 0
|
20
|
+
@statuses = configuration.transformers.collect do |transformer|
|
21
|
+
::DataMapper.repository(:mdes_warehouse_working) do
|
22
|
+
build_status_for(transformer, position).tap do |status|
|
23
|
+
begin
|
24
|
+
transformer.transform(status)
|
25
|
+
rescue => e
|
26
|
+
shell.say_line("\nTransform failed. (See log for more detail.)")
|
27
|
+
status.add_error("Transform failed. #{e.class}: #{e}.")
|
28
|
+
end
|
29
|
+
status.end_time = Time.now
|
30
|
+
unless status.save
|
31
|
+
shell.say_line("Could not save status for transformer #{status.name}")
|
32
|
+
log.warn("Could not save status for transformer #{status.name}")
|
33
|
+
end
|
34
|
+
position += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if statuses.detect { |s| !s.transform_errors.empty? }
|
40
|
+
false
|
41
|
+
else
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def build_status_for(transformer, position)
|
47
|
+
TransformStatus.new(
|
48
|
+
:name => transformer.respond_to?(:name) ? transformer.name : transformer.class.name,
|
49
|
+
:start_time => Time.now,
|
50
|
+
:position => position
|
51
|
+
)
|
52
|
+
end
|
53
|
+
private :build_status_for
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'ncs_navigator/warehouse'
|
2
|
+
|
3
|
+
require 'data_mapper'
|
4
|
+
|
5
|
+
module NcsNavigator::Warehouse
|
6
|
+
##
|
7
|
+
# Tracks and stores the progress of a particular transform.
|
8
|
+
class TransformStatus
|
9
|
+
include ::DataMapper::Resource
|
10
|
+
|
11
|
+
##
|
12
|
+
# DataMapper 1.2 attempts to query for associations, even if the
|
13
|
+
# record has never been saved. This fails if the database
|
14
|
+
# connection is not set up (as in most of this library's
|
15
|
+
# tests). This method creates a new instance which works around
|
16
|
+
# this problem, at the cost of the instances not being accurately
|
17
|
+
# persistable.
|
18
|
+
def self.memory_only(name)
|
19
|
+
TransformStatus.new(:name => name).tap do |s|
|
20
|
+
def s.transform_errors
|
21
|
+
@transform_errors ||= []
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
storage_names[:default] = storage_names[:mdes_warehouse_working] =
|
27
|
+
storage_names[:mdes_warehouse_reporting] = 'wh_transform_status'
|
28
|
+
|
29
|
+
property :id, Serial
|
30
|
+
property :name, String, :required => true, :length => 255
|
31
|
+
property :start_time, DateTime
|
32
|
+
property :end_time, DateTime
|
33
|
+
property :record_count, Integer, :default => 0
|
34
|
+
property :position, Integer
|
35
|
+
|
36
|
+
has n, :transform_errors, 'NcsNavigator::Warehouse::TransformError'
|
37
|
+
|
38
|
+
def add_error(message)
|
39
|
+
self.transform_errors << TransformError.new(:message => message)
|
40
|
+
end
|
41
|
+
|
42
|
+
def unsuccessful_record(record, message)
|
43
|
+
self.transform_errors <<
|
44
|
+
TransformError.new(:model_class => record.class.name, :message => message)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class TransformError
|
49
|
+
include ::DataMapper::Resource
|
50
|
+
|
51
|
+
storage_names[:default] = storage_names[:mdes_warehouse_working] =
|
52
|
+
storage_names[:mdes_warehouse_reporting] = 'wh_transform_error'
|
53
|
+
|
54
|
+
property :id, Serial
|
55
|
+
property :message, Text, :required => true
|
56
|
+
property :model_class, String, :length => 255
|
57
|
+
|
58
|
+
belongs_to :transform_status, TransformStatus, :required => true
|
59
|
+
end
|
60
|
+
|
61
|
+
TransformError.finalize
|
62
|
+
TransformStatus.finalize
|
63
|
+
end
|
@@ -13,7 +13,6 @@ module NcsNavigator::Warehouse
|
|
13
13
|
module Transformers
|
14
14
|
autoload :Database, 'ncs_navigator/warehouse/transformers/database'
|
15
15
|
autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
|
16
|
-
autoload :TransformStatus, 'ncs_navigator/warehouse/transformers/transform_status'
|
17
16
|
autoload :VdrXml, 'ncs_navigator/warehouse/transformers/vdr_xml'
|
18
17
|
end
|
19
18
|
end
|
@@ -125,7 +125,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
125
125
|
log.debug("Executing query for producer #{rp.name}:\n#{rp.query}")
|
126
126
|
repository.adapter.select(rp.query).each do |row|
|
127
127
|
row_count += 1
|
128
|
-
[*rp.row_processor.call(row)].each do |result|
|
128
|
+
[*rp.row_processor.call(row)].compact.each do |result|
|
129
129
|
yield result
|
130
130
|
result_count += 1
|
131
131
|
shell.back_up_and_say(24, "(%-6d in / %-6d out)" % [row_count, result_count])
|
@@ -198,7 +198,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
198
198
|
if columns.empty?
|
199
199
|
@ignored_columns ||= []
|
200
200
|
else
|
201
|
-
@ignored_columns = columns.collect(&:
|
201
|
+
@ignored_columns = columns.collect(&:to_s)
|
202
202
|
end
|
203
203
|
end
|
204
204
|
|
@@ -242,116 +242,130 @@ module NcsNavigator::Warehouse::Transformers
|
|
242
242
|
end
|
243
243
|
|
244
244
|
##
|
245
|
-
#
|
246
|
-
#
|
247
|
-
#
|
248
|
-
#
|
249
|
-
# at most one model property value. In the order that they are
|
250
|
-
# applied, the heuristics are:
|
245
|
+
# Define a translation from the results of a query into exactly
|
246
|
+
# one warehouse record per result row. This method, while more
|
247
|
+
# restrictive than {#produce_records}, allows for rapidly
|
248
|
+
# mapping data which is already a close match for MDES records.
|
251
249
|
#
|
250
|
+
# The mapping uses a series of heuristics to map from the
|
251
|
+
# columns in each query result row to at most one MDES variable
|
252
|
+
# from the specified model.
|
253
|
+
#
|
254
|
+
# * If the column appears as a key in the `:column_map` hash,
|
255
|
+
# use the associated value as the target variable name.
|
252
256
|
# * If there's a `:prefix` option, the column is named {X},
|
253
|
-
# and there's a
|
254
|
-
#
|
255
|
-
# * If the column is named {X} and there's a
|
256
|
-
# {X}, use that
|
257
|
-
# * If the column is named {X}_code and there's a
|
258
|
-
# named {X}, use that
|
259
|
-
# * If the column is named {X}_code and there's a
|
260
|
-
# named {X}_id, use that
|
257
|
+
# and there's a variable named {prefix}{X}, use that
|
258
|
+
# variable.
|
259
|
+
# * If the column is named {X} and there's a variable named
|
260
|
+
# {X}, use that variable.
|
261
|
+
# * If the column is named {X}_code and there's a variable
|
262
|
+
# named {X}, use that variable.
|
263
|
+
# * If the column is named {X}_code and there's a variable
|
264
|
+
# named {X}_id, use that variable.
|
261
265
|
# * If the column is named {X}_other and there's a property
|
262
|
-
# named {X}_oth, use that
|
263
|
-
#
|
264
|
-
# Separately, any property value in the instance may be
|
265
|
-
# explicitly set via a hash passed as the `:explicit`
|
266
|
-
# option. Property values in `:explicit` take precedence over
|
267
|
-
# the heuristically-determined values.
|
266
|
+
# named {X}_oth, use that variable.
|
268
267
|
#
|
269
|
-
# @param [
|
270
|
-
#
|
271
|
-
#
|
272
|
-
#
|
273
|
-
#
|
274
|
-
# @param [Hash] options
|
275
|
-
#
|
268
|
+
# @param [Symbol] name the name of this producer; if you don't
|
269
|
+
# specify a `:query`, the default is to return every row from
|
270
|
+
# the application table with this name.
|
271
|
+
# @param [Class] model the warehouse model to which results of
|
272
|
+
# the query will be mapped.
|
273
|
+
# @param [Hash] options
|
274
|
+
# @option options :query [String] the query to execute for this
|
275
|
+
# producer. If not specified, the query is `"SELECT * FROM #{name}"`.
|
276
276
|
#
|
277
277
|
# @option options :prefix [String] a prefix to use when looking
|
278
278
|
# for matching property values. (See above.)
|
279
279
|
# @option options :column_map [Hash<Symbol, Symbol>] explicit
|
280
|
-
# mapping from column name to
|
281
|
-
# mapping is consulted before the heuristics are applied
|
282
|
-
# before `:property_values` is used.
|
283
|
-
# @option options :property_values [Hash<Symbol, Object>]
|
284
|
-
# explicit values to use. Keys are model property names and
|
285
|
-
# values are the desired values. Any values in this hash trump
|
286
|
-
# the heuristically-determined values.
|
280
|
+
# mapping from column name to MDES variable name. This
|
281
|
+
# mapping is consulted before the heuristics are applied.
|
287
282
|
# @option options :on_unused [:ignore,:fail] what to do when
|
288
283
|
# there are columns in the row which are not used.
|
289
|
-
# @option options :ignored_columns [Array<String,Symbol>]
|
290
|
-
# columns to consider "used" even if the heuristic
|
291
|
-
# match them to anything.
|
284
|
+
# @option options :ignored_columns [Array<[String,Symbol]>]
|
285
|
+
# columns to consider "used" even if the heuristic or the
|
286
|
+
# column map don't match them to anything.
|
292
287
|
#
|
293
|
-
# @return [
|
294
|
-
def
|
295
|
-
|
296
|
-
|
288
|
+
# @return [void]
|
289
|
+
def produce_one_for_one(name, model, options={})
|
290
|
+
options[:on_unused] ||= on_unused_columns
|
291
|
+
options[:column_map] =
|
292
|
+
(options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
|
293
|
+
options[:ignored_columns] =
|
294
|
+
(options[:ignored_columns] || []).collect(&:to_s) + ignored_columns
|
295
|
+
|
296
|
+
record_producers << OneForOneProducer.new(name, options.delete(:query), model, options)
|
297
|
+
end
|
298
|
+
end
|
297
299
|
|
298
|
-
|
299
|
-
|
300
|
+
##
|
301
|
+
# The class ecapsulating one call to {DSL#produce_records}
|
302
|
+
class RecordProducer < Struct.new(:name, :query, :row_processor)
|
303
|
+
def query
|
304
|
+
super || "SELECT * FROM #{name}"
|
305
|
+
end
|
306
|
+
end
|
300
307
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
308
|
+
##
|
309
|
+
# The class encapsulating one call to {DSL#produce_one_for_one}
|
310
|
+
class OneForOneProducer < RecordProducer
|
311
|
+
attr_reader :model, :options
|
312
|
+
|
313
|
+
def initialize(name, query, model, options)
|
314
|
+
super(name, query, self)
|
315
|
+
@model = model
|
316
|
+
@options = options
|
305
317
|
end
|
306
318
|
|
307
319
|
##
|
308
|
-
#
|
309
|
-
#
|
310
|
-
|
311
|
-
|
312
|
-
|
320
|
+
# Produces a single instance of {#model} using the values in the
|
321
|
+
# row as mapped by {#column_map}.
|
322
|
+
def convert_row(row)
|
323
|
+
col_map = column_map(row.members)
|
324
|
+
unused = row.members.collect(&:to_s) - col_map.keys - options[:ignored_columns]
|
313
325
|
|
314
|
-
|
315
|
-
|
326
|
+
if options[:on_unused] == :fail && !unused.empty?
|
327
|
+
raise UnusedColumnsForModelError.new(unused)
|
328
|
+
end
|
329
|
+
model.new(
|
330
|
+
col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
|
331
|
+
)
|
332
|
+
end
|
333
|
+
alias :call :convert_row
|
316
334
|
|
335
|
+
##
|
336
|
+
# @param [Array<String>] column_names
|
337
|
+
# @return [Hash<String, String>] a mapping from the given
|
338
|
+
# column names to MDES variable names for the configured
|
339
|
+
# model. This mapping reflects both the configured explicit
|
340
|
+
# mapping and the heuristic.
|
341
|
+
def column_map(column_names)
|
317
342
|
available_props = model.properties.collect { |p| p.name.to_s }
|
318
|
-
available_props -=
|
343
|
+
available_props -= options[:column_map].values
|
319
344
|
|
320
|
-
|
321
|
-
|
345
|
+
column_names.inject(options[:column_map].dup) do |map, column|
|
346
|
+
column = column.to_s
|
322
347
|
prop =
|
323
|
-
|
324
|
-
column_map[column.to_s]
|
325
|
-
else
|
348
|
+
unless options[:column_map][column]
|
326
349
|
[
|
327
350
|
[//, ''],
|
328
351
|
[/_code$/, ''],
|
329
352
|
[/_code$/, '_id'],
|
330
353
|
[/_other$/, '_oth'],
|
331
|
-
].collect do |pattern,
|
354
|
+
].collect do |pattern, substitution|
|
332
355
|
if column =~ pattern
|
333
356
|
prefixed_property_name(available_props,
|
334
|
-
column.
|
357
|
+
column.sub(pattern, substitution), options[:prefix])
|
335
358
|
end
|
336
359
|
end.compact.first
|
337
360
|
end
|
338
361
|
if prop
|
339
362
|
available_props.delete(prop)
|
340
|
-
|
341
|
-
else
|
342
|
-
unused << column.to_sym
|
363
|
+
map[column] = prop
|
343
364
|
end
|
365
|
+
map
|
344
366
|
end
|
345
|
-
[pv, unused]
|
346
367
|
end
|
347
|
-
private :create_property_values
|
348
368
|
|
349
|
-
##
|
350
|
-
# Determines if the model has a property with the given name,
|
351
|
-
# with or without the prefix.
|
352
|
-
#
|
353
|
-
# @return [String,nil] the name of an existing property on the
|
354
|
-
# model, either with or without the prefix; or nil.
|
355
369
|
def prefixed_property_name(available_props, name, prefix)
|
356
370
|
if prefix && available_props.include?(prop = "#{prefix}#{name}")
|
357
371
|
prop
|
@@ -362,14 +376,6 @@ module NcsNavigator::Warehouse::Transformers
|
|
362
376
|
private :prefixed_property_name
|
363
377
|
end
|
364
378
|
|
365
|
-
##
|
366
|
-
# @private
|
367
|
-
class RecordProducer < Struct.new(:name, :query, :row_processor)
|
368
|
-
def query
|
369
|
-
super || "SELECT * FROM #{name}"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
379
|
##
|
374
380
|
# Methods to assist with using classes that mix in {Database}.
|
375
381
|
module Factory
|
@@ -378,7 +384,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
378
384
|
# enumerable.
|
379
385
|
# @see EnumTransformer
|
380
386
|
def create_transformer(configuration, options={})
|
381
|
-
EnumTransformer.new(new(configuration, options))
|
387
|
+
EnumTransformer.new(configuration, new(configuration, options))
|
382
388
|
end
|
383
389
|
end
|
384
390
|
|
@@ -388,7 +394,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
388
394
|
def initialize(unused)
|
389
395
|
super(
|
390
396
|
"#{unused.size} unused column#{'s' unless unused.size == 1} when building model. " +
|
391
|
-
"Use :
|
397
|
+
"Use :ignored_columns => %w(#{unused.join(' ')}) or :on_unused => :ignore to suppress this error.")
|
392
398
|
@unused = unused
|
393
399
|
end
|
394
400
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'ncs_navigator/warehouse'
|
2
2
|
|
3
|
+
require 'forwardable'
|
4
|
+
|
3
5
|
module NcsNavigator::Warehouse::Transformers
|
4
6
|
##
|
5
7
|
# A transformer that accepts a series of model instances in the form
|
@@ -7,16 +9,26 @@ module NcsNavigator::Warehouse::Transformers
|
|
7
9
|
# array, or it might be a custom class that streams through
|
8
10
|
# thousands of instances without having them all in memory at once.
|
9
11
|
class EnumTransformer
|
12
|
+
extend Forwardable
|
13
|
+
|
10
14
|
##
|
11
15
|
# @return [Enumerable] the enumeration that will be transformed.
|
12
16
|
attr_reader :enum
|
13
17
|
|
18
|
+
def_delegators :@configuration, :log, :shell
|
19
|
+
|
14
20
|
##
|
21
|
+
# @param [Configuration] configuration
|
15
22
|
# @param [Enumerable] enum
|
16
|
-
def initialize(enum)
|
23
|
+
def initialize(configuration, enum)
|
24
|
+
@configuration = configuration
|
17
25
|
@enum = enum
|
18
26
|
end
|
19
27
|
|
28
|
+
def name
|
29
|
+
"EnumTransformer for #{enum.class}"
|
30
|
+
end
|
31
|
+
|
20
32
|
##
|
21
33
|
# Takes each in-memory record provided by the configured
|
22
34
|
# `Enumerable`, validates it, and saves it if it is valid.
|
@@ -26,14 +38,17 @@ module NcsNavigator::Warehouse::Transformers
|
|
26
38
|
def transform(status)
|
27
39
|
enum.each do |record|
|
28
40
|
if record.valid?
|
41
|
+
log.debug("Saving valid record #{record_ident record}.")
|
29
42
|
begin
|
30
43
|
unless record.save
|
31
|
-
|
32
|
-
|
44
|
+
msg = "Could not save. #{record_ident(record)}."
|
45
|
+
log.error msg
|
46
|
+
status.unsuccessful_record(record, msg)
|
33
47
|
end
|
34
48
|
rescue => e
|
35
|
-
|
36
|
-
|
49
|
+
msg = "Error on save. #{e.class}: #{e}. #{record_ident(record)}."
|
50
|
+
log.error msg
|
51
|
+
status.unsuccessful_record(record, msg)
|
37
52
|
end
|
38
53
|
else
|
39
54
|
messages = record.errors.keys.collect { |prop|
|
@@ -42,9 +57,11 @@ module NcsNavigator::Warehouse::Transformers
|
|
42
57
|
"#{e} (#{prop}=#{v.inspect})."
|
43
58
|
}
|
44
59
|
}.flatten
|
45
|
-
|
46
|
-
|
60
|
+
msg = "Invalid record. #{messages.join(' ')} #{record_ident(record)}."
|
61
|
+
log.error msg
|
62
|
+
status.unsuccessful_record(record, msg)
|
47
63
|
end
|
64
|
+
status.record_count += 1
|
48
65
|
end
|
49
66
|
end
|
50
67
|
|
@@ -52,7 +69,8 @@ module NcsNavigator::Warehouse::Transformers
|
|
52
69
|
|
53
70
|
def record_ident(rec)
|
54
71
|
# No composite keys in the MDES
|
55
|
-
'%s %s=%s' % [
|
72
|
+
'%s %s=%s' % [
|
73
|
+
rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
|
56
74
|
end
|
57
75
|
end
|
58
76
|
end
|