ncs_mdes_warehouse 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +16 -0
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
- data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
- data/lib/ncs_navigator/warehouse.rb +4 -0
- data/lib/ncs_navigator/warehouse/cli.rb +31 -1
- data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
- data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
- data/lib/ncs_navigator/warehouse/models.rb +3 -0
- data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
- data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
- data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
- data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
- data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
- data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
- data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
- data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
- data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
- data/lib/ncs_navigator/warehouse/version.rb +1 -1
- data/spec/bcdatabase/test_sqlite.yml +4 -0
- data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
- data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
- data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
- data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
- data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
- data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
- data/spec/spec_helper.rb +1 -1
- metadata +44 -37
- data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'ncs_navigator/warehouse'
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module NcsNavigator::Warehouse
|
6
|
+
class TransformLoad
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
attr_reader :configuration
|
10
|
+
attr_reader :statuses
|
11
|
+
|
12
|
+
def_delegators :@configuration, :log, :shell
|
13
|
+
|
14
|
+
def initialize(configuration)
|
15
|
+
@configuration = configuration
|
16
|
+
end
|
17
|
+
|
18
|
+
def run
|
19
|
+
position = 0
|
20
|
+
@statuses = configuration.transformers.collect do |transformer|
|
21
|
+
::DataMapper.repository(:mdes_warehouse_working) do
|
22
|
+
build_status_for(transformer, position).tap do |status|
|
23
|
+
begin
|
24
|
+
transformer.transform(status)
|
25
|
+
rescue => e
|
26
|
+
shell.say_line("\nTransform failed. (See log for more detail.)")
|
27
|
+
status.add_error("Transform failed. #{e.class}: #{e}.")
|
28
|
+
end
|
29
|
+
status.end_time = Time.now
|
30
|
+
unless status.save
|
31
|
+
shell.say_line("Could not save status for transformer #{status.name}")
|
32
|
+
log.warn("Could not save status for transformer #{status.name}")
|
33
|
+
end
|
34
|
+
position += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if statuses.detect { |s| !s.transform_errors.empty? }
|
40
|
+
false
|
41
|
+
else
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def build_status_for(transformer, position)
|
47
|
+
TransformStatus.new(
|
48
|
+
:name => transformer.respond_to?(:name) ? transformer.name : transformer.class.name,
|
49
|
+
:start_time => Time.now,
|
50
|
+
:position => position
|
51
|
+
)
|
52
|
+
end
|
53
|
+
private :build_status_for
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'ncs_navigator/warehouse'
|
2
|
+
|
3
|
+
require 'data_mapper'
|
4
|
+
|
5
|
+
module NcsNavigator::Warehouse
|
6
|
+
##
|
7
|
+
# Tracks and stores the progress of a particular transform.
|
8
|
+
class TransformStatus
|
9
|
+
include ::DataMapper::Resource
|
10
|
+
|
11
|
+
##
|
12
|
+
# DataMapper 1.2 attempts to query for associations, even if the
|
13
|
+
# record has never been saved. This fails if the database
|
14
|
+
# connection is not set up (as in most of this library's
|
15
|
+
# tests). This method creates a new instance which works around
|
16
|
+
# this problem, at the cost of the instances not being accurately
|
17
|
+
# persistable.
|
18
|
+
def self.memory_only(name)
|
19
|
+
TransformStatus.new(:name => name).tap do |s|
|
20
|
+
def s.transform_errors
|
21
|
+
@transform_errors ||= []
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
storage_names[:default] = storage_names[:mdes_warehouse_working] =
|
27
|
+
storage_names[:mdes_warehouse_reporting] = 'wh_transform_status'
|
28
|
+
|
29
|
+
property :id, Serial
|
30
|
+
property :name, String, :required => true, :length => 255
|
31
|
+
property :start_time, DateTime
|
32
|
+
property :end_time, DateTime
|
33
|
+
property :record_count, Integer, :default => 0
|
34
|
+
property :position, Integer
|
35
|
+
|
36
|
+
has n, :transform_errors, 'NcsNavigator::Warehouse::TransformError'
|
37
|
+
|
38
|
+
def add_error(message)
|
39
|
+
self.transform_errors << TransformError.new(:message => message)
|
40
|
+
end
|
41
|
+
|
42
|
+
def unsuccessful_record(record, message)
|
43
|
+
self.transform_errors <<
|
44
|
+
TransformError.new(:model_class => record.class.name, :message => message)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class TransformError
|
49
|
+
include ::DataMapper::Resource
|
50
|
+
|
51
|
+
storage_names[:default] = storage_names[:mdes_warehouse_working] =
|
52
|
+
storage_names[:mdes_warehouse_reporting] = 'wh_transform_error'
|
53
|
+
|
54
|
+
property :id, Serial
|
55
|
+
property :message, Text, :required => true
|
56
|
+
property :model_class, String, :length => 255
|
57
|
+
|
58
|
+
belongs_to :transform_status, TransformStatus, :required => true
|
59
|
+
end
|
60
|
+
|
61
|
+
TransformError.finalize
|
62
|
+
TransformStatus.finalize
|
63
|
+
end
|
@@ -13,7 +13,6 @@ module NcsNavigator::Warehouse
|
|
13
13
|
module Transformers
|
14
14
|
autoload :Database, 'ncs_navigator/warehouse/transformers/database'
|
15
15
|
autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
|
16
|
-
autoload :TransformStatus, 'ncs_navigator/warehouse/transformers/transform_status'
|
17
16
|
autoload :VdrXml, 'ncs_navigator/warehouse/transformers/vdr_xml'
|
18
17
|
end
|
19
18
|
end
|
@@ -125,7 +125,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
125
125
|
log.debug("Executing query for producer #{rp.name}:\n#{rp.query}")
|
126
126
|
repository.adapter.select(rp.query).each do |row|
|
127
127
|
row_count += 1
|
128
|
-
[*rp.row_processor.call(row)].each do |result|
|
128
|
+
[*rp.row_processor.call(row)].compact.each do |result|
|
129
129
|
yield result
|
130
130
|
result_count += 1
|
131
131
|
shell.back_up_and_say(24, "(%-6d in / %-6d out)" % [row_count, result_count])
|
@@ -198,7 +198,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
198
198
|
if columns.empty?
|
199
199
|
@ignored_columns ||= []
|
200
200
|
else
|
201
|
-
@ignored_columns = columns.collect(&:
|
201
|
+
@ignored_columns = columns.collect(&:to_s)
|
202
202
|
end
|
203
203
|
end
|
204
204
|
|
@@ -242,116 +242,130 @@ module NcsNavigator::Warehouse::Transformers
|
|
242
242
|
end
|
243
243
|
|
244
244
|
##
|
245
|
-
#
|
246
|
-
#
|
247
|
-
#
|
248
|
-
#
|
249
|
-
# at most one model property value. In the order that they are
|
250
|
-
# applied, the heuristics are:
|
245
|
+
# Define a translation from the results of a query into exactly
|
246
|
+
# one warehouse record per result row. This method, while more
|
247
|
+
# restrictive than {#produce_records}, allows for rapidly
|
248
|
+
# mapping data which is already a close match for MDES records.
|
251
249
|
#
|
250
|
+
# The mapping uses a series of heuristics to map from the
|
251
|
+
# columns in each query result row to at most one MDES variable
|
252
|
+
# from the specified model.
|
253
|
+
#
|
254
|
+
# * If the column appears as a key in the `:column_map` hash,
|
255
|
+
# use the associated value as the target variable name.
|
252
256
|
# * If there's a `:prefix` option, the column is named {X},
|
253
|
-
# and there's a
|
254
|
-
#
|
255
|
-
# * If the column is named {X} and there's a
|
256
|
-
# {X}, use that
|
257
|
-
# * If the column is named {X}_code and there's a
|
258
|
-
# named {X}, use that
|
259
|
-
# * If the column is named {X}_code and there's a
|
260
|
-
# named {X}_id, use that
|
257
|
+
# and there's a variable named {prefix}{X}, use that
|
258
|
+
# variable.
|
259
|
+
# * If the column is named {X} and there's a variable named
|
260
|
+
# {X}, use that variable.
|
261
|
+
# * If the column is named {X}_code and there's a variable
|
262
|
+
# named {X}, use that variable.
|
263
|
+
# * If the column is named {X}_code and there's a variable
|
264
|
+
# named {X}_id, use that variable.
|
261
265
|
# * If the column is named {X}_other and there's a property
|
262
|
-
# named {X}_oth, use that
|
263
|
-
#
|
264
|
-
# Separately, any property value in the instance may be
|
265
|
-
# explicitly set via a hash passed as the `:explicit`
|
266
|
-
# option. Property values in `:explicit` take precedence over
|
267
|
-
# the heuristically-determined values.
|
266
|
+
# named {X}_oth, use that variable.
|
268
267
|
#
|
269
|
-
# @param [
|
270
|
-
#
|
271
|
-
#
|
272
|
-
#
|
273
|
-
#
|
274
|
-
# @param [Hash] options
|
275
|
-
#
|
268
|
+
# @param [Symbol] name the name of this producer; if you don't
|
269
|
+
# specify a `:query`, the default is to return every row from
|
270
|
+
# the application table with this name.
|
271
|
+
# @param [Class] model the warehouse model to which results of
|
272
|
+
# the query will be mapped.
|
273
|
+
# @param [Hash] options
|
274
|
+
# @option options :query [String] the query to execute for this
|
275
|
+
# producer. If not specified, the query is `"SELECT * FROM #{name}"`.
|
276
276
|
#
|
277
277
|
# @option options :prefix [String] a prefix to use when looking
|
278
278
|
# for matching property values. (See above.)
|
279
279
|
# @option options :column_map [Hash<Symbol, Symbol>] explicit
|
280
|
-
# mapping from column name to
|
281
|
-
# mapping is consulted before the heuristics are applied
|
282
|
-
# before `:property_values` is used.
|
283
|
-
# @option options :property_values [Hash<Symbol, Object>]
|
284
|
-
# explicit values to use. Keys are model property names and
|
285
|
-
# values are the desired values. Any values in this hash trump
|
286
|
-
# the heuristically-determined values.
|
280
|
+
# mapping from column name to MDES variable name. This
|
281
|
+
# mapping is consulted before the heuristics are applied.
|
287
282
|
# @option options :on_unused [:ignore,:fail] what to do when
|
288
283
|
# there are columns in the row which are not used.
|
289
|
-
# @option options :ignored_columns [Array<String,Symbol>]
|
290
|
-
# columns to consider "used" even if the heuristic
|
291
|
-
# match them to anything.
|
284
|
+
# @option options :ignored_columns [Array<[String,Symbol]>]
|
285
|
+
# columns to consider "used" even if the heuristic or the
|
286
|
+
# column map don't match them to anything.
|
292
287
|
#
|
293
|
-
# @return [
|
294
|
-
def
|
295
|
-
|
296
|
-
|
288
|
+
# @return [void]
|
289
|
+
def produce_one_for_one(name, model, options={})
|
290
|
+
options[:on_unused] ||= on_unused_columns
|
291
|
+
options[:column_map] =
|
292
|
+
(options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
|
293
|
+
options[:ignored_columns] =
|
294
|
+
(options[:ignored_columns] || []).collect(&:to_s) + ignored_columns
|
295
|
+
|
296
|
+
record_producers << OneForOneProducer.new(name, options.delete(:query), model, options)
|
297
|
+
end
|
298
|
+
end
|
297
299
|
|
298
|
-
|
299
|
-
|
300
|
+
##
|
301
|
+
# The class ecapsulating one call to {DSL#produce_records}
|
302
|
+
class RecordProducer < Struct.new(:name, :query, :row_processor)
|
303
|
+
def query
|
304
|
+
super || "SELECT * FROM #{name}"
|
305
|
+
end
|
306
|
+
end
|
300
307
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
308
|
+
##
|
309
|
+
# The class encapsulating one call to {DSL#produce_one_for_one}
|
310
|
+
class OneForOneProducer < RecordProducer
|
311
|
+
attr_reader :model, :options
|
312
|
+
|
313
|
+
def initialize(name, query, model, options)
|
314
|
+
super(name, query, self)
|
315
|
+
@model = model
|
316
|
+
@options = options
|
305
317
|
end
|
306
318
|
|
307
319
|
##
|
308
|
-
#
|
309
|
-
#
|
310
|
-
|
311
|
-
|
312
|
-
|
320
|
+
# Produces a single instance of {#model} using the values in the
|
321
|
+
# row as mapped by {#column_map}.
|
322
|
+
def convert_row(row)
|
323
|
+
col_map = column_map(row.members)
|
324
|
+
unused = row.members.collect(&:to_s) - col_map.keys - options[:ignored_columns]
|
313
325
|
|
314
|
-
|
315
|
-
|
326
|
+
if options[:on_unused] == :fail && !unused.empty?
|
327
|
+
raise UnusedColumnsForModelError.new(unused)
|
328
|
+
end
|
329
|
+
model.new(
|
330
|
+
col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
|
331
|
+
)
|
332
|
+
end
|
333
|
+
alias :call :convert_row
|
316
334
|
|
335
|
+
##
|
336
|
+
# @param [Array<String>] column_names
|
337
|
+
# @return [Hash<String, String>] a mapping from the given
|
338
|
+
# column names to MDES variable names for the configured
|
339
|
+
# model. This mapping reflects both the configured explicit
|
340
|
+
# mapping and the heuristic.
|
341
|
+
def column_map(column_names)
|
317
342
|
available_props = model.properties.collect { |p| p.name.to_s }
|
318
|
-
available_props -=
|
343
|
+
available_props -= options[:column_map].values
|
319
344
|
|
320
|
-
|
321
|
-
|
345
|
+
column_names.inject(options[:column_map].dup) do |map, column|
|
346
|
+
column = column.to_s
|
322
347
|
prop =
|
323
|
-
|
324
|
-
column_map[column.to_s]
|
325
|
-
else
|
348
|
+
unless options[:column_map][column]
|
326
349
|
[
|
327
350
|
[//, ''],
|
328
351
|
[/_code$/, ''],
|
329
352
|
[/_code$/, '_id'],
|
330
353
|
[/_other$/, '_oth'],
|
331
|
-
].collect do |pattern,
|
354
|
+
].collect do |pattern, substitution|
|
332
355
|
if column =~ pattern
|
333
356
|
prefixed_property_name(available_props,
|
334
|
-
column.
|
357
|
+
column.sub(pattern, substitution), options[:prefix])
|
335
358
|
end
|
336
359
|
end.compact.first
|
337
360
|
end
|
338
361
|
if prop
|
339
362
|
available_props.delete(prop)
|
340
|
-
|
341
|
-
else
|
342
|
-
unused << column.to_sym
|
363
|
+
map[column] = prop
|
343
364
|
end
|
365
|
+
map
|
344
366
|
end
|
345
|
-
[pv, unused]
|
346
367
|
end
|
347
|
-
private :create_property_values
|
348
368
|
|
349
|
-
##
|
350
|
-
# Determines if the model has a property with the given name,
|
351
|
-
# with or without the prefix.
|
352
|
-
#
|
353
|
-
# @return [String,nil] the name of an existing property on the
|
354
|
-
# model, either with or without the prefix; or nil.
|
355
369
|
def prefixed_property_name(available_props, name, prefix)
|
356
370
|
if prefix && available_props.include?(prop = "#{prefix}#{name}")
|
357
371
|
prop
|
@@ -362,14 +376,6 @@ module NcsNavigator::Warehouse::Transformers
|
|
362
376
|
private :prefixed_property_name
|
363
377
|
end
|
364
378
|
|
365
|
-
##
|
366
|
-
# @private
|
367
|
-
class RecordProducer < Struct.new(:name, :query, :row_processor)
|
368
|
-
def query
|
369
|
-
super || "SELECT * FROM #{name}"
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
379
|
##
|
374
380
|
# Methods to assist with using classes that mix in {Database}.
|
375
381
|
module Factory
|
@@ -378,7 +384,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
378
384
|
# enumerable.
|
379
385
|
# @see EnumTransformer
|
380
386
|
def create_transformer(configuration, options={})
|
381
|
-
EnumTransformer.new(new(configuration, options))
|
387
|
+
EnumTransformer.new(configuration, new(configuration, options))
|
382
388
|
end
|
383
389
|
end
|
384
390
|
|
@@ -388,7 +394,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
388
394
|
def initialize(unused)
|
389
395
|
super(
|
390
396
|
"#{unused.size} unused column#{'s' unless unused.size == 1} when building model. " +
|
391
|
-
"Use :
|
397
|
+
"Use :ignored_columns => %w(#{unused.join(' ')}) or :on_unused => :ignore to suppress this error.")
|
392
398
|
@unused = unused
|
393
399
|
end
|
394
400
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'ncs_navigator/warehouse'
|
2
2
|
|
3
|
+
require 'forwardable'
|
4
|
+
|
3
5
|
module NcsNavigator::Warehouse::Transformers
|
4
6
|
##
|
5
7
|
# A transformer that accepts a series of model instances in the form
|
@@ -7,16 +9,26 @@ module NcsNavigator::Warehouse::Transformers
|
|
7
9
|
# array, or it might be a custom class that streams through
|
8
10
|
# thousands of instances without having them all in memory at once.
|
9
11
|
class EnumTransformer
|
12
|
+
extend Forwardable
|
13
|
+
|
10
14
|
##
|
11
15
|
# @return [Enumerable] the enumeration that will be transformed.
|
12
16
|
attr_reader :enum
|
13
17
|
|
18
|
+
def_delegators :@configuration, :log, :shell
|
19
|
+
|
14
20
|
##
|
21
|
+
# @param [Configuration] configuration
|
15
22
|
# @param [Enumerable] enum
|
16
|
-
def initialize(enum)
|
23
|
+
def initialize(configuration, enum)
|
24
|
+
@configuration = configuration
|
17
25
|
@enum = enum
|
18
26
|
end
|
19
27
|
|
28
|
+
def name
|
29
|
+
"EnumTransformer for #{enum.class}"
|
30
|
+
end
|
31
|
+
|
20
32
|
##
|
21
33
|
# Takes each in-memory record provided by the configured
|
22
34
|
# `Enumerable`, validates it, and saves it if it is valid.
|
@@ -26,14 +38,17 @@ module NcsNavigator::Warehouse::Transformers
|
|
26
38
|
def transform(status)
|
27
39
|
enum.each do |record|
|
28
40
|
if record.valid?
|
41
|
+
log.debug("Saving valid record #{record_ident record}.")
|
29
42
|
begin
|
30
43
|
unless record.save
|
31
|
-
|
32
|
-
|
44
|
+
msg = "Could not save. #{record_ident(record)}."
|
45
|
+
log.error msg
|
46
|
+
status.unsuccessful_record(record, msg)
|
33
47
|
end
|
34
48
|
rescue => e
|
35
|
-
|
36
|
-
|
49
|
+
msg = "Error on save. #{e.class}: #{e}. #{record_ident(record)}."
|
50
|
+
log.error msg
|
51
|
+
status.unsuccessful_record(record, msg)
|
37
52
|
end
|
38
53
|
else
|
39
54
|
messages = record.errors.keys.collect { |prop|
|
@@ -42,9 +57,11 @@ module NcsNavigator::Warehouse::Transformers
|
|
42
57
|
"#{e} (#{prop}=#{v.inspect})."
|
43
58
|
}
|
44
59
|
}.flatten
|
45
|
-
|
46
|
-
|
60
|
+
msg = "Invalid record. #{messages.join(' ')} #{record_ident(record)}."
|
61
|
+
log.error msg
|
62
|
+
status.unsuccessful_record(record, msg)
|
47
63
|
end
|
64
|
+
status.record_count += 1
|
48
65
|
end
|
49
66
|
end
|
50
67
|
|
@@ -52,7 +69,8 @@ module NcsNavigator::Warehouse::Transformers
|
|
52
69
|
|
53
70
|
def record_ident(rec)
|
54
71
|
# No composite keys in the MDES
|
55
|
-
'%s %s=%s' % [
|
72
|
+
'%s %s=%s' % [
|
73
|
+
rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
|
56
74
|
end
|
57
75
|
end
|
58
76
|
end
|