ncs_mdes_warehouse 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/CHANGELOG.md +16 -0
  2. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
  3. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
  4. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
  5. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
  6. data/lib/ncs_navigator/warehouse.rb +4 -0
  7. data/lib/ncs_navigator/warehouse/cli.rb +31 -1
  8. data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
  9. data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
  10. data/lib/ncs_navigator/warehouse/models.rb +3 -0
  11. data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
  12. data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
  13. data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
  14. data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
  15. data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
  16. data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
  20. data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
  22. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  23. data/spec/bcdatabase/test_sqlite.yml +4 -0
  24. data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
  25. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
  28. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
  29. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
  30. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
  31. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
  32. data/spec/spec_helper.rb +1 -1
  33. metadata +44 -37
  34. data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
@@ -0,0 +1,55 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'forwardable'
4
+
5
+ module NcsNavigator::Warehouse
6
+ class TransformLoad
7
+ extend Forwardable
8
+
9
+ attr_reader :configuration
10
+ attr_reader :statuses
11
+
12
+ def_delegators :@configuration, :log, :shell
13
+
14
+ def initialize(configuration)
15
+ @configuration = configuration
16
+ end
17
+
18
+ def run
19
+ position = 0
20
+ @statuses = configuration.transformers.collect do |transformer|
21
+ ::DataMapper.repository(:mdes_warehouse_working) do
22
+ build_status_for(transformer, position).tap do |status|
23
+ begin
24
+ transformer.transform(status)
25
+ rescue => e
26
+ shell.say_line("\nTransform failed. (See log for more detail.)")
27
+ status.add_error("Transform failed. #{e.class}: #{e}.")
28
+ end
29
+ status.end_time = Time.now
30
+ unless status.save
31
+ shell.say_line("Could not save status for transformer #{status.name}")
32
+ log.warn("Could not save status for transformer #{status.name}")
33
+ end
34
+ position += 1
35
+ end
36
+ end
37
+ end
38
+
39
+ if statuses.detect { |s| !s.transform_errors.empty? }
40
+ false
41
+ else
42
+ true
43
+ end
44
+ end
45
+
46
+ def build_status_for(transformer, position)
47
+ TransformStatus.new(
48
+ :name => transformer.respond_to?(:name) ? transformer.name : transformer.class.name,
49
+ :start_time => Time.now,
50
+ :position => position
51
+ )
52
+ end
53
+ private :build_status_for
54
+ end
55
+ end
@@ -0,0 +1,63 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'data_mapper'
4
+
5
+ module NcsNavigator::Warehouse
6
+ ##
7
+ # Tracks and stores the progress of a particular transform.
8
+ class TransformStatus
9
+ include ::DataMapper::Resource
10
+
11
+ ##
12
+ # DataMapper 1.2 attempts to query for associations, even if the
13
+ # record has never been saved. This fails if the database
14
+ # connection is not set up (as in most of this library's
15
+ # tests). This method creates a new instance which works around
16
+ # this problem, at the cost of the instances not being accurately
17
+ # persistable.
18
+ def self.memory_only(name)
19
+ TransformStatus.new(:name => name).tap do |s|
20
+ def s.transform_errors
21
+ @transform_errors ||= []
22
+ end
23
+ end
24
+ end
25
+
26
+ storage_names[:default] = storage_names[:mdes_warehouse_working] =
27
+ storage_names[:mdes_warehouse_reporting] = 'wh_transform_status'
28
+
29
+ property :id, Serial
30
+ property :name, String, :required => true, :length => 255
31
+ property :start_time, DateTime
32
+ property :end_time, DateTime
33
+ property :record_count, Integer, :default => 0
34
+ property :position, Integer
35
+
36
+ has n, :transform_errors, 'NcsNavigator::Warehouse::TransformError'
37
+
38
+ def add_error(message)
39
+ self.transform_errors << TransformError.new(:message => message)
40
+ end
41
+
42
+ def unsuccessful_record(record, message)
43
+ self.transform_errors <<
44
+ TransformError.new(:model_class => record.class.name, :message => message)
45
+ end
46
+ end
47
+
48
+ class TransformError
49
+ include ::DataMapper::Resource
50
+
51
+ storage_names[:default] = storage_names[:mdes_warehouse_working] =
52
+ storage_names[:mdes_warehouse_reporting] = 'wh_transform_error'
53
+
54
+ property :id, Serial
55
+ property :message, Text, :required => true
56
+ property :model_class, String, :length => 255
57
+
58
+ belongs_to :transform_status, TransformStatus, :required => true
59
+ end
60
+
61
+ TransformError.finalize
62
+ TransformStatus.finalize
63
+ end
@@ -13,7 +13,6 @@ module NcsNavigator::Warehouse
13
13
  module Transformers
14
14
  autoload :Database, 'ncs_navigator/warehouse/transformers/database'
15
15
  autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
16
- autoload :TransformStatus, 'ncs_navigator/warehouse/transformers/transform_status'
17
16
  autoload :VdrXml, 'ncs_navigator/warehouse/transformers/vdr_xml'
18
17
  end
19
18
  end
@@ -125,7 +125,7 @@ module NcsNavigator::Warehouse::Transformers
125
125
  log.debug("Executing query for producer #{rp.name}:\n#{rp.query}")
126
126
  repository.adapter.select(rp.query).each do |row|
127
127
  row_count += 1
128
- [*rp.row_processor.call(row)].each do |result|
128
+ [*rp.row_processor.call(row)].compact.each do |result|
129
129
  yield result
130
130
  result_count += 1
131
131
  shell.back_up_and_say(24, "(%-6d in / %-6d out)" % [row_count, result_count])
@@ -198,7 +198,7 @@ module NcsNavigator::Warehouse::Transformers
198
198
  if columns.empty?
199
199
  @ignored_columns ||= []
200
200
  else
201
- @ignored_columns = columns.collect(&:to_sym)
201
+ @ignored_columns = columns.collect(&:to_s)
202
202
  end
203
203
  end
204
204
 
@@ -242,116 +242,130 @@ module NcsNavigator::Warehouse::Transformers
242
242
  end
243
243
 
244
244
  ##
245
- # Performs automatic conversion from a row struct to a
246
- # an instance of a particular warehouse model. This conversion
247
- # uses several heuristics to apply values from the row to the
248
- # model instance. Each column in the row will be converted into
249
- # at most one model property value. In the order that they are
250
- # applied, the heuristics are:
245
+ # Define a translation from the results of a query into exactly
246
+ # one warehouse record per result row. This method, while more
247
+ # restrictive than {#produce_records}, allows for rapidly
248
+ # mapping data which is already a close match for MDES records.
251
249
  #
250
+ # The mapping uses a series of heuristics to map from the
251
+ # columns in each query result row to at most one MDES variable
252
+ # from the specified model.
253
+ #
254
+ # * If the column appears as a key in the `:column_map` hash,
255
+ # use the associated value as the target variable name.
252
256
  # * If there's a `:prefix` option, the column is named {X},
253
- # and there's a property named {prefix}{X}, use that
254
- # property.
255
- # * If the column is named {X} and there's a property named
256
- # {X}, use that property.
257
- # * If the column is named {X}_code and there's a property
258
- # named {X}, use that property.
259
- # * If the column is named {X}_code and there's a property
260
- # named {X}_id, use that property.
257
+ # and there's a variable named {prefix}{X}, use that
258
+ # variable.
259
+ # * If the column is named {X} and there's a variable named
260
+ # {X}, use that variable.
261
+ # * If the column is named {X}_code and there's a variable
262
+ # named {X}, use that variable.
263
+ # * If the column is named {X}_code and there's a variable
264
+ # named {X}_id, use that variable.
261
265
  # * If the column is named {X}_other and there's a property
262
- # named {X}_oth, use that property.
263
- #
264
- # Separately, any property value in the instance may be
265
- # explicitly set via a hash passed as the `:explicit`
266
- # option. Property values in `:explicit` take precedence over
267
- # the heuristically-determined values.
266
+ # named {X}_oth, use that variable.
268
267
  #
269
- # @param [Class] model the warehouse model class (e.g.,
270
- # `NcsNavigator::Warehouse::Models::TwoPointZero::Person`)
271
- # @param [Object] row a DataMapper row struct that is the source
272
- # of the data for the instance. (This is the kind of object
273
- # that is yielded to {#produce_records} blocks.)
274
- # @param [Hash] options Options controlling the created
275
- # instance.
268
+ # @param [Symbol] name the name of this producer; if you don't
269
+ # specify a `:query`, the default is to return every row from
270
+ # the application table with this name.
271
+ # @param [Class] model the warehouse model to which results of
272
+ # the query will be mapped.
273
+ # @param [Hash] options
274
+ # @option options :query [String] the query to execute for this
275
+ # producer. If not specified, the query is `"SELECT * FROM #{name}"`.
276
276
  #
277
277
  # @option options :prefix [String] a prefix to use when looking
278
278
  # for matching property values. (See above.)
279
279
  # @option options :column_map [Hash<Symbol, Symbol>] explicit
280
- # mapping from column name to model property name. This
281
- # mapping is consulted before the heuristics are applied and
282
- # before `:property_values` is used.
283
- # @option options :property_values [Hash<Symbol, Object>]
284
- # explicit values to use. Keys are model property names and
285
- # values are the desired values. Any values in this hash trump
286
- # the heuristically-determined values.
280
+ # mapping from column name to MDES variable name. This
281
+ # mapping is consulted before the heuristics are applied.
287
282
  # @option options :on_unused [:ignore,:fail] what to do when
288
283
  # there are columns in the row which are not used.
289
- # @option options :ignored_columns [Array<String,Symbol>]
290
- # columns to consider "used" even if the heuristic doesn't
291
- # match them to anything.
284
+ # @option options :ignored_columns [Array<[String,Symbol]>]
285
+ # columns to consider "used" even if the heuristic or the
286
+ # column map don't match them to anything.
292
287
  #
293
- # @return [Object] an instance of `model`.
294
- def model_row(model, row, options={})
295
- unused_behavior = options[:on_unused] || on_unused_columns
296
- pv, unused = create_property_values(model, row, options)
288
+ # @return [void]
289
+ def produce_one_for_one(name, model, options={})
290
+ options[:on_unused] ||= on_unused_columns
291
+ options[:column_map] =
292
+ (options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
293
+ options[:ignored_columns] =
294
+ (options[:ignored_columns] || []).collect(&:to_s) + ignored_columns
295
+
296
+ record_producers << OneForOneProducer.new(name, options.delete(:query), model, options)
297
+ end
298
+ end
297
299
 
298
- unused -= ignored_columns
299
- unused -= options[:ignored_or_used].collect(&:to_sym) if options[:ignored_or_used]
300
+ ##
301
+ # The class ecapsulating one call to {DSL#produce_records}
302
+ class RecordProducer < Struct.new(:name, :query, :row_processor)
303
+ def query
304
+ super || "SELECT * FROM #{name}"
305
+ end
306
+ end
300
307
 
301
- if unused_behavior == :fail && !unused.empty?
302
- raise UnusedColumnsForModelError.new(unused)
303
- end
304
- model.new(pv)
308
+ ##
309
+ # The class encapsulating one call to {DSL#produce_one_for_one}
310
+ class OneForOneProducer < RecordProducer
311
+ attr_reader :model, :options
312
+
313
+ def initialize(name, query, model, options)
314
+ super(name, query, self)
315
+ @model = model
316
+ @options = options
305
317
  end
306
318
 
307
319
  ##
308
- # Returns a two-member array. The first member is the matched
309
- # property values. The second is the columns from the row which
310
- # were not matched to anything.
311
- def create_property_values(model, row, options)
312
- column_map = (options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
320
+ # Produces a single instance of {#model} using the values in the
321
+ # row as mapped by {#column_map}.
322
+ def convert_row(row)
323
+ col_map = column_map(row.members)
324
+ unused = row.members.collect(&:to_s) - col_map.keys - options[:ignored_columns]
313
325
 
314
- pv = (options[:property_values] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v; h }
315
- column_map.values.each { |prop| pv.delete(prop) }
326
+ if options[:on_unused] == :fail && !unused.empty?
327
+ raise UnusedColumnsForModelError.new(unused)
328
+ end
329
+ model.new(
330
+ col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
331
+ )
332
+ end
333
+ alias :call :convert_row
316
334
 
335
+ ##
336
+ # @param [Array<String>] column_names
337
+ # @return [Hash<String, String>] a mapping from the given
338
+ # column names to MDES variable names for the configured
339
+ # model. This mapping reflects both the configured explicit
340
+ # mapping and the heuristic.
341
+ def column_map(column_names)
317
342
  available_props = model.properties.collect { |p| p.name.to_s }
318
- available_props -= pv.keys
343
+ available_props -= options[:column_map].values
319
344
 
320
- unused = []
321
- row.members.each do |column|
345
+ column_names.inject(options[:column_map].dup) do |map, column|
346
+ column = column.to_s
322
347
  prop =
323
- if column_map[column.to_s]
324
- column_map[column.to_s]
325
- else
348
+ unless options[:column_map][column]
326
349
  [
327
350
  [//, ''],
328
351
  [/_code$/, ''],
329
352
  [/_code$/, '_id'],
330
353
  [/_other$/, '_oth'],
331
- ].collect do |pattern, substution|
354
+ ].collect do |pattern, substitution|
332
355
  if column =~ pattern
333
356
  prefixed_property_name(available_props,
334
- column.to_s.sub(pattern, substution), options[:prefix])
357
+ column.sub(pattern, substitution), options[:prefix])
335
358
  end
336
359
  end.compact.first
337
360
  end
338
361
  if prop
339
362
  available_props.delete(prop)
340
- pv[prop] = row[column]
341
- else
342
- unused << column.to_sym
363
+ map[column] = prop
343
364
  end
365
+ map
344
366
  end
345
- [pv, unused]
346
367
  end
347
- private :create_property_values
348
368
 
349
- ##
350
- # Determines if the model has a property with the given name,
351
- # with or without the prefix.
352
- #
353
- # @return [String,nil] the name of an existing property on the
354
- # model, either with or without the prefix; or nil.
355
369
  def prefixed_property_name(available_props, name, prefix)
356
370
  if prefix && available_props.include?(prop = "#{prefix}#{name}")
357
371
  prop
@@ -362,14 +376,6 @@ module NcsNavigator::Warehouse::Transformers
362
376
  private :prefixed_property_name
363
377
  end
364
378
 
365
- ##
366
- # @private
367
- class RecordProducer < Struct.new(:name, :query, :row_processor)
368
- def query
369
- super || "SELECT * FROM #{name}"
370
- end
371
- end
372
-
373
379
  ##
374
380
  # Methods to assist with using classes that mix in {Database}.
375
381
  module Factory
@@ -378,7 +384,7 @@ module NcsNavigator::Warehouse::Transformers
378
384
  # enumerable.
379
385
  # @see EnumTransformer
380
386
  def create_transformer(configuration, options={})
381
- EnumTransformer.new(new(configuration, options))
387
+ EnumTransformer.new(configuration, new(configuration, options))
382
388
  end
383
389
  end
384
390
 
@@ -388,7 +394,7 @@ module NcsNavigator::Warehouse::Transformers
388
394
  def initialize(unused)
389
395
  super(
390
396
  "#{unused.size} unused column#{'s' unless unused.size == 1} when building model. " +
391
- "Use :ignored_or_used => %w(#{unused.join(' ')}) or :unused => :ignore to suppress this error.")
397
+ "Use :ignored_columns => %w(#{unused.join(' ')}) or :on_unused => :ignore to suppress this error.")
392
398
  @unused = unused
393
399
  end
394
400
  end
@@ -1,5 +1,7 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
+ require 'forwardable'
4
+
3
5
  module NcsNavigator::Warehouse::Transformers
4
6
  ##
5
7
  # A transformer that accepts a series of model instances in the form
@@ -7,16 +9,26 @@ module NcsNavigator::Warehouse::Transformers
7
9
  # array, or it might be a custom class that streams through
8
10
  # thousands of instances without having them all in memory at once.
9
11
  class EnumTransformer
12
+ extend Forwardable
13
+
10
14
  ##
11
15
  # @return [Enumerable] the enumeration that will be transformed.
12
16
  attr_reader :enum
13
17
 
18
+ def_delegators :@configuration, :log, :shell
19
+
14
20
  ##
21
+ # @param [Configuration] configuration
15
22
  # @param [Enumerable] enum
16
- def initialize(enum)
23
+ def initialize(configuration, enum)
24
+ @configuration = configuration
17
25
  @enum = enum
18
26
  end
19
27
 
28
+ def name
29
+ "EnumTransformer for #{enum.class}"
30
+ end
31
+
20
32
  ##
21
33
  # Takes each in-memory record provided by the configured
22
34
  # `Enumerable`, validates it, and saves it if it is valid.
@@ -26,14 +38,17 @@ module NcsNavigator::Warehouse::Transformers
26
38
  def transform(status)
27
39
  enum.each do |record|
28
40
  if record.valid?
41
+ log.debug("Saving valid record #{record_ident record}.")
29
42
  begin
30
43
  unless record.save
31
- status.unsuccessful_record(
32
- record, "Could not save. #{record_ident(record)}.")
44
+ msg = "Could not save. #{record_ident(record)}."
45
+ log.error msg
46
+ status.unsuccessful_record(record, msg)
33
47
  end
34
48
  rescue => e
35
- status.unsuccessful_record(
36
- record, "Error on save. #{e.class}: #{e}. #{record_ident(record)}.")
49
+ msg = "Error on save. #{e.class}: #{e}. #{record_ident(record)}."
50
+ log.error msg
51
+ status.unsuccessful_record(record, msg)
37
52
  end
38
53
  else
39
54
  messages = record.errors.keys.collect { |prop|
@@ -42,9 +57,11 @@ module NcsNavigator::Warehouse::Transformers
42
57
  "#{e} (#{prop}=#{v.inspect})."
43
58
  }
44
59
  }.flatten
45
- status.unsuccessful_record(
46
- record, "Invalid record. #{messages.join(' ')} #{record_ident(record)}.")
60
+ msg = "Invalid record. #{messages.join(' ')} #{record_ident(record)}."
61
+ log.error msg
62
+ status.unsuccessful_record(record, msg)
47
63
  end
64
+ status.record_count += 1
48
65
  end
49
66
  end
50
67
 
@@ -52,7 +69,8 @@ module NcsNavigator::Warehouse::Transformers
52
69
 
53
70
  def record_ident(rec)
54
71
  # No composite keys in the MDES
55
- '%s %s=%s' % [rec.class.name.demodulize, rec.class.key.first.name, rec.key.first.inspect]
72
+ '%s %s=%s' % [
73
+ rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
56
74
  end
57
75
  end
58
76
  end