ncs_mdes_warehouse 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/CHANGELOG.md +16 -0
  2. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
  3. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
  4. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
  5. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
  6. data/lib/ncs_navigator/warehouse.rb +4 -0
  7. data/lib/ncs_navigator/warehouse/cli.rb +31 -1
  8. data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
  9. data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
  10. data/lib/ncs_navigator/warehouse/models.rb +3 -0
  11. data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
  12. data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
  13. data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
  14. data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
  15. data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
  16. data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
  20. data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
  22. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  23. data/spec/bcdatabase/test_sqlite.yml +4 -0
  24. data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
  25. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
  28. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
  29. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
  30. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
  31. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
  32. data/spec/spec_helper.rb +1 -1
  33. metadata +44 -37
  34. data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
@@ -0,0 +1,55 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'forwardable'
4
+
5
+ module NcsNavigator::Warehouse
6
+ class TransformLoad
7
+ extend Forwardable
8
+
9
+ attr_reader :configuration
10
+ attr_reader :statuses
11
+
12
+ def_delegators :@configuration, :log, :shell
13
+
14
+ def initialize(configuration)
15
+ @configuration = configuration
16
+ end
17
+
18
+ def run
19
+ position = 0
20
+ @statuses = configuration.transformers.collect do |transformer|
21
+ ::DataMapper.repository(:mdes_warehouse_working) do
22
+ build_status_for(transformer, position).tap do |status|
23
+ begin
24
+ transformer.transform(status)
25
+ rescue => e
26
+ shell.say_line("\nTransform failed. (See log for more detail.)")
27
+ status.add_error("Transform failed. #{e.class}: #{e}.")
28
+ end
29
+ status.end_time = Time.now
30
+ unless status.save
31
+ shell.say_line("Could not save status for transformer #{status.name}")
32
+ log.warn("Could not save status for transformer #{status.name}")
33
+ end
34
+ position += 1
35
+ end
36
+ end
37
+ end
38
+
39
+ if statuses.detect { |s| !s.transform_errors.empty? }
40
+ false
41
+ else
42
+ true
43
+ end
44
+ end
45
+
46
+ def build_status_for(transformer, position)
47
+ TransformStatus.new(
48
+ :name => transformer.respond_to?(:name) ? transformer.name : transformer.class.name,
49
+ :start_time => Time.now,
50
+ :position => position
51
+ )
52
+ end
53
+ private :build_status_for
54
+ end
55
+ end
@@ -0,0 +1,63 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'data_mapper'
4
+
5
+ module NcsNavigator::Warehouse
6
+ ##
7
+ # Tracks and stores the progress of a particular transform.
8
+ class TransformStatus
9
+ include ::DataMapper::Resource
10
+
11
+ ##
12
+ # DataMapper 1.2 attempts to query for associations, even if the
13
+ # record has never been saved. This fails if the database
14
+ # connection is not set up (as in most of this library's
15
+ # tests). This method creates a new instance which works around
16
+ # this problem, at the cost of the instances not being accurately
17
+ # persistable.
18
+ def self.memory_only(name)
19
+ TransformStatus.new(:name => name).tap do |s|
20
+ def s.transform_errors
21
+ @transform_errors ||= []
22
+ end
23
+ end
24
+ end
25
+
26
+ storage_names[:default] = storage_names[:mdes_warehouse_working] =
27
+ storage_names[:mdes_warehouse_reporting] = 'wh_transform_status'
28
+
29
+ property :id, Serial
30
+ property :name, String, :required => true, :length => 255
31
+ property :start_time, DateTime
32
+ property :end_time, DateTime
33
+ property :record_count, Integer, :default => 0
34
+ property :position, Integer
35
+
36
+ has n, :transform_errors, 'NcsNavigator::Warehouse::TransformError'
37
+
38
+ def add_error(message)
39
+ self.transform_errors << TransformError.new(:message => message)
40
+ end
41
+
42
+ def unsuccessful_record(record, message)
43
+ self.transform_errors <<
44
+ TransformError.new(:model_class => record.class.name, :message => message)
45
+ end
46
+ end
47
+
48
+ class TransformError
49
+ include ::DataMapper::Resource
50
+
51
+ storage_names[:default] = storage_names[:mdes_warehouse_working] =
52
+ storage_names[:mdes_warehouse_reporting] = 'wh_transform_error'
53
+
54
+ property :id, Serial
55
+ property :message, Text, :required => true
56
+ property :model_class, String, :length => 255
57
+
58
+ belongs_to :transform_status, TransformStatus, :required => true
59
+ end
60
+
61
+ TransformError.finalize
62
+ TransformStatus.finalize
63
+ end
@@ -13,7 +13,6 @@ module NcsNavigator::Warehouse
13
13
  module Transformers
14
14
  autoload :Database, 'ncs_navigator/warehouse/transformers/database'
15
15
  autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
16
- autoload :TransformStatus, 'ncs_navigator/warehouse/transformers/transform_status'
17
16
  autoload :VdrXml, 'ncs_navigator/warehouse/transformers/vdr_xml'
18
17
  end
19
18
  end
@@ -125,7 +125,7 @@ module NcsNavigator::Warehouse::Transformers
125
125
  log.debug("Executing query for producer #{rp.name}:\n#{rp.query}")
126
126
  repository.adapter.select(rp.query).each do |row|
127
127
  row_count += 1
128
- [*rp.row_processor.call(row)].each do |result|
128
+ [*rp.row_processor.call(row)].compact.each do |result|
129
129
  yield result
130
130
  result_count += 1
131
131
  shell.back_up_and_say(24, "(%-6d in / %-6d out)" % [row_count, result_count])
@@ -198,7 +198,7 @@ module NcsNavigator::Warehouse::Transformers
198
198
  if columns.empty?
199
199
  @ignored_columns ||= []
200
200
  else
201
- @ignored_columns = columns.collect(&:to_sym)
201
+ @ignored_columns = columns.collect(&:to_s)
202
202
  end
203
203
  end
204
204
 
@@ -242,116 +242,130 @@ module NcsNavigator::Warehouse::Transformers
242
242
  end
243
243
 
244
244
  ##
245
- # Performs automatic conversion from a row struct to a
246
- # an instance of a particular warehouse model. This conversion
247
- # uses several heuristics to apply values from the row to the
248
- # model instance. Each column in the row will be converted into
249
- # at most one model property value. In the order that they are
250
- # applied, the heuristics are:
245
+ # Define a translation from the results of a query into exactly
246
+ # one warehouse record per result row. This method, while more
247
+ # restrictive than {#produce_records}, allows for rapidly
248
+ # mapping data which is already a close match for MDES records.
251
249
  #
250
+ # The mapping uses a series of heuristics to map from the
251
+ # columns in each query result row to at most one MDES variable
252
+ # from the specified model.
253
+ #
254
+ # * If the column appears as a key in the `:column_map` hash,
255
+ # use the associated value as the target variable name.
252
256
  # * If there's a `:prefix` option, the column is named {X},
253
- # and there's a property named {prefix}{X}, use that
254
- # property.
255
- # * If the column is named {X} and there's a property named
256
- # {X}, use that property.
257
- # * If the column is named {X}_code and there's a property
258
- # named {X}, use that property.
259
- # * If the column is named {X}_code and there's a property
260
- # named {X}_id, use that property.
257
+ # and there's a variable named {prefix}{X}, use that
258
+ # variable.
259
+ # * If the column is named {X} and there's a variable named
260
+ # {X}, use that variable.
261
+ # * If the column is named {X}_code and there's a variable
262
+ # named {X}, use that variable.
263
+ # * If the column is named {X}_code and there's a variable
264
+ # named {X}_id, use that variable.
261
265
  # * If the column is named {X}_other and there's a property
262
- # named {X}_oth, use that property.
263
- #
264
- # Separately, any property value in the instance may be
265
- # explicitly set via a hash passed as the `:explicit`
266
- # option. Property values in `:explicit` take precedence over
267
- # the heuristically-determined values.
266
+ # named {X}_oth, use that variable.
268
267
  #
269
- # @param [Class] model the warehouse model class (e.g.,
270
- # `NcsNavigator::Warehouse::Models::TwoPointZero::Person`)
271
- # @param [Object] row a DataMapper row struct that is the source
272
- # of the data for the instance. (This is the kind of object
273
- # that is yielded to {#produce_records} blocks.)
274
- # @param [Hash] options Options controlling the created
275
- # instance.
268
+ # @param [Symbol] name the name of this producer; if you don't
269
+ # specify a `:query`, the default is to return every row from
270
+ # the application table with this name.
271
+ # @param [Class] model the warehouse model to which results of
272
+ # the query will be mapped.
273
+ # @param [Hash] options
274
+ # @option options :query [String] the query to execute for this
275
+ # producer. If not specified, the query is `"SELECT * FROM #{name}"`.
276
276
  #
277
277
  # @option options :prefix [String] a prefix to use when looking
278
278
  # for matching property values. (See above.)
279
279
  # @option options :column_map [Hash<Symbol, Symbol>] explicit
280
- # mapping from column name to model property name. This
281
- # mapping is consulted before the heuristics are applied and
282
- # before `:property_values` is used.
283
- # @option options :property_values [Hash<Symbol, Object>]
284
- # explicit values to use. Keys are model property names and
285
- # values are the desired values. Any values in this hash trump
286
- # the heuristically-determined values.
280
+ # mapping from column name to MDES variable name. This
281
+ # mapping is consulted before the heuristics are applied.
287
282
  # @option options :on_unused [:ignore,:fail] what to do when
288
283
  # there are columns in the row which are not used.
289
- # @option options :ignored_columns [Array<String,Symbol>]
290
- # columns to consider "used" even if the heuristic doesn't
291
- # match them to anything.
284
+ # @option options :ignored_columns [Array<[String,Symbol]>]
285
+ # columns to consider "used" even if the heuristic or the
286
+ # column map don't match them to anything.
292
287
  #
293
- # @return [Object] an instance of `model`.
294
- def model_row(model, row, options={})
295
- unused_behavior = options[:on_unused] || on_unused_columns
296
- pv, unused = create_property_values(model, row, options)
288
+ # @return [void]
289
+ def produce_one_for_one(name, model, options={})
290
+ options[:on_unused] ||= on_unused_columns
291
+ options[:column_map] =
292
+ (options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
293
+ options[:ignored_columns] =
294
+ (options[:ignored_columns] || []).collect(&:to_s) + ignored_columns
295
+
296
+ record_producers << OneForOneProducer.new(name, options.delete(:query), model, options)
297
+ end
298
+ end
297
299
 
298
- unused -= ignored_columns
299
- unused -= options[:ignored_or_used].collect(&:to_sym) if options[:ignored_or_used]
300
+ ##
301
+ # The class ecapsulating one call to {DSL#produce_records}
302
+ class RecordProducer < Struct.new(:name, :query, :row_processor)
303
+ def query
304
+ super || "SELECT * FROM #{name}"
305
+ end
306
+ end
300
307
 
301
- if unused_behavior == :fail && !unused.empty?
302
- raise UnusedColumnsForModelError.new(unused)
303
- end
304
- model.new(pv)
308
+ ##
309
+ # The class encapsulating one call to {DSL#produce_one_for_one}
310
+ class OneForOneProducer < RecordProducer
311
+ attr_reader :model, :options
312
+
313
+ def initialize(name, query, model, options)
314
+ super(name, query, self)
315
+ @model = model
316
+ @options = options
305
317
  end
306
318
 
307
319
  ##
308
- # Returns a two-member array. The first member is the matched
309
- # property values. The second is the columns from the row which
310
- # were not matched to anything.
311
- def create_property_values(model, row, options)
312
- column_map = (options[:column_map] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v.to_s; h }
320
+ # Produces a single instance of {#model} using the values in the
321
+ # row as mapped by {#column_map}.
322
+ def convert_row(row)
323
+ col_map = column_map(row.members)
324
+ unused = row.members.collect(&:to_s) - col_map.keys - options[:ignored_columns]
313
325
 
314
- pv = (options[:property_values] || {}).inject({}) { |h, (k, v)| h[k.to_s] = v; h }
315
- column_map.values.each { |prop| pv.delete(prop) }
326
+ if options[:on_unused] == :fail && !unused.empty?
327
+ raise UnusedColumnsForModelError.new(unused)
328
+ end
329
+ model.new(
330
+ col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
331
+ )
332
+ end
333
+ alias :call :convert_row
316
334
 
335
+ ##
336
+ # @param [Array<String>] column_names
337
+ # @return [Hash<String, String>] a mapping from the given
338
+ # column names to MDES variable names for the configured
339
+ # model. This mapping reflects both the configured explicit
340
+ # mapping and the heuristic.
341
+ def column_map(column_names)
317
342
  available_props = model.properties.collect { |p| p.name.to_s }
318
- available_props -= pv.keys
343
+ available_props -= options[:column_map].values
319
344
 
320
- unused = []
321
- row.members.each do |column|
345
+ column_names.inject(options[:column_map].dup) do |map, column|
346
+ column = column.to_s
322
347
  prop =
323
- if column_map[column.to_s]
324
- column_map[column.to_s]
325
- else
348
+ unless options[:column_map][column]
326
349
  [
327
350
  [//, ''],
328
351
  [/_code$/, ''],
329
352
  [/_code$/, '_id'],
330
353
  [/_other$/, '_oth'],
331
- ].collect do |pattern, substution|
354
+ ].collect do |pattern, substitution|
332
355
  if column =~ pattern
333
356
  prefixed_property_name(available_props,
334
- column.to_s.sub(pattern, substution), options[:prefix])
357
+ column.sub(pattern, substitution), options[:prefix])
335
358
  end
336
359
  end.compact.first
337
360
  end
338
361
  if prop
339
362
  available_props.delete(prop)
340
- pv[prop] = row[column]
341
- else
342
- unused << column.to_sym
363
+ map[column] = prop
343
364
  end
365
+ map
344
366
  end
345
- [pv, unused]
346
367
  end
347
- private :create_property_values
348
368
 
349
- ##
350
- # Determines if the model has a property with the given name,
351
- # with or without the prefix.
352
- #
353
- # @return [String,nil] the name of an existing property on the
354
- # model, either with or without the prefix; or nil.
355
369
  def prefixed_property_name(available_props, name, prefix)
356
370
  if prefix && available_props.include?(prop = "#{prefix}#{name}")
357
371
  prop
@@ -362,14 +376,6 @@ module NcsNavigator::Warehouse::Transformers
362
376
  private :prefixed_property_name
363
377
  end
364
378
 
365
- ##
366
- # @private
367
- class RecordProducer < Struct.new(:name, :query, :row_processor)
368
- def query
369
- super || "SELECT * FROM #{name}"
370
- end
371
- end
372
-
373
379
  ##
374
380
  # Methods to assist with using classes that mix in {Database}.
375
381
  module Factory
@@ -378,7 +384,7 @@ module NcsNavigator::Warehouse::Transformers
378
384
  # enumerable.
379
385
  # @see EnumTransformer
380
386
  def create_transformer(configuration, options={})
381
- EnumTransformer.new(new(configuration, options))
387
+ EnumTransformer.new(configuration, new(configuration, options))
382
388
  end
383
389
  end
384
390
 
@@ -388,7 +394,7 @@ module NcsNavigator::Warehouse::Transformers
388
394
  def initialize(unused)
389
395
  super(
390
396
  "#{unused.size} unused column#{'s' unless unused.size == 1} when building model. " +
391
- "Use :ignored_or_used => %w(#{unused.join(' ')}) or :unused => :ignore to suppress this error.")
397
+ "Use :ignored_columns => %w(#{unused.join(' ')}) or :on_unused => :ignore to suppress this error.")
392
398
  @unused = unused
393
399
  end
394
400
  end
@@ -1,5 +1,7 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
+ require 'forwardable'
4
+
3
5
  module NcsNavigator::Warehouse::Transformers
4
6
  ##
5
7
  # A transformer that accepts a series of model instances in the form
@@ -7,16 +9,26 @@ module NcsNavigator::Warehouse::Transformers
7
9
  # array, or it might be a custom class that streams through
8
10
  # thousands of instances without having them all in memory at once.
9
11
  class EnumTransformer
12
+ extend Forwardable
13
+
10
14
  ##
11
15
  # @return [Enumerable] the enumeration that will be transformed.
12
16
  attr_reader :enum
13
17
 
18
+ def_delegators :@configuration, :log, :shell
19
+
14
20
  ##
21
+ # @param [Configuration] configuration
15
22
  # @param [Enumerable] enum
16
- def initialize(enum)
23
+ def initialize(configuration, enum)
24
+ @configuration = configuration
17
25
  @enum = enum
18
26
  end
19
27
 
28
+ def name
29
+ "EnumTransformer for #{enum.class}"
30
+ end
31
+
20
32
  ##
21
33
  # Takes each in-memory record provided by the configured
22
34
  # `Enumerable`, validates it, and saves it if it is valid.
@@ -26,14 +38,17 @@ module NcsNavigator::Warehouse::Transformers
26
38
  def transform(status)
27
39
  enum.each do |record|
28
40
  if record.valid?
41
+ log.debug("Saving valid record #{record_ident record}.")
29
42
  begin
30
43
  unless record.save
31
- status.unsuccessful_record(
32
- record, "Could not save. #{record_ident(record)}.")
44
+ msg = "Could not save. #{record_ident(record)}."
45
+ log.error msg
46
+ status.unsuccessful_record(record, msg)
33
47
  end
34
48
  rescue => e
35
- status.unsuccessful_record(
36
- record, "Error on save. #{e.class}: #{e}. #{record_ident(record)}.")
49
+ msg = "Error on save. #{e.class}: #{e}. #{record_ident(record)}."
50
+ log.error msg
51
+ status.unsuccessful_record(record, msg)
37
52
  end
38
53
  else
39
54
  messages = record.errors.keys.collect { |prop|
@@ -42,9 +57,11 @@ module NcsNavigator::Warehouse::Transformers
42
57
  "#{e} (#{prop}=#{v.inspect})."
43
58
  }
44
59
  }.flatten
45
- status.unsuccessful_record(
46
- record, "Invalid record. #{messages.join(' ')} #{record_ident(record)}.")
60
+ msg = "Invalid record. #{messages.join(' ')} #{record_ident(record)}."
61
+ log.error msg
62
+ status.unsuccessful_record(record, msg)
47
63
  end
64
+ status.record_count += 1
48
65
  end
49
66
  end
50
67
 
@@ -52,7 +69,8 @@ module NcsNavigator::Warehouse::Transformers
52
69
 
53
70
  def record_ident(rec)
54
71
  # No composite keys in the MDES
55
- '%s %s=%s' % [rec.class.name.demodulize, rec.class.key.first.name, rec.key.first.inspect]
72
+ '%s %s=%s' % [
73
+ rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
56
74
  end
57
75
  end
58
76
  end