ncs_mdes_warehouse 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/CHANGELOG.md +27 -0
  2. data/Rakefile +2 -2
  3. data/lib/ncs_navigator/warehouse.rb +2 -0
  4. data/lib/ncs_navigator/warehouse/configuration.rb +50 -0
  5. data/lib/ncs_navigator/warehouse/hooks.rb +21 -0
  6. data/lib/ncs_navigator/warehouse/hooks/etl_status_email.rb +88 -0
  7. data/lib/ncs_navigator/warehouse/mailer_templates/etl_status_email/failure_message.text.erb +8 -0
  8. data/lib/ncs_navigator/warehouse/mailer_templates/etl_status_email/success_message.text.erb +7 -0
  9. data/lib/ncs_navigator/warehouse/stringify_trace.rb +31 -0
  10. data/lib/ncs_navigator/warehouse/transform_load.rb +23 -3
  11. data/lib/ncs_navigator/warehouse/transform_status.rb +11 -2
  12. data/lib/ncs_navigator/warehouse/transformers/database.rb +13 -1
  13. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +65 -20
  14. data/lib/ncs_navigator/warehouse/transformers/subprocess_transformer.rb +4 -1
  15. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +8 -0
  16. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  17. data/ncs_mdes_warehouse.gemspec +1 -0
  18. data/spec/navigator.ini +3 -0
  19. data/spec/ncs_navigator/warehouse/configuration_spec.rb +79 -2
  20. data/spec/ncs_navigator/warehouse/data_mapper_spec.rb +1 -1
  21. data/spec/ncs_navigator/warehouse/database_initializer_spec.rb +1 -1
  22. data/spec/ncs_navigator/warehouse/hooks/etl_status_email_spec.rb +126 -0
  23. data/spec/ncs_navigator/warehouse/models/mdes_model_spec.rb +1 -1
  24. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +1 -1
  25. data/spec/ncs_navigator/warehouse/stringify_trace_spec.rb +58 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +119 -5
  28. data/spec/ncs_navigator/warehouse/transform_status_spec.rb +32 -1
  29. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +11 -1
  30. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +75 -1
  31. data/spec/ncs_navigator/warehouse/transformers/sampling_units_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/transformers/subprocess_transformer_spec.rb +7 -1
  33. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +14 -3
  34. data/spec/ncs_navigator/warehouse/transformers/vdr_xml_spec.rb +1 -1
  35. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +1 -1
  36. data/spec/ncs_navigator/warehouse_spec.rb +1 -1
  37. data/spec/spec_helper.rb +3 -1
  38. metadata +307 -154
data/CHANGELOG.md CHANGED
@@ -1,6 +1,33 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.5.0
5
+ -----
6
+
7
+ - Add "post-ETL hooks" to ETL process: objects with callbacks which
8
+ are executed when the ETL completes. (#1725)
9
+
10
+ - Add post-ETL hook for sending e-mail when the ETL process completes,
11
+ indicating success or failure. (#1601)
12
+
13
+ - Include the input filename in the name of transformers based on
14
+ `VdrXml::Reader`. (#1927)
15
+
16
+ - Exclude parent bundler environment when executing subprocess in
17
+ SubprocessTransformer. (#2012)
18
+
19
+ - Strip leading and trailing whitespace from values in one-to-one
20
+ transformer. (#2028)
21
+
22
+ - Catch all exceptions during enumeration in EnumTransformer. (#2070)
23
+
24
+ - Log caught exceptions during ETL. Previously they were only reported
25
+ to the shell and stored in the transform status table. (#2070)
26
+
27
+ - An enumerator may communicate recoverable errors to EnumTransformer
28
+ by yielding one or more TransformErrors as part of its
29
+ enumeration. (#2073)
30
+
4
31
  0.4.1
5
32
  -----
6
33
 
data/Rakefile CHANGED
@@ -14,12 +14,12 @@ task :spec => 'spec:all'
14
14
 
15
15
  namespace :spec do
16
16
  RSpec::Core::RakeTask.new(:fast) do |t|
17
- t.pattern = "spec/**/*_spec.rb"
17
+ t.pattern = ENV['SPEC_PATTERN'] || "spec/**/*_spec.rb"
18
18
  t.rspec_opts = %q(--tag ~slow)
19
19
  end
20
20
 
21
21
  RSpec::Core::RakeTask.new(:all) do |t|
22
- t.pattern = "spec/**/*_spec.rb"
22
+ t.pattern = ENV['SPEC_PATTERN'] || "spec/**/*_spec.rb"
23
23
  end
24
24
  end
25
25
 
@@ -11,8 +11,10 @@ module NcsNavigator
11
11
  autoload :Configuration, 'ncs_navigator/warehouse/configuration'
12
12
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
13
13
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
14
+ autoload :Hooks, 'ncs_navigator/warehouse/hooks'
14
15
  autoload :Models, 'ncs_navigator/warehouse/models'
15
16
  autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
17
+ autoload :StringifyTrace, 'ncs_navigator/warehouse/stringify_trace'
16
18
  autoload :TableModeler, 'ncs_navigator/warehouse/table_modeler'
17
19
  autoload :Transformers, 'ncs_navigator/warehouse/transformers'
18
20
  autoload :TransformError, 'ncs_navigator/warehouse/transform_status'
@@ -82,6 +82,36 @@ module NcsNavigator::Warehouse
82
82
  end
83
83
  end
84
84
 
85
+ ####
86
+ #### Hooks
87
+ ####
88
+
89
+ ##
90
+ # @return [Array<#etl_succeeded,#etl_failed>] the configured
91
+ # post-ETL hooks.
92
+ def post_etl_hooks
93
+ @post_etl_hooks ||= []
94
+ end
95
+
96
+ ##
97
+ # Adds a post-ETL hook to the list for this warehouse instance.
98
+ #
99
+ # @return [void]
100
+ # @param [#etl_succeeded,#etl_failed] the hook
101
+ def add_post_etl_hook(candidate)
102
+ expected_methods = [:etl_succeeded, :etl_failed]
103
+ implemented_methods = expected_methods.select { |m| candidate.respond_to?(m) }
104
+ if implemented_methods.empty?
105
+ msg = "#{candidate.inspect} does not have an #{expected_methods.join(' or ')} method."
106
+ if candidate.respond_to?(:new)
107
+ msg += " Perhaps you meant #{candidate}.new?"
108
+ end
109
+ raise Error, msg
110
+ else
111
+ post_etl_hooks << candidate
112
+ end
113
+ end
114
+
85
115
  ####
86
116
  #### MDES version
87
117
  ####
@@ -157,6 +187,26 @@ module NcsNavigator::Warehouse
157
187
  @navigator = NcsNavigator::Configuration.new(ini_file)
158
188
  end
159
189
 
190
+ ###
191
+ ### E-mail
192
+ ###
193
+
194
+ ##
195
+ # Configures `ActionMailer` with the options implied by the suite
196
+ # configuration.
197
+ #
198
+ # @return [void]
199
+ def set_up_action_mailer
200
+ return if @action_mailer_set_up
201
+ require 'action_mailer'
202
+ ActionMailer::Base.delivery_method = :smtp
203
+ ActionMailer::Base.smtp_settings = navigator.action_mailer_smtp_settings
204
+ ActionMailer::Base.view_paths = [
205
+ File.expand_path('../mailer_templates', __FILE__)
206
+ ]
207
+ @action_mailer_set_up = true
208
+ end
209
+
160
210
  ####
161
211
  #### Bcdatabase
162
212
  ####
@@ -0,0 +1,21 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # The namespace for post-ETL hook implementations which are provided
6
+ # with the warehouse.
7
+ #
8
+ # A post-ETL hook is an object which responds to either
9
+ # `etl_succeeded` or `etl_failed` (or both). Each method takes a
10
+ # single hash argument which, when the method is called, will
11
+ # contain the following keys:
12
+ #
13
+ # * `:transform_statuses` the list of {TransformStatus}es describing
14
+ # the ETL process that was just completed
15
+ # * `:configuration` a reference to the warehouse {Configuration}.
16
+ #
17
+ # @see Configuration#add_post_etl_hook
18
+ module Hooks
19
+ autoload :EtlStatusEmail, 'ncs_navigator/warehouse/hooks/etl_status_email'
20
+ end
21
+ end
@@ -0,0 +1,88 @@
1
+ require 'ncs_navigator/warehouse'
2
+ require 'action_mailer'
3
+ require 'time'
4
+
5
+ module NcsNavigator::Warehouse::Hooks
6
+ ##
7
+ # A post-ETL hook which sends a message to configured e-mail
8
+ # accounts after each ETL run. The message indicates whether the ETL
9
+ # succeeded or failed and some summary statistics, but no further
10
+ # detail.
11
+ class EtlStatusEmail
12
+ ##
13
+ # @param [Hash<Symbol, Object>] options
14
+ # @option options [Array<String>] :to the e-mail addresses to
15
+ # whom the notifications will be sent.
16
+ def initialize(options={})
17
+ @to = options[:to] or 'Need at least one recipient'
18
+ end
19
+
20
+ ##
21
+ # @param [Hash<Symbol, Object>] args the arguments received from
22
+ # the ETL process.
23
+ # @return [void]
24
+ def etl_succeeded(args)
25
+ args[:configuration].set_up_action_mailer
26
+
27
+ Mailer.success_message(@to, args[:transform_statuses]).deliver
28
+ end
29
+
30
+ ##
31
+ # @param [Hash<Symbol, Object>] args the arguments received from
32
+ # the ETL process.
33
+ # @return [void]
34
+ def etl_failed(args)
35
+ args[:configuration].set_up_action_mailer
36
+
37
+ Mailer.failure_message(@to, args[:transform_statuses]).deliver
38
+ end
39
+
40
+ ##
41
+ # @private
42
+ class Mailer < ::ActionMailer::Base
43
+ self.mailer_name = 'etl_status_email'
44
+
45
+ def success_message(to, transform_statuses)
46
+ analyze_statuses(transform_statuses)
47
+
48
+ mail(
49
+ # TODO: make configurable
50
+ :from => 'mdes-warehouse',
51
+ :to => to,
52
+ :subject => '[NCS Navigator] Warehouse load successful'
53
+ )
54
+ end
55
+
56
+ def failure_message(to, transform_statuses)
57
+ analyze_statuses(transform_statuses)
58
+
59
+ mail(
60
+ # TODO: make configurable
61
+ :from => 'mdes-warehouse',
62
+ :to => to,
63
+ :subject => '[NCS Navigator] Warehouse load failed'
64
+ )
65
+ end
66
+
67
+ private
68
+
69
+ def analyze_statuses(transform_statuses)
70
+ start_time_dt = transform_statuses.first.start_time
71
+ end_time_dt = transform_statuses.last.end_time
72
+
73
+ @start_time = start_time_dt.to_s
74
+
75
+ @transform_duration = duration_string((end_time_dt - start_time_dt) * 24 * 3600)
76
+ @transform_count = transform_statuses.size
77
+ @record_count = transform_statuses.inject(0) { |sum, s| sum + s.record_count }
78
+
79
+ @success_count = transform_statuses.select { |s| s.transform_errors.empty? }.size
80
+ @failure_count = transform_statuses.size - @success_count
81
+ end
82
+
83
+ def duration_string(seconds)
84
+ [ seconds / 3600, (seconds % 3600) / 60, seconds % 60 ].collect { |t| '%02d' % t }.join(':')
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,8 @@
1
+ An MDES Warehouse transform and load process ran, starting at <%= @start_time %>.
2
+
3
+ <%= @success_count %> transformation<%= 's' unless @success_count == 1 %> executed successfully.
4
+ <%= @failure_count %> transformation<%= 's' unless @failure_count == 1 %> failed.
5
+
6
+ Executing <%= @transform_count %> transform<%= 's' if @transform_count != 1 %>
7
+ took <%= @transform_duration %> and
8
+ produced <%= @record_count %> total record<%= 's' if @record_count != 1 %>.
@@ -0,0 +1,7 @@
1
+ An MDES Warehouse transform and load process ran, starting at <%= @start_time %>.
2
+
3
+ All transformations executed successfully.
4
+
5
+ Executing <%= @transform_count %> transform<%= 's' if @transform_count != 1 %>
6
+ took <%= @transform_duration %> and
7
+ produced <%= @record_count %> total record<%= 's' if @record_count != 1 %>.
@@ -0,0 +1,31 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # @api private
6
+ module StringifyTrace
7
+ ##
8
+ # Utility to generate a nicely formatted string from a ruby
9
+ # exception trace.
10
+ #
11
+ # @param [Array<String>] backtrace
12
+ # @return String
13
+ def stringify_trace(backtrace)
14
+ # an array of arrays containing [filename, line, msg]
15
+ trace_lines = backtrace.collect { |l| l.scan(/^(.*?)\:(\d*)\:?(.*)$/).first || ['', '', l] }
16
+
17
+ lengths = trace_lines.inject([0, 0, 0]) { |lens, components|
18
+ 0.upto(2) { |i| lens[i] = [lens[i], components[i].length].max }
19
+ lens
20
+ }
21
+
22
+ formats = ["%#{lengths[0]}s", "%#{lengths[1]}s", "%s"]
23
+ trace_lines.collect { |components|
24
+ formats.zip(components).collect { |format, component|
25
+ (format % component)
26
+ }.select { |s| s =~ /\S/ }.join(':')
27
+ }.join("\n")
28
+ end
29
+ module_function :stringify_trace
30
+ end
31
+ end
@@ -5,6 +5,7 @@ require 'forwardable'
5
5
  module NcsNavigator::Warehouse
6
6
  class TransformLoad
7
7
  extend Forwardable
8
+ include StringifyTrace
8
9
 
9
10
  attr_reader :configuration
10
11
  attr_reader :statuses
@@ -34,12 +35,16 @@ module NcsNavigator::Warehouse
34
35
  transformer.transform(status)
35
36
  rescue => e
36
37
  shell.say_line("\nTransform failed. (See log for more detail.)")
37
- status.add_error("Transform failed. #{e.class}: #{e}.")
38
+ msg = "Transform failed. #{e.class}: #{e}\n#{stringify_trace(e.backtrace)}"
39
+ log.error(msg)
40
+ status.add_error(msg)
38
41
  end
39
42
  end
40
43
  rescue DataObjects::IntegrityError => e
41
- shell.say_line("\nTransform failed with data integrity error. (See log for more detail.)")
42
- log.error("Transform failed with data integrity error: #{e}.")
44
+ shell.say_line(
45
+ "\nTransform failed with data integrity error. (See log for more detail.)")
46
+ log.error(
47
+ "Transform failed with data integrity error: #{e}.\n#{stringify_trace(e.backtrace)}")
43
48
  status.add_error("Transform failed with data integrity error: #{e}.")
44
49
  end
45
50
  status.end_time = Time.now
@@ -53,8 +58,10 @@ module NcsNavigator::Warehouse
53
58
  end
54
59
 
55
60
  if statuses.detect { |s| !s.transform_errors.empty? }
61
+ dispatch_post_etl_hooks(:etl_failed)
56
62
  false
57
63
  else
64
+ dispatch_post_etl_hooks(:etl_succeeded)
58
65
  true
59
66
  end
60
67
  end
@@ -67,5 +74,18 @@ module NcsNavigator::Warehouse
67
74
  )
68
75
  end
69
76
  private :build_status_for
77
+
78
+ def dispatch_post_etl_hooks(method)
79
+ configuration.post_etl_hooks.each do |hook|
80
+ begin
81
+ args = { :transform_statuses => statuses, :configuration => configuration }
82
+ hook.send(method, args) if hook.respond_to?(method)
83
+ rescue => e
84
+ log.error(
85
+ "Error invoking #{method.inspect} on #{hook.inspect}: #{e.class} #{e}.\n#{stringify_trace(e.backtrace)}")
86
+ end
87
+ end
88
+ end
89
+ private :dispatch_post_etl_hooks
70
90
  end
71
91
  end
@@ -14,8 +14,8 @@ module NcsNavigator::Warehouse
14
14
  # tests). This method creates a new instance which works around
15
15
  # this problem, at the cost of the instances not being accurately
16
16
  # persistable.
17
- def self.memory_only(name)
18
- TransformStatus.new(:name => name).tap do |s|
17
+ def self.memory_only(name, attrs={})
18
+ TransformStatus.new(attrs.merge(:name => name)).tap do |s|
19
19
  def s.transform_errors
20
20
  @transform_errors ||= []
21
21
  end
@@ -59,6 +59,15 @@ module NcsNavigator::Warehouse
59
59
  property :record_id, String, :length => 255
60
60
 
61
61
  belongs_to :transform_status, TransformStatus, :required => true
62
+
63
+ def self.for_exception(exception, context_message=nil)
64
+ TransformError.new(:message => [
65
+ context_message,
66
+ "#{exception.class}: #{exception}",
67
+ StringifyTrace.stringify_trace(exception.backtrace)
68
+ ].compact.join("\n")
69
+ )
70
+ end
62
71
  end
63
72
 
64
73
  TransformError.finalize
@@ -327,11 +327,23 @@ module NcsNavigator::Warehouse::Transformers
327
327
  raise UnusedColumnsForModelError.new(unused)
328
328
  end
329
329
  model.new(
330
- col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
330
+ col_map.inject({}) { |pv, (col_name, var_name)|
331
+ pv[var_name] = clean_value(row[col_name]);
332
+ pv
333
+ }
331
334
  )
332
335
  end
333
336
  alias :call :convert_row
334
337
 
338
+ def clean_value(v)
339
+ if v.respond_to?(:strip)
340
+ v.strip
341
+ else
342
+ v
343
+ end
344
+ end
345
+ private :clean_value
346
+
335
347
  ##
336
348
  # @param [Array<String>] column_names
337
349
  # @return [Hash<String, String>] a mapping from the given
@@ -4,12 +4,28 @@ require 'forwardable'
4
4
 
5
5
  module NcsNavigator::Warehouse::Transformers
6
6
  ##
7
- # A transformer that accepts a series of model instances in the form
8
- # of a ruby Enumerable. An enumerable might be as simple as an
9
- # array, or it might be a custom class that streams through
10
- # thousands of instances without having them all in memory at once.
7
+ # A transformer that accepts a series of model instances and
8
+ # {TransformError}s in the form of a ruby Enumerable. An enumerable
9
+ # might be as simple as an array, or it might be a custom class that
10
+ # streams through thousands of instances without having them all in
11
+ # memory at once.
12
+ #
13
+ # Each value yielded by the enumerable may be either an instance of
14
+ # an MDES model or a {TransformError}. If it is a model instance, it
15
+ # will have global values (e.g., PSU ID) applied as necessary,
16
+ # validated, and saved.
17
+ #
18
+ # On the other hand, If it is a `TransformError` the error will be
19
+ # associated with the status for the transform run. The benefit of
20
+ # the enumeration yielding a `TransformError` instead of throwing an
21
+ # exception is that the enumeration may continue after the error is
22
+ # reported. If the error is unrecoverable, the enum should throw an
23
+ # exception instead of returning a
24
+ # `TransformError`. `EnumTransformer` will handle recording the
25
+ # error appropriately in that case.
11
26
  class EnumTransformer
12
27
  extend Forwardable
28
+ include NcsNavigator::Warehouse::StringifyTrace
13
29
 
14
30
  ##
15
31
  # @return [Enumerable] the enumeration that will be transformed.
@@ -26,7 +42,12 @@ module NcsNavigator::Warehouse::Transformers
26
42
  end
27
43
 
28
44
  def name
29
- "EnumTransformer for #{enum.class}"
45
+ enum_name = if enum.respond_to?(:name)
46
+ enum.name
47
+ else
48
+ enum.class
49
+ end
50
+ "EnumTransformer for #{enum_name}"
30
51
  end
31
52
 
32
53
  ##
@@ -36,31 +57,55 @@ module NcsNavigator::Warehouse::Transformers
36
57
  # @param [TransformStatus] status
37
58
  # @return [void]
38
59
  def transform(status)
60
+ begin
61
+ do_transform(status)
62
+ rescue Exception => e
63
+ err = NcsNavigator::Warehouse::TransformError.for_exception(e, 'Enumeration failed.')
64
+ log.error err.message
65
+ status.transform_errors << err
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def do_transform(status)
39
72
  enum.each do |record|
40
- apply_global_values_if_necessary(record)
41
- if record.valid?
42
- log.debug("Saving valid record #{record_ident record}.")
43
- begin
44
- unless record.save
45
- msg = "Could not save valid record #{record.inspect}. #{record_messages(record).join(' ')}"
46
- log.error msg
47
- status.unsuccessful_record(record, msg)
48
- end
49
- rescue => e
50
- msg = "Error on save. #{e.class}: #{e}."
73
+ case record
74
+ when NcsNavigator::Warehouse::TransformError
75
+ receive_transform_error(record, status)
76
+ else
77
+ save_model_instance(record, status)
78
+ end
79
+ end
80
+ end
81
+
82
+ def save_model_instance(record, status)
83
+ apply_global_values_if_necessary(record)
84
+ if record.valid?
85
+ log.debug("Saving valid record #{record_ident record}.")
86
+ begin
87
+ unless record.save
88
+ msg = "Could not save valid record #{record.inspect}. #{record_messages(record).join(' ')}"
51
89
  log.error msg
52
90
  status.unsuccessful_record(record, msg)
53
91
  end
54
- else
55
- msg = "Invalid record. #{record_messages(record).join(' ')}"
92
+ rescue => e
93
+ msg = "Error on save. #{e.class}: #{e}."
56
94
  log.error msg
57
95
  status.unsuccessful_record(record, msg)
58
96
  end
59
- status.record_count += 1
97
+ else
98
+ msg = "Invalid record. #{record_messages(record).join(' ')}"
99
+ log.error msg
100
+ status.unsuccessful_record(record, msg)
60
101
  end
102
+ status.record_count += 1
61
103
  end
62
104
 
63
- private
105
+ def receive_transform_error(error, status)
106
+ error.id = nil
107
+ status.transform_errors << error
108
+ end
64
109
 
65
110
  def record_ident(rec)
66
111
  # No composite keys in the MDES