ncs_mdes_warehouse 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/CHANGELOG.md +27 -0
  2. data/Rakefile +2 -2
  3. data/lib/ncs_navigator/warehouse.rb +2 -0
  4. data/lib/ncs_navigator/warehouse/configuration.rb +50 -0
  5. data/lib/ncs_navigator/warehouse/hooks.rb +21 -0
  6. data/lib/ncs_navigator/warehouse/hooks/etl_status_email.rb +88 -0
  7. data/lib/ncs_navigator/warehouse/mailer_templates/etl_status_email/failure_message.text.erb +8 -0
  8. data/lib/ncs_navigator/warehouse/mailer_templates/etl_status_email/success_message.text.erb +7 -0
  9. data/lib/ncs_navigator/warehouse/stringify_trace.rb +31 -0
  10. data/lib/ncs_navigator/warehouse/transform_load.rb +23 -3
  11. data/lib/ncs_navigator/warehouse/transform_status.rb +11 -2
  12. data/lib/ncs_navigator/warehouse/transformers/database.rb +13 -1
  13. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +65 -20
  14. data/lib/ncs_navigator/warehouse/transformers/subprocess_transformer.rb +4 -1
  15. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +8 -0
  16. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  17. data/ncs_mdes_warehouse.gemspec +1 -0
  18. data/spec/navigator.ini +3 -0
  19. data/spec/ncs_navigator/warehouse/configuration_spec.rb +79 -2
  20. data/spec/ncs_navigator/warehouse/data_mapper_spec.rb +1 -1
  21. data/spec/ncs_navigator/warehouse/database_initializer_spec.rb +1 -1
  22. data/spec/ncs_navigator/warehouse/hooks/etl_status_email_spec.rb +126 -0
  23. data/spec/ncs_navigator/warehouse/models/mdes_model_spec.rb +1 -1
  24. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +1 -1
  25. data/spec/ncs_navigator/warehouse/stringify_trace_spec.rb +58 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +119 -5
  28. data/spec/ncs_navigator/warehouse/transform_status_spec.rb +32 -1
  29. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +11 -1
  30. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +75 -1
  31. data/spec/ncs_navigator/warehouse/transformers/sampling_units_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/transformers/subprocess_transformer_spec.rb +7 -1
  33. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +14 -3
  34. data/spec/ncs_navigator/warehouse/transformers/vdr_xml_spec.rb +1 -1
  35. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +1 -1
  36. data/spec/ncs_navigator/warehouse_spec.rb +1 -1
  37. data/spec/spec_helper.rb +3 -1
  38. metadata +307 -154
data/CHANGELOG.md CHANGED
@@ -1,6 +1,33 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.5.0
5
+ -----
6
+
7
+ - Add "post-ETL hooks" to ETL process: objects with callbacks which
8
+ are executed when the ETL completes. (#1725)
9
+
10
+ - Add post-ETL hook for sending e-mail when the ETL process completes,
11
+ indicating success or failure. (#1601)
12
+
13
+ - Include the input filename in the name of transformers based on
14
+ `VdrXml::Reader`. (#1927)
15
+
16
+ - Exclude parent bundler environment when executing subprocess in
17
+ SubprocessTransformer. (#2012)
18
+
19
+ - Strip leading and trailing whitespace from values in one-to-one
20
+ transformer. (#2028)
21
+
22
+ - Catch all exceptions during enumeration in EnumTransformer. (#2070)
23
+
24
+ - Log caught exceptions during ETL. Previously they were only reported
25
+ to the shell and stored in the transform status table. (#2070)
26
+
27
+ - An enumerator may communicate recoverable errors to EnumTransformer
28
+ by yielding one or more TransformErrors as part of its
29
+ enumeration. (#2073)
30
+
4
31
  0.4.1
5
32
  -----
6
33
 
data/Rakefile CHANGED
@@ -14,12 +14,12 @@ task :spec => 'spec:all'
14
14
 
15
15
  namespace :spec do
16
16
  RSpec::Core::RakeTask.new(:fast) do |t|
17
- t.pattern = "spec/**/*_spec.rb"
17
+ t.pattern = ENV['SPEC_PATTERN'] || "spec/**/*_spec.rb"
18
18
  t.rspec_opts = %q(--tag ~slow)
19
19
  end
20
20
 
21
21
  RSpec::Core::RakeTask.new(:all) do |t|
22
- t.pattern = "spec/**/*_spec.rb"
22
+ t.pattern = ENV['SPEC_PATTERN'] || "spec/**/*_spec.rb"
23
23
  end
24
24
  end
25
25
 
@@ -11,8 +11,10 @@ module NcsNavigator
11
11
  autoload :Configuration, 'ncs_navigator/warehouse/configuration'
12
12
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
13
13
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
14
+ autoload :Hooks, 'ncs_navigator/warehouse/hooks'
14
15
  autoload :Models, 'ncs_navigator/warehouse/models'
15
16
  autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
17
+ autoload :StringifyTrace, 'ncs_navigator/warehouse/stringify_trace'
16
18
  autoload :TableModeler, 'ncs_navigator/warehouse/table_modeler'
17
19
  autoload :Transformers, 'ncs_navigator/warehouse/transformers'
18
20
  autoload :TransformError, 'ncs_navigator/warehouse/transform_status'
@@ -82,6 +82,36 @@ module NcsNavigator::Warehouse
82
82
  end
83
83
  end
84
84
 
85
+ ####
86
+ #### Hooks
87
+ ####
88
+
89
+ ##
90
+ # @return [Array<#etl_succeeded,#etl_failed>] the configured
91
+ # post-ETL hooks.
92
+ def post_etl_hooks
93
+ @post_etl_hooks ||= []
94
+ end
95
+
96
+ ##
97
+ # Adds a post-ETL hook to the list for this warehouse instance.
98
+ #
99
+ # @return [void]
100
+ # @param [#etl_succeeded,#etl_failed] the hook
101
+ def add_post_etl_hook(candidate)
102
+ expected_methods = [:etl_succeeded, :etl_failed]
103
+ implemented_methods = expected_methods.select { |m| candidate.respond_to?(m) }
104
+ if implemented_methods.empty?
105
+ msg = "#{candidate.inspect} does not have an #{expected_methods.join(' or ')} method."
106
+ if candidate.respond_to?(:new)
107
+ msg += " Perhaps you meant #{candidate}.new?"
108
+ end
109
+ raise Error, msg
110
+ else
111
+ post_etl_hooks << candidate
112
+ end
113
+ end
114
+
85
115
  ####
86
116
  #### MDES version
87
117
  ####
@@ -157,6 +187,26 @@ module NcsNavigator::Warehouse
157
187
  @navigator = NcsNavigator::Configuration.new(ini_file)
158
188
  end
159
189
 
190
+ ###
191
+ ### E-mail
192
+ ###
193
+
194
+ ##
195
+ # Configures `ActionMailer` with the options implied by the suite
196
+ # configuration.
197
+ #
198
+ # @return [void]
199
+ def set_up_action_mailer
200
+ return if @action_mailer_set_up
201
+ require 'action_mailer'
202
+ ActionMailer::Base.delivery_method = :smtp
203
+ ActionMailer::Base.smtp_settings = navigator.action_mailer_smtp_settings
204
+ ActionMailer::Base.view_paths = [
205
+ File.expand_path('../mailer_templates', __FILE__)
206
+ ]
207
+ @action_mailer_set_up = true
208
+ end
209
+
160
210
  ####
161
211
  #### Bcdatabase
162
212
  ####
@@ -0,0 +1,21 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # The namespace for post-ETL hook implementations which are provided
6
+ # with the warehouse.
7
+ #
8
+ # A post-ETL hook is an object which responds to either
9
+ # `etl_succeeded` or `etl_failed` (or both). Each method takes a
10
+ # single hash argument which, when the method is called, will
11
+ # contain the following keys:
12
+ #
13
+ # * `:transform_statuses` the list of {TransformStatus}es describing
14
+ # the ETL process that was just completed
15
+ # * `:configuration` a reference to the warehouse {Configuration}.
16
+ #
17
+ # @see Configuration#add_post_etl_hook
18
+ module Hooks
19
+ autoload :EtlStatusEmail, 'ncs_navigator/warehouse/hooks/etl_status_email'
20
+ end
21
+ end
@@ -0,0 +1,88 @@
1
+ require 'ncs_navigator/warehouse'
2
+ require 'action_mailer'
3
+ require 'time'
4
+
5
+ module NcsNavigator::Warehouse::Hooks
6
+ ##
7
+ # A post-ETL hook which sends a message to configured e-mail
8
+ # accounts after each ETL run. The message indicates whether the ETL
9
+ # succeeded or failed and some summary statistics, but no further
10
+ # detail.
11
+ class EtlStatusEmail
12
+ ##
13
+ # @param [Hash<Symbol, Object>] options
14
+ # @option options [Array<String>] :to the e-mail addresses to
15
+ # whom the notifications will be sent.
16
+ def initialize(options={})
17
+ @to = options[:to] or 'Need at least one recipient'
18
+ end
19
+
20
+ ##
21
+ # @param [Hash<Symbol, Object>] args the arguments received from
22
+ # the ETL process.
23
+ # @return [void]
24
+ def etl_succeeded(args)
25
+ args[:configuration].set_up_action_mailer
26
+
27
+ Mailer.success_message(@to, args[:transform_statuses]).deliver
28
+ end
29
+
30
+ ##
31
+ # @param [Hash<Symbol, Object>] args the arguments received from
32
+ # the ETL process.
33
+ # @return [void]
34
+ def etl_failed(args)
35
+ args[:configuration].set_up_action_mailer
36
+
37
+ Mailer.failure_message(@to, args[:transform_statuses]).deliver
38
+ end
39
+
40
+ ##
41
+ # @private
42
+ class Mailer < ::ActionMailer::Base
43
+ self.mailer_name = 'etl_status_email'
44
+
45
+ def success_message(to, transform_statuses)
46
+ analyze_statuses(transform_statuses)
47
+
48
+ mail(
49
+ # TODO: make configurable
50
+ :from => 'mdes-warehouse',
51
+ :to => to,
52
+ :subject => '[NCS Navigator] Warehouse load successful'
53
+ )
54
+ end
55
+
56
+ def failure_message(to, transform_statuses)
57
+ analyze_statuses(transform_statuses)
58
+
59
+ mail(
60
+ # TODO: make configurable
61
+ :from => 'mdes-warehouse',
62
+ :to => to,
63
+ :subject => '[NCS Navigator] Warehouse load failed'
64
+ )
65
+ end
66
+
67
+ private
68
+
69
+ def analyze_statuses(transform_statuses)
70
+ start_time_dt = transform_statuses.first.start_time
71
+ end_time_dt = transform_statuses.last.end_time
72
+
73
+ @start_time = start_time_dt.to_s
74
+
75
+ @transform_duration = duration_string((end_time_dt - start_time_dt) * 24 * 3600)
76
+ @transform_count = transform_statuses.size
77
+ @record_count = transform_statuses.inject(0) { |sum, s| sum + s.record_count }
78
+
79
+ @success_count = transform_statuses.select { |s| s.transform_errors.empty? }.size
80
+ @failure_count = transform_statuses.size - @success_count
81
+ end
82
+
83
+ def duration_string(seconds)
84
+ [ seconds / 3600, (seconds % 3600) / 60, seconds % 60 ].collect { |t| '%02d' % t }.join(':')
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,8 @@
1
+ An MDES Warehouse transform and load process ran, starting at <%= @start_time %>.
2
+
3
+ <%= @success_count %> transformation<%= 's' unless @success_count == 1 %> executed successfully.
4
+ <%= @failure_count %> transformation<%= 's' unless @failure_count == 1 %> failed.
5
+
6
+ Executing <%= @transform_count %> transform<%= 's' if @transform_count != 1 %>
7
+ took <%= @transform_duration %> and
8
+ produced <%= @record_count %> total record<%= 's' if @record_count != 1 %>.
@@ -0,0 +1,7 @@
1
+ An MDES Warehouse transform and load process ran, starting at <%= @start_time %>.
2
+
3
+ All transformations executed successfully.
4
+
5
+ Executing <%= @transform_count %> transform<%= 's' if @transform_count != 1 %>
6
+ took <%= @transform_duration %> and
7
+ produced <%= @record_count %> total record<%= 's' if @record_count != 1 %>.
@@ -0,0 +1,31 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # @api private
6
+ module StringifyTrace
7
+ ##
8
+ # Utility to generate a nicely formatted string from a ruby
9
+ # exception trace.
10
+ #
11
+ # @param [Array<String>] backtrace
12
+ # @return String
13
+ def stringify_trace(backtrace)
14
+ # an array of arrays containing [filename, line, msg]
15
+ trace_lines = backtrace.collect { |l| l.scan(/^(.*?)\:(\d*)\:?(.*)$/).first || ['', '', l] }
16
+
17
+ lengths = trace_lines.inject([0, 0, 0]) { |lens, components|
18
+ 0.upto(2) { |i| lens[i] = [lens[i], components[i].length].max }
19
+ lens
20
+ }
21
+
22
+ formats = ["%#{lengths[0]}s", "%#{lengths[1]}s", "%s"]
23
+ trace_lines.collect { |components|
24
+ formats.zip(components).collect { |format, component|
25
+ (format % component)
26
+ }.select { |s| s =~ /\S/ }.join(':')
27
+ }.join("\n")
28
+ end
29
+ module_function :stringify_trace
30
+ end
31
+ end
@@ -5,6 +5,7 @@ require 'forwardable'
5
5
  module NcsNavigator::Warehouse
6
6
  class TransformLoad
7
7
  extend Forwardable
8
+ include StringifyTrace
8
9
 
9
10
  attr_reader :configuration
10
11
  attr_reader :statuses
@@ -34,12 +35,16 @@ module NcsNavigator::Warehouse
34
35
  transformer.transform(status)
35
36
  rescue => e
36
37
  shell.say_line("\nTransform failed. (See log for more detail.)")
37
- status.add_error("Transform failed. #{e.class}: #{e}.")
38
+ msg = "Transform failed. #{e.class}: #{e}\n#{stringify_trace(e.backtrace)}"
39
+ log.error(msg)
40
+ status.add_error(msg)
38
41
  end
39
42
  end
40
43
  rescue DataObjects::IntegrityError => e
41
- shell.say_line("\nTransform failed with data integrity error. (See log for more detail.)")
42
- log.error("Transform failed with data integrity error: #{e}.")
44
+ shell.say_line(
45
+ "\nTransform failed with data integrity error. (See log for more detail.)")
46
+ log.error(
47
+ "Transform failed with data integrity error: #{e}.\n#{stringify_trace(e.backtrace)}")
43
48
  status.add_error("Transform failed with data integrity error: #{e}.")
44
49
  end
45
50
  status.end_time = Time.now
@@ -53,8 +58,10 @@ module NcsNavigator::Warehouse
53
58
  end
54
59
 
55
60
  if statuses.detect { |s| !s.transform_errors.empty? }
61
+ dispatch_post_etl_hooks(:etl_failed)
56
62
  false
57
63
  else
64
+ dispatch_post_etl_hooks(:etl_succeeded)
58
65
  true
59
66
  end
60
67
  end
@@ -67,5 +74,18 @@ module NcsNavigator::Warehouse
67
74
  )
68
75
  end
69
76
  private :build_status_for
77
+
78
+ def dispatch_post_etl_hooks(method)
79
+ configuration.post_etl_hooks.each do |hook|
80
+ begin
81
+ args = { :transform_statuses => statuses, :configuration => configuration }
82
+ hook.send(method, args) if hook.respond_to?(method)
83
+ rescue => e
84
+ log.error(
85
+ "Error invoking #{method.inspect} on #{hook.inspect}: #{e.class} #{e}.\n#{stringify_trace(e.backtrace)}")
86
+ end
87
+ end
88
+ end
89
+ private :dispatch_post_etl_hooks
70
90
  end
71
91
  end
@@ -14,8 +14,8 @@ module NcsNavigator::Warehouse
14
14
  # tests). This method creates a new instance which works around
15
15
  # this problem, at the cost of the instances not being accurately
16
16
  # persistable.
17
- def self.memory_only(name)
18
- TransformStatus.new(:name => name).tap do |s|
17
+ def self.memory_only(name, attrs={})
18
+ TransformStatus.new(attrs.merge(:name => name)).tap do |s|
19
19
  def s.transform_errors
20
20
  @transform_errors ||= []
21
21
  end
@@ -59,6 +59,15 @@ module NcsNavigator::Warehouse
59
59
  property :record_id, String, :length => 255
60
60
 
61
61
  belongs_to :transform_status, TransformStatus, :required => true
62
+
63
+ def self.for_exception(exception, context_message=nil)
64
+ TransformError.new(:message => [
65
+ context_message,
66
+ "#{exception.class}: #{exception}",
67
+ StringifyTrace.stringify_trace(exception.backtrace)
68
+ ].compact.join("\n")
69
+ )
70
+ end
62
71
  end
63
72
 
64
73
  TransformError.finalize
@@ -327,11 +327,23 @@ module NcsNavigator::Warehouse::Transformers
327
327
  raise UnusedColumnsForModelError.new(unused)
328
328
  end
329
329
  model.new(
330
- col_map.inject({}) { |pv, (col_name, var_name)| pv[var_name] = row[col_name]; pv }
330
+ col_map.inject({}) { |pv, (col_name, var_name)|
331
+ pv[var_name] = clean_value(row[col_name]);
332
+ pv
333
+ }
331
334
  )
332
335
  end
333
336
  alias :call :convert_row
334
337
 
338
+ def clean_value(v)
339
+ if v.respond_to?(:strip)
340
+ v.strip
341
+ else
342
+ v
343
+ end
344
+ end
345
+ private :clean_value
346
+
335
347
  ##
336
348
  # @param [Array<String>] column_names
337
349
  # @return [Hash<String, String>] a mapping from the given
@@ -4,12 +4,28 @@ require 'forwardable'
4
4
 
5
5
  module NcsNavigator::Warehouse::Transformers
6
6
  ##
7
- # A transformer that accepts a series of model instances in the form
8
- # of a ruby Enumerable. An enumerable might be as simple as an
9
- # array, or it might be a custom class that streams through
10
- # thousands of instances without having them all in memory at once.
7
+ # A transformer that accepts a series of model instances and
8
+ # {TransformError}s in the form of a ruby Enumerable. An enumerable
9
+ # might be as simple as an array, or it might be a custom class that
10
+ # streams through thousands of instances without having them all in
11
+ # memory at once.
12
+ #
13
+ # Each value yielded by the enumerable may be either an instance of
14
+ # an MDES model or a {TransformError}. If it is a model instance, it
15
+ # will have global values (e.g., PSU ID) applied as necessary,
16
+ # validated, and saved.
17
+ #
18
+ # On the other hand, If it is a `TransformError` the error will be
19
+ # associated with the status for the transform run. The benefit of
20
+ # the enumeration yielding a `TransformError` instead of throwing an
21
+ # exception is that the enumeration may continue after the error is
22
+ # reported. If the error is unrecoverable, the enum should throw an
23
+ # exception instead of returning a
24
+ # `TransformError`. `EnumTransformer` will handle recording the
25
+ # error appropriately in that case.
11
26
  class EnumTransformer
12
27
  extend Forwardable
28
+ include NcsNavigator::Warehouse::StringifyTrace
13
29
 
14
30
  ##
15
31
  # @return [Enumerable] the enumeration that will be transformed.
@@ -26,7 +42,12 @@ module NcsNavigator::Warehouse::Transformers
26
42
  end
27
43
 
28
44
  def name
29
- "EnumTransformer for #{enum.class}"
45
+ enum_name = if enum.respond_to?(:name)
46
+ enum.name
47
+ else
48
+ enum.class
49
+ end
50
+ "EnumTransformer for #{enum_name}"
30
51
  end
31
52
 
32
53
  ##
@@ -36,31 +57,55 @@ module NcsNavigator::Warehouse::Transformers
36
57
  # @param [TransformStatus] status
37
58
  # @return [void]
38
59
  def transform(status)
60
+ begin
61
+ do_transform(status)
62
+ rescue Exception => e
63
+ err = NcsNavigator::Warehouse::TransformError.for_exception(e, 'Enumeration failed.')
64
+ log.error err.message
65
+ status.transform_errors << err
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def do_transform(status)
39
72
  enum.each do |record|
40
- apply_global_values_if_necessary(record)
41
- if record.valid?
42
- log.debug("Saving valid record #{record_ident record}.")
43
- begin
44
- unless record.save
45
- msg = "Could not save valid record #{record.inspect}. #{record_messages(record).join(' ')}"
46
- log.error msg
47
- status.unsuccessful_record(record, msg)
48
- end
49
- rescue => e
50
- msg = "Error on save. #{e.class}: #{e}."
73
+ case record
74
+ when NcsNavigator::Warehouse::TransformError
75
+ receive_transform_error(record, status)
76
+ else
77
+ save_model_instance(record, status)
78
+ end
79
+ end
80
+ end
81
+
82
+ def save_model_instance(record, status)
83
+ apply_global_values_if_necessary(record)
84
+ if record.valid?
85
+ log.debug("Saving valid record #{record_ident record}.")
86
+ begin
87
+ unless record.save
88
+ msg = "Could not save valid record #{record.inspect}. #{record_messages(record).join(' ')}"
51
89
  log.error msg
52
90
  status.unsuccessful_record(record, msg)
53
91
  end
54
- else
55
- msg = "Invalid record. #{record_messages(record).join(' ')}"
92
+ rescue => e
93
+ msg = "Error on save. #{e.class}: #{e}."
56
94
  log.error msg
57
95
  status.unsuccessful_record(record, msg)
58
96
  end
59
- status.record_count += 1
97
+ else
98
+ msg = "Invalid record. #{record_messages(record).join(' ')}"
99
+ log.error msg
100
+ status.unsuccessful_record(record, msg)
60
101
  end
102
+ status.record_count += 1
61
103
  end
62
104
 
63
- private
105
+ def receive_transform_error(error, status)
106
+ error.id = nil
107
+ status.transform_errors << error
108
+ end
64
109
 
65
110
  def record_ident(rec)
66
111
  # No composite keys in the MDES