ncs_mdes_warehouse 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/CHANGELOG.md +16 -0
  2. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
  3. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
  4. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
  5. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
  6. data/lib/ncs_navigator/warehouse.rb +4 -0
  7. data/lib/ncs_navigator/warehouse/cli.rb +31 -1
  8. data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
  9. data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
  10. data/lib/ncs_navigator/warehouse/models.rb +3 -0
  11. data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
  12. data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
  13. data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
  14. data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
  15. data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
  16. data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
  20. data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
  22. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  23. data/spec/bcdatabase/test_sqlite.yml +4 -0
  24. data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
  25. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
  28. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
  29. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
  30. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
  31. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
  32. data/spec/spec_helper.rb +1 -1
  33. metadata +44 -37
  34. data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
data/CHANGELOG.md CHANGED
@@ -1,6 +1,22 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.1.0
5
+ -----
6
+
7
+ - Implement actual ETL runner. It's accessible via the `etl`
8
+ subcommand of the `mdes-wh` executable.
9
+
10
+ - Correct generated models so that association reference names and
11
+ foreign key column names do not collide. This was previously
12
+ possible when an MDES foreign key was not suffixed with "_id".
13
+
14
+ - Replace `model_row` helper in `Transformers::Database` with its own
15
+ top-level production method, `produce_one_for_one`. The options and
16
+ behavior are mostly the same as `model_row`, but this refactoring
17
+ allows the results of the column mapping heuristic to be exposed to
18
+ assist in writing DRYer importers.
19
+
4
20
  0.0.2
5
21
  -----
6
22
 
@@ -40,7 +40,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
40
40
  property :equip_action,
41
41
  NcsNavigator::Warehouse::DataMapper::NcsString,
42
42
  { :required => true, :pii => :possible, :length => 1..2, :set => ["1", "2", "3", "-7", "-4"] }
43
- belongs_to :staff_id_reviewer,
43
+ belongs_to :staff_id_reviewer_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :staff_id_reviewer ], :required => false
46
46
 
@@ -34,25 +34,25 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
34
34
  belongs_to :inc_staff_supervisor,
35
35
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
36
36
  :child_key => [ :inc_staff_supervisor_id ], :required => false
37
- belongs_to :inc_recip_is_participant,
37
+ belongs_to :inc_recip_is_participant_record,
38
38
  'NcsNavigator::Warehouse::Models::TwoPointZero::Participant',
39
39
  :child_key => [ :inc_recip_is_participant ], :required => false
40
- belongs_to :inc_recip_is_du,
40
+ belongs_to :inc_recip_is_du_record,
41
41
  'NcsNavigator::Warehouse::Models::TwoPointZero::DwellingUnit',
42
42
  :child_key => [ :inc_recip_is_du ], :required => false
43
- belongs_to :inc_recip_is_staff,
43
+ belongs_to :inc_recip_is_staff_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :inc_recip_is_staff ], :required => false
46
- belongs_to :inc_recip_is_family,
46
+ belongs_to :inc_recip_is_family_record,
47
47
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
48
48
  :child_key => [ :inc_recip_is_family ], :required => false
49
- belongs_to :inc_recip_is_acquaintance,
49
+ belongs_to :inc_recip_is_acquaintance_record,
50
50
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
51
51
  :child_key => [ :inc_recip_is_acquaintance ], :required => false
52
52
  property :inc_recip_is_other,
53
53
  NcsNavigator::Warehouse::DataMapper::NcsString,
54
54
  { :format => /^([-+]?[\d]{1,9})?$/ }
55
- belongs_to :inc_contact_person,
55
+ belongs_to :inc_contact_person_record,
56
56
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
57
57
  :child_key => [ :inc_contact_person ], :required => false
58
58
  property :inctype,
@@ -25,7 +25,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
25
25
  property :rvis_language_oth,
26
26
  NcsNavigator::Warehouse::DataMapper::NcsString,
27
27
  { :pii => :possible, :length => 0..255 }
28
- belongs_to :rvis_person,
28
+ belongs_to :rvis_person_record,
29
29
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
30
30
  :child_key => [ :rvis_person ], :required => false
31
31
  property :rvis_who_consented,
@@ -40,7 +40,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
40
40
  property :shipment_issues_oth,
41
41
  NcsNavigator::Warehouse::DataMapper::NcsString,
42
42
  { :pii => :possible, :length => 0..255 }
43
- belongs_to :staff_id_track,
43
+ belongs_to :staff_id_track_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :staff_id_track ], :required => false
46
46
  property :sample_shipped_by,
@@ -11,8 +11,12 @@ module NcsNavigator
11
11
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
12
12
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
13
13
  autoload :Models, 'ncs_navigator/warehouse/models'
14
+ autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
14
15
  autoload :TableModeler, 'ncs_navigator/warehouse/table_modeler'
15
16
  autoload :Transformers, 'ncs_navigator/warehouse/transformers'
17
+ autoload :TransformError, 'ncs_navigator/warehouse/transform_status'
18
+ autoload :TransformLoad, 'ncs_navigator/warehouse/transform_load'
19
+ autoload :TransformStatus, 'ncs_navigator/warehouse/transform_status'
16
20
  autoload :UpdatingShell, 'ncs_navigator/warehouse/updating_shell'
17
21
  autoload :VERSION, 'ncs_navigator/warehouse/version'
18
22
  autoload :XmlEmitter, 'ncs_navigator/warehouse/xml_emitter'
@@ -9,7 +9,7 @@ module NcsNavigator::Warehouse
9
9
  class_option :quiet, :type => :boolean, :aliases => %w(-q),
10
10
  :desc => 'Suppress the status messages printed to standard error'
11
11
  class_option 'config', :type => :string, :aliases => %w(-c),
12
- :desc => 'Supply an alternate configuration file instead of the default /etc/nubic/ncs/warehouse/{env_name}.rb'
12
+ :desc => "Supply an alternate configuration file instead of the default #{Configuration.environment_file}"
13
13
 
14
14
  no_tasks {
15
15
  def configuration
@@ -40,6 +40,12 @@ module NcsNavigator::Warehouse
40
40
  db.replace_schema
41
41
  end
42
42
 
43
+ desc 'clone-working', 'Copies the contents of the working database to the reporting database'
44
+ def clone_working
45
+ db = DatabaseInitializer.new(configuration)
46
+ db.clone_working_to_reporting
47
+ end
48
+
43
49
  desc 'emit-xml [FILENAME]', 'Generates the VDR submission XML'
44
50
  long_desc <<-DESC
45
51
  Generates and zips the vanguard data repository submission XML from
@@ -52,6 +58,30 @@ DESC
52
58
 
53
59
  XmlEmitter.new(configuration, filename).emit_xml
54
60
  end
61
+
62
+ desc 'etl', 'Performs the full extract-transform-load process for this configuration'
63
+ long_desc <<-DESC
64
+ Clears the working schema and repopulates it with the results of running
65
+ the all the configured transforms. If the transforms are successful, the
66
+ reporting schema is wiped and replaced with the results.
67
+ DESC
68
+ method_option 'force', :type => 'boolean',
69
+ :desc => 'Copy the working schema to production even if there are errors'
70
+ def etl
71
+ db = DatabaseInitializer.new(configuration)
72
+ db.set_up_repository(:both)
73
+ db.replace_schema
74
+
75
+ success = TransformLoad.new(configuration).run
76
+ if success || options['force']
77
+ db.clone_working_to_reporting
78
+ else
79
+ configuration.shell.say_line "There were errors during ETL. Reporting database not updated."
80
+ configuration.shell.say_line "See the log and the database table wh_transform_error for more details."
81
+
82
+ exit 1
83
+ end
84
+ end
55
85
  end
56
86
  end
57
87
 
@@ -2,6 +2,7 @@ require 'ncs_navigator/warehouse'
2
2
  require 'ncs_navigator/configuration'
3
3
  require 'ncs_navigator/mdes'
4
4
 
5
+ require 'data_mapper'
5
6
  require 'active_support/core_ext/object/try'
6
7
  require 'pathname'
7
8
 
@@ -276,15 +277,7 @@ module NcsNavigator::Warehouse
276
277
  #
277
278
  # @param [Pathname,String,nil] fn
278
279
  def log_file=(fn)
279
- @log_directory =
280
- case fn
281
- when nil
282
- nil
283
- when Pathname
284
- fn
285
- else
286
- Pathname.new(fn)
287
- end
280
+ @log_directory = coerce_to_pathname(fn)
288
281
  end
289
282
 
290
283
  def set_up_logs
@@ -310,6 +303,53 @@ module NcsNavigator::Warehouse
310
303
  set_up_logs unless @log
311
304
  @log
312
305
  end
306
+
307
+ ####
308
+ #### pg_bin
309
+ ####
310
+
311
+ ##
312
+ # The path where the PostgreSQL command line utilities can be
313
+ # found. If they are on the search path, this may be `nil` (the
314
+ # default).
315
+ #
316
+ # @return [Pathname, nil]
317
+ attr_reader :pg_bin_path
318
+
319
+ ##
320
+ # Specify the path where the PostgreSQL command line utilities can
321
+ # be found.
322
+ #
323
+ # @param [Pathname,String,nil] fn
324
+ # @return [void]
325
+ def pg_bin_path=(fn)
326
+ @pg_bin_path = coerce_to_pathname(fn)
327
+ end
328
+
329
+ ##
330
+ # @return [Pathname] the executable for the given PostgreSQL
331
+ # utility.
332
+ # @param [Pathname, String] command the name of the command
333
+ def pg_bin(command)
334
+ if pg_bin_path
335
+ pg_bin_path + command
336
+ else
337
+ coerce_to_pathname command
338
+ end
339
+ end
340
+
341
+ private
342
+
343
+ def coerce_to_pathname(value)
344
+ case value
345
+ when nil
346
+ nil
347
+ when Pathname
348
+ value
349
+ else
350
+ Pathname.new(value)
351
+ end
352
+ end
313
353
  end
314
354
  end
315
355
 
@@ -57,6 +57,8 @@ module NcsNavigator::Warehouse
57
57
  def connect_one(which_one, dm_name=nil)
58
58
  dm_name ||= :"mdes_warehouse_#{which_one}"
59
59
  log.info "Connecting DataMapper repository #{dm_name.inspect}"
60
+ p = params(which_one)
61
+ log.debug " using #{p.merge('password' => 'SUPPRESSED').inspect}"
60
62
  adapter = ::DataMapper.setup(dm_name, params(which_one))
61
63
  end
62
64
  private :connect_one
@@ -78,7 +80,6 @@ module NcsNavigator::Warehouse
78
80
  #
79
81
  # @return [void]
80
82
  def replace_schema
81
- # TODO: actual logging, too
82
83
  shell.say "Dropping everything"
83
84
  log.info "Dropping everything in working schema"
84
85
  ::DataMapper.repository(:mdes_warehouse_working).adapter.
@@ -89,12 +90,69 @@ module NcsNavigator::Warehouse
89
90
  log.info "Initializing schema for MDES #{configuration.mdes.specification_version}"
90
91
  # In DM 1.2, DataMapper.auto_migrate! only works for the
91
92
  # :default repo
92
- configuration.models_module.mdes_order.each do |m|
93
- shell.clear_line_then_say "Adding #{m.mdes_table_name}..."
93
+ ::DataMapper::Model.descendants.each do |m|
94
+ shell.clear_line_then_say "Adding #{m.storage_name(:mdes_warehouse_working)}..."
94
95
  m.auto_migrate!(:mdes_warehouse_working)
95
96
  end
96
- shell.clear_line_then_say "Added #{configuration.models_module.mdes_order.size} tables.\n"
97
+ shell.clear_line_then_say(
98
+ "Added #{configuration.models_module.mdes_order.size} MDES tables.\n")
97
99
  end
100
+
101
+ ##
102
+ # Replaces the reporting database with a clone of the working
103
+ # database. This method relies on the command line `pg_dump` and
104
+ # `pg_restore` commands.
105
+ #
106
+ # @see Configuration#pg_bin_path
107
+ def clone_working_to_reporting
108
+ PostgreSQL::Pgpass.new.tap do |pgpass|
109
+ pgpass.update params(:working)
110
+ pgpass.update params(:reporting)
111
+ end
112
+
113
+ dump_cmd = [
114
+ configuration.pg_bin('pg_dump'),
115
+ pg_params(params(:working)),
116
+ '--format=custom',
117
+ params(:working)['database']
118
+ ].flatten
119
+
120
+ restore_cmd = [
121
+ configuration.pg_bin('pg_restore'),
122
+ pg_params(params(:reporting)),
123
+ '--schema', 'public',
124
+ '--clean',
125
+ '--dbname', params(:reporting)['database']
126
+ ].flatten
127
+
128
+ command = "#{escape_cmd dump_cmd} | #{escape_cmd restore_cmd}"
129
+ log.info('Cloning working schema into reporting schema')
130
+ log.debug("Clone command: #{command.inspect}")
131
+ unless system(command)
132
+ configuration.shell.say_line "Clone from working to reporting failed. See above for detail."
133
+ exit 1
134
+ end
135
+ end
136
+
137
+ def pg_params(p)
138
+ [
139
+ pg_param(p, 'host'),
140
+ pg_param(p, 'port'),
141
+ pg_param(p, 'username'),
142
+ '-w'
143
+ ].compact.flatten
144
+ end
145
+ private :pg_params
146
+
147
+ def pg_param(p, param_name)
148
+ ["--#{param_name}", p[param_name]] if p[param_name]
149
+ end
150
+ private :pg_param
151
+
152
+ def escape_cmd(parts)
153
+ parts.collect { |p| "'#{p}'" }.join(' ')
154
+ end
155
+ private :escape_cmd
98
156
  end
99
157
  end
100
158
 
@@ -1,5 +1,8 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
+ # ensure that this model is loaded along with the others
4
+ require 'ncs_navigator/warehouse/transform_status'
5
+
3
6
  module NcsNavigator::Warehouse
4
7
  module Models
5
8
  autoload :MdesModel, 'ncs_navigator/warehouse/models/mdes_model'
@@ -0,0 +1,7 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ module PostgreSQL
5
+ autoload :Pgpass, 'ncs_navigator/warehouse/postgresql/pgpass'
6
+ end
7
+ end
@@ -0,0 +1,79 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'pathname'
4
+ require 'forwardable'
5
+
6
+ module NcsNavigator::Warehouse::PostgreSQL
7
+ class Pgpass
8
+ extend Forwardable
9
+
10
+ ##
11
+ # Converts a database configuration hash into the elements of the
12
+ # corresponding .pgpass line.
13
+ #
14
+ # @param [Hash<String, String>] the configuration
15
+ # @return [Array<String>]
16
+ def self.line(entry)
17
+ [
18
+ entry['host'] || 'localhost',
19
+ (entry['port'] || 5432).to_s,
20
+ '*',
21
+ entry['username'] || fail("No username in configuration #{entry.inspect}"),
22
+ entry['password'] || fail("No password in configuration #{entry.inspect}")
23
+ ]
24
+ end
25
+
26
+ def_delegator self, :line
27
+
28
+ attr_reader :file
29
+
30
+ def initialize
31
+ @file = Pathname.new(ENV['HOME']) + '.pgpass'
32
+ end
33
+
34
+ ##
35
+ # Updates the `.pgpass` file so that it includes the current
36
+ # password for the given configuration. This may involve adding an
37
+ # entry, replacing an entry, or even creating the file entirely.
38
+ #
39
+ # @param [Hash] entry a database configuration hash
40
+ # @return [void]
41
+ def update(entry)
42
+ ensure_file_exists_and_is_writable
43
+
44
+ new_line = self.line(entry)
45
+ contents = file.readlines.collect { |l| l.chomp.split(':') }
46
+ match = contents.detect { |line| line[0..3] == new_line[0..3] }
47
+
48
+ if match
49
+ match[4] = entry['password']
50
+ else
51
+ contents << new_line
52
+ end
53
+
54
+ file.open('w') do |f|
55
+ contents.each do |l|
56
+ f.puts l.join(':')
57
+ end
58
+ end
59
+ file.chmod(0600)
60
+ end
61
+
62
+ private
63
+
64
+ def ensure_file_exists_and_is_writable
65
+ if file.exist?
66
+ if file.writable?
67
+ # do nothing
68
+ else
69
+ fail "Cannot update #{file}"
70
+ end
71
+ elsif file.parent.writable?
72
+ # touch
73
+ file.open('w') { |f| }
74
+ else
75
+ fail "Cannot create #{file}"
76
+ end
77
+ end
78
+ end
79
+ end
@@ -73,6 +73,15 @@ module NcsNavigator
73
73
  end
74
74
  ].compact
75
75
  end
76
+
77
+ def wh_reference_name
78
+ fail 'Does not apply' unless self.table_reference
79
+ if self.name =~ /_id$/
80
+ self.name.sub(/_id$/, '')
81
+ else
82
+ self.name + '_record'
83
+ end
84
+ end
76
85
  end
77
86
 
78
87
  class VariableType
@@ -12,7 +12,7 @@ module <%= module_name %>
12
12
 
13
13
  <% t.wh_variables.each do |v| -%>
14
14
  <% if v.table_reference -%>
15
- belongs_to :<%= v.name.sub(/_id$/, '') %>,
15
+ belongs_to :<%= v.wh_reference_name %>,
16
16
  '<%= v.table_reference.wh_model_name(module_name) %>',
17
17
  :child_key => [ :<%= v.name %> ], :required => <%= !!v.required %>
18
18
  <% else -%>