ncs_mdes_warehouse 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/CHANGELOG.md +16 -0
  2. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/env_equipment_prob_log.rb +1 -1
  3. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/incident.rb +6 -6
  4. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/participant_rvis.rb +1 -1
  5. data/generated_models/ncs_navigator/warehouse/models/two_point_zero/sample_shipping.rb +1 -1
  6. data/lib/ncs_navigator/warehouse.rb +4 -0
  7. data/lib/ncs_navigator/warehouse/cli.rb +31 -1
  8. data/lib/ncs_navigator/warehouse/configuration.rb +49 -9
  9. data/lib/ncs_navigator/warehouse/database_initializer.rb +62 -4
  10. data/lib/ncs_navigator/warehouse/models.rb +3 -0
  11. data/lib/ncs_navigator/warehouse/postgresql.rb +7 -0
  12. data/lib/ncs_navigator/warehouse/postgresql/pgpass.rb +79 -0
  13. data/lib/ncs_navigator/warehouse/table_modeler/mdes_ext.rb +9 -0
  14. data/lib/ncs_navigator/warehouse/table_modeler/model_template.rb.erb +1 -1
  15. data/lib/ncs_navigator/warehouse/transform_load.rb +55 -0
  16. data/lib/ncs_navigator/warehouse/transform_status.rb +63 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -1
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +91 -85
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +26 -8
  20. data/lib/ncs_navigator/warehouse/transformers/vdr_xml.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +11 -4
  22. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  23. data/spec/bcdatabase/test_sqlite.yml +4 -0
  24. data/spec/ncs_navigator/warehouse/configuration_spec.rb +42 -0
  25. data/spec/ncs_navigator/warehouse/postgresql/pgpass_spec.rb +187 -0
  26. data/spec/ncs_navigator/warehouse/table_modeler_spec.rb +15 -1
  27. data/spec/ncs_navigator/warehouse/transform_load_spec.rb +152 -0
  28. data/spec/ncs_navigator/warehouse/transformers/database_spec.rb +24 -28
  29. data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +16 -10
  30. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +4 -4
  31. data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +8 -3
  32. data/spec/spec_helper.rb +1 -1
  33. metadata +44 -37
  34. data/lib/ncs_navigator/warehouse/transformers/transform_status.rb +0 -23
data/CHANGELOG.md CHANGED
@@ -1,6 +1,22 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.1.0
5
+ -----
6
+
7
+ - Implement actual ETL runner. It's accessible via the `etl`
8
+ subcommand of the `mdes-wh` executable.
9
+
10
+ - Correct generated models so that association reference names and
11
+ foreign key column names do not collide. This was previously
12
+ possible when an MDES foreign key was not suffixed with "_id".
13
+
14
+ - Replace `model_row` helper in `Transformers::Database` with its own
15
+ top-level production method, `produce_one_for_one`. The options and
16
+ behavior are mostly the same as `model_row`, but this refactoring
17
+ allows the results of the column mapping heuristic to be exposed to
18
+ assist in writing DRYer importers.
19
+
4
20
  0.0.2
5
21
  -----
6
22
 
@@ -40,7 +40,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
40
40
  property :equip_action,
41
41
  NcsNavigator::Warehouse::DataMapper::NcsString,
42
42
  { :required => true, :pii => :possible, :length => 1..2, :set => ["1", "2", "3", "-7", "-4"] }
43
- belongs_to :staff_id_reviewer,
43
+ belongs_to :staff_id_reviewer_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :staff_id_reviewer ], :required => false
46
46
 
@@ -34,25 +34,25 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
34
34
  belongs_to :inc_staff_supervisor,
35
35
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
36
36
  :child_key => [ :inc_staff_supervisor_id ], :required => false
37
- belongs_to :inc_recip_is_participant,
37
+ belongs_to :inc_recip_is_participant_record,
38
38
  'NcsNavigator::Warehouse::Models::TwoPointZero::Participant',
39
39
  :child_key => [ :inc_recip_is_participant ], :required => false
40
- belongs_to :inc_recip_is_du,
40
+ belongs_to :inc_recip_is_du_record,
41
41
  'NcsNavigator::Warehouse::Models::TwoPointZero::DwellingUnit',
42
42
  :child_key => [ :inc_recip_is_du ], :required => false
43
- belongs_to :inc_recip_is_staff,
43
+ belongs_to :inc_recip_is_staff_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :inc_recip_is_staff ], :required => false
46
- belongs_to :inc_recip_is_family,
46
+ belongs_to :inc_recip_is_family_record,
47
47
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
48
48
  :child_key => [ :inc_recip_is_family ], :required => false
49
- belongs_to :inc_recip_is_acquaintance,
49
+ belongs_to :inc_recip_is_acquaintance_record,
50
50
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
51
51
  :child_key => [ :inc_recip_is_acquaintance ], :required => false
52
52
  property :inc_recip_is_other,
53
53
  NcsNavigator::Warehouse::DataMapper::NcsString,
54
54
  { :format => /^([-+]?[\d]{1,9})?$/ }
55
- belongs_to :inc_contact_person,
55
+ belongs_to :inc_contact_person_record,
56
56
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
57
57
  :child_key => [ :inc_contact_person ], :required => false
58
58
  property :inctype,
@@ -25,7 +25,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
25
25
  property :rvis_language_oth,
26
26
  NcsNavigator::Warehouse::DataMapper::NcsString,
27
27
  { :pii => :possible, :length => 0..255 }
28
- belongs_to :rvis_person,
28
+ belongs_to :rvis_person_record,
29
29
  'NcsNavigator::Warehouse::Models::TwoPointZero::Person',
30
30
  :child_key => [ :rvis_person ], :required => false
31
31
  property :rvis_who_consented,
@@ -40,7 +40,7 @@ module NcsNavigator::Warehouse::Models::TwoPointZero
40
40
  property :shipment_issues_oth,
41
41
  NcsNavigator::Warehouse::DataMapper::NcsString,
42
42
  { :pii => :possible, :length => 0..255 }
43
- belongs_to :staff_id_track,
43
+ belongs_to :staff_id_track_record,
44
44
  'NcsNavigator::Warehouse::Models::TwoPointZero::Staff',
45
45
  :child_key => [ :staff_id_track ], :required => false
46
46
  property :sample_shipped_by,
@@ -11,8 +11,12 @@ module NcsNavigator
11
11
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
12
12
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
13
13
  autoload :Models, 'ncs_navigator/warehouse/models'
14
+ autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
14
15
  autoload :TableModeler, 'ncs_navigator/warehouse/table_modeler'
15
16
  autoload :Transformers, 'ncs_navigator/warehouse/transformers'
17
+ autoload :TransformError, 'ncs_navigator/warehouse/transform_status'
18
+ autoload :TransformLoad, 'ncs_navigator/warehouse/transform_load'
19
+ autoload :TransformStatus, 'ncs_navigator/warehouse/transform_status'
16
20
  autoload :UpdatingShell, 'ncs_navigator/warehouse/updating_shell'
17
21
  autoload :VERSION, 'ncs_navigator/warehouse/version'
18
22
  autoload :XmlEmitter, 'ncs_navigator/warehouse/xml_emitter'
@@ -9,7 +9,7 @@ module NcsNavigator::Warehouse
9
9
  class_option :quiet, :type => :boolean, :aliases => %w(-q),
10
10
  :desc => 'Suppress the status messages printed to standard error'
11
11
  class_option 'config', :type => :string, :aliases => %w(-c),
12
- :desc => 'Supply an alternate configuration file instead of the default /etc/nubic/ncs/warehouse/{env_name}.rb'
12
+ :desc => "Supply an alternate configuration file instead of the default #{Configuration.environment_file}"
13
13
 
14
14
  no_tasks {
15
15
  def configuration
@@ -40,6 +40,12 @@ module NcsNavigator::Warehouse
40
40
  db.replace_schema
41
41
  end
42
42
 
43
+ desc 'clone-working', 'Copies the contents of the working database to the reporting database'
44
+ def clone_working
45
+ db = DatabaseInitializer.new(configuration)
46
+ db.clone_working_to_reporting
47
+ end
48
+
43
49
  desc 'emit-xml [FILENAME]', 'Generates the VDR submission XML'
44
50
  long_desc <<-DESC
45
51
  Generates and zips the vanguard data repository submission XML from
@@ -52,6 +58,30 @@ DESC
52
58
 
53
59
  XmlEmitter.new(configuration, filename).emit_xml
54
60
  end
61
+
62
+ desc 'etl', 'Performs the full extract-transform-load process for this configuration'
63
+ long_desc <<-DESC
64
+ Clears the working schema and repopulates it with the results of running
65
+ the all the configured transforms. If the transforms are successful, the
66
+ reporting schema is wiped and replaced with the results.
67
+ DESC
68
+ method_option 'force', :type => 'boolean',
69
+ :desc => 'Copy the working schema to production even if there are errors'
70
+ def etl
71
+ db = DatabaseInitializer.new(configuration)
72
+ db.set_up_repository(:both)
73
+ db.replace_schema
74
+
75
+ success = TransformLoad.new(configuration).run
76
+ if success || options['force']
77
+ db.clone_working_to_reporting
78
+ else
79
+ configuration.shell.say_line "There were errors during ETL. Reporting database not updated."
80
+ configuration.shell.say_line "See the log and the database table wh_transform_error for more details."
81
+
82
+ exit 1
83
+ end
84
+ end
55
85
  end
56
86
  end
57
87
 
@@ -2,6 +2,7 @@ require 'ncs_navigator/warehouse'
2
2
  require 'ncs_navigator/configuration'
3
3
  require 'ncs_navigator/mdes'
4
4
 
5
+ require 'data_mapper'
5
6
  require 'active_support/core_ext/object/try'
6
7
  require 'pathname'
7
8
 
@@ -276,15 +277,7 @@ module NcsNavigator::Warehouse
276
277
  #
277
278
  # @param [Pathname,String,nil] fn
278
279
  def log_file=(fn)
279
- @log_directory =
280
- case fn
281
- when nil
282
- nil
283
- when Pathname
284
- fn
285
- else
286
- Pathname.new(fn)
287
- end
280
+ @log_directory = coerce_to_pathname(fn)
288
281
  end
289
282
 
290
283
  def set_up_logs
@@ -310,6 +303,53 @@ module NcsNavigator::Warehouse
310
303
  set_up_logs unless @log
311
304
  @log
312
305
  end
306
+
307
+ ####
308
+ #### pg_bin
309
+ ####
310
+
311
+ ##
312
+ # The path where the PostgreSQL command line utilities can be
313
+ # found. If they are on the search path, this may be `nil` (the
314
+ # default).
315
+ #
316
+ # @return [Pathname, nil]
317
+ attr_reader :pg_bin_path
318
+
319
+ ##
320
+ # Specify the path where the PostgreSQL command line utilities can
321
+ # be found.
322
+ #
323
+ # @param [Pathname,String,nil] fn
324
+ # @return [void]
325
+ def pg_bin_path=(fn)
326
+ @pg_bin_path = coerce_to_pathname(fn)
327
+ end
328
+
329
+ ##
330
+ # @return [Pathname] the executable for the given PostgreSQL
331
+ # utility.
332
+ # @param [Pathname, String] command the name of the command
333
+ def pg_bin(command)
334
+ if pg_bin_path
335
+ pg_bin_path + command
336
+ else
337
+ coerce_to_pathname command
338
+ end
339
+ end
340
+
341
+ private
342
+
343
+ def coerce_to_pathname(value)
344
+ case value
345
+ when nil
346
+ nil
347
+ when Pathname
348
+ value
349
+ else
350
+ Pathname.new(value)
351
+ end
352
+ end
313
353
  end
314
354
  end
315
355
 
@@ -57,6 +57,8 @@ module NcsNavigator::Warehouse
57
57
  def connect_one(which_one, dm_name=nil)
58
58
  dm_name ||= :"mdes_warehouse_#{which_one}"
59
59
  log.info "Connecting DataMapper repository #{dm_name.inspect}"
60
+ p = params(which_one)
61
+ log.debug " using #{p.merge('password' => 'SUPPRESSED').inspect}"
60
62
  adapter = ::DataMapper.setup(dm_name, params(which_one))
61
63
  end
62
64
  private :connect_one
@@ -78,7 +80,6 @@ module NcsNavigator::Warehouse
78
80
  #
79
81
  # @return [void]
80
82
  def replace_schema
81
- # TODO: actual logging, too
82
83
  shell.say "Dropping everything"
83
84
  log.info "Dropping everything in working schema"
84
85
  ::DataMapper.repository(:mdes_warehouse_working).adapter.
@@ -89,12 +90,69 @@ module NcsNavigator::Warehouse
89
90
  log.info "Initializing schema for MDES #{configuration.mdes.specification_version}"
90
91
  # In DM 1.2, DataMapper.auto_migrate! only works for the
91
92
  # :default repo
92
- configuration.models_module.mdes_order.each do |m|
93
- shell.clear_line_then_say "Adding #{m.mdes_table_name}..."
93
+ ::DataMapper::Model.descendants.each do |m|
94
+ shell.clear_line_then_say "Adding #{m.storage_name(:mdes_warehouse_working)}..."
94
95
  m.auto_migrate!(:mdes_warehouse_working)
95
96
  end
96
- shell.clear_line_then_say "Added #{configuration.models_module.mdes_order.size} tables.\n"
97
+ shell.clear_line_then_say(
98
+ "Added #{configuration.models_module.mdes_order.size} MDES tables.\n")
97
99
  end
100
+
101
+ ##
102
+ # Replaces the reporting database with a clone of the working
103
+ # database. This method relies on the command line `pg_dump` and
104
+ # `pg_restore` commands.
105
+ #
106
+ # @see Configuration#pg_bin_path
107
+ def clone_working_to_reporting
108
+ PostgreSQL::Pgpass.new.tap do |pgpass|
109
+ pgpass.update params(:working)
110
+ pgpass.update params(:reporting)
111
+ end
112
+
113
+ dump_cmd = [
114
+ configuration.pg_bin('pg_dump'),
115
+ pg_params(params(:working)),
116
+ '--format=custom',
117
+ params(:working)['database']
118
+ ].flatten
119
+
120
+ restore_cmd = [
121
+ configuration.pg_bin('pg_restore'),
122
+ pg_params(params(:reporting)),
123
+ '--schema', 'public',
124
+ '--clean',
125
+ '--dbname', params(:reporting)['database']
126
+ ].flatten
127
+
128
+ command = "#{escape_cmd dump_cmd} | #{escape_cmd restore_cmd}"
129
+ log.info('Cloning working schema into reporting schema')
130
+ log.debug("Clone command: #{command.inspect}")
131
+ unless system(command)
132
+ configuration.shell.say_line "Clone from working to reporting failed. See above for detail."
133
+ exit 1
134
+ end
135
+ end
136
+
137
+ def pg_params(p)
138
+ [
139
+ pg_param(p, 'host'),
140
+ pg_param(p, 'port'),
141
+ pg_param(p, 'username'),
142
+ '-w'
143
+ ].compact.flatten
144
+ end
145
+ private :pg_params
146
+
147
+ def pg_param(p, param_name)
148
+ ["--#{param_name}", p[param_name]] if p[param_name]
149
+ end
150
+ private :pg_param
151
+
152
+ def escape_cmd(parts)
153
+ parts.collect { |p| "'#{p}'" }.join(' ')
154
+ end
155
+ private :escape_cmd
98
156
  end
99
157
  end
100
158
 
@@ -1,5 +1,8 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
+ # ensure that this model is loaded along with the others
4
+ require 'ncs_navigator/warehouse/transform_status'
5
+
3
6
  module NcsNavigator::Warehouse
4
7
  module Models
5
8
  autoload :MdesModel, 'ncs_navigator/warehouse/models/mdes_model'
@@ -0,0 +1,7 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ module PostgreSQL
5
+ autoload :Pgpass, 'ncs_navigator/warehouse/postgresql/pgpass'
6
+ end
7
+ end
@@ -0,0 +1,79 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'pathname'
4
+ require 'forwardable'
5
+
6
+ module NcsNavigator::Warehouse::PostgreSQL
7
+ class Pgpass
8
+ extend Forwardable
9
+
10
+ ##
11
+ # Converts a database configuration hash into the elements of the
12
+ # corresponding .pgpass line.
13
+ #
14
+ # @param [Hash<String, String>] the configuration
15
+ # @return [Array<String>]
16
+ def self.line(entry)
17
+ [
18
+ entry['host'] || 'localhost',
19
+ (entry['port'] || 5432).to_s,
20
+ '*',
21
+ entry['username'] || fail("No username in configuration #{entry.inspect}"),
22
+ entry['password'] || fail("No password in configuration #{entry.inspect}")
23
+ ]
24
+ end
25
+
26
+ def_delegator self, :line
27
+
28
+ attr_reader :file
29
+
30
+ def initialize
31
+ @file = Pathname.new(ENV['HOME']) + '.pgpass'
32
+ end
33
+
34
+ ##
35
+ # Updates the `.pgpass` file so that it includes the current
36
+ # password for the given configuration. This may involve adding an
37
+ # entry, replacing an entry, or even creating the file entirely.
38
+ #
39
+ # @param [Hash] entry a database configuration hash
40
+ # @return [void]
41
+ def update(entry)
42
+ ensure_file_exists_and_is_writable
43
+
44
+ new_line = self.line(entry)
45
+ contents = file.readlines.collect { |l| l.chomp.split(':') }
46
+ match = contents.detect { |line| line[0..3] == new_line[0..3] }
47
+
48
+ if match
49
+ match[4] = entry['password']
50
+ else
51
+ contents << new_line
52
+ end
53
+
54
+ file.open('w') do |f|
55
+ contents.each do |l|
56
+ f.puts l.join(':')
57
+ end
58
+ end
59
+ file.chmod(0600)
60
+ end
61
+
62
+ private
63
+
64
+ def ensure_file_exists_and_is_writable
65
+ if file.exist?
66
+ if file.writable?
67
+ # do nothing
68
+ else
69
+ fail "Cannot update #{file}"
70
+ end
71
+ elsif file.parent.writable?
72
+ # touch
73
+ file.open('w') { |f| }
74
+ else
75
+ fail "Cannot create #{file}"
76
+ end
77
+ end
78
+ end
79
+ end
@@ -73,6 +73,15 @@ module NcsNavigator
73
73
  end
74
74
  ].compact
75
75
  end
76
+
77
+ def wh_reference_name
78
+ fail 'Does not apply' unless self.table_reference
79
+ if self.name =~ /_id$/
80
+ self.name.sub(/_id$/, '')
81
+ else
82
+ self.name + '_record'
83
+ end
84
+ end
76
85
  end
77
86
 
78
87
  class VariableType
@@ -12,7 +12,7 @@ module <%= module_name %>
12
12
 
13
13
  <% t.wh_variables.each do |v| -%>
14
14
  <% if v.table_reference -%>
15
- belongs_to :<%= v.name.sub(/_id$/, '') %>,
15
+ belongs_to :<%= v.wh_reference_name %>,
16
16
  '<%= v.table_reference.wh_model_name(module_name) %>',
17
17
  :child_key => [ :<%= v.name %> ], :required => <%= !!v.required %>
18
18
  <% else -%>