ncs_mdes_warehouse 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,7 +92,12 @@ class NcsNavigator::Warehouse::Transformers::VdrXml
92
92
  else
93
93
  # node is the start tag of a table variable
94
94
  var = node.local_name.to_sym
95
- val = node.inner_xml.strip.gsub('
', "\r")
95
+ val =
96
+ if node.attribute('xsi:nil') == 'true'
97
+ nil
98
+ else
99
+ node.inner_xml.strip.gsub('
', "\r")
100
+ end
96
101
 
97
102
  unless node.self_closing?
98
103
  # Skip to closing tag
@@ -1,5 +1,5 @@
1
1
  module NcsNavigator
2
2
  module Warehouse
3
- VERSION = '0.10.1'
3
+ VERSION = '0.11.0'
4
4
  end
5
5
  end
@@ -19,16 +19,18 @@ module NcsNavigator::Warehouse
19
19
  # emitter.
20
20
  attr_reader :configuration
21
21
 
22
- ##
23
- # @return [Pathname] the file to which the XML will be emitted.
24
- attr_reader :filename
25
-
26
22
  ##
27
23
  # @return [Array<Models::MdesModel>] the models whose data will be
28
24
  # emitted. This is determined from the `:tables` option to
29
25
  # {#initialize}.
30
26
  attr_reader :models
31
27
 
28
+ ##
29
+ # @private exposed for testing
30
+ # @return [Array] the configuration objects related to each separate XML
31
+ # file emitted in one run.
32
+ attr_reader :xml_files
33
+
32
34
  def_delegators :@configuration, :shell, :log
33
35
 
34
36
  HEADER_TEMPLATE = ERB.new(<<-XML_ERB)
@@ -99,18 +101,13 @@ XML
99
101
  # produced alongside the XML file?
100
102
  def initialize(config, filename, options={})
101
103
  @configuration = config
102
- @include_pii = options[:'include-pii']
103
- @filename = case filename
104
- when Pathname
105
- filename
106
- when nil
107
- self.class.default_filename(configuration, @include_pii)
108
- else
109
- Pathname.new(filename.to_s)
110
- end
111
104
  @record_count = 0
112
105
  @block_size = options[:'block-size'] || 5000
113
106
  @zip = options.has_key?(:zip) ? options[:zip] : true
107
+
108
+ @xml_files = determine_files_to_create(filename, options)
109
+
110
+
114
111
  @models =
115
112
  if options[:tables]
116
113
  options[:tables].collect { |t| t.to_s }.collect { |t|
@@ -126,70 +123,108 @@ XML
126
123
  #
127
124
  # @return [void]
128
125
  def emit_xml
129
- shell.say_line("Exporting to #{filename}#{include_pii? ? ' with PII' : ''}")
130
- log.info("Beginning XML export to #{filename}")
126
+ shell.say_line("Exporting to #{xml_files.collect(&:describe).join(', ')}")
127
+ log.info("Beginning XML export to #{xml_files.collect(&:describe).join(', ')}")
131
128
 
132
129
  @start = Time.now
133
- filename.open('w') do |f|
134
- f.write HEADER_TEMPLATE.result(binding)
135
-
136
- models.each do |model|
137
- shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
138
-
139
- write_all_xml_for_model(f, model)
140
- end
141
-
142
- f.write FOOTER_TEMPLATE
130
+ xml_files.each { |xf| xf.write HEADER_TEMPLATE.result(binding) }
131
+ models.each do |model|
132
+ shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
133
+ write_all_xml_for_model(model)
143
134
  end
135
+ xml_files.each { |xf| xf.write FOOTER_TEMPLATE }
136
+ xml_files.each { |xf| xf.close }
144
137
  @end = Time.now
138
+
145
139
  msg = "%d records written in %d seconds (%.1f/sec).\n" % [@record_count, emit_time, emit_rate]
146
140
  shell.clear_line_then_say(msg)
147
141
  log.info(msg)
148
142
 
149
- if zip?
150
- shell.say_line("Zipping to #{zip_filename}")
151
- log.info("Zipping to #{zip_filename}")
152
- Zip::ZipFile.open(zip_filename, Zip::ZipFile::CREATE) do |zf|
153
- zf.add(filename.basename, filename)
154
- end
155
- log.info("XML export complete")
143
+ xml_files.each { |xf| xf.zip_if_desired }
144
+ log.info("XML export complete")
145
+ end
146
+
147
+ ##
148
+ # @return [Pathname] the single file to which the XML will be emitted.
149
+ # Throws an exception if writing to multiple files.
150
+ def filename
151
+ if xml_files.size == 1
152
+ xml_files.first.filename
153
+ else
154
+ fail "Emitting more than one file. Use `xml_files` to interrogate."
156
155
  end
157
156
  end
158
157
 
159
158
  ##
160
- # Will PII be included in the exported XML?
161
- #
162
- # @return [Boolean]
159
+ # @return [Boolean] Will PII be included in the exported XML?
160
+ # Throws an exception if writing to multiple files.
163
161
  def include_pii?
164
- @include_pii
162
+ if xml_files.size == 1
163
+ xml_files.first.include_pii?
164
+ else
165
+ fail "Emitting more than one file. Use `xml_files` to interrogate."
166
+ end
165
167
  end
166
168
 
167
169
  ##
168
- # Will a ZIP archive be created along with the XML?
170
+ # Will ZIP archive(s) be created along with the XML?
169
171
  #
170
172
  # @return [Boolean]
171
173
  def zip?
172
174
  @zip
173
175
  end
174
176
 
175
- ##
176
- # @return [Pathname] the filename for the ZIP archive of the XML,
177
- # if any. Currently this is always {#filename} + '.zip'.
178
- # @see #zip?
179
- def zip_filename
180
- @zip_filename ||= filename.to_s + '.zip'
177
+ private
178
+
179
+ def determine_files_to_create(filename, options)
180
+ if options[:'and-pii']
181
+ # two files, one with and one without PII
182
+ no_pii_filename = select_filename(filename, false, options[:directory])
183
+ with_pii_filename = Pathname.new(no_pii_filename.to_s.sub(/^(.*?)(\..*)$/, '\1-PII\2'))
184
+ [
185
+ [false, no_pii_filename],
186
+ [true, with_pii_filename]
187
+ ].collect do |include_pii, fn|
188
+ XmlFile.new(fn, include_pii, @zip, shell, log)
189
+ end
190
+ else
191
+ # one file, PII determined by --include-pii
192
+ include_pii = options[:'include-pii']
193
+ actual_filename = select_filename(filename, include_pii, options[:directory])
194
+ [
195
+ XmlFile.new(actual_filename, include_pii, @zip, shell, log)
196
+ ]
197
+ end
181
198
  end
182
199
 
183
- private
200
+ def select_filename(filename, include_pii, directory_for_default_files)
201
+ if filename && directory_for_default_files
202
+ fail "It does not make sense to specify both a filename and the :directory option."
203
+ end
184
204
 
185
- def write_all_xml_for_model(f, model)
205
+ case filename
206
+ when Pathname
207
+ filename
208
+ when nil
209
+ default_name = self.class.default_filename(configuration, include_pii)
210
+ if directory_for_default_files
211
+ Pathname.new(directory_for_default_files) + default_name
212
+ else
213
+ default_name
214
+ end
215
+ else
216
+ Pathname.new(filename.to_s)
217
+ end
218
+ end
219
+
220
+ def write_all_xml_for_model(model)
186
221
  shell.say(' %20s' % '[loading]')
187
222
  count = model.count
188
223
  offset = 0
189
224
  while offset < count
190
225
  shell.back_up_and_say(20, '%20s' % '[loading]')
191
226
  model.all(:limit => @block_size, :offset => offset).each do |instance|
192
- instance.write_mdes_xml(f, :indent => 3, :margin => 1, :pii => include_pii?)
227
+ xml_files.each { |xf| xf.write_instance(instance) }
193
228
  @record_count += 1
194
229
 
195
230
  shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [@record_count, emit_rate])
@@ -217,5 +252,52 @@ XML
217
252
  def emit_rate
218
253
  @record_count / emit_time
219
254
  end
255
+
256
+ ##
257
+ # @private
258
+ #
259
+ # Encapsulated the data and operations related to one of the files produced
260
+ # in a run. Consider "related to one of the files" in versus "related to
261
+ # loading the records".
262
+ class XmlFile < Struct.new(:filename, :include_pii, :zip, :shell, :log)
263
+ alias :include_pii? :include_pii
264
+ alias :zip? :zip
265
+
266
+ def describe
267
+ "#{filename} #{include_pii? ? 'with' : 'without'} PII"
268
+ end
269
+
270
+ def zip_filename
271
+ @zip_filename ||= filename.to_s + '.zip'
272
+ end
273
+
274
+ def open
275
+ @handle ||= filename.open('w')
276
+ end
277
+ alias :handle :open
278
+
279
+ def write(s)
280
+ handle.write(s)
281
+ end
282
+
283
+ def write_instance(instance)
284
+ instance.write_mdes_xml(handle, :indent => 3, :margin => 1, :pii => include_pii?)
285
+ end
286
+
287
+ def close
288
+ @handle && @handle.close
289
+ end
290
+
291
+ def zip_if_desired
292
+ if zip?
293
+ shell.say("Zipping to #{zip_filename}")
294
+ log.info("Zipping to #{zip_filename}")
295
+ Zip::ZipFile.open(zip_filename, Zip::ZipFile::CREATE) do |zf|
296
+ zf.add(filename.basename, filename)
297
+ end
298
+ shell.clear_line_then_say("Zipped #{zip_filename}.")
299
+ end
300
+ end
301
+ end
220
302
  end
221
303
  end
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
42
42
  s.add_dependency 'actionmailer', '~> 3.0'
43
43
 
44
44
  s.add_dependency 'treetop'
45
+ s.add_dependency 'rgl'
45
46
 
46
47
  s.add_development_dependency 'rspec', '~> 2.6'
47
48
  s.add_development_dependency 'rake', '~> 0.9.2'
@@ -43,7 +43,7 @@ module NcsNavigator::Warehouse
43
43
 
44
44
  describe '#foreign_key_index' do
45
45
  it 'provides an index by default' do
46
- config.foreign_key_index.should respond_to(:record_and_verify)
46
+ config.foreign_key_index.should respond_to(:verify_or_defer)
47
47
  end
48
48
 
49
49
  it 'allows the index to be overridden' do
@@ -17,7 +17,7 @@ module NcsNavigator::Warehouse::Transformers
17
17
  include ::DataMapper::Resource
18
18
 
19
19
  property :id, Integer, :key => true
20
- property :name, String
20
+ property :name, String, :length => (1..100)
21
21
  property :age, Integer
22
22
  belongs_to :sample,
23
23
  'NcsNavigator::Warehouse::Transformers::Sample', :child_key => [ :sample_id ], :required => false
@@ -64,7 +64,7 @@ module NcsNavigator::Warehouse::Transformers
64
64
  end
65
65
 
66
66
  subject.transform(transform_status)
67
- transform_status.transform_errors.should be_empty
67
+ transform_status.transform_errors.should == []
68
68
  end
69
69
 
70
70
  describe 'with an invalid instance' do
@@ -122,6 +122,7 @@ module NcsNavigator::Warehouse::Transformers
122
122
  records[2].should_receive(:save).and_return(true)
123
123
 
124
124
  records[1].psu_id = '20000041'
125
+ records[1].recruit_type = 'A'
125
126
 
126
127
  subject.transform(transform_status)
127
128
  end
@@ -135,11 +136,11 @@ module NcsNavigator::Warehouse::Transformers
135
136
  end
136
137
 
137
138
  it 'records an error' do
138
- transform_status.transform_errors.collect(&:record_id).should == ['2']
139
+ transform_status.transform_errors.collect(&:record_id).uniq.should == ['2']
139
140
  end
140
141
 
141
142
  describe 'the recorded error' do
142
- let(:error) { transform_status.transform_errors.first }
143
+ let(:error) { transform_status.transform_errors.detect { |err| err.message =~ /PSU/ } }
143
144
 
144
145
  it 'has the correct model class' do
145
146
  error.model_class.should == Sample.to_s
@@ -163,28 +164,135 @@ module NcsNavigator::Warehouse::Transformers
163
164
  end
164
165
  end
165
166
 
167
+ describe 'and another invalidity' do
168
+ let(:validation_error) {
169
+ transform_status.transform_errors.detect { |err| err.message =~ /format/i }
170
+ }
171
+
172
+ it 'simultaneously reports an error about each' do
173
+ validation_error.attribute_name.should == 'recruit_type'
174
+ end
175
+ end
176
+
166
177
  let(:expected_saved_record_ids) { [1, 3] }
167
178
  let(:expected_not_saved_record_ids) { [2] }
168
179
 
169
180
  include_examples 'a foreign key index updater'
170
181
  end
171
182
 
172
- describe 'with an unsatisfied foreign key' do
173
- let(:error) { transform_status.transform_errors.first }
183
+ describe 'with an initially unsatisfied foreign key' do
184
+ let(:unsatisfied) { Subsample.new(:id => 3, :sample_id => 912, :name => '') }
185
+ let(:satisfier) { Sample.new(:id => unsatisfied.sample_id, :psu_id => '20000030', :name => 'Nine') }
174
186
 
175
- before do
176
- records << Subsample.new(:id => 3, :sample_id => 912)
187
+ let(:fk_error) {
188
+ transform_status.transform_errors.detect { |error| error.message =~ /foreign/i }
189
+ }
190
+
191
+ let(:validation_error) {
192
+ transform_status.transform_errors.detect { |error| error.message =~ /valid/i }
193
+ }
177
194
 
195
+ before do
178
196
  records.each do |m|
179
197
  m.stub!(:valid?).and_return(true)
180
198
  m.stub!(:save).and_return(true)
181
199
  end
182
200
 
183
- subject.transform(transform_status)
201
+ records << unsatisfied
202
+ end
203
+
204
+ describe 'when the record is otherwise valid' do
205
+ before do
206
+ unsatisfied.name = 'Foo'
207
+ unsatisfied.should be_valid # setup
208
+ end
209
+
210
+ describe 'and the foreign key is never satisfied' do
211
+ it 'reports the unsatisfied foreign key' do
212
+ unsatisfied.stub!(:save).and_return(true)
213
+ subject.transform(transform_status)
214
+
215
+ [fk_error.attribute_name, fk_error.attribute_value].should == ['sample_id', '912']
216
+ end
217
+
218
+ it 'does not save the record with the unsatisfied key' do
219
+ unsatisfied.should_not_receive(:save)
220
+
221
+ subject.transform(transform_status)
222
+ end
223
+ end
224
+
225
+ describe 'and the foreign key is eventually satisfied' do
226
+
227
+ before do
228
+ satisfier.stub!(:valid?).and_return(true)
229
+ satisfier.stub!(:save).and_return(true)
230
+ records << satisfier
231
+ end
232
+
233
+ it 'does not report any errors' do
234
+ transform_status.transform_errors.should == []
235
+ end
236
+
237
+ it 'saves the record with the eventually satisfied key' do
238
+ satisfier.should_receive(:save)
239
+
240
+ subject.transform(transform_status)
241
+ end
242
+ end
184
243
  end
185
244
 
186
- it 'reports the unsatisfied foreign key' do
187
- error.message.should =~ /Unsatisfied foreign key sample_id=912/
245
+ describe 'when the record has invalid properties' do
246
+ before do
247
+ unsatisfied.name = ''
248
+ unsatisfied.should_not be_valid # setup
249
+ end
250
+
251
+ describe 'and the foreign key is never satisfied' do
252
+ it 'reports the unsatisfied foreign key' do
253
+ subject.transform(transform_status)
254
+
255
+ [fk_error.attribute_name, fk_error.attribute_value].should == ['sample_id', '912']
256
+ end
257
+
258
+ it 'reports the invalid values' do
259
+ subject.transform(transform_status)
260
+
261
+ validation_error.attribute_name.should == 'name'
262
+ end
263
+
264
+ it 'does not save the record with the unsatisfied key' do
265
+ unsatisfied.should_not_receive(:save)
266
+
267
+ subject.transform(transform_status)
268
+ end
269
+ end
270
+
271
+ describe 'and the foreign key is eventually satisfied' do
272
+ before do
273
+ satisfier.stub!(:valid?).and_return(true)
274
+ satisfier.stub!(:save).and_return(true)
275
+ records << satisfier
276
+ end
277
+
278
+ it 'does not report a foreign key problem' do
279
+ subject.transform(transform_status)
280
+
281
+ fk_error.should be_nil
282
+ end
283
+
284
+ it 'does report the invalid value' do
285
+ subject.transform(transform_status)
286
+
287
+ validation_error.attribute_name.should == 'name'
288
+ end
289
+
290
+ it 'does not save the record' do
291
+ unsatisfied.should_not_receive(:save)
292
+
293
+ subject.transform(transform_status)
294
+ end
295
+ end
188
296
  end
189
297
  end
190
298