ncs_mdes_warehouse 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,32 @@ module NcsNavigator::Warehouse::Transformers
19
19
  property :f_id, Integer, :key => true
20
20
  end
21
21
 
22
+ class Subcircle1
23
+ include ::DataMapper::Resource
24
+
25
+ property :id, Integer, :key => true
26
+ belongs_to :subcircle2, 'NcsNavigator::Warehouse::Transformers::Subcircle2',
27
+ :required => false, :child_key => [ :subcircle2_id ]
28
+
29
+ belongs_to :addr, 'NcsNavigator::Warehouse::Transformers::Addr', :child_key => [ :addr_id ]
30
+ end
31
+
32
+ class Subcircle2
33
+ include ::DataMapper::Resource
34
+
35
+ property :id, Integer, :key => true
36
+ belongs_to :subcircle3, 'NcsNavigator::Warehouse::Transformers::Subcircle3',
37
+ :required => false, :child_key => [ :subcircle3_id ]
38
+ end
39
+
40
+ class Subcircle3
41
+ include ::DataMapper::Resource
42
+
43
+ property :id, Integer, :key => true
44
+ belongs_to :subcircle1, 'NcsNavigator::Warehouse::Transformers::Subcircle1',
45
+ :required => false, :child_key => [ :subcircle1_id ]
46
+ end
47
+
22
48
  before(:each) do
23
49
  DataMapper.finalize
24
50
  end
@@ -28,6 +54,13 @@ module NcsNavigator::Warehouse::Transformers
28
54
  let(:transform_status) { NcsNavigator::Warehouse::TransformStatus.memory_only('test') }
29
55
  let(:errors) { transform_status.transform_errors }
30
56
 
57
+ def verify_and_record_if_appropriate(rec, index=fk_index)
58
+ if v = index.verify_or_defer(rec)
59
+ index.record(rec)
60
+ end
61
+ v
62
+ end
63
+
31
64
  describe '#initialize' do
32
65
  describe ':existing_key_provider' do
33
66
  it 'defaults to a database provider' do
@@ -47,45 +80,47 @@ module NcsNavigator::Warehouse::Transformers
47
80
 
48
81
  describe 'reporting errors' do
49
82
  before do
50
- fk_index.record_and_verify(Frob.new(:f_id => 8))
51
- fk_index.record_and_verify(Frob.new(:f_id => 16))
83
+ fk_index.start_transform(transform_status)
84
+ verify_and_record_if_appropriate(Frob.new(:f_id => 8))
85
+ verify_and_record_if_appropriate(Frob.new(:f_id => 16))
52
86
  end
53
87
 
54
88
  it 'does not report for a key which is satisfied when it is first recorded' do
55
- fk_index.record_and_verify(Addr.new(:frob_id => 8, :a_id => 1))
56
- fk_index.report_errors(transform_status)
89
+ verify_and_record_if_appropriate(Addr.new(:frob_id => 8, :a_id => 1))
90
+ fk_index.end_transform
57
91
 
58
92
  errors.should == []
59
93
  end
60
94
 
61
95
  it 'does not report for a key which is not initially satisfied but is later' do
62
- fk_index.record_and_verify(Addr.new(:frob_id => 4, :a_id => 1))
63
- fk_index.record_and_verify(Frob.new(:f_id => 4))
64
- fk_index.report_errors(transform_status)
96
+ verify_and_record_if_appropriate(Addr.new(:frob_id => 4, :a_id => 1))
97
+ verify_and_record_if_appropriate(Frob.new(:f_id => 4))
98
+ fk_index.end_transform
65
99
 
66
100
  errors.should == []
67
101
  end
68
102
 
69
103
  it 'does not report for a key which is provided by the external key provider' do
70
- fk_index.record_and_verify(Addr.new(:a_id => 8, :old_one_id => 120))
71
- fk_index.report_errors(transform_status)
104
+ verify_and_record_if_appropriate(Addr.new(:a_id => 8, :old_one_id => 120))
105
+ fk_index.end_transform
72
106
 
73
107
  errors.should == []
74
108
  end
75
109
 
76
110
  it 'does not report for a key which is provided by the external key provider when the associated model class has not previously been referenced' do
77
111
  another_index = ForeignKeyIndex.new(:existing_key_provider => key_provider)
112
+ another_index.start_transform(transform_status)
78
113
 
79
- another_index.record_and_verify(Addr.new(:a_id => 8, :frob_id => 80))
80
- another_index.report_errors(transform_status)
114
+ verify_and_record_if_appropriate(Addr.new(:a_id => 8, :frob_id => 80), another_index)
115
+ another_index.end_transform
81
116
 
82
117
  errors.should == []
83
118
  end
84
119
 
85
120
  describe 'when a key is never satisfied' do
86
121
  before do
87
- fk_index.record_and_verify(Addr.new(:frob_id => 4, :a_id => 1))
88
- fk_index.report_errors(transform_status)
122
+ verify_and_record_if_appropriate(Addr.new(:frob_id => 4, :a_id => 1))
123
+ fk_index.end_transform
89
124
  end
90
125
 
91
126
  it 'reports the error' do
@@ -102,22 +137,235 @@ module NcsNavigator::Warehouse::Transformers
102
137
 
103
138
  it 'includes a useful message' do
104
139
  errors.first.message.should ==
105
- 'Unsatisfied foreign key frob_id=4 referencing NcsNavigator::Warehouse::Transformers::Frob.'
140
+ 'Unsatisfied foreign key referencing NcsNavigator::Warehouse::Transformers::Frob.'
106
141
  end
107
142
 
108
- it 'only reports the error once if #report_errors is called multiple times' do
109
- fk_index.report_errors(transform_status)
143
+ it 'includes the referencing attribute name' do
144
+ errors.first.attribute_name.should == 'frob_id'
145
+ end
146
+
147
+ it 'includes the unsatisfied value' do
148
+ errors.first.attribute_value.should == '4'
149
+ end
150
+
151
+ it 'only reports the error for the transform in which it occurs' do
152
+ fk_index.start_transform(NcsNavigator::Warehouse::TransformStatus.memory_only('test2'))
153
+ fk_index.end_transform
110
154
 
111
155
  errors.size.should == 1
112
156
  end
113
157
  end
114
158
 
115
159
  it 'reports multiple failed references for a single record' do
116
- fk_index.record_and_verify(Addr.new(:a_id => 1, :frob_id => 2, :old_one_id => 3))
117
- fk_index.report_errors(transform_status)
160
+ verify_and_record_if_appropriate(Addr.new(:a_id => 1, :frob_id => 2, :old_one_id => 3))
161
+ fk_index.end_transform
162
+
163
+ errors.collect { |e| [e.attribute_name, e.attribute_value] }.sort.should ==
164
+ [['frob_id', '2'], ['old_one_id', '3']]
165
+ end
166
+ end
167
+
168
+ describe 'complex deferred foreign key resolution' do
169
+ # The full set of records produces a consistent graph like this:
170
+ # frob1 <- addr1 <- circle11 -> circle21 -> circle31
171
+ # ^ |
172
+ # \_______________________|
173
+ # The various test contexts simulate difficulties by presenting the
174
+ # records in a different order or by removing some of them.
175
+ let(:frob1) { Frob.new(:f_id => 1) }
176
+ let(:addr1) { Addr.new(:a_id => 1, :frob_id => 1)}
177
+ let(:circle11) { Subcircle1.new(:id => 1, :addr_id => 1, :subcircle2_id => 1)}
178
+ let(:circle21) { Subcircle2.new(:id => 1, :subcircle3_id => 1)}
179
+ let(:circle31) { Subcircle3.new(:id => 1, :subcircle1_id => 1)}
180
+
181
+ before do
182
+ fk_index.start_transform(transform_status)
183
+ end
184
+
185
+ def record_idents(records)
186
+ records.collect { |rec| [rec.class.to_s.split(':').last, rec.key.first].join('-') }
187
+ end
188
+
189
+ describe 'nested out-of-order records which are eventually resolvable' do
190
+ let!(:record_and_verify_results) {
191
+ circle21.subcircle3_id = nil
192
+
193
+ [circle21, circle11, addr1, frob1].each_with_object({}) do |rec, results|
194
+ results[rec] = verify_and_record_if_appropriate(rec)
195
+ end
196
+ }
197
+
198
+ it 'defers records with unresolvable FKs' do
199
+ record_and_verify_results[addr1].should be_false
200
+ end
201
+
202
+ it 'defers records that refer to records with unresolvable FKs' do
203
+ record_and_verify_results[circle11].should be_false
204
+ end
205
+
206
+ it 'accepts records which are initially resolved' do
207
+ [circle21, frob1].collect { |rec| record_and_verify_results[rec] }.should == [true, true]
208
+ end
209
+
210
+ describe 'after all records are visited' do
211
+ let!(:deferred_records) { fk_index.end_transform }
212
+
213
+ it 'reports no errors' do
214
+ errors.should == []
215
+ end
216
+
217
+ it 'returns the records which may now be saved' do
218
+ record_idents(deferred_records).sort.should == %w(Addr-1 Subcircle1-1)
219
+ end
220
+ end
221
+ end
222
+
223
+ describe 'nested out-of-order records which are not all eventually resolvable' do
224
+ let!(:record_and_verify_results) {
225
+ [circle21, circle11, addr1, frob1].each_with_object({}) do |rec, results|
226
+ results[rec] = verify_and_record_if_appropriate(rec)
227
+ end
228
+ }
118
229
 
119
- errors.collect { |e| e.message.match(/foreign key (\S+)/)[1] }.sort.should ==
120
- %w(frob_id=2 old_one_id=3)
230
+ it 'defers records with unresolvable FKs' do
231
+ [circle21, addr1].collect { |rec| record_and_verify_results[rec] }.should == [false, false]
232
+ end
233
+
234
+ it 'defers records that refer to records with unresolvable FKs' do
235
+ record_and_verify_results[circle11].should be_false
236
+ end
237
+
238
+ it 'accepts records which are initially resolved' do
239
+ record_and_verify_results[frob1].should be_true
240
+ end
241
+
242
+ describe 'after all records are visited' do
243
+ let!(:deferred_records) { fk_index.end_transform }
244
+
245
+ it 'returns the records which may now be saved' do
246
+ record_idents(deferred_records).should == %w(Addr-1)
247
+ end
248
+
249
+ it 'reports errors for the remaining unresolvable items' do
250
+ errors.collect(&:model_class).sort.should == [Subcircle1.to_s, Subcircle2.to_s]
251
+ end
252
+
253
+ describe 'the error for a record which has an unresolvable FK' do
254
+ let(:the_error) { errors.find { |e| e.model_class == Subcircle2.to_s } }
255
+
256
+ it 'reports the error' do
257
+ the_error.message.should == 'Unsatisfied foreign key referencing NcsNavigator::Warehouse::Transformers::Subcircle3.'
258
+ end
259
+
260
+ it 'includes the attribute name' do
261
+ the_error.attribute_name.should == 'subcircle3_id'
262
+ end
263
+
264
+ it 'includes the attribute value' do
265
+ the_error.attribute_value.should == '1'
266
+ end
267
+ end
268
+
269
+ describe 'the error for a record which refers to a record that has an unresolvable FK' do
270
+ let(:the_error) { errors.find { |e| e.model_class == Subcircle1.to_s } }
271
+
272
+ it 'reports the error' do
273
+ the_error.message.should == 'Associated NcsNavigator::Warehouse::Transformers::Subcircle2 record contains one or more unsatisfed foreign keys or refers to other records that do.'
274
+ end
275
+
276
+ it 'includes the attribute name' do
277
+ the_error.attribute_name.should == 'subcircle2_id'
278
+ end
279
+
280
+ it 'includes the attribute value' do
281
+ the_error.attribute_value.should == '1'
282
+ end
283
+ end
284
+ end
285
+ end
286
+
287
+ describe 'circular records which are eventually resolvable' do
288
+ let!(:record_and_verify_results) {
289
+ [frob1, circle21, circle11, addr1, circle31].each_with_object({}) do |rec, results|
290
+ results[rec] = verify_and_record_if_appropriate(rec)
291
+ end
292
+ }
293
+
294
+ it 'defers all the records in the circle' do
295
+ [circle11, circle21, circle31].collect { |rec| record_and_verify_results[rec] }.should == [false] * 3
296
+ end
297
+
298
+ it 'accepts records which are initially resolved' do
299
+ [frob1, addr1].collect { |rec| record_and_verify_results[rec] }.should == [true] * 2
300
+ end
301
+
302
+ describe 'after all records are visited' do
303
+ let!(:deferred_records) { fk_index.end_transform }
304
+
305
+ it 'reports no errors' do
306
+ errors.should == []
307
+ end
308
+
309
+ it 'returns all the records in the circle' do
310
+ record_idents(deferred_records).sort.should == %w(Subcircle1-1 Subcircle2-1 Subcircle3-1)
311
+ end
312
+ end
313
+ end
314
+
315
+ describe 'circular records which are not eventually resolvable' do
316
+ let!(:record_and_verify_results) {
317
+ [circle21, circle11, circle31].each_with_object({}) do |rec, results|
318
+ results[rec] = verify_and_record_if_appropriate(rec)
319
+ end
320
+ }
321
+
322
+ it 'defers all the records in the circle' do
323
+ [circle11, circle21, circle31].collect { |rec| record_and_verify_results[rec] }.should == [false] * 3
324
+ end
325
+
326
+ describe 'after all records are visited' do
327
+ let!(:deferred_records) { fk_index.end_transform }
328
+
329
+ it 'reports an error for each record in the circle, including one for the bad external ref' do
330
+ errors.collect(&:model_class).sort.should == [Subcircle1.to_s, Subcircle1.to_s, Subcircle2.to_s, Subcircle3.to_s]
331
+ end
332
+
333
+ describe 'the error for the element with the unresolvable FK' do
334
+ let(:the_error) { errors.find { |e| e.model_class == Subcircle1.to_s && e.attribute_name == 'addr_id' } }
335
+
336
+ it 'reports the error' do
337
+ the_error.message.should == 'Unsatisfied foreign key referencing NcsNavigator::Warehouse::Transformers::Addr.'
338
+ end
339
+
340
+ it 'includes the attribute name' do
341
+ the_error.attribute_name.should == 'addr_id'
342
+ end
343
+
344
+ it 'includes the attribute value' do
345
+ the_error.attribute_value.should == '1'
346
+ end
347
+ end
348
+
349
+ describe 'an error for an element in the circle' do
350
+ let(:the_error) { errors.find { |e| e.model_class == Subcircle3.to_s } }
351
+
352
+ it 'reports the error' do
353
+ the_error.message.should == 'Associated NcsNavigator::Warehouse::Transformers::Subcircle1 record contains one or more unsatisfed foreign keys or refers to other records that do.'
354
+ end
355
+
356
+ it 'includes the attribute name' do
357
+ the_error.attribute_name.should == 'subcircle1_id'
358
+ end
359
+
360
+ it 'includes the attribute value' do
361
+ the_error.attribute_value.should == '1'
362
+ end
363
+ end
364
+
365
+ it 'returns nothing' do
366
+ deferred_records.should == []
367
+ end
368
+ end
121
369
  end
122
370
  end
123
371
  end
@@ -62,7 +62,7 @@
62
62
  <sex>-6</sex>
63
63
  <age>49</age>
64
64
  <age_range>5</age_range>
65
- <person_dob>1962-01-01</person_dob>
65
+ <person_dob xsi:nil="true"/>
66
66
  <deceased>2</deceased>
67
67
  <ethnic_group>2</ethnic_group>
68
68
  <person_lang>1</person_lang>
@@ -50,6 +50,10 @@ class NcsNavigator::Warehouse::Transformers::VdrXml
50
50
  it 'strips left over encoded CRs (#1940)' do
51
51
  person.person_comment.should == "Likes\r\nline\r\nbreaks"
52
52
  end
53
+
54
+ it 'reads xsi:nil as nil' do
55
+ person.person_dob.should be_nil
56
+ end
53
57
  end
54
58
  end
55
59
  end
@@ -5,9 +5,10 @@ require 'zip/zip'
5
5
  module NcsNavigator::Warehouse
6
6
  describe XmlEmitter, :use_mdes do
7
7
  let(:filename) { tmpdir + 'export.xml' }
8
- let(:options) { {} }
8
+ let(:options) { { :zip => false } }
9
+ let(:emitter) { XmlEmitter.new(spec_config, filename, options) }
9
10
  let(:xml) {
10
- XmlEmitter.new(spec_config, filename, options).emit_xml
11
+ emitter.emit_xml
11
12
  Nokogiri::XML(File.read(filename))
12
13
  }
13
14
 
@@ -110,7 +111,7 @@ module NcsNavigator::Warehouse
110
111
  xml.xpath('//person/person_id').collect { |e| e.text.strip }.sort.should == %w(QX9 XQ4)
111
112
  end
112
113
 
113
- describe 'and PII' do
114
+ describe 'and including PII in a single file' do
114
115
  let(:xml_first_names) {
115
116
  xml.xpath('//person/first_name').collect { |e| e.text.strip }.sort
116
117
  }
@@ -130,6 +131,39 @@ module NcsNavigator::Warehouse
130
131
  end
131
132
  end
132
133
 
134
+ describe 'and creating both with- and without-PII variants in parallel' do
135
+ let(:no_pii_file) { emitter.xml_files.find { |xf| !xf.include_pii? }.filename }
136
+ let(:pii_file) { emitter.xml_files.find { |xf| xf.include_pii? }.filename }
137
+
138
+ let(:no_pii_xml) { Nokogiri::XML(no_pii_file.read) }
139
+ let(:pii_xml) { Nokogiri::XML(pii_file.read) }
140
+
141
+ def first_names_in(xml)
142
+ xml.xpath('//person/first_name').collect { |e| e.text.strip }.sort
143
+ end
144
+
145
+ before do
146
+ options[:'and-pii'] = true
147
+ emitter.emit_xml
148
+ end
149
+
150
+ it 'creates the expected with-PII file' do
151
+ pii_file.exist?.should be_true
152
+ end
153
+
154
+ it 'creates the expected without-PII file' do
155
+ no_pii_file.exist?.should be_true
156
+ end
157
+
158
+ it 'includes PII in the PII file' do
159
+ first_names_in(pii_xml).should == %w(Quentin Xavier)
160
+ end
161
+
162
+ it 'does not include PII in the no-PII file' do
163
+ first_names_in(no_pii_xml).should == ['', '']
164
+ end
165
+ end
166
+
133
167
  describe 'and selected output' do
134
168
  let(:people_count) { xml.xpath('//person').size }
135
169
  let(:p_count) { xml.xpath('//participant').size }
@@ -177,6 +211,7 @@ module NcsNavigator::Warehouse
177
211
 
178
212
  describe 'the generated ZIP file', :slow do
179
213
  let(:expected_zipfile) { Pathname.new(filename.to_s + '.zip') }
214
+ let(:options) { {} }
180
215
 
181
216
  before do
182
217
  stub_model(person_model)
@@ -256,5 +291,76 @@ module NcsNavigator::Warehouse
256
291
  subject.should be_a Pathname
257
292
  end
258
293
  end
294
+
295
+ describe 'generating PII XML in parallel' do
296
+ let(:emitter) { XmlEmitter.new(spec_config, provided_filename, options) }
297
+ let(:options) { { :'and-pii' => true } }
298
+ let(:provided_filename) { 'emitted.xml' }
299
+
300
+ let(:xml_files) { emitter.xml_files }
301
+ let(:pii_xml_file) { emitter.xml_files.find { |xf| xf.include_pii? } }
302
+ let(:no_pii_xml_file) { emitter.xml_files.find { |xf| !xf.include_pii? } }
303
+
304
+ describe 'filenames' do
305
+ describe 'when none specified', :slow, :use_mdes do
306
+ let(:provided_filename) { nil }
307
+
308
+ before do
309
+ NcsNavigator.configuration.psus.first.id = '20000216'
310
+
311
+ # Time.parse uses Time.now internally, so this needs to be
312
+ # defined before starting to register the mock.
313
+ t = Time.parse('2011-07-28')
314
+ Time.stub!(:now).and_return(t)
315
+ end
316
+
317
+ it 'uses the normal one for the non-PII variant' do
318
+ no_pii_xml_file.filename.to_s.should == 'bear_lake-20110728.xml'
319
+ end
320
+
321
+ it 'uses the -PII variant for the one with PII' do
322
+ pii_xml_file.filename.to_s.should == 'bear_lake-20110728-PII.xml'
323
+ end
324
+
325
+ describe 'but a directory is specified' do
326
+ before do
327
+ options[:directory] = '/baz/zap'
328
+ end
329
+
330
+ it 'includes the directory in the with-PII name' do
331
+ pii_xml_file.filename.to_s.should == '/baz/zap/bear_lake-20110728-PII.xml'
332
+ end
333
+
334
+ it 'includes the directory in the without-PII name' do
335
+ no_pii_xml_file.filename.to_s.should == '/baz/zap/bear_lake-20110728.xml'
336
+ end
337
+ end
338
+ end
339
+
340
+ describe 'when one is specified' do
341
+ let(:provided_filename) { 'emitted.xml' }
342
+
343
+ it 'uses the specified name for the non-PII variant' do
344
+ no_pii_xml_file.filename.to_s.should == 'emitted.xml'
345
+ end
346
+
347
+ it 'it adds a -PII infix before the extension for the PII variant' do
348
+ pii_xml_file.filename.to_s.should == 'emitted-PII.xml'
349
+ end
350
+ end
351
+ end
352
+
353
+ it 'produces two files' do
354
+ emitter.xml_files.size.should == 2
355
+ end
356
+
357
+ it 'produces a with-PII file' do
358
+ pii_xml_file.should_not be_nil
359
+ end
360
+
361
+ it 'produces a without-PII file' do
362
+ no_pii_xml_file.should_not be_nil
363
+ end
364
+ end
259
365
  end
260
366
  end