ncs_mdes_warehouse 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +37 -0
- data/lib/ncs_navigator/warehouse/cli.rb +5 -1
- data/lib/ncs_navigator/warehouse/configuration.rb +2 -2
- data/lib/ncs_navigator/warehouse/transformers/database.rb +5 -4
- data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +145 -58
- data/lib/ncs_navigator/warehouse/transformers/foreign_key_index.rb +266 -34
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +6 -1
- data/lib/ncs_navigator/warehouse/version.rb +1 -1
- data/lib/ncs_navigator/warehouse/xml_emitter.rb +128 -46
- data/ncs_mdes_warehouse.gemspec +1 -0
- data/spec/ncs_navigator/warehouse/configuration_spec.rb +1 -1
- data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +119 -11
- data/spec/ncs_navigator/warehouse/transformers/foreign_key_index_spec.rb +268 -20
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +1 -1
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +4 -0
- data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +109 -3
- metadata +20 -4
@@ -92,7 +92,12 @@ class NcsNavigator::Warehouse::Transformers::VdrXml
|
|
92
92
|
else
|
93
93
|
# node is the start tag of a table variable
|
94
94
|
var = node.local_name.to_sym
|
95
|
-
val =
|
95
|
+
val =
|
96
|
+
if node.attribute('xsi:nil') == 'true'
|
97
|
+
nil
|
98
|
+
else
|
99
|
+
node.inner_xml.strip.gsub(' ', "\r")
|
100
|
+
end
|
96
101
|
|
97
102
|
unless node.self_closing?
|
98
103
|
# Skip to closing tag
|
@@ -19,16 +19,18 @@ module NcsNavigator::Warehouse
|
|
19
19
|
# emitter.
|
20
20
|
attr_reader :configuration
|
21
21
|
|
22
|
-
##
|
23
|
-
# @return [Pathname] the file to which the XML will be emitted.
|
24
|
-
attr_reader :filename
|
25
|
-
|
26
22
|
##
|
27
23
|
# @return [Array<Models::MdesModel>] the models whose data will be
|
28
24
|
# emitted. This is determined from the `:tables` option to
|
29
25
|
# {#initialize}.
|
30
26
|
attr_reader :models
|
31
27
|
|
28
|
+
##
|
29
|
+
# @private exposed for testing
|
30
|
+
# @return [Array] the configuration objects related to each separate XML
|
31
|
+
# file emitted in one run.
|
32
|
+
attr_reader :xml_files
|
33
|
+
|
32
34
|
def_delegators :@configuration, :shell, :log
|
33
35
|
|
34
36
|
HEADER_TEMPLATE = ERB.new(<<-XML_ERB)
|
@@ -99,18 +101,13 @@ XML
|
|
99
101
|
# produced alongside the XML file?
|
100
102
|
def initialize(config, filename, options={})
|
101
103
|
@configuration = config
|
102
|
-
@include_pii = options[:'include-pii']
|
103
|
-
@filename = case filename
|
104
|
-
when Pathname
|
105
|
-
filename
|
106
|
-
when nil
|
107
|
-
self.class.default_filename(configuration, @include_pii)
|
108
|
-
else
|
109
|
-
Pathname.new(filename.to_s)
|
110
|
-
end
|
111
104
|
@record_count = 0
|
112
105
|
@block_size = options[:'block-size'] || 5000
|
113
106
|
@zip = options.has_key?(:zip) ? options[:zip] : true
|
107
|
+
|
108
|
+
@xml_files = determine_files_to_create(filename, options)
|
109
|
+
|
110
|
+
|
114
111
|
@models =
|
115
112
|
if options[:tables]
|
116
113
|
options[:tables].collect { |t| t.to_s }.collect { |t|
|
@@ -126,70 +123,108 @@ XML
|
|
126
123
|
#
|
127
124
|
# @return [void]
|
128
125
|
def emit_xml
|
129
|
-
shell.say_line("Exporting to #{
|
130
|
-
log.info("Beginning XML export to #{
|
126
|
+
shell.say_line("Exporting to #{xml_files.collect(&:describe).join(', ')}")
|
127
|
+
log.info("Beginning XML export to #{xml_files.collect(&:describe).join(', ')}")
|
131
128
|
|
132
129
|
@start = Time.now
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
|
138
|
-
|
139
|
-
write_all_xml_for_model(f, model)
|
140
|
-
end
|
141
|
-
|
142
|
-
f.write FOOTER_TEMPLATE
|
130
|
+
xml_files.each { |xf| xf.write HEADER_TEMPLATE.result(binding) }
|
131
|
+
models.each do |model|
|
132
|
+
shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
|
133
|
+
write_all_xml_for_model(model)
|
143
134
|
end
|
135
|
+
xml_files.each { |xf| xf.write FOOTER_TEMPLATE }
|
136
|
+
xml_files.each { |xf| xf.close }
|
144
137
|
@end = Time.now
|
138
|
+
|
145
139
|
msg = "%d records written in %d seconds (%.1f/sec).\n" % [@record_count, emit_time, emit_rate]
|
146
140
|
shell.clear_line_then_say(msg)
|
147
141
|
log.info(msg)
|
148
142
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
143
|
+
xml_files.each { |xf| xf.zip_if_desired }
|
144
|
+
log.info("XML export complete")
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# @return [Pathname] the single file to which the XML will be emitted.
|
149
|
+
# Throws an exception if writing to multiple files.
|
150
|
+
def filename
|
151
|
+
if xml_files.size == 1
|
152
|
+
xml_files.first.filename
|
153
|
+
else
|
154
|
+
fail "Emitting more than one file. Use `xml_files` to interrogate."
|
156
155
|
end
|
157
156
|
end
|
158
157
|
|
159
158
|
##
|
160
|
-
# Will PII be included in the exported XML?
|
161
|
-
#
|
162
|
-
# @return [Boolean]
|
159
|
+
# @return [Boolean] Will PII be included in the exported XML?
|
160
|
+
# Throws an exception if writing to multiple files.
|
163
161
|
def include_pii?
|
164
|
-
|
162
|
+
if xml_files.size == 1
|
163
|
+
xml_files.first.include_pii?
|
164
|
+
else
|
165
|
+
fail "Emitting more than one file. Use `xml_files` to interrogate."
|
166
|
+
end
|
165
167
|
end
|
166
168
|
|
167
169
|
##
|
168
|
-
# Will
|
170
|
+
# Will ZIP archive(s) be created along with the XML?
|
169
171
|
#
|
170
172
|
# @return [Boolean]
|
171
173
|
def zip?
|
172
174
|
@zip
|
173
175
|
end
|
174
176
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
177
|
+
private
|
178
|
+
|
179
|
+
def determine_files_to_create(filename, options)
|
180
|
+
if options[:'and-pii']
|
181
|
+
# two files, one with and one without PII
|
182
|
+
no_pii_filename = select_filename(filename, false, options[:directory])
|
183
|
+
with_pii_filename = Pathname.new(no_pii_filename.to_s.sub(/^(.*?)(\..*)$/, '\1-PII\2'))
|
184
|
+
[
|
185
|
+
[false, no_pii_filename],
|
186
|
+
[true, with_pii_filename]
|
187
|
+
].collect do |include_pii, fn|
|
188
|
+
XmlFile.new(fn, include_pii, @zip, shell, log)
|
189
|
+
end
|
190
|
+
else
|
191
|
+
# one file, PII determined by --include-pii
|
192
|
+
include_pii = options[:'include-pii']
|
193
|
+
actual_filename = select_filename(filename, include_pii, options[:directory])
|
194
|
+
[
|
195
|
+
XmlFile.new(actual_filename, include_pii, @zip, shell, log)
|
196
|
+
]
|
197
|
+
end
|
181
198
|
end
|
182
199
|
|
183
|
-
|
200
|
+
def select_filename(filename, include_pii, directory_for_default_files)
|
201
|
+
if filename && directory_for_default_files
|
202
|
+
fail "It does not make sense to specify both a filename and the :directory option."
|
203
|
+
end
|
184
204
|
|
185
|
-
|
205
|
+
case filename
|
206
|
+
when Pathname
|
207
|
+
filename
|
208
|
+
when nil
|
209
|
+
default_name = self.class.default_filename(configuration, include_pii)
|
210
|
+
if directory_for_default_files
|
211
|
+
Pathname.new(directory_for_default_files) + default_name
|
212
|
+
else
|
213
|
+
default_name
|
214
|
+
end
|
215
|
+
else
|
216
|
+
Pathname.new(filename.to_s)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def write_all_xml_for_model(model)
|
186
221
|
shell.say(' %20s' % '[loading]')
|
187
222
|
count = model.count
|
188
223
|
offset = 0
|
189
224
|
while offset < count
|
190
225
|
shell.back_up_and_say(20, '%20s' % '[loading]')
|
191
226
|
model.all(:limit => @block_size, :offset => offset).each do |instance|
|
192
|
-
|
227
|
+
xml_files.each { |xf| xf.write_instance(instance) }
|
193
228
|
@record_count += 1
|
194
229
|
|
195
230
|
shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [@record_count, emit_rate])
|
@@ -217,5 +252,52 @@ XML
|
|
217
252
|
def emit_rate
|
218
253
|
@record_count / emit_time
|
219
254
|
end
|
255
|
+
|
256
|
+
##
|
257
|
+
# @private
|
258
|
+
#
|
259
|
+
# Encapsulated the data and operations related to one of the files produced
|
260
|
+
# in a run. Consider "related to one of the files" in versus "related to
|
261
|
+
# loading the records".
|
262
|
+
class XmlFile < Struct.new(:filename, :include_pii, :zip, :shell, :log)
|
263
|
+
alias :include_pii? :include_pii
|
264
|
+
alias :zip? :zip
|
265
|
+
|
266
|
+
def describe
|
267
|
+
"#{filename} #{include_pii? ? 'with' : 'without'} PII"
|
268
|
+
end
|
269
|
+
|
270
|
+
def zip_filename
|
271
|
+
@zip_filename ||= filename.to_s + '.zip'
|
272
|
+
end
|
273
|
+
|
274
|
+
def open
|
275
|
+
@handle ||= filename.open('w')
|
276
|
+
end
|
277
|
+
alias :handle :open
|
278
|
+
|
279
|
+
def write(s)
|
280
|
+
handle.write(s)
|
281
|
+
end
|
282
|
+
|
283
|
+
def write_instance(instance)
|
284
|
+
instance.write_mdes_xml(handle, :indent => 3, :margin => 1, :pii => include_pii?)
|
285
|
+
end
|
286
|
+
|
287
|
+
def close
|
288
|
+
@handle && @handle.close
|
289
|
+
end
|
290
|
+
|
291
|
+
def zip_if_desired
|
292
|
+
if zip?
|
293
|
+
shell.say("Zipping to #{zip_filename}")
|
294
|
+
log.info("Zipping to #{zip_filename}")
|
295
|
+
Zip::ZipFile.open(zip_filename, Zip::ZipFile::CREATE) do |zf|
|
296
|
+
zf.add(filename.basename, filename)
|
297
|
+
end
|
298
|
+
shell.clear_line_then_say("Zipped #{zip_filename}.")
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
220
302
|
end
|
221
303
|
end
|
data/ncs_mdes_warehouse.gemspec
CHANGED
@@ -43,7 +43,7 @@ module NcsNavigator::Warehouse
|
|
43
43
|
|
44
44
|
describe '#foreign_key_index' do
|
45
45
|
it 'provides an index by default' do
|
46
|
-
config.foreign_key_index.should respond_to(:
|
46
|
+
config.foreign_key_index.should respond_to(:verify_or_defer)
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'allows the index to be overridden' do
|
@@ -17,7 +17,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
17
17
|
include ::DataMapper::Resource
|
18
18
|
|
19
19
|
property :id, Integer, :key => true
|
20
|
-
property :name, String
|
20
|
+
property :name, String, :length => (1..100)
|
21
21
|
property :age, Integer
|
22
22
|
belongs_to :sample,
|
23
23
|
'NcsNavigator::Warehouse::Transformers::Sample', :child_key => [ :sample_id ], :required => false
|
@@ -64,7 +64,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
64
64
|
end
|
65
65
|
|
66
66
|
subject.transform(transform_status)
|
67
|
-
transform_status.transform_errors.should
|
67
|
+
transform_status.transform_errors.should == []
|
68
68
|
end
|
69
69
|
|
70
70
|
describe 'with an invalid instance' do
|
@@ -122,6 +122,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
122
122
|
records[2].should_receive(:save).and_return(true)
|
123
123
|
|
124
124
|
records[1].psu_id = '20000041'
|
125
|
+
records[1].recruit_type = 'A'
|
125
126
|
|
126
127
|
subject.transform(transform_status)
|
127
128
|
end
|
@@ -135,11 +136,11 @@ module NcsNavigator::Warehouse::Transformers
|
|
135
136
|
end
|
136
137
|
|
137
138
|
it 'records an error' do
|
138
|
-
transform_status.transform_errors.collect(&:record_id).should == ['2']
|
139
|
+
transform_status.transform_errors.collect(&:record_id).uniq.should == ['2']
|
139
140
|
end
|
140
141
|
|
141
142
|
describe 'the recorded error' do
|
142
|
-
let(:error) { transform_status.transform_errors.
|
143
|
+
let(:error) { transform_status.transform_errors.detect { |err| err.message =~ /PSU/ } }
|
143
144
|
|
144
145
|
it 'has the correct model class' do
|
145
146
|
error.model_class.should == Sample.to_s
|
@@ -163,28 +164,135 @@ module NcsNavigator::Warehouse::Transformers
|
|
163
164
|
end
|
164
165
|
end
|
165
166
|
|
167
|
+
describe 'and another invalidity' do
|
168
|
+
let(:validation_error) {
|
169
|
+
transform_status.transform_errors.detect { |err| err.message =~ /format/i }
|
170
|
+
}
|
171
|
+
|
172
|
+
it 'simultaneously reports an error about each' do
|
173
|
+
validation_error.attribute_name.should == 'recruit_type'
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
166
177
|
let(:expected_saved_record_ids) { [1, 3] }
|
167
178
|
let(:expected_not_saved_record_ids) { [2] }
|
168
179
|
|
169
180
|
include_examples 'a foreign key index updater'
|
170
181
|
end
|
171
182
|
|
172
|
-
describe 'with an unsatisfied foreign key' do
|
173
|
-
let(:
|
183
|
+
describe 'with an initially unsatisfied foreign key' do
|
184
|
+
let(:unsatisfied) { Subsample.new(:id => 3, :sample_id => 912, :name => '') }
|
185
|
+
let(:satisfier) { Sample.new(:id => unsatisfied.sample_id, :psu_id => '20000030', :name => 'Nine') }
|
174
186
|
|
175
|
-
|
176
|
-
|
187
|
+
let(:fk_error) {
|
188
|
+
transform_status.transform_errors.detect { |error| error.message =~ /foreign/i }
|
189
|
+
}
|
190
|
+
|
191
|
+
let(:validation_error) {
|
192
|
+
transform_status.transform_errors.detect { |error| error.message =~ /valid/i }
|
193
|
+
}
|
177
194
|
|
195
|
+
before do
|
178
196
|
records.each do |m|
|
179
197
|
m.stub!(:valid?).and_return(true)
|
180
198
|
m.stub!(:save).and_return(true)
|
181
199
|
end
|
182
200
|
|
183
|
-
|
201
|
+
records << unsatisfied
|
202
|
+
end
|
203
|
+
|
204
|
+
describe 'when the record is otherwise valid' do
|
205
|
+
before do
|
206
|
+
unsatisfied.name = 'Foo'
|
207
|
+
unsatisfied.should be_valid # setup
|
208
|
+
end
|
209
|
+
|
210
|
+
describe 'and the foreign key is never satisfied' do
|
211
|
+
it 'reports the unsatisfied foreign key' do
|
212
|
+
unsatisfied.stub!(:save).and_return(true)
|
213
|
+
subject.transform(transform_status)
|
214
|
+
|
215
|
+
[fk_error.attribute_name, fk_error.attribute_value].should == ['sample_id', '912']
|
216
|
+
end
|
217
|
+
|
218
|
+
it 'does not save the record with the unsatisfied key' do
|
219
|
+
unsatisfied.should_not_receive(:save)
|
220
|
+
|
221
|
+
subject.transform(transform_status)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
describe 'and the foreign key is eventually satisfied' do
|
226
|
+
|
227
|
+
before do
|
228
|
+
satisfier.stub!(:valid?).and_return(true)
|
229
|
+
satisfier.stub!(:save).and_return(true)
|
230
|
+
records << satisfier
|
231
|
+
end
|
232
|
+
|
233
|
+
it 'does not report any errors' do
|
234
|
+
transform_status.transform_errors.should == []
|
235
|
+
end
|
236
|
+
|
237
|
+
it 'saves the record with the eventually satisfied key' do
|
238
|
+
satisfier.should_receive(:save)
|
239
|
+
|
240
|
+
subject.transform(transform_status)
|
241
|
+
end
|
242
|
+
end
|
184
243
|
end
|
185
244
|
|
186
|
-
|
187
|
-
|
245
|
+
describe 'when the record has invalid properties' do
|
246
|
+
before do
|
247
|
+
unsatisfied.name = ''
|
248
|
+
unsatisfied.should_not be_valid # setup
|
249
|
+
end
|
250
|
+
|
251
|
+
describe 'and the foreign key is never satisfied' do
|
252
|
+
it 'reports the unsatisfied foreign key' do
|
253
|
+
subject.transform(transform_status)
|
254
|
+
|
255
|
+
[fk_error.attribute_name, fk_error.attribute_value].should == ['sample_id', '912']
|
256
|
+
end
|
257
|
+
|
258
|
+
it 'reports the invalid values' do
|
259
|
+
subject.transform(transform_status)
|
260
|
+
|
261
|
+
validation_error.attribute_name.should == 'name'
|
262
|
+
end
|
263
|
+
|
264
|
+
it 'does not save the record with the unsatisfied key' do
|
265
|
+
unsatisfied.should_not_receive(:save)
|
266
|
+
|
267
|
+
subject.transform(transform_status)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
describe 'and the foreign key is eventually satisfied' do
|
272
|
+
before do
|
273
|
+
satisfier.stub!(:valid?).and_return(true)
|
274
|
+
satisfier.stub!(:save).and_return(true)
|
275
|
+
records << satisfier
|
276
|
+
end
|
277
|
+
|
278
|
+
it 'does not report a foreign key problem' do
|
279
|
+
subject.transform(transform_status)
|
280
|
+
|
281
|
+
fk_error.should be_nil
|
282
|
+
end
|
283
|
+
|
284
|
+
it 'does report the invalid value' do
|
285
|
+
subject.transform(transform_status)
|
286
|
+
|
287
|
+
validation_error.attribute_name.should == 'name'
|
288
|
+
end
|
289
|
+
|
290
|
+
it 'does not save the record' do
|
291
|
+
unsatisfied.should_not_receive(:save)
|
292
|
+
|
293
|
+
subject.transform(transform_status)
|
294
|
+
end
|
295
|
+
end
|
188
296
|
end
|
189
297
|
end
|
190
298
|
|