ncs_mdes_warehouse 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/CHANGELOG.md +23 -0
  2. data/lib/ncs_navigator/warehouse.rb +2 -0
  3. data/lib/ncs_navigator/warehouse/cli.rb +10 -2
  4. data/lib/ncs_navigator/warehouse/configuration.rb +80 -0
  5. data/lib/ncs_navigator/warehouse/configuration/file_evaluator.rb +1 -1
  6. data/lib/ncs_navigator/warehouse/contents.rb +77 -0
  7. data/lib/ncs_navigator/warehouse/filters.rb +19 -0
  8. data/lib/ncs_navigator/warehouse/filters/add_id_prefix_filter.rb +32 -0
  9. data/lib/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter.rb +1 -1
  10. data/lib/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter.rb +1 -1
  11. data/lib/ncs_navigator/warehouse/filters/composite_filter.rb +77 -0
  12. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter.rb +1 -1
  13. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter.rb +1 -1
  14. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter.rb +1 -1
  15. data/lib/ncs_navigator/warehouse/filters/record_id_changing_filter_support.rb +71 -0
  16. data/lib/ncs_navigator/warehouse/filters/remove_id_prefix_filter.rb +32 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -6
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +14 -8
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +5 -3
  20. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/xml_emitter.rb +101 -50
  22. data/sample_configuration.rb +12 -0
  23. data/spec/ncs_navigator/warehouse/configuration_spec.rb +102 -0
  24. data/spec/ncs_navigator/warehouse/contents_spec.rb +166 -0
  25. data/spec/ncs_navigator/warehouse/filters/add_id_prefix_filter_spec.rb +82 -0
  26. data/spec/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter_spec.rb +1 -1
  28. data/spec/ncs_navigator/warehouse/{transformers/filters_spec.rb → filters/composite_filter_spec.rb} +8 -8
  29. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter_spec.rb +1 -1
  30. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter_spec.rb +1 -1
  31. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/filters/remove_id_prefix_filter_spec.rb +95 -0
  33. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +94 -0
  34. metadata +33 -22
  35. data/lib/ncs_navigator/warehouse/transformers/filters.rb +0 -66
@@ -0,0 +1,32 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A {CompositeFilter filter} which removes a prefix from the ID for every
6
+ # record of a given type. This filter is stateful and so must be instantiated.
7
+ class RemoveIdPrefixFilter
8
+ include RecordIdChangingFilterSupport
9
+
10
+ ##
11
+ # Creates the filter.
12
+ #
13
+ # In addition to the options specified here, this constructor accepts the
14
+ # options defined on {RecordIdChangingFilterSupport#initialize}.
15
+ #
16
+ # @param [Configuration] configuration the warehouse configuration.
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :prefix the prefix to remove.
20
+ def initialize(configuration, options={})
21
+ super
22
+ @prefix = options[:prefix] or fail 'Please specify a :prefix.'
23
+ end
24
+
25
+ ##
26
+ # @param [String] original_id the incoming ID.
27
+ # @return [String] the ID with the prefix removed.
28
+ def changed_id(original_id)
29
+ original_id.sub(/\A#{@prefix}/, '') if original_id
30
+ end
31
+ end
32
+ end
@@ -11,17 +11,11 @@ module NcsNavigator::Warehouse
11
11
  # `Enumerable` of MDES Warehouse model instances, validates and
12
12
  # saves them. It's a good general base for your own transformers.
13
13
  module Transformers
14
- autoload :ApplyGlobalValuesFilter, 'ncs_navigator/warehouse/transformers/apply_global_values_filter'
15
- autoload :CodedAsMissingFilter, 'ncs_navigator/warehouse/transformers/coded_as_missing_filter'
16
14
  autoload :Database, 'ncs_navigator/warehouse/transformers/database'
17
15
  autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
18
16
  autoload :EventStartFromContactTransformer, 'ncs_navigator/warehouse/transformers/event_start_from_contact_transformer'
19
- autoload :Filters, 'ncs_navigator/warehouse/transformers/filters'
20
17
  autoload :ForeignKeyIndex, 'ncs_navigator/warehouse/transformers/foreign_key_index'
21
18
  autoload :MdesCsv, 'ncs_navigator/warehouse/transformers/mdes_csv'
22
- autoload :NoBlankForeignKeysFilter, 'ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter'
23
- autoload :NoSsuOutreachAllSsusFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter'
24
- autoload :NoSsuOutreachPlaceholderFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter'
25
19
  autoload :SamplingUnits, 'ncs_navigator/warehouse/transformers/sampling_units'
26
20
  autoload :SqlTransformer, 'ncs_navigator/warehouse/transformers/sql_transformer'
27
21
  autoload :SubprocessTransformer, 'ncs_navigator/warehouse/transformers/subprocess_transformer'
@@ -13,14 +13,11 @@ module NcsNavigator::Warehouse::Transformers
13
13
  # class StaffPortalTransformer
14
14
  # include NcsNavigator::Warehouse::Transformers::Database
15
15
  #
16
- # # Include the models for the version of the MDES this
17
- # # transformer is compatible with
18
- # include NcsNavigator::Warehouse::Models::TwoPointZero
19
- #
20
16
  # bcdatabase :name => 'ncs_staff_portal'
21
17
  #
22
- # produce_records(:staff) do |row|
23
- # Staff.new(
18
+ # produce_records(:staff) do |row, meta|
19
+ # staff_model = meta[:configuration].model(:staff)
20
+ # staff_model.new(
24
21
  # :staff_id => 'SP' + row.username
25
22
  # # etc.
26
23
  # )
@@ -32,8 +29,9 @@ module NcsNavigator::Warehouse::Transformers
32
29
  # SELECT sub.*, s.username
33
30
  # FROM staff_languages sub INNER JOIN staff s ON sub.staff_id=s.id
34
31
  # )
35
- # ) do |row|
36
- # StaffLanguages.new(
32
+ # ) do |row, meta|
33
+ # sl_model = meta[:configuration].model(:staff_language)
34
+ # sl_model.new(
37
35
  # :staff_language_id => 'SP' + row.id,
38
36
  # :staff_id => 'SP' + row.username,
39
37
  # :staff_lang => row.lang_code
@@ -232,6 +230,14 @@ module NcsNavigator::Warehouse::Transformers
232
230
  # Define a translation from the results of a query into one or
233
231
  # more warehouse records.
234
232
  #
233
+ # The optional second parameter the `logic` proc/block is a hash of
234
+ # metadata. That metadata contains a single key:
235
+ #
236
+ # * :configuration. Provides the {Configuration} in use by the warehouse
237
+ # that is executing this transformer. Among other things, this allows
238
+ # {dynamic lookup of models Configuration#model} based on the in-use
239
+ # MDES version.
240
+ #
235
241
  # @param [Symbol] name the name of this producer; if you don't
236
242
  # specify a `:query`, the default is to return every row from
237
243
  # the application table with this name.
@@ -1,4 +1,5 @@
1
1
  require 'ncs_navigator/warehouse'
2
+ require 'ncs_navigator/warehouse/data_mapper_patches'
2
3
 
3
4
  require 'forwardable'
4
5
 
@@ -32,7 +33,7 @@ module NcsNavigator::Warehouse::Transformers
32
33
  attr_reader :enum
33
34
 
34
35
  ##
35
- # @return [Filters] the filters in use on this transformer.
36
+ # @return [CompositeFilter] the filters in use on this transformer.
36
37
  attr_reader :filters
37
38
 
38
39
  ##
@@ -54,12 +55,13 @@ module NcsNavigator::Warehouse::Transformers
54
55
  # `:ignore` means do not attempt to save the duplicate.
55
56
  # `:replace` means substitute the duplicate for the existing record.
56
57
  #
57
- # @see Filters
58
+ # @see CompositeFilter
58
59
  def initialize(configuration, enum, options={})
59
60
  @configuration = configuration
60
61
  @enum = enum
61
62
  filter_list = options.delete(:filters)
62
- @filters = Filters.new(filter_list ? [*filter_list].compact : [])
63
+ @filters = NcsNavigator::Warehouse::Filters::CompositeFilter.new(
64
+ filter_list ? [*filter_list].compact : [])
63
65
  @duplicates = options.delete(:duplicates) || :error
64
66
  @duplicates_strategy = select_duplicates_strategy
65
67
 
@@ -1,5 +1,5 @@
1
1
  module NcsNavigator
2
2
  module Warehouse
3
- VERSION = '0.12.0'
3
+ VERSION = '0.13.0'
4
4
  end
5
5
  end
@@ -88,34 +88,56 @@ XML
88
88
  # @param [Pathname,#to_s,nil] filename the filename to which the output
89
89
  # will be written. If `nil`, the {.default_filename} is used.
90
90
  #
91
+ # @option options [Boolean] :include-pii (false) should PII
92
+ # variable values be included in the XML?
93
+ # @option options [Boolean] :zip (true) should a ZIP file be
94
+ # produced alongside the XML file?
95
+ # @option options [Enumerable] :content an enumerable over the records to
96
+ # emit. A {Contents} will be created using the other options
97
+ # if this is not specified.
91
98
  # @option options [Fixnum] :block-size (5000) the maximum number
92
99
  # of records to load into memory before writing them to the XML
93
100
  # file. Reduce this to reduce the memory load of the emitter.
94
101
  # Increasing it will probably not improve performance, even if
95
- # you have sufficient memory to load more records.
96
- # @option options [Boolean] :include-pii (false) should PII
97
- # variable values be included in the XML?
102
+ # you have sufficient memory to load more records. Only used if `:content`
103
+ # is not specified.
98
104
  # @option options [Array<#to_s>] :tables (all for current MDES
99
- # version) the tables to include in the emitted XML.
100
- # @option options [Boolean] :zip (true) should a ZIP file be
101
- # produced alongside the XML file?
105
+ # version) the tables to include in the emitted XML. Only used if
106
+ # `:content` is not specified.
107
+ # @option options [Array<#to_sym>,nil] :filters (config.default_xml_filter_set)
108
+ # named filter sets to apply to the data before emitting XML. Only used if
109
+ # `:content` is not specified. If the option is not specified at all, the
110
+ # {default in the configuration Configuration#default_xml_filter_set} will
111
+ # be used, if any. To avoid using even this default, include
112
+ # `:filters=>nil` in the options.
102
113
  def initialize(config, filename, options={})
103
114
  @configuration = config
104
- @record_count = 0
105
- @block_size = options[:'block-size'] || 5000
106
115
  @zip = options.has_key?(:zip) ? options[:zip] : true
107
116
 
108
117
  @xml_files = determine_files_to_create(filename, options)
109
118
 
119
+ @tracker = ProgressTracker.new(@configuration)
110
120
 
111
- @models =
112
- if options[:tables]
113
- options[:tables].collect { |t| t.to_s }.collect { |t|
114
- config.models_module.mdes_order.find { |model| model.mdes_table_name == t }
115
- }
116
- else
117
- config.models_module.mdes_order
118
- end
121
+ if options[:content]
122
+ @content_enumerator = options[:content]
123
+ else
124
+ filter_names =
125
+ if options.has_key?(:filters)
126
+ options[:filters] ? options[:filters] : []
127
+ else
128
+ [config.default_xml_filter_set].compact
129
+ end
130
+ filters =
131
+ unless filter_names.empty?
132
+ filter_names.collect { |n| config.filter_set(n) }
133
+ end
134
+
135
+ @content_enumerator = Contents.new(config, {
136
+ :tables => options[:tables],
137
+ :'block-size' => options[:'block-size'],
138
+ :filters => filters
139
+ })
140
+ end
119
141
  end
120
142
 
121
143
  ##
@@ -126,19 +148,16 @@ XML
126
148
  shell.say_line("Exporting to #{xml_files.collect(&:describe).join(', ')}")
127
149
  log.info("Beginning XML export to #{xml_files.collect(&:describe).join(', ')}")
128
150
 
129
- @start = Time.now
151
+ @tracker.start!
130
152
  xml_files.each { |xf| xf.write HEADER_TEMPLATE.result(binding) }
131
- models.each do |model|
132
- shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
133
- write_all_xml_for_model(model)
153
+ @content_enumerator.each do |instance|
154
+ @tracker.starting_instance(instance)
155
+ xml_files.each { |xf| xf.write_instance(instance) }
156
+ @tracker.finish_instance(instance)
134
157
  end
135
158
  xml_files.each { |xf| xf.write FOOTER_TEMPLATE }
136
159
  xml_files.each { |xf| xf.close }
137
- @end = Time.now
138
-
139
- msg = "%d records written in %d seconds (%.1f/sec).\n" % [@record_count, emit_time, emit_rate]
140
- shell.clear_line_then_say(msg)
141
- log.info(msg)
160
+ @tracker.stop!
142
161
 
143
162
  xml_files.each { |xf| xf.zip_if_desired }
144
163
  log.info("XML export complete")
@@ -217,23 +236,6 @@ XML
217
236
  end
218
237
  end
219
238
 
220
- def write_all_xml_for_model(model)
221
- shell.say(' %20s' % '[loading]')
222
- key = model.key.first.name.to_sym
223
- count = model.count
224
- offset = 0
225
- while offset < count
226
- shell.back_up_and_say(20, '%20s' % '[loading]')
227
- model.all(:limit => @block_size, :offset => offset, :order => key.asc).each do |instance|
228
- xml_files.each { |xf| xf.write_instance(instance) }
229
- @record_count += 1
230
-
231
- shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [@record_count, emit_rate])
232
- end
233
- offset += @block_size
234
- end
235
- end
236
-
237
239
  def sc_id
238
240
  configuration.navigator.sc_id
239
241
  end
@@ -246,14 +248,6 @@ XML
246
248
  configuration.mdes.specification_version
247
249
  end
248
250
 
249
- def emit_time
250
- (@end || Time.now) - @start
251
- end
252
-
253
- def emit_rate
254
- @record_count / emit_time
255
- end
256
-
257
251
  ##
258
252
  # @private
259
253
  #
@@ -300,5 +294,62 @@ XML
300
294
  end
301
295
  end
302
296
  end
297
+
298
+ ##
299
+ # @private
300
+ class ProgressTracker
301
+ extend Forwardable
302
+
303
+ attr_reader :start, :stop, :record_count, :shell
304
+
305
+ def_delegators :@configuration, :shell, :log
306
+
307
+ def initialize(config)
308
+ @record_count = 0
309
+ @current_model = nil
310
+ @configuration = config
311
+ end
312
+
313
+ def start!
314
+ @start = Time.now
315
+ end
316
+
317
+ def starting_instance(i)
318
+ model_changed!(i.class) unless i.class == @current_model
319
+ end
320
+
321
+ def model_changed!(new_model)
322
+ shell.clear_line_then_say('Writing XML for %33s' % new_model.mdes_table_name)
323
+ shell.say(' %20s' % '[loading]')
324
+ @current_model = new_model
325
+ end
326
+ private :model_changed!
327
+
328
+ def finish_instance(i)
329
+ @record_count += 1
330
+ update_status
331
+ end
332
+
333
+ def update_status
334
+ shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [record_count, emit_rate])
335
+ end
336
+ private :update_status
337
+
338
+ def stop!
339
+ @stop = Time.now
340
+
341
+ msg = "%d records written in %d seconds (%.1f/sec).\n" % [record_count, emit_time, emit_rate]
342
+ shell.clear_line_then_say(msg)
343
+ log.info(msg)
344
+ end
345
+
346
+ def emit_time
347
+ (stop || Time.now) - start
348
+ end
349
+
350
+ def emit_rate
351
+ record_count / emit_time
352
+ end
353
+ end
303
354
  end
304
355
  end
@@ -46,3 +46,15 @@ c.add_transformer Bar.new(configuration)
46
46
  # :normal or :quiet. The default is :normal; it will usually make more
47
47
  # sense to control this from the command line.
48
48
  #c.output_level = :normal
49
+
50
+ # Give a set of one or more filters a name.
51
+ # This name can be used with `emit-xml` to apply the filter during export.
52
+ c.add_filter_set :quux, [FilterOne, lambda { |recs| recs }, FilterThree.new(c)]
53
+
54
+ # Filter sets may include other filter sets:
55
+ c.add_filter_set :baz, [FilterSeven.new, :quux]
56
+
57
+ # Specify a filter set to use by default in emit-xml. Any filter referenced
58
+ # using --filters in the invocation of emit-xml will be used _instead_ of this
59
+ # filter.
60
+ c.default_xml_filter_set = :quux
@@ -102,6 +102,100 @@ module NcsNavigator::Warehouse
102
102
  end
103
103
  end
104
104
 
105
+ describe '#add_filter_set' do
106
+ let(:a_filter) { lambda { |recs| recs + ['A'] } }
107
+ let(:b_filter) { lambda { |recs| ['B'] + recs } }
108
+ let(:c_filter) { lambda { |recs| ['C'] + recs + ['C'] } }
109
+
110
+ describe 'with one valid filter' do
111
+ before do
112
+ config.add_filter_set(:quux, a_filter)
113
+ end
114
+
115
+ it 'adds the filter set' do
116
+ config.filter_set(:quux).should_not be_nil
117
+ end
118
+
119
+ it 'produces a callable filter' do
120
+ config.filter_set(:quux).call(['D']).should == %w(D A)
121
+ end
122
+ end
123
+
124
+ describe 'with an array valid filters' do
125
+ before do
126
+ config.add_filter_set(:some, [b_filter, c_filter, a_filter])
127
+ end
128
+
129
+ it 'adds the filter set' do
130
+ config.filter_set(:some).should_not be_nil
131
+ end
132
+
133
+ it 'produces a single callable filter' do
134
+ config.filter_set(:some).call(['D']).should == %w(C B D C A)
135
+ end
136
+ end
137
+
138
+ describe 'with a reference to another filter set' do
139
+ it 'expands the reference if the name is known' do
140
+ config.add_filter_set(:gamma, c_filter)
141
+ config.add_filter_set(:beta, [:gamma, b_filter])
142
+
143
+ config.filter_set(:beta).call(['D']).should == %w(B C D C)
144
+ end
145
+
146
+ it 'fails if the name is unknown' do
147
+ expect {
148
+ config.add_filter_set(:beta, [:gamma, b_filter])
149
+ }.to raise_error('Unknown filter set :gamma.')
150
+ end
151
+ end
152
+
153
+ describe 'with an object without a #call method' do
154
+ it 'gives a helpful message if the object is constructable' do
155
+ lambda { config.add_filter_set(:quux, String) }.
156
+ should raise_error('String does not have a call method. Perhaps you meant String.new?')
157
+ end
158
+
159
+ it 'gives a helpful message if the object is just an instance' do
160
+ lambda { config.add_filter_set(:quux, "not a filter") }.
161
+ should raise_error('"not a filter" does not have a call method.')
162
+ end
163
+
164
+ it 'does not add the filter set' do
165
+ config.add_filter_set(:quux, 'not a filter') rescue nil
166
+ config.filter_sets.should_not have_key(:quux)
167
+ end
168
+ end
169
+
170
+ describe 'when there is a set with the same name' do
171
+ it 'throws a useful error message' do
172
+ config.add_filter_set(:beta, b_filter)
173
+ expect { config.add_filter_set(:beta, c_filter) }.
174
+ to raise_error('There is already a filter set named :beta.')
175
+ end
176
+ end
177
+ end
178
+
179
+ describe '#default_xml_filter_set' do
180
+ it 'defaults to nil' do
181
+ config.default_xml_filter_set.should be_nil
182
+ end
183
+ end
184
+
185
+ describe '#default_xml_filter_set=' do
186
+ it 'accepts a known filter set name' do
187
+ config.add_filter_set :foo, lambda { |recs| recs }
188
+ config.default_xml_filter_set = :foo
189
+
190
+ config.default_xml_filter_set.should == :foo
191
+ end
192
+
193
+ it 'fails with an unknown filter set name' do
194
+ expect { config.default_xml_filter_set = :foo }.
195
+ to raise_error("Unknown filter set :foo.")
196
+ end
197
+ end
198
+
105
199
  describe '#mdes_version=' do
106
200
  context 'for a known version', :slow, :use_mdes, :modifies_warehouse_state do
107
201
  it 'makes the models available' do
@@ -542,6 +636,14 @@ module NcsNavigator::Warehouse
542
636
  subject.log_file.to_s.should == 'NcsNavigator::Warehouse::Transformers::EnumTransformer.log'
543
637
  end
544
638
 
639
+ it 'evaluates constants from NcsNavigator::Warehouse::Filters' do
640
+ write_file do |f|
641
+ f.puts 'c.log_file = "#{CompositeFilter}.log"'
642
+ end
643
+
644
+ subject.log_file.to_s.should == 'NcsNavigator::Warehouse::Filters::CompositeFilter.log'
645
+ end
646
+
545
647
  it 'reports missing constants as bare' do
546
648
  write_file do |f|
547
649
  f.puts 'c.add_transformer = ATransformerIForgotToRequire'