ncs_mdes_warehouse 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG.md +23 -0
  2. data/lib/ncs_navigator/warehouse.rb +2 -0
  3. data/lib/ncs_navigator/warehouse/cli.rb +10 -2
  4. data/lib/ncs_navigator/warehouse/configuration.rb +80 -0
  5. data/lib/ncs_navigator/warehouse/configuration/file_evaluator.rb +1 -1
  6. data/lib/ncs_navigator/warehouse/contents.rb +77 -0
  7. data/lib/ncs_navigator/warehouse/filters.rb +19 -0
  8. data/lib/ncs_navigator/warehouse/filters/add_id_prefix_filter.rb +32 -0
  9. data/lib/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter.rb +1 -1
  10. data/lib/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter.rb +1 -1
  11. data/lib/ncs_navigator/warehouse/filters/composite_filter.rb +77 -0
  12. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter.rb +1 -1
  13. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter.rb +1 -1
  14. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter.rb +1 -1
  15. data/lib/ncs_navigator/warehouse/filters/record_id_changing_filter_support.rb +71 -0
  16. data/lib/ncs_navigator/warehouse/filters/remove_id_prefix_filter.rb +32 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -6
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +14 -8
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +5 -3
  20. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/xml_emitter.rb +101 -50
  22. data/sample_configuration.rb +12 -0
  23. data/spec/ncs_navigator/warehouse/configuration_spec.rb +102 -0
  24. data/spec/ncs_navigator/warehouse/contents_spec.rb +166 -0
  25. data/spec/ncs_navigator/warehouse/filters/add_id_prefix_filter_spec.rb +82 -0
  26. data/spec/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter_spec.rb +1 -1
  28. data/spec/ncs_navigator/warehouse/{transformers/filters_spec.rb → filters/composite_filter_spec.rb} +8 -8
  29. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter_spec.rb +1 -1
  30. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter_spec.rb +1 -1
  31. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/filters/remove_id_prefix_filter_spec.rb +95 -0
  33. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +94 -0
  34. metadata +33 -22
  35. data/lib/ncs_navigator/warehouse/transformers/filters.rb +0 -66
@@ -0,0 +1,32 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A {CompositeFilter filter} which removes a prefix from the ID for every
6
+ # record of a given type. This filter is stateful and so must be instantiated.
7
+ class RemoveIdPrefixFilter
8
+ include RecordIdChangingFilterSupport
9
+
10
+ ##
11
+ # Creates the filter.
12
+ #
13
+ # In addition to the options specified here, this constructor accepts the
14
+ # options defined on {RecordIdChangingFilterSupport#initialize}.
15
+ #
16
+ # @param [Configuration] configuration the warehouse configuration.
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :prefix the prefix to remove.
20
+ def initialize(configuration, options={})
21
+ super
22
+ @prefix = options[:prefix] or fail 'Please specify a :prefix.'
23
+ end
24
+
25
+ ##
26
+ # @param [String] original_id the incoming ID.
27
+ # @return [String] the ID with the prefix removed.
28
+ def changed_id(original_id)
29
+ original_id.sub(/\A#{@prefix}/, '') if original_id
30
+ end
31
+ end
32
+ end
@@ -11,17 +11,11 @@ module NcsNavigator::Warehouse
11
11
  # `Enumerable` of MDES Warehouse model instances, validates and
12
12
  # saves them. It's a good general base for your own transformers.
13
13
  module Transformers
14
- autoload :ApplyGlobalValuesFilter, 'ncs_navigator/warehouse/transformers/apply_global_values_filter'
15
- autoload :CodedAsMissingFilter, 'ncs_navigator/warehouse/transformers/coded_as_missing_filter'
16
14
  autoload :Database, 'ncs_navigator/warehouse/transformers/database'
17
15
  autoload :EnumTransformer, 'ncs_navigator/warehouse/transformers/enum_transformer'
18
16
  autoload :EventStartFromContactTransformer, 'ncs_navigator/warehouse/transformers/event_start_from_contact_transformer'
19
- autoload :Filters, 'ncs_navigator/warehouse/transformers/filters'
20
17
  autoload :ForeignKeyIndex, 'ncs_navigator/warehouse/transformers/foreign_key_index'
21
18
  autoload :MdesCsv, 'ncs_navigator/warehouse/transformers/mdes_csv'
22
- autoload :NoBlankForeignKeysFilter, 'ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter'
23
- autoload :NoSsuOutreachAllSsusFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter'
24
- autoload :NoSsuOutreachPlaceholderFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter'
25
19
  autoload :SamplingUnits, 'ncs_navigator/warehouse/transformers/sampling_units'
26
20
  autoload :SqlTransformer, 'ncs_navigator/warehouse/transformers/sql_transformer'
27
21
  autoload :SubprocessTransformer, 'ncs_navigator/warehouse/transformers/subprocess_transformer'
@@ -13,14 +13,11 @@ module NcsNavigator::Warehouse::Transformers
13
13
  # class StaffPortalTransformer
14
14
  # include NcsNavigator::Warehouse::Transformers::Database
15
15
  #
16
- # # Include the models for the version of the MDES this
17
- # # transformer is compatible with
18
- # include NcsNavigator::Warehouse::Models::TwoPointZero
19
- #
20
16
  # bcdatabase :name => 'ncs_staff_portal'
21
17
  #
22
- # produce_records(:staff) do |row|
23
- # Staff.new(
18
+ # produce_records(:staff) do |row, meta|
19
+ # staff_model = meta[:configuration].model(:staff)
20
+ # staff_model.new(
24
21
  # :staff_id => 'SP' + row.username
25
22
  # # etc.
26
23
  # )
@@ -32,8 +29,9 @@ module NcsNavigator::Warehouse::Transformers
32
29
  # SELECT sub.*, s.username
33
30
  # FROM staff_languages sub INNER JOIN staff s ON sub.staff_id=s.id
34
31
  # )
35
- # ) do |row|
36
- # StaffLanguages.new(
32
+ # ) do |row, meta|
33
+ # sl_model = meta[:configuration].model(:staff_language)
34
+ # sl_model.new(
37
35
  # :staff_language_id => 'SP' + row.id,
38
36
  # :staff_id => 'SP' + row.username,
39
37
  # :staff_lang => row.lang_code
@@ -232,6 +230,14 @@ module NcsNavigator::Warehouse::Transformers
232
230
  # Define a translation from the results of a query into one or
233
231
  # more warehouse records.
234
232
  #
233
+ # The optional second parameter the `logic` proc/block is a hash of
234
+ # metadata. That metadata contains a single key:
235
+ #
236
+ # * :configuration. Provides the {Configuration} in use by the warehouse
237
+ # that is executing this transformer. Among other things, this allows
238
+ # {dynamic lookup of models Configuration#model} based on the in-use
239
+ # MDES version.
240
+ #
235
241
  # @param [Symbol] name the name of this producer; if you don't
236
242
  # specify a `:query`, the default is to return every row from
237
243
  # the application table with this name.
@@ -1,4 +1,5 @@
1
1
  require 'ncs_navigator/warehouse'
2
+ require 'ncs_navigator/warehouse/data_mapper_patches'
2
3
 
3
4
  require 'forwardable'
4
5
 
@@ -32,7 +33,7 @@ module NcsNavigator::Warehouse::Transformers
32
33
  attr_reader :enum
33
34
 
34
35
  ##
35
- # @return [Filters] the filters in use on this transformer.
36
+ # @return [CompositeFilter] the filters in use on this transformer.
36
37
  attr_reader :filters
37
38
 
38
39
  ##
@@ -54,12 +55,13 @@ module NcsNavigator::Warehouse::Transformers
54
55
  # `:ignore` means do not attempt to save the duplicate.
55
56
  # `:replace` means substitute the duplicate for the existing record.
56
57
  #
57
- # @see Filters
58
+ # @see CompositeFilter
58
59
  def initialize(configuration, enum, options={})
59
60
  @configuration = configuration
60
61
  @enum = enum
61
62
  filter_list = options.delete(:filters)
62
- @filters = Filters.new(filter_list ? [*filter_list].compact : [])
63
+ @filters = NcsNavigator::Warehouse::Filters::CompositeFilter.new(
64
+ filter_list ? [*filter_list].compact : [])
63
65
  @duplicates = options.delete(:duplicates) || :error
64
66
  @duplicates_strategy = select_duplicates_strategy
65
67
 
@@ -1,5 +1,5 @@
1
1
  module NcsNavigator
2
2
  module Warehouse
3
- VERSION = '0.12.0'
3
+ VERSION = '0.13.0'
4
4
  end
5
5
  end
@@ -88,34 +88,56 @@ XML
88
88
  # @param [Pathname,#to_s,nil] filename the filename to which the output
89
89
  # will be written. If `nil`, the {.default_filename} is used.
90
90
  #
91
+ # @option options [Boolean] :include-pii (false) should PII
92
+ # variable values be included in the XML?
93
+ # @option options [Boolean] :zip (true) should a ZIP file be
94
+ # produced alongside the XML file?
95
+ # @option options [Enumerable] :content an enumerable over the records to
96
+ # emit. A {Contents} will be created using the other options
97
+ # if this is not specified.
91
98
  # @option options [Fixnum] :block-size (5000) the maximum number
92
99
  # of records to load into memory before writing them to the XML
93
100
  # file. Reduce this to reduce the memory load of the emitter.
94
101
  # Increasing it will probably not improve performance, even if
95
- # you have sufficient memory to load more records.
96
- # @option options [Boolean] :include-pii (false) should PII
97
- # variable values be included in the XML?
102
+ # you have sufficient memory to load more records. Only used if `:content`
103
+ # is not specified.
98
104
  # @option options [Array<#to_s>] :tables (all for current MDES
99
- # version) the tables to include in the emitted XML.
100
- # @option options [Boolean] :zip (true) should a ZIP file be
101
- # produced alongside the XML file?
105
+ # version) the tables to include in the emitted XML. Only used if
106
+ # `:content` is not specified.
107
+ # @option options [Array<#to_sym>,nil] :filters (config.default_xml_filter_set)
108
+ # named filter sets to apply to the data before emitting XML. Only used if
109
+ # `:content` is not specified. If the option is not specified at all, the
110
+ # {default in the configuration Configuration#default_xml_filter_set} will
111
+ # be used, if any. To avoid using even this default, include
112
+ # `:filters=>nil` in the options.
102
113
  def initialize(config, filename, options={})
103
114
  @configuration = config
104
- @record_count = 0
105
- @block_size = options[:'block-size'] || 5000
106
115
  @zip = options.has_key?(:zip) ? options[:zip] : true
107
116
 
108
117
  @xml_files = determine_files_to_create(filename, options)
109
118
 
119
+ @tracker = ProgressTracker.new(@configuration)
110
120
 
111
- @models =
112
- if options[:tables]
113
- options[:tables].collect { |t| t.to_s }.collect { |t|
114
- config.models_module.mdes_order.find { |model| model.mdes_table_name == t }
115
- }
116
- else
117
- config.models_module.mdes_order
118
- end
121
+ if options[:content]
122
+ @content_enumerator = options[:content]
123
+ else
124
+ filter_names =
125
+ if options.has_key?(:filters)
126
+ options[:filters] ? options[:filters] : []
127
+ else
128
+ [config.default_xml_filter_set].compact
129
+ end
130
+ filters =
131
+ unless filter_names.empty?
132
+ filter_names.collect { |n| config.filter_set(n) }
133
+ end
134
+
135
+ @content_enumerator = Contents.new(config, {
136
+ :tables => options[:tables],
137
+ :'block-size' => options[:'block-size'],
138
+ :filters => filters
139
+ })
140
+ end
119
141
  end
120
142
 
121
143
  ##
@@ -126,19 +148,16 @@ XML
126
148
  shell.say_line("Exporting to #{xml_files.collect(&:describe).join(', ')}")
127
149
  log.info("Beginning XML export to #{xml_files.collect(&:describe).join(', ')}")
128
150
 
129
- @start = Time.now
151
+ @tracker.start!
130
152
  xml_files.each { |xf| xf.write HEADER_TEMPLATE.result(binding) }
131
- models.each do |model|
132
- shell.clear_line_then_say('Writing XML for %33s' % model.mdes_table_name)
133
- write_all_xml_for_model(model)
153
+ @content_enumerator.each do |instance|
154
+ @tracker.starting_instance(instance)
155
+ xml_files.each { |xf| xf.write_instance(instance) }
156
+ @tracker.finish_instance(instance)
134
157
  end
135
158
  xml_files.each { |xf| xf.write FOOTER_TEMPLATE }
136
159
  xml_files.each { |xf| xf.close }
137
- @end = Time.now
138
-
139
- msg = "%d records written in %d seconds (%.1f/sec).\n" % [@record_count, emit_time, emit_rate]
140
- shell.clear_line_then_say(msg)
141
- log.info(msg)
160
+ @tracker.stop!
142
161
 
143
162
  xml_files.each { |xf| xf.zip_if_desired }
144
163
  log.info("XML export complete")
@@ -217,23 +236,6 @@ XML
217
236
  end
218
237
  end
219
238
 
220
- def write_all_xml_for_model(model)
221
- shell.say(' %20s' % '[loading]')
222
- key = model.key.first.name.to_sym
223
- count = model.count
224
- offset = 0
225
- while offset < count
226
- shell.back_up_and_say(20, '%20s' % '[loading]')
227
- model.all(:limit => @block_size, :offset => offset, :order => key.asc).each do |instance|
228
- xml_files.each { |xf| xf.write_instance(instance) }
229
- @record_count += 1
230
-
231
- shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [@record_count, emit_rate])
232
- end
233
- offset += @block_size
234
- end
235
- end
236
-
237
239
  def sc_id
238
240
  configuration.navigator.sc_id
239
241
  end
@@ -246,14 +248,6 @@ XML
246
248
  configuration.mdes.specification_version
247
249
  end
248
250
 
249
- def emit_time
250
- (@end || Time.now) - @start
251
- end
252
-
253
- def emit_rate
254
- @record_count / emit_time
255
- end
256
-
257
251
  ##
258
252
  # @private
259
253
  #
@@ -300,5 +294,62 @@ XML
300
294
  end
301
295
  end
302
296
  end
297
+
298
+ ##
299
+ # @private
300
+ class ProgressTracker
301
+ extend Forwardable
302
+
303
+ attr_reader :start, :stop, :record_count, :shell
304
+
305
+ def_delegators :@configuration, :shell, :log
306
+
307
+ def initialize(config)
308
+ @record_count = 0
309
+ @current_model = nil
310
+ @configuration = config
311
+ end
312
+
313
+ def start!
314
+ @start = Time.now
315
+ end
316
+
317
+ def starting_instance(i)
318
+ model_changed!(i.class) unless i.class == @current_model
319
+ end
320
+
321
+ def model_changed!(new_model)
322
+ shell.clear_line_then_say('Writing XML for %33s' % new_model.mdes_table_name)
323
+ shell.say(' %20s' % '[loading]')
324
+ @current_model = new_model
325
+ end
326
+ private :model_changed!
327
+
328
+ def finish_instance(i)
329
+ @record_count += 1
330
+ update_status
331
+ end
332
+
333
+ def update_status
334
+ shell.back_up_and_say(20, '%5d (%5.1f/sec)' % [record_count, emit_rate])
335
+ end
336
+ private :update_status
337
+
338
+ def stop!
339
+ @stop = Time.now
340
+
341
+ msg = "%d records written in %d seconds (%.1f/sec).\n" % [record_count, emit_time, emit_rate]
342
+ shell.clear_line_then_say(msg)
343
+ log.info(msg)
344
+ end
345
+
346
+ def emit_time
347
+ (stop || Time.now) - start
348
+ end
349
+
350
+ def emit_rate
351
+ record_count / emit_time
352
+ end
353
+ end
303
354
  end
304
355
  end
@@ -46,3 +46,15 @@ c.add_transformer Bar.new(configuration)
46
46
  # :normal or :quiet. The default is :normal; it will usually make more
47
47
  # sense to control this from the command line.
48
48
  #c.output_level = :normal
49
+
50
+ # Give a set of one or more filters a name.
51
+ # This name can be used with `emit-xml` to apply the filter during export.
52
+ c.add_filter_set :quux, [FilterOne, lambda { |recs| recs }, FilterThree.new(c)]
53
+
54
+ # Filter sets may include other filter sets:
55
+ c.add_filter_set :baz, [FilterSeven.new, :quux]
56
+
57
+ # Specify a filter set to use by default in emit-xml. Any filter referenced
58
+ # using --filters in the invocation of emit-xml will be used _instead_ of this
59
+ # filter.
60
+ c.default_xml_filter_set = :quux
@@ -102,6 +102,100 @@ module NcsNavigator::Warehouse
102
102
  end
103
103
  end
104
104
 
105
+ describe '#add_filter_set' do
106
+ let(:a_filter) { lambda { |recs| recs + ['A'] } }
107
+ let(:b_filter) { lambda { |recs| ['B'] + recs } }
108
+ let(:c_filter) { lambda { |recs| ['C'] + recs + ['C'] } }
109
+
110
+ describe 'with one valid filter' do
111
+ before do
112
+ config.add_filter_set(:quux, a_filter)
113
+ end
114
+
115
+ it 'adds the filter set' do
116
+ config.filter_set(:quux).should_not be_nil
117
+ end
118
+
119
+ it 'produces a callable filter' do
120
+ config.filter_set(:quux).call(['D']).should == %w(D A)
121
+ end
122
+ end
123
+
124
+ describe 'with an array valid filters' do
125
+ before do
126
+ config.add_filter_set(:some, [b_filter, c_filter, a_filter])
127
+ end
128
+
129
+ it 'adds the filter set' do
130
+ config.filter_set(:some).should_not be_nil
131
+ end
132
+
133
+ it 'produces a single callable filter' do
134
+ config.filter_set(:some).call(['D']).should == %w(C B D C A)
135
+ end
136
+ end
137
+
138
+ describe 'with a reference to another filter set' do
139
+ it 'expands the reference if the name is known' do
140
+ config.add_filter_set(:gamma, c_filter)
141
+ config.add_filter_set(:beta, [:gamma, b_filter])
142
+
143
+ config.filter_set(:beta).call(['D']).should == %w(B C D C)
144
+ end
145
+
146
+ it 'fails if the name is unknown' do
147
+ expect {
148
+ config.add_filter_set(:beta, [:gamma, b_filter])
149
+ }.to raise_error('Unknown filter set :gamma.')
150
+ end
151
+ end
152
+
153
+ describe 'with an object without a #call method' do
154
+ it 'gives a helpful message if the object is constructable' do
155
+ lambda { config.add_filter_set(:quux, String) }.
156
+ should raise_error('String does not have a call method. Perhaps you meant String.new?')
157
+ end
158
+
159
+ it 'gives a helpful message if the object is just an instance' do
160
+ lambda { config.add_filter_set(:quux, "not a filter") }.
161
+ should raise_error('"not a filter" does not have a call method.')
162
+ end
163
+
164
+ it 'does not add the filter set' do
165
+ config.add_filter_set(:quux, 'not a filter') rescue nil
166
+ config.filter_sets.should_not have_key(:quux)
167
+ end
168
+ end
169
+
170
+ describe 'when there is a set with the same name' do
171
+ it 'throws a useful error message' do
172
+ config.add_filter_set(:beta, b_filter)
173
+ expect { config.add_filter_set(:beta, c_filter) }.
174
+ to raise_error('There is already a filter set named :beta.')
175
+ end
176
+ end
177
+ end
178
+
179
+ describe '#default_xml_filter_set' do
180
+ it 'defaults to nil' do
181
+ config.default_xml_filter_set.should be_nil
182
+ end
183
+ end
184
+
185
+ describe '#default_xml_filter_set=' do
186
+ it 'accepts a known filter set name' do
187
+ config.add_filter_set :foo, lambda { |recs| recs }
188
+ config.default_xml_filter_set = :foo
189
+
190
+ config.default_xml_filter_set.should == :foo
191
+ end
192
+
193
+ it 'fails with an unknown filter set name' do
194
+ expect { config.default_xml_filter_set = :foo }.
195
+ to raise_error("Unknown filter set :foo.")
196
+ end
197
+ end
198
+
105
199
  describe '#mdes_version=' do
106
200
  context 'for a known version', :slow, :use_mdes, :modifies_warehouse_state do
107
201
  it 'makes the models available' do
@@ -542,6 +636,14 @@ module NcsNavigator::Warehouse
542
636
  subject.log_file.to_s.should == 'NcsNavigator::Warehouse::Transformers::EnumTransformer.log'
543
637
  end
544
638
 
639
+ it 'evaluates constants from NcsNavigator::Warehouse::Filters' do
640
+ write_file do |f|
641
+ f.puts 'c.log_file = "#{CompositeFilter}.log"'
642
+ end
643
+
644
+ subject.log_file.to_s.should == 'NcsNavigator::Warehouse::Filters::CompositeFilter.log'
645
+ end
646
+
545
647
  it 'reports missing constants as bare' do
546
648
  write_file do |f|
547
649
  f.puts 'c.add_transformer = ATransformerIForgotToRequire'