ncs_mdes_warehouse 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG.md +23 -0
  2. data/lib/ncs_navigator/warehouse.rb +2 -0
  3. data/lib/ncs_navigator/warehouse/cli.rb +10 -2
  4. data/lib/ncs_navigator/warehouse/configuration.rb +80 -0
  5. data/lib/ncs_navigator/warehouse/configuration/file_evaluator.rb +1 -1
  6. data/lib/ncs_navigator/warehouse/contents.rb +77 -0
  7. data/lib/ncs_navigator/warehouse/filters.rb +19 -0
  8. data/lib/ncs_navigator/warehouse/filters/add_id_prefix_filter.rb +32 -0
  9. data/lib/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter.rb +1 -1
  10. data/lib/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter.rb +1 -1
  11. data/lib/ncs_navigator/warehouse/filters/composite_filter.rb +77 -0
  12. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter.rb +1 -1
  13. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter.rb +1 -1
  14. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter.rb +1 -1
  15. data/lib/ncs_navigator/warehouse/filters/record_id_changing_filter_support.rb +71 -0
  16. data/lib/ncs_navigator/warehouse/filters/remove_id_prefix_filter.rb +32 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -6
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +14 -8
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +5 -3
  20. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/xml_emitter.rb +101 -50
  22. data/sample_configuration.rb +12 -0
  23. data/spec/ncs_navigator/warehouse/configuration_spec.rb +102 -0
  24. data/spec/ncs_navigator/warehouse/contents_spec.rb +166 -0
  25. data/spec/ncs_navigator/warehouse/filters/add_id_prefix_filter_spec.rb +82 -0
  26. data/spec/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter_spec.rb +1 -1
  28. data/spec/ncs_navigator/warehouse/{transformers/filters_spec.rb → filters/composite_filter_spec.rb} +8 -8
  29. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter_spec.rb +1 -1
  30. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter_spec.rb +1 -1
  31. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/filters/remove_id_prefix_filter_spec.rb +95 -0
  33. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +94 -0
  34. metadata +33 -22
  35. data/lib/ncs_navigator/warehouse/transformers/filters.rb +0 -66
data/CHANGELOG.md CHANGED
@@ -1,6 +1,29 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.13.0
5
+ ------
6
+
7
+ - Move all existing filters to new `Filters` module. Any fully-qualified
8
+ references to those filters will need to be updated. Filter-name-only
9
+ references made from within a configuration file will still resolve fine.
10
+ (#3940)
11
+
12
+ - Add the concept of named "filter sets" to the warehouse configuration. (#3940)
13
+
14
+ - Support specifying one or more named filter sets to use with `emit-xml`.
15
+ (#3940)
16
+
17
+ - Support specifying a default filter set to use with `emit-xml` if none are
18
+ specified on the command line. (#3940)
19
+
20
+ - Split out content enumeration from `XmlEmitter`. `Contents` is an `Enumerable`
21
+ providing memory-efficient access to every record in a set of tables (or all
22
+ tables). (#3936)
23
+
24
+ - Add filters for adding and removing prefixes to/from the keys for all records
25
+ of a particular type. (#3968)
26
+
4
27
  0.12.0
5
28
  ------
6
29
 
@@ -9,9 +9,11 @@ module NcsNavigator
9
9
  autoload :CLI, 'ncs_navigator/warehouse/cli'
10
10
  autoload :Comparator, 'ncs_navigator/warehouse/comparator'
11
11
  autoload :Configuration, 'ncs_navigator/warehouse/configuration'
12
+ autoload :Contents, 'ncs_navigator/warehouse/contents'
12
13
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
13
14
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
14
15
  autoload :Hooks, 'ncs_navigator/warehouse/hooks'
16
+ autoload :Filters, 'ncs_navigator/warehouse/filters'
15
17
  autoload :Models, 'ncs_navigator/warehouse/models'
16
18
  autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
17
19
  autoload :StringifyTrace, 'ncs_navigator/warehouse/stringify_trace'
@@ -64,11 +64,19 @@ DESC
64
64
  :desc => 'The target directory for automatically-named files. (Default is CWD.)'
65
65
  method_option 'tables', :type => :string,
66
66
  :desc => 'Emit XML for a subset of tables.', :banner => 'TABLE,TABLE,TABLE'
67
+ method_option 'filters', :type => :string, :banner => 'FILTER_SET,FILTER_SET',
68
+ :desc => 'Use these named filter sets when producing the XML. Default is set in configuration. Use --no-filters to disable default without providing an alternative.'
67
69
  def emit_xml(filename=nil)
68
70
  use_database
69
71
 
70
- XmlEmitter.new(configuration, filename,
71
- options.merge(:tables => options[:tables].try(:split, /\s*,\s*/))).emit_xml
72
+ options[:tables] = options[:tables].try(:split, /\s*,\s*/)
73
+ # need to prevent the addition of a :filters key entirely in order
74
+ # to detect --no-filters, which shows up as :filters=>nil.
75
+ if options[:filters]
76
+ options[:filters] = options[:filters].split(/\s*,\s*/)
77
+ end
78
+
79
+ XmlEmitter.new(configuration, filename, options).emit_xml
72
80
  end
73
81
 
74
82
  desc 'etl', 'Performs the full extract-transform-load process for this configuration'
@@ -133,6 +133,86 @@ module NcsNavigator::Warehouse
133
133
  end
134
134
  end
135
135
 
136
+ ####
137
+ #### Filters
138
+ ####
139
+
140
+ ##
141
+ # @return [Hash<Symbol,#call>] an index of named {CompositeFilter}s
142
+ # reflecting the configured named filter sets
143
+ def filter_sets
144
+ @filter_sets ||= {}
145
+ end
146
+
147
+ ##
148
+ # Looks up a filter set by name. Errors out if no match found.
149
+ #
150
+ # If you need to refer to a named filter set in your configuration file,
151
+ # use this method instead of {#filter_sets}. E.g.:
152
+ #
153
+ # c.add_transformer SomeDatabase.create_transformer(c, :filters => [c.filter_set(:some_filters)])
154
+ #
155
+ # Using this method instead of {#filter_sets} ensures that you will get a
156
+ # useful error message if you have a typo in your filter name.
157
+ #
158
+ # @return [#call] the {CompositeFilter} registered under the given
159
+ # name. If none, it raises {Error}.
160
+ def filter_set(name)
161
+ filter_sets[name.to_sym] or raise Error, "Unknown filter set #{name.inspect}."
162
+ end
163
+
164
+ ##
165
+ # Define a named filter set.
166
+ #
167
+ # @param [Symbol,#to_sym] name the name for this set.
168
+ # @param [#call, Array<#call>] one_or_more_filters the filters to use in
169
+ # the set
170
+ # @return [void]
171
+ def add_filter_set(name, one_or_more_filters)
172
+ if filter_sets.has_key?(name.to_sym)
173
+ raise Error, "There is already a filter set named #{name.inspect}."
174
+ end
175
+
176
+ filters = [*one_or_more_filters].collect do |candidate|
177
+ case candidate
178
+ when Symbol
179
+ filter_set(candidate)
180
+ else
181
+ candidate
182
+ end
183
+ end
184
+
185
+ filters.each do |candidate|
186
+ unless candidate.respond_to?(:call)
187
+ if candidate.respond_to?(:new)
188
+ raise Error, "#{candidate.inspect} does not have a call method. Perhaps you meant #{candidate.inspect}.new?"
189
+ else
190
+ raise Error, "#{candidate.inspect} does not have a call method."
191
+ end
192
+ end
193
+ end
194
+
195
+ filter_sets[name.to_sym] = Filters::CompositeFilter.new(filters)
196
+ end
197
+
198
+ ##
199
+ # @return [Symbol,nil] the filter set to with the XML emitter if none is
200
+ # specified.
201
+ attr_reader :default_xml_filter_set
202
+
203
+ ##
204
+ # @param [Symbol,#to_sym] name the name of a known filter set to use by
205
+ # default in the XML emitter.
206
+ # @return [void]
207
+ def default_xml_filter_set=(name)
208
+ if name
209
+ filter_set(name) # fails if unknown
210
+ @default_xml_filter_set = name.to_sym
211
+ else
212
+ @default_xml_filter_set = nil
213
+ end
214
+ end
215
+
136
216
  ####
137
217
  #### MDES version
138
218
  ####
@@ -28,7 +28,7 @@ class NcsNavigator::Warehouse::Configuration
28
28
  end
29
29
 
30
30
  def self.const_missing(const)
31
- [NcsNavigator::Warehouse::Transformers].each do |ns|
31
+ [NcsNavigator::Warehouse::Transformers, NcsNavigator::Warehouse::Filters].each do |ns|
32
32
  if ns.const_defined?(const)
33
33
  return ns.const_get(const)
34
34
  end
@@ -0,0 +1,77 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # Provides an Enumerable over all of the content for some or all of the
6
+ # tables in a warehouse instance. Provides streaming results over batches
7
+ # for large content sets.
8
+ class Contents
9
+ include Enumerable
10
+
11
+ ##
12
+ # @return [Array<Class>] the warehouse models whose records will be enumerated
13
+ # by this instance.
14
+ attr_reader :models
15
+
16
+ ##
17
+ # @return [CompositeFilter] the filters in use on this transformer.
18
+ attr_reader :filters
19
+
20
+ ##
21
+ # @return [Numeric] the maximum number of records to load into memory before
22
+ # yielding them to the consumer.
23
+ attr_reader :block_size
24
+
25
+ ##
26
+ # Create a new {Contents}.
27
+ #
28
+ # @param [Configuration] config the configuration for the
29
+ # warehouse from which to iterate over records.
30
+ #
31
+ # @option options [Array<#call>,#call] :filters a list of
32
+ # filters to use for this transformer
33
+ # @option options [Fixnum] :block-size (5000) the maximum number
34
+ # of records to load into memory before yielding them to the consumer.
35
+ # Reduce this to reduce the memory load of the emitter. Increasing it
36
+ # will probably not improve performance, even if you have sufficient
37
+ # memory to load more records.
38
+ # @option options [Array<#to_s>] :tables (all for current MDES
39
+ # version) the tables to include in the iteration.
40
+ def initialize(config, options={})
41
+ @configuration = config
42
+ @record_count = 0
43
+ @block_size = options[:'block-size'] || 5000
44
+
45
+ @models =
46
+ if options[:tables]
47
+ options[:tables].collect { |t| t.to_s }.collect { |t|
48
+ config.models_module.mdes_order.find { |model| model.mdes_table_name == t }
49
+ }
50
+ else
51
+ config.models_module.mdes_order
52
+ end
53
+
54
+ filter_list = options[:filters]
55
+ @filters = NcsNavigator::Warehouse::Filters::CompositeFilter.new(
56
+ filter_list ? [*filter_list].compact : [])
57
+ end
58
+
59
+ ##
60
+ # Yields each instance of every configured {model #models} in turn.
61
+ def each
62
+ models.each do |model|
63
+ key = model.key.first.name.to_sym
64
+ count = model.count
65
+ offset = 0
66
+ while offset < count
67
+ model.all(:limit => block_size, :offset => offset, :order => key.asc).each do |instance|
68
+ filters.call([instance]).each do |filtered_record|
69
+ yield filtered_record
70
+ end
71
+ end
72
+ offset += block_size
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,19 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # Record filters and related.
6
+ module Filters
7
+ autoload :CompositeFilter, 'ncs_navigator/warehouse/filters/composite_filter'
8
+
9
+ autoload :AddIdPrefixFilter, 'ncs_navigator/warehouse/filters/add_id_prefix_filter'
10
+ autoload :ApplyGlobalValuesFilter, 'ncs_navigator/warehouse/filters/apply_global_values_filter'
11
+ autoload :CodedAsMissingFilter, 'ncs_navigator/warehouse/filters/coded_as_missing_filter'
12
+ autoload :NoBlankForeignKeysFilter, 'ncs_navigator/warehouse/filters/no_blank_foreign_keys_filter'
13
+ autoload :NoSsuOutreachAllSsusFilter, 'ncs_navigator/warehouse/filters/no_ssu_outreach_all_ssus_filter'
14
+ autoload :NoSsuOutreachPlaceholderFilter, 'ncs_navigator/warehouse/filters/no_ssu_outreach_placeholder_filter'
15
+ autoload :RemoveIdPrefixFilter, 'ncs_navigator/warehouse/filters/remove_id_prefix_filter'
16
+
17
+ autoload :RecordIdChangingFilterSupport, 'ncs_navigator/warehouse/filters/record_id_changing_filter_support'
18
+ end
19
+ end
@@ -0,0 +1,32 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A {CompositeFilter filter} which adds a prefix to the ID for every record of
6
+ # a given type. This filter is stateful and so must be instantiated.
7
+ class AddIdPrefixFilter
8
+ include RecordIdChangingFilterSupport
9
+
10
+ ##
11
+ # Creates the filter.
12
+ #
13
+ # In addition to the options specified here, this constructor accepts the
14
+ # options defined on {RecordIdChangingFilterSupport#initialize}.
15
+ #
16
+ # @param [Configuration] configuration the warehouse configuration.
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :prefix the prefix to apply.
20
+ def initialize(configuration, options={})
21
+ super
22
+ @prefix = options[:prefix] or fail 'Please specify a :prefix.'
23
+ end
24
+
25
+ ##
26
+ # @param [String] original_id the incoming ID.
27
+ # @return [String] the ID with the prefix applied.
28
+ def changed_id(original_id)
29
+ "#{@prefix}#{original_id}" if original_id
30
+ end
31
+ end
32
+ end
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which sets certain variables to the same values on all
6
6
  # records if those variables are not already set when the filter
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # Some source data sets include unnecessary placeholder records and
6
6
  # values. This filter excludes the following variables and records:
@@ -0,0 +1,77 @@
1
+ require 'forwardable'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # Encapsulates an ordered list of filters that can be applied to one or more
6
+ # records. Filters of this type are used in {EnumTransformer} and {Contents}.
7
+ #
8
+ # Each filter is an object with an `call` method. The `call` method takes an
9
+ # array of zero or more records as input and returns an array of zero or more
10
+ # records. The input and output arrays may be the same or different. A filter
11
+ # may add, remove, or mutate records (or all three).
12
+ #
13
+ # A filter will be called multiple times for a single transform run, but it
14
+ # will never see the same record twice. To put it another way, the first
15
+ # filter will be invoked exactly once per record yielded from the underlying
16
+ # enumeration. Subsequent filters are invoked on the return value from the
17
+ # previous filter.
18
+ #
19
+ # Depending on the filter order, a particular filter may never see some of the
20
+ # eventually transformed records. This will happen if records are created by a
21
+ # filter lower in the filter chain.
22
+ #
23
+ # Leaky abstraction note: if a filter needs to change the primary key for a
24
+ # record, there is unfortunate DataMapper behavior to contend with. DM
25
+ # memoizes the result of `Resource#key` after the first time it is invoked.
26
+ # The warehouse infrastructure will certainly have invoked `#key` on a record
27
+ # which is passed to a filter's call method. In order so that subsequent calls
28
+ # to `#key` reflect the filter's changes, it needs to work around this. Two
29
+ # possibilities:
30
+ #
31
+ # * Instead of changing the key on the passed-in record, create a new record
32
+ # with the new key (and all the other attributes) and return that.
33
+ # * `record.instance_eval { remove_instance_variable(:@_key) }`
34
+ class CompositeFilter
35
+ include Enumerable
36
+ extend Forwardable
37
+
38
+ def_delegators :@filters, :each
39
+
40
+ attr_accessor :filters
41
+
42
+ def initialize(filter_objects)
43
+ check_filters(filter_objects)
44
+
45
+ @filters = filter_objects
46
+ end
47
+
48
+ ##
49
+ # @param records [Array] zero or more records to filter.
50
+ # @return [Array] the filtered records.
51
+ def call(records)
52
+ filters.inject(ensure_array(records)) { |result, filter| ensure_array(filter.call(result)) }
53
+ end
54
+
55
+ private
56
+
57
+ def check_filters(candidates)
58
+ candidates.each_with_index do |f, i|
59
+ unless f.respond_to?(:call)
60
+ fail "Filter #{i} (#{f.class}) does not have a call method"
61
+ end
62
+ end
63
+ end
64
+
65
+ def ensure_array(records)
66
+ case records
67
+ when nil
68
+ []
69
+ when Enumerable
70
+ records.to_a
71
+ else
72
+ [records]
73
+ end
74
+ end
75
+ end
76
+ end
77
+
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # This filter transforms any blank foreign keys into nil. Blank
6
6
  # foreign keys are a common issue in source XML data.
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which duplicates outreach event records that don't have
6
6
  # an SSU ID across all of a center's SSUs.
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which associates outreach event records that don't have
6
6
  # an SSU ID with an automatically-created placeholder SSU.
@@ -0,0 +1,71 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A framework for a filter which modifies all ID values for a particular
6
+ # record type. The class into which it is mixed must implement `changed_id`,
7
+ # a method which takes an incoming ID value and provides the replacement
8
+ # value.
9
+ module RecordIdChangingFilterSupport
10
+ ##
11
+ # @return [Class] the warehouse model for which the ID will be prefixed.
12
+ attr_reader :model
13
+
14
+ ##
15
+ # An inheritable constructor for filters which mix in this module.
16
+ #
17
+ # @param [Configuration] configuration the warehouse configuration
18
+ # @param [Hash] options
19
+ # @option options [String,Symbol] :table the name of the table for the
20
+ # target record type.
21
+ # @option options [String,Symbol] :model the unqualified name of the
22
+ # warehouse model for the target record type. If both this and `:table`
23
+ # are specified, `:table` wins.
24
+ def initialize(configuration, options={})
25
+ @model =
26
+ if options[:table]
27
+ configuration.model(options[:table])
28
+ elsif options[:model]
29
+ configuration.model(options[:model])
30
+ else
31
+ fail 'Please specify either :table or :model.'
32
+ end
33
+ unless self.respond_to?(:changed_id)
34
+ fail "#{self.class} does not implement changed_id"
35
+ end
36
+ end
37
+
38
+ ##
39
+ # Modifies all IDs for the target record type according to the
40
+ # consumer-defined `changed_id` method.
41
+ #
42
+ # @param [Array<MdesModel>] records the records to review and modify
43
+ # @return [Array<MdesModel>] the same records.
44
+ # Any IDs will be updated in place.
45
+ def call(records)
46
+ records.each do |rec|
47
+ if rec.is_a?(model)
48
+ change_primary_key(rec)
49
+ # see the class comment on {CompositeFilter}
50
+ rec.instance_eval { remove_instance_variable(:@_key) if defined?(@_key) }
51
+ end
52
+ change_foreign_keys_if_any(rec)
53
+ end
54
+ end
55
+
56
+ def change_primary_key(record)
57
+ key_name = model.key.first.name
58
+ record[key_name] = changed_id(record[key_name])
59
+ end
60
+ protected :change_primary_key
61
+
62
+ def change_foreign_keys_if_any(record)
63
+ record.class.relationships.each do |rel|
64
+ next unless rel.parent_model == model
65
+ foreign_key = rel.child_key.first.name
66
+ record[foreign_key] = changed_id(record[foreign_key])
67
+ end
68
+ end
69
+ protected :change_foreign_keys_if_any
70
+ end
71
+ end