ncs_mdes_warehouse 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/CHANGELOG.md +23 -0
  2. data/lib/ncs_navigator/warehouse.rb +2 -0
  3. data/lib/ncs_navigator/warehouse/cli.rb +10 -2
  4. data/lib/ncs_navigator/warehouse/configuration.rb +80 -0
  5. data/lib/ncs_navigator/warehouse/configuration/file_evaluator.rb +1 -1
  6. data/lib/ncs_navigator/warehouse/contents.rb +77 -0
  7. data/lib/ncs_navigator/warehouse/filters.rb +19 -0
  8. data/lib/ncs_navigator/warehouse/filters/add_id_prefix_filter.rb +32 -0
  9. data/lib/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter.rb +1 -1
  10. data/lib/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter.rb +1 -1
  11. data/lib/ncs_navigator/warehouse/filters/composite_filter.rb +77 -0
  12. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter.rb +1 -1
  13. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter.rb +1 -1
  14. data/lib/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter.rb +1 -1
  15. data/lib/ncs_navigator/warehouse/filters/record_id_changing_filter_support.rb +71 -0
  16. data/lib/ncs_navigator/warehouse/filters/remove_id_prefix_filter.rb +32 -0
  17. data/lib/ncs_navigator/warehouse/transformers.rb +0 -6
  18. data/lib/ncs_navigator/warehouse/transformers/database.rb +14 -8
  19. data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +5 -3
  20. data/lib/ncs_navigator/warehouse/version.rb +1 -1
  21. data/lib/ncs_navigator/warehouse/xml_emitter.rb +101 -50
  22. data/sample_configuration.rb +12 -0
  23. data/spec/ncs_navigator/warehouse/configuration_spec.rb +102 -0
  24. data/spec/ncs_navigator/warehouse/contents_spec.rb +166 -0
  25. data/spec/ncs_navigator/warehouse/filters/add_id_prefix_filter_spec.rb +82 -0
  26. data/spec/ncs_navigator/warehouse/{transformers → filters}/apply_global_values_filter_spec.rb +1 -1
  27. data/spec/ncs_navigator/warehouse/{transformers → filters}/coded_as_missing_filter_spec.rb +1 -1
  28. data/spec/ncs_navigator/warehouse/{transformers/filters_spec.rb → filters/composite_filter_spec.rb} +8 -8
  29. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_blank_foreign_keys_filter_spec.rb +1 -1
  30. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_all_ssus_filter_spec.rb +1 -1
  31. data/spec/ncs_navigator/warehouse/{transformers → filters}/no_ssu_outreach_placeholder_filter_spec.rb +1 -1
  32. data/spec/ncs_navigator/warehouse/filters/remove_id_prefix_filter_spec.rb +95 -0
  33. data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +94 -0
  34. metadata +33 -22
  35. data/lib/ncs_navigator/warehouse/transformers/filters.rb +0 -66
data/CHANGELOG.md CHANGED
@@ -1,6 +1,29 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.13.0
5
+ ------
6
+
7
+ - Move all existing filters to new `Filters` module. Any fully-qualified
8
+ references to those filters will need to be updated. Filter-name-only
9
+ references made from within a configuration file will still resolve fine.
10
+ (#3940)
11
+
12
+ - Add the concept of named "filter sets" to the warehouse configuration. (#3940)
13
+
14
+ - Support specifying one or more named filter sets to use with `emit-xml`.
15
+ (#3940)
16
+
17
+ - Support specifying a default filter set to use with `emit-xml` if none are
18
+ specified on the command line. (#3940)
19
+
20
+ - Split out content enumeration from `XmlEmitter`. `Contents` is an `Enumerable`
21
+ providing memory-efficient access to every record in a set of tables (or all
22
+ tables). (#3936)
23
+
24
+ - Add filters for adding and removing prefixes to/from the keys for all records
25
+ of a particular type. (#3968)
26
+
4
27
  0.12.0
5
28
  ------
6
29
 
@@ -9,9 +9,11 @@ module NcsNavigator
9
9
  autoload :CLI, 'ncs_navigator/warehouse/cli'
10
10
  autoload :Comparator, 'ncs_navigator/warehouse/comparator'
11
11
  autoload :Configuration, 'ncs_navigator/warehouse/configuration'
12
+ autoload :Contents, 'ncs_navigator/warehouse/contents'
12
13
  autoload :DataMapper, 'ncs_navigator/warehouse/data_mapper'
13
14
  autoload :DatabaseInitializer, 'ncs_navigator/warehouse/database_initializer'
14
15
  autoload :Hooks, 'ncs_navigator/warehouse/hooks'
16
+ autoload :Filters, 'ncs_navigator/warehouse/filters'
15
17
  autoload :Models, 'ncs_navigator/warehouse/models'
16
18
  autoload :PostgreSQL, 'ncs_navigator/warehouse/postgresql'
17
19
  autoload :StringifyTrace, 'ncs_navigator/warehouse/stringify_trace'
@@ -64,11 +64,19 @@ DESC
64
64
  :desc => 'The target directory for automatically-named files. (Default is CWD.)'
65
65
  method_option 'tables', :type => :string,
66
66
  :desc => 'Emit XML for a subset of tables.', :banner => 'TABLE,TABLE,TABLE'
67
+ method_option 'filters', :type => :string, :banner => 'FILTER_SET,FILTER_SET',
68
+ :desc => 'Use these named filter sets when producing the XML. Default is set in configuration. Use --no-filters to disable default without providing an alternative.'
67
69
  def emit_xml(filename=nil)
68
70
  use_database
69
71
 
70
- XmlEmitter.new(configuration, filename,
71
- options.merge(:tables => options[:tables].try(:split, /\s*,\s*/))).emit_xml
72
+ options[:tables] = options[:tables].try(:split, /\s*,\s*/)
73
+ # need to prevent the addition of a :filters key entirely in order
74
+ # to detect --no-filters, which shows up as :filters=>nil.
75
+ if options[:filters]
76
+ options[:filters] = options[:filters].split(/\s*,\s*/)
77
+ end
78
+
79
+ XmlEmitter.new(configuration, filename, options).emit_xml
72
80
  end
73
81
 
74
82
  desc 'etl', 'Performs the full extract-transform-load process for this configuration'
@@ -133,6 +133,86 @@ module NcsNavigator::Warehouse
133
133
  end
134
134
  end
135
135
 
136
+ ####
137
+ #### Filters
138
+ ####
139
+
140
+ ##
141
+ # @return [Hash<Symbol,#call>] an index of named {CompositeFilter}s
142
+ # reflecting the configured named filter sets
143
+ def filter_sets
144
+ @filter_sets ||= {}
145
+ end
146
+
147
+ ##
148
+ # Looks up a filter set by name. Errors out if no match found.
149
+ #
150
+ # If you need to refer to a named filter set in your configuration file,
151
+ # use this method instead of {#filter_sets}. E.g.:
152
+ #
153
+ # c.add_transformer SomeDatabase.create_transformer(c, :filters => [c.filter_set(:some_filters)])
154
+ #
155
+ # Using this method instead of {#filter_sets} ensures that you will get a
156
+ # useful error message if you have a typo in your filter name.
157
+ #
158
+ # @return [#call] the {CompositeFilter} registered under the given
159
+ # name. If none, it raises {Error}.
160
+ def filter_set(name)
161
+ filter_sets[name.to_sym] or raise Error, "Unknown filter set #{name.inspect}."
162
+ end
163
+
164
+ ##
165
+ # Define a named filter set.
166
+ #
167
+ # @param [Symbol,#to_sym] name the name for this set.
168
+ # @param [#call, Array<#call>] one_or_more_filters the filters to use in
169
+ # the set
170
+ # @return [void]
171
+ def add_filter_set(name, one_or_more_filters)
172
+ if filter_sets.has_key?(name.to_sym)
173
+ raise Error, "There is already a filter set named #{name.inspect}."
174
+ end
175
+
176
+ filters = [*one_or_more_filters].collect do |candidate|
177
+ case candidate
178
+ when Symbol
179
+ filter_set(candidate)
180
+ else
181
+ candidate
182
+ end
183
+ end
184
+
185
+ filters.each do |candidate|
186
+ unless candidate.respond_to?(:call)
187
+ if candidate.respond_to?(:new)
188
+ raise Error, "#{candidate.inspect} does not have a call method. Perhaps you meant #{candidate.inspect}.new?"
189
+ else
190
+ raise Error, "#{candidate.inspect} does not have a call method."
191
+ end
192
+ end
193
+ end
194
+
195
+ filter_sets[name.to_sym] = Filters::CompositeFilter.new(filters)
196
+ end
197
+
198
+ ##
199
+ # @return [Symbol,nil] the filter set to with the XML emitter if none is
200
+ # specified.
201
+ attr_reader :default_xml_filter_set
202
+
203
+ ##
204
+ # @param [Symbol,#to_sym] name the name of a known filter set to use by
205
+ # default in the XML emitter.
206
+ # @return [void]
207
+ def default_xml_filter_set=(name)
208
+ if name
209
+ filter_set(name) # fails if unknown
210
+ @default_xml_filter_set = name.to_sym
211
+ else
212
+ @default_xml_filter_set = nil
213
+ end
214
+ end
215
+
136
216
  ####
137
217
  #### MDES version
138
218
  ####
@@ -28,7 +28,7 @@ class NcsNavigator::Warehouse::Configuration
28
28
  end
29
29
 
30
30
  def self.const_missing(const)
31
- [NcsNavigator::Warehouse::Transformers].each do |ns|
31
+ [NcsNavigator::Warehouse::Transformers, NcsNavigator::Warehouse::Filters].each do |ns|
32
32
  if ns.const_defined?(const)
33
33
  return ns.const_get(const)
34
34
  end
@@ -0,0 +1,77 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # Provides an Enumerable over all of the content for some or all of the
6
+ # tables in a warehouse instance. Provides streaming results over batches
7
+ # for large content sets.
8
+ class Contents
9
+ include Enumerable
10
+
11
+ ##
12
+ # @return [Array<Class>] the warehouse models whose records will be enumerated
13
+ # by this instance.
14
+ attr_reader :models
15
+
16
+ ##
17
+ # @return [CompositeFilter] the filters in use on this transformer.
18
+ attr_reader :filters
19
+
20
+ ##
21
+ # @return [Numeric] the maximum number of records to load into memory before
22
+ # yielding them to the consumer.
23
+ attr_reader :block_size
24
+
25
+ ##
26
+ # Create a new {Contents}.
27
+ #
28
+ # @param [Configuration] config the configuration for the
29
+ # warehouse from which to iterate over records.
30
+ #
31
+ # @option options [Array<#call>,#call] :filters a list of
32
+ # filters to use for this transformer
33
+ # @option options [Fixnum] :block-size (5000) the maximum number
34
+ # of records to load into memory before yielding them to the consumer.
35
+ # Reduce this to reduce the memory load of the emitter. Increasing it
36
+ # will probably not improve performance, even if you have sufficient
37
+ # memory to load more records.
38
+ # @option options [Array<#to_s>] :tables (all for current MDES
39
+ # version) the tables to include in the iteration.
40
+ def initialize(config, options={})
41
+ @configuration = config
42
+ @record_count = 0
43
+ @block_size = options[:'block-size'] || 5000
44
+
45
+ @models =
46
+ if options[:tables]
47
+ options[:tables].collect { |t| t.to_s }.collect { |t|
48
+ config.models_module.mdes_order.find { |model| model.mdes_table_name == t }
49
+ }
50
+ else
51
+ config.models_module.mdes_order
52
+ end
53
+
54
+ filter_list = options[:filters]
55
+ @filters = NcsNavigator::Warehouse::Filters::CompositeFilter.new(
56
+ filter_list ? [*filter_list].compact : [])
57
+ end
58
+
59
+ ##
60
+ # Yields each instance of every configured {model #models} in turn.
61
+ def each
62
+ models.each do |model|
63
+ key = model.key.first.name.to_sym
64
+ count = model.count
65
+ offset = 0
66
+ while offset < count
67
+ model.all(:limit => block_size, :offset => offset, :order => key.asc).each do |instance|
68
+ filters.call([instance]).each do |filtered_record|
69
+ yield filtered_record
70
+ end
71
+ end
72
+ offset += block_size
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,19 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse
4
+ ##
5
+ # Record filters and related.
6
+ module Filters
7
+ autoload :CompositeFilter, 'ncs_navigator/warehouse/filters/composite_filter'
8
+
9
+ autoload :AddIdPrefixFilter, 'ncs_navigator/warehouse/filters/add_id_prefix_filter'
10
+ autoload :ApplyGlobalValuesFilter, 'ncs_navigator/warehouse/filters/apply_global_values_filter'
11
+ autoload :CodedAsMissingFilter, 'ncs_navigator/warehouse/filters/coded_as_missing_filter'
12
+ autoload :NoBlankForeignKeysFilter, 'ncs_navigator/warehouse/filters/no_blank_foreign_keys_filter'
13
+ autoload :NoSsuOutreachAllSsusFilter, 'ncs_navigator/warehouse/filters/no_ssu_outreach_all_ssus_filter'
14
+ autoload :NoSsuOutreachPlaceholderFilter, 'ncs_navigator/warehouse/filters/no_ssu_outreach_placeholder_filter'
15
+ autoload :RemoveIdPrefixFilter, 'ncs_navigator/warehouse/filters/remove_id_prefix_filter'
16
+
17
+ autoload :RecordIdChangingFilterSupport, 'ncs_navigator/warehouse/filters/record_id_changing_filter_support'
18
+ end
19
+ end
@@ -0,0 +1,32 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A {CompositeFilter filter} which adds a prefix to the ID for every record of
6
+ # a given type. This filter is stateful and so must be instantiated.
7
+ class AddIdPrefixFilter
8
+ include RecordIdChangingFilterSupport
9
+
10
+ ##
11
+ # Creates the filter.
12
+ #
13
+ # In addition to the options specified here, this constructor accepts the
14
+ # options defined on {RecordIdChangingFilterSupport#initialize}.
15
+ #
16
+ # @param [Configuration] configuration the warehouse configuration.
17
+ # @param [Hash] options
18
+ #
19
+ # @option options [String] :prefix the prefix to apply.
20
+ def initialize(configuration, options={})
21
+ super
22
+ @prefix = options[:prefix] or fail 'Please specify a :prefix.'
23
+ end
24
+
25
+ ##
26
+ # @param [String] original_id the incoming ID.
27
+ # @return [String] the ID with the prefix applied.
28
+ def changed_id(original_id)
29
+ "#{@prefix}#{original_id}" if original_id
30
+ end
31
+ end
32
+ end
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which sets certain variables to the same values on all
6
6
  # records if those variables are not already set when the filter
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # Some source data sets include unnecessary placeholder records and
6
6
  # values. This filter excludes the following variables and records:
@@ -0,0 +1,77 @@
1
+ require 'forwardable'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # Encapsulates an ordered list of filters that can be applied to one or more
6
+ # records. Filters of this type are used in {EnumTransformer} and {Contents}.
7
+ #
8
+ # Each filter is an object with an `call` method. The `call` method takes an
9
+ # array of zero or more records as input and returns an array of zero or more
10
+ # records. The input and output arrays may be the same or different. A filter
11
+ # may add, remove, or mutate records (or all three).
12
+ #
13
+ # A filter will be called multiple times for a single transform run, but it
14
+ # will never see the same record twice. To put it another way, the first
15
+ # filter will be invoked exactly once per record yielded from the underlying
16
+ # enumeration. Subsequent filters are invoked on the return value from the
17
+ # previous filter.
18
+ #
19
+ # Depending on the filter order, a particular filter may never see some of the
20
+ # eventually transformed records. This will happen if records are created by a
21
+ # filter lower in the filter chain.
22
+ #
23
+ # Leaky abstraction note: if a filter needs to change the primary key for a
24
+ # record, there is unfortunate DataMapper behavior to contend with. DM
25
+ # memoizes the result of `Resource#key` after the first time it is invoked.
26
+ # The warehouse infrastructure will certainly have invoked `#key` on a record
27
+ # which is passed to a filter's call method. In order so that subsequent calls
28
+ # to `#key` reflect the filter's changes, it needs to work around this. Two
29
+ # possibilities:
30
+ #
31
+ # * Instead of changing the key on the passed-in record, create a new record
32
+ # with the new key (and all the other attributes) and return that.
33
+ # * `record.instance_eval { remove_instance_variable(:@_key) }`
34
+ class CompositeFilter
35
+ include Enumerable
36
+ extend Forwardable
37
+
38
+ def_delegators :@filters, :each
39
+
40
+ attr_accessor :filters
41
+
42
+ def initialize(filter_objects)
43
+ check_filters(filter_objects)
44
+
45
+ @filters = filter_objects
46
+ end
47
+
48
+ ##
49
+ # @param records [Array] zero or more records to filter.
50
+ # @return [Array] the filtered records.
51
+ def call(records)
52
+ filters.inject(ensure_array(records)) { |result, filter| ensure_array(filter.call(result)) }
53
+ end
54
+
55
+ private
56
+
57
+ def check_filters(candidates)
58
+ candidates.each_with_index do |f, i|
59
+ unless f.respond_to?(:call)
60
+ fail "Filter #{i} (#{f.class}) does not have a call method"
61
+ end
62
+ end
63
+ end
64
+
65
+ def ensure_array(records)
66
+ case records
67
+ when nil
68
+ []
69
+ when Enumerable
70
+ records.to_a
71
+ else
72
+ [records]
73
+ end
74
+ end
75
+ end
76
+ end
77
+
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # This filter transforms any blank foreign keys into nil. Blank
6
6
  # foreign keys are a common issue in source XML data.
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which duplicates outreach event records that don't have
6
6
  # an SSU ID across all of a center's SSUs.
@@ -1,6 +1,6 @@
1
1
  require 'ncs_navigator/warehouse'
2
2
 
3
- module NcsNavigator::Warehouse::Transformers
3
+ module NcsNavigator::Warehouse::Filters
4
4
  ##
5
5
  # A filter which associates outreach event records that don't have
6
6
  # an SSU ID with an automatically-created placeholder SSU.
@@ -0,0 +1,71 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Filters
4
+ ##
5
+ # A framework for a filter which modifies all ID values for a particular
6
+ # record type. The class into which it is mixed must implement `changed_id`,
7
+ # a method which takes an incoming ID value and provides the replacement
8
+ # value.
9
+ module RecordIdChangingFilterSupport
10
+ ##
11
+ # @return [Class] the warehouse model for which the ID will be prefixed.
12
+ attr_reader :model
13
+
14
+ ##
15
+ # An inheritable constructor for filters which mix in this module.
16
+ #
17
+ # @param [Configuration] configuration the warehouse configuration
18
+ # @param [Hash] options
19
+ # @option options [String,Symbol] :table the name of the table for the
20
+ # target record type.
21
+ # @option options [String,Symbol] :model the unqualified name of the
22
+ # warehouse model for the target record type. If both this and `:table`
23
+ # are specified, `:table` wins.
24
+ def initialize(configuration, options={})
25
+ @model =
26
+ if options[:table]
27
+ configuration.model(options[:table])
28
+ elsif options[:model]
29
+ configuration.model(options[:model])
30
+ else
31
+ fail 'Please specify either :table or :model.'
32
+ end
33
+ unless self.respond_to?(:changed_id)
34
+ fail "#{self.class} does not implement changed_id"
35
+ end
36
+ end
37
+
38
+ ##
39
+ # Modifies all IDs for the target record type according to the
40
+ # consumer-defined `changed_id` method.
41
+ #
42
+ # @param [Array<MdesModel>] records the records to review and modify
43
+ # @return [Array<MdesModel>] the same records.
44
+ # Any IDs will be updated in place.
45
+ def call(records)
46
+ records.each do |rec|
47
+ if rec.is_a?(model)
48
+ change_primary_key(rec)
49
+ # see the class comment on {CompositeFilter}
50
+ rec.instance_eval { remove_instance_variable(:@_key) if defined?(@_key) }
51
+ end
52
+ change_foreign_keys_if_any(rec)
53
+ end
54
+ end
55
+
56
+ def change_primary_key(record)
57
+ key_name = model.key.first.name
58
+ record[key_name] = changed_id(record[key_name])
59
+ end
60
+ protected :change_primary_key
61
+
62
+ def change_foreign_keys_if_any(record)
63
+ record.class.relationships.each do |rel|
64
+ next unless rel.parent_model == model
65
+ foreign_key = rel.child_key.first.name
66
+ record[foreign_key] = changed_id(record[foreign_key])
67
+ end
68
+ end
69
+ protected :change_foreign_keys_if_any
70
+ end
71
+ end