ncs_mdes_warehouse 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +37 -0
- data/lib/ncs_navigator/warehouse/cli.rb +5 -1
- data/lib/ncs_navigator/warehouse/configuration.rb +2 -2
- data/lib/ncs_navigator/warehouse/transformers/database.rb +5 -4
- data/lib/ncs_navigator/warehouse/transformers/enum_transformer.rb +145 -58
- data/lib/ncs_navigator/warehouse/transformers/foreign_key_index.rb +266 -34
- data/lib/ncs_navigator/warehouse/transformers/vdr_xml/reader.rb +6 -1
- data/lib/ncs_navigator/warehouse/version.rb +1 -1
- data/lib/ncs_navigator/warehouse/xml_emitter.rb +128 -46
- data/ncs_mdes_warehouse.gemspec +1 -0
- data/spec/ncs_navigator/warehouse/configuration_spec.rb +1 -1
- data/spec/ncs_navigator/warehouse/transformers/enum_transformer_spec.rb +119 -11
- data/spec/ncs_navigator/warehouse/transformers/foreign_key_index_spec.rb +268 -20
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/made_up_vdr_xml.xml +1 -1
- data/spec/ncs_navigator/warehouse/transformers/vdr_xml/reader_spec.rb +4 -0
- data/spec/ncs_navigator/warehouse/xml_emitter_spec.rb +109 -3
- metadata +20 -4
data/CHANGELOG.md
CHANGED
@@ -1,11 +1,48 @@
|
|
1
1
|
NCS Navigator MDES Warehouse History
|
2
2
|
====================================
|
3
3
|
|
4
|
+
0.11.0
|
5
|
+
------
|
6
|
+
|
7
|
+
- Add `--and-pii` option to `emit-xml` to allow for simultaneously producing
|
8
|
+
with- and without-PII XML from the same database reads. (#2285)
|
9
|
+
|
10
|
+
- Add `--directory` option to `emit-xml` to allow for writing files with the
|
11
|
+
default names somewhere other than the current working directory. (#3073)
|
12
|
+
|
13
|
+
- Change `EnumTransformer` foreign key error handling. Foreign keys are
|
14
|
+
now completely resolved in memory. Previously, foreign key errors were
|
15
|
+
reported out of `EnumTransformer`, but the records were saved anyway. This
|
16
|
+
was mostly in order to allow the database to handle circular reference
|
17
|
+
resolution, but also because that implementation was simpler. The new
|
18
|
+
implementation is expected to handle resolving all possible foreign key issues
|
19
|
+
and so does not save records it finds to have bad FKs (or records that refer
|
20
|
+
to those records, etc.) (#3188)
|
21
|
+
|
22
|
+
- In order to handle the previous, the interface for the object expected to be
|
23
|
+
in the configuration property `foreign_key_index` has changed. In the very
|
24
|
+
unlikely event that you were using a custom implementation, see the
|
25
|
+
`Configuration#foreign_key_index` docs for the new protocol.
|
26
|
+
|
27
|
+
- Handle `xsi:nil` in `VdrXml::Reader`. (#3217)
|
28
|
+
|
29
|
+
- Limit caught exceptions in `EnumTransformer` to `StandardError` & subclasses.
|
30
|
+
(#3243)
|
31
|
+
|
32
|
+
- Use `attribute_name` and `attribute_value` when creating TransformErrors for
|
33
|
+
foreign key violations. (#3189)
|
34
|
+
|
35
|
+
- Report invalid properties or unresolvable FKs on records that have invalid
|
36
|
+
PSU IDs. (#3264)
|
37
|
+
|
38
|
+
- Clean up shell output for `Database`-based enumerators. (Includes #3072.)
|
39
|
+
|
4
40
|
0.10.1
|
5
41
|
------
|
6
42
|
|
7
43
|
- Eliminate method name collisions when a generated model would have had
|
8
44
|
both a property and a belongs_to with the same name. (#3184)
|
45
|
+
|
9
46
|
- Update MDES 3.1 models to 3.1.01.00. (#2750)
|
10
47
|
|
11
48
|
0.10.0
|
@@ -54,10 +54,14 @@ DESC
|
|
54
54
|
method_option 'block-size', :type => :numeric, :aliases => %w(-b),
|
55
55
|
:desc => 'The maximum number of records to have in memory at once.',
|
56
56
|
:default => 5000
|
57
|
+
method_option 'and-pii', :type => :boolean, :default => false,
|
58
|
+
:desc => 'Emit one XML file without PII and one with.'
|
57
59
|
method_option 'include-pii', :type => :boolean, :default => false,
|
58
|
-
:desc => 'Include PII values in the emitted XML.'
|
60
|
+
:desc => 'Include PII values in the emitted XML. (Usually you should prefer --and-pii.)'
|
59
61
|
method_option 'zip', :type => :boolean, :default => true,
|
60
62
|
:desc => 'Create a zip file alongside the XML. (Use --no-zip to disable.)'
|
63
|
+
method_option 'directory', :type => :string, :default => nil,
|
64
|
+
:desc => 'The target directory for automatically-named files. (Default is CWD.)'
|
61
65
|
method_option 'tables', :type => :string,
|
62
66
|
:desc => 'Emit XML for a subset of tables.', :banner => 'TABLE,TABLE,TABLE'
|
63
67
|
def emit_xml(filename=nil)
|
@@ -97,8 +97,8 @@ module NcsNavigator::Warehouse
|
|
97
97
|
# value is correct for virtually any case.
|
98
98
|
#
|
99
99
|
# @return [void]
|
100
|
-
# @param [#
|
101
|
-
# foreign key index implementation.
|
100
|
+
# @param [#start_transform,#verify_or_defer,#record,#end_transform] index
|
101
|
+
# the replacement foreign key index implementation.
|
102
102
|
def foreign_key_index=(index)
|
103
103
|
@foreign_key_index = index
|
104
104
|
end
|
@@ -121,7 +121,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
121
121
|
|
122
122
|
producers.each do |rp|
|
123
123
|
shell.clear_line_then_say(
|
124
|
-
"Producing records from %-#{producer_name_length}s (%-
|
124
|
+
"Producing records from %-#{producer_name_length}s (%-24s)" % [rp.name, 'loading'])
|
125
125
|
log.debug("Executing query for producer #{rp.name}:\n#{rp.query}")
|
126
126
|
repository.adapter.select(rp.query).each do |row|
|
127
127
|
meta = { :configuration => @configuration }
|
@@ -132,13 +132,14 @@ module NcsNavigator::Warehouse::Transformers
|
|
132
132
|
[*rp.row_processor.call(*args)].compact.each do |result|
|
133
133
|
yield result
|
134
134
|
result_count += 1
|
135
|
-
shell.back_up_and_say(
|
135
|
+
shell.back_up_and_say(26, "(%-7d in / %-7d out)" % [row_count, result_count])
|
136
136
|
end
|
137
|
-
shell.back_up_and_say(
|
137
|
+
shell.back_up_and_say(26, "(%-7d in / %-7d out)" % [row_count, result_count])
|
138
138
|
end
|
139
|
+
shell.back_up_and_say(26, "(%-7d in / %-7d out)" % [row_count, result_count])
|
139
140
|
log.debug("Producer #{rp.name} complete")
|
140
141
|
end
|
141
|
-
shell.
|
142
|
+
shell.clear_line_then_say("#{self.class} complete (%-7d in / %-7d out)\n" % [row_count, result_count])
|
142
143
|
|
143
144
|
log.info(
|
144
145
|
"Production from #{self.class} complete. " +
|
@@ -62,6 +62,12 @@ module NcsNavigator::Warehouse::Transformers
|
|
62
62
|
@filters = Filters.new(filter_list ? [*filter_list].compact : [])
|
63
63
|
@duplicates = options.delete(:duplicates) || :error
|
64
64
|
@duplicates_strategy = select_duplicates_strategy
|
65
|
+
|
66
|
+
@record_checkers = {
|
67
|
+
:validation => ValidateRecordChecker.new(log),
|
68
|
+
:foreign_key => ForeignKeyChecker.new(log, foreign_key_index),
|
69
|
+
:psus => PsuIdChecker.new(log, @configuration.navigator.psus)
|
70
|
+
}
|
65
71
|
end
|
66
72
|
|
67
73
|
##
|
@@ -86,7 +92,7 @@ module NcsNavigator::Warehouse::Transformers
|
|
86
92
|
def transform(status)
|
87
93
|
begin
|
88
94
|
do_transform(status)
|
89
|
-
rescue
|
95
|
+
rescue => e
|
90
96
|
err = NcsNavigator::Warehouse::TransformError.for_exception(e, 'Enumeration failed.')
|
91
97
|
log.error err.message
|
92
98
|
status.transform_errors << err
|
@@ -96,55 +102,137 @@ module NcsNavigator::Warehouse::Transformers
|
|
96
102
|
private
|
97
103
|
|
98
104
|
def do_transform(status)
|
105
|
+
foreign_key_index.start_transform(status)
|
99
106
|
enum.each do |record|
|
100
107
|
case record
|
101
108
|
when NcsNavigator::Warehouse::TransformError
|
102
109
|
receive_transform_error(record, status)
|
103
110
|
else
|
104
111
|
filters.call([record]).each do |filtered_record|
|
105
|
-
|
106
|
-
foreign_key_index.record_and_verify(saved_record) if saved_record
|
112
|
+
save_model_instance(filtered_record, status, [:psus, :validation, :foreign_key])
|
107
113
|
end
|
108
114
|
end
|
109
115
|
end
|
110
|
-
foreign_key_index.
|
116
|
+
late_resolved_records = foreign_key_index.end_transform
|
117
|
+
late_resolved_records.each do |record|
|
118
|
+
save_model_instance(record, status, []) unless has_reported_errors?(record, status)
|
119
|
+
end
|
111
120
|
end
|
112
121
|
|
113
|
-
|
122
|
+
##
|
123
|
+
# @return [void]
|
124
|
+
def save_model_instance(incoming_record, status, record_check_kinds)
|
125
|
+
status.record_count += 1
|
126
|
+
|
114
127
|
record = process_duplicate_if_appropriate(incoming_record)
|
115
128
|
unless record
|
116
129
|
log.info("Ignoring duplicate record #{record_ident incoming_record}.")
|
117
|
-
status.record_count += 1
|
118
130
|
return
|
119
131
|
end
|
120
132
|
|
121
|
-
|
122
|
-
|
123
|
-
|
133
|
+
record_checks = record_check_kinds.map { |kind| @record_checkers[kind] }
|
134
|
+
|
135
|
+
saveable = verify_record_or_report_errors(record, status, record_checks)
|
136
|
+
|
137
|
+
if saveable
|
138
|
+
log.debug("Saving verified record #{record_ident record}.")
|
139
|
+
begin
|
140
|
+
if record.save
|
141
|
+
record
|
142
|
+
foreign_key_index.record(record)
|
143
|
+
else
|
144
|
+
msg = "Could not save valid record #{record.inspect}."
|
145
|
+
log.error msg
|
146
|
+
status.unsuccessful_record(record, msg)
|
147
|
+
end
|
148
|
+
rescue => e
|
149
|
+
msg = "Error on save. #{e.class}: #{e}."
|
150
|
+
log.error msg
|
151
|
+
status.unsuccessful_record(record, msg)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def receive_transform_error(error, status)
|
157
|
+
error.id = nil
|
158
|
+
status.transform_errors << error
|
159
|
+
end
|
160
|
+
|
161
|
+
def has_reported_errors?(record, status)
|
162
|
+
status.transform_errors.any? { |error|
|
163
|
+
error.model_class.to_s == record.class.to_s &&
|
164
|
+
error.record_id.to_s == record.key.first.to_s
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
module RecordIdent
|
169
|
+
def record_ident(rec)
|
170
|
+
# No composite keys in the MDES
|
171
|
+
'%s %s=%s' % [
|
172
|
+
rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
include RecordIdent
|
176
|
+
|
177
|
+
###### CHECKING FOR ERRORS IN A RECORD
|
178
|
+
|
179
|
+
def verify_record_or_report_errors(record, status, record_checks)
|
180
|
+
record_checks.collect { |check| check.verify_or_report_errors(record, status) }.
|
181
|
+
reject { |r| r }.empty? # does the result contain anything that isn't truthy?
|
182
|
+
end
|
183
|
+
|
184
|
+
class PsuIdChecker
|
185
|
+
include RecordIdent
|
186
|
+
|
187
|
+
attr_reader :log
|
188
|
+
|
189
|
+
def initialize(log, psus)
|
190
|
+
@log = log
|
191
|
+
@psus = psus
|
192
|
+
end
|
193
|
+
|
194
|
+
##
|
195
|
+
# Has valid PSU is true if:
|
196
|
+
# * The record has no PSU reference, or
|
197
|
+
# * The record's PSU ID is one of those configured for the
|
198
|
+
# study center
|
199
|
+
def has_valid_psu?(record)
|
200
|
+
if record.respond_to?(:psu_id)
|
201
|
+
@psus.collect(&:id).include?(record.psu_id)
|
202
|
+
else
|
203
|
+
true
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def verify_or_report_errors(record, status)
|
208
|
+
if has_valid_psu?(record)
|
209
|
+
true
|
210
|
+
else
|
211
|
+
msg = "Invalid PSU ID. The list of valid PSU IDs for this Study Center is #{@psus.collect(&:id).inspect}."
|
124
212
|
log.error "#{record_ident record}: #{msg}"
|
125
213
|
status.unsuccessful_record(record, msg,
|
126
214
|
:attribute_name => 'psu_id',
|
127
215
|
:attribute_value => record.psu_id.inspect)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
216
|
+
false
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
class ValidateRecordChecker
|
222
|
+
include RecordIdent
|
223
|
+
|
224
|
+
attr_reader :log
|
225
|
+
|
226
|
+
def initialize(log)
|
227
|
+
@log = log
|
228
|
+
end
|
229
|
+
|
230
|
+
def verify_or_report_errors(record, status)
|
231
|
+
if record.valid?
|
232
|
+
log.debug "#{record_ident record} is valid."
|
233
|
+
true
|
146
234
|
else
|
147
|
-
log.error "
|
235
|
+
log.error "#{record_ident record} is not valid. #{record_messages(record).join(' ')}"
|
148
236
|
record.errors.keys.each do |prop|
|
149
237
|
record.errors[prop].each do |e|
|
150
238
|
status.unsuccessful_record(
|
@@ -154,45 +242,44 @@ module NcsNavigator::Warehouse::Transformers
|
|
154
242
|
)
|
155
243
|
end
|
156
244
|
end
|
157
|
-
|
245
|
+
false
|
158
246
|
end
|
159
|
-
|
160
|
-
saved_record
|
161
|
-
end
|
247
|
+
end
|
162
248
|
|
163
|
-
|
164
|
-
|
165
|
-
|
249
|
+
def record_messages(record)
|
250
|
+
record.errors.keys.collect { |prop|
|
251
|
+
record.errors[prop].collect { |e|
|
252
|
+
v = record.send(prop)
|
253
|
+
"#{e} (#{prop}=#{v.inspect})."
|
254
|
+
}
|
255
|
+
}.flatten
|
256
|
+
end
|
166
257
|
end
|
167
258
|
|
168
|
-
|
169
|
-
|
170
|
-
'%s %s=%s' % [
|
171
|
-
rec.class.name.demodulize, rec.class.key.first.name, rec.key.try(:first).inspect]
|
172
|
-
end
|
259
|
+
class ForeignKeyChecker
|
260
|
+
include RecordIdent
|
173
261
|
|
174
|
-
|
175
|
-
record.errors.keys.collect { |prop|
|
176
|
-
record.errors[prop].collect { |e|
|
177
|
-
v = record.send(prop)
|
178
|
-
"#{e} (#{prop}=#{v.inspect})."
|
179
|
-
}
|
180
|
-
}.flatten
|
181
|
-
end
|
262
|
+
attr_reader :log, :fk_index
|
182
263
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
264
|
+
def initialize(log, fk_index)
|
265
|
+
@log = log
|
266
|
+
@fk_index = fk_index
|
267
|
+
end
|
268
|
+
|
269
|
+
def verify_or_report_errors(record, status)
|
270
|
+
log.debug "Verifying FKs for #{record_ident record}"
|
271
|
+
fk_index.verify_or_defer(record).tap do |result|
|
272
|
+
if result
|
273
|
+
log.debug "- All FKs currently resolved."
|
274
|
+
else
|
275
|
+
log.debug "- Deferring because one or more FKs are not resolved."
|
276
|
+
end
|
277
|
+
end
|
193
278
|
end
|
194
279
|
end
|
195
280
|
|
281
|
+
###### HANDLING DUPLICATES
|
282
|
+
|
196
283
|
def process_duplicate_if_appropriate(record)
|
197
284
|
if @duplicates_strategy.duplicate?(record)
|
198
285
|
@duplicates_strategy.to_save(record)
|
@@ -41,27 +41,56 @@ module NcsNavigator::Warehouse::Transformers
|
|
41
41
|
DatabaseKeyProvider.new
|
42
42
|
end
|
43
43
|
@seen_keys = {}
|
44
|
+
@current_transform_tracker = nil
|
44
45
|
end
|
45
46
|
|
46
47
|
##
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
48
|
+
# Indicates the beginning of a new transform. This method must be called
|
49
|
+
# before the first time {#verify_or_defer} is called for a particular transform.
|
50
|
+
#
|
51
|
+
# Error reporting and deferred foreign key resolution are scoped to a
|
52
|
+
# transform.
|
53
|
+
def start_transform(transform_status)
|
54
|
+
if @current_transform_tracker
|
55
|
+
fail "#start_transform called before previous transform's #end_transform called. This will lose deferred records."
|
56
|
+
else
|
57
|
+
@current_transform_tracker = TransformTracker.new(transform_status)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Indicates whether a record's foreign keys can immediately be satisfied. If
|
63
|
+
# not, it stores the record for processing in and possible return from
|
64
|
+
# {#end_transform}.
|
65
|
+
#
|
66
|
+
# @param [DataMapper::Resource] record the record whose foreign references
|
67
|
+
# we want to verify.
|
68
|
+
# @return [Boolean]
|
69
|
+
def verify_or_defer(record)
|
70
|
+
deferrer = DeferredRecord.create_if_appropriate(record, self)
|
71
|
+
if deferrer
|
72
|
+
@current_transform_tracker.defer(deferrer)
|
73
|
+
false
|
74
|
+
else
|
75
|
+
true
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Records the key for this record in the index. By calling this method,
|
81
|
+
# the caller affirms that the record should be considered available for
|
82
|
+
# resolution of future foreign keys.
|
50
83
|
#
|
51
84
|
# @param [DataMapper::Resource] record the record whose key we
|
52
|
-
# want to record
|
85
|
+
# want to record.
|
53
86
|
# @return [void]
|
54
|
-
def
|
87
|
+
def record(record)
|
55
88
|
seen_keys(record.class) << record.key.first # no CPKs in MDES
|
56
|
-
|
57
|
-
record.class.relationships.each do |belongs_to|
|
58
|
-
verify_relationship(record, belongs_to)
|
59
|
-
end
|
60
89
|
end
|
61
90
|
|
62
91
|
##
|
63
92
|
# Reviews any references that initially failed against the final
|
64
|
-
# set of keys. If any are still
|
93
|
+
# set of keys. If any are still unresolvable, it records errors
|
65
94
|
# against the provided transform status.
|
66
95
|
#
|
67
96
|
# Each failed reference will be reported by this method only
|
@@ -69,15 +98,14 @@ module NcsNavigator::Warehouse::Transformers
|
|
69
98
|
# this class across multiple transformers, reporting only the
|
70
99
|
# newly encountered errors for each transform in turn.
|
71
100
|
#
|
72
|
-
# @return [
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
101
|
+
# @return [Array<DataMapper::Resource>] any records which were previously
|
102
|
+
# deferred but which now are fully resolved and are candidates to be
|
103
|
+
# persisted.
|
104
|
+
def end_transform
|
105
|
+
fail "No current transform" unless @current_transform_tracker
|
106
|
+
@current_transform_tracker.end_it(self).tap do |x|
|
107
|
+
@current_transform_tracker = nil
|
79
108
|
end
|
80
|
-
interim_unsatisfied.clear
|
81
109
|
end
|
82
110
|
|
83
111
|
##
|
@@ -89,18 +117,6 @@ module NcsNavigator::Warehouse::Transformers
|
|
89
117
|
|
90
118
|
private
|
91
119
|
|
92
|
-
def verify_relationship(record, belongs_to)
|
93
|
-
reference_name = belongs_to.child_key.first.name
|
94
|
-
reference_value = record.send(reference_name)
|
95
|
-
foreign_model = belongs_to.parent_model
|
96
|
-
|
97
|
-
if reference_value && !seen?(foreign_model, reference_value)
|
98
|
-
interim_unsatisfied << RelationshipInstance.new(
|
99
|
-
record.key.first, record.class.to_s, foreign_model, reference_name, reference_value
|
100
|
-
)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
120
|
def seen_keys(model_class)
|
105
121
|
@seen_keys[model_class.to_s] ||= begin
|
106
122
|
existing_keys = existing_key_provider.existing_keys(model_class) || []
|
@@ -108,19 +124,235 @@ module NcsNavigator::Warehouse::Transformers
|
|
108
124
|
end
|
109
125
|
end
|
110
126
|
|
111
|
-
|
112
|
-
|
127
|
+
##
|
128
|
+
# @private
|
129
|
+
class TransformTracker
|
130
|
+
def initialize(transform_status)
|
131
|
+
@transform_status = transform_status
|
132
|
+
@deferred_records = []
|
133
|
+
end
|
134
|
+
|
135
|
+
def defer(deferred_record)
|
136
|
+
@deferred_records << deferred_record
|
137
|
+
end
|
138
|
+
|
139
|
+
def end_it(fk_index)
|
140
|
+
update_satisfied_by_for_deferred(fk_index)
|
141
|
+
build_deferred_graph
|
142
|
+
compute_record_resolvabilities
|
143
|
+
|
144
|
+
deferred_satisfied, deferred_unsatisfied = partition_deferred
|
145
|
+
|
146
|
+
deferred_unsatisfied.each { |deferred_record| deferred_record.report_errors(@transform_status) }
|
147
|
+
|
148
|
+
deferred_satisfied.collect(&:record)
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def update_satisfied_by_for_deferred(fk_index)
|
154
|
+
@deferred_records.each do |deferred_record|
|
155
|
+
deferred_record.update_satisfied_by_for_relationships(fk_index, @deferred_records)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def build_deferred_graph
|
160
|
+
require 'rgl/base'
|
161
|
+
require 'rgl/adjacency'
|
162
|
+
require 'rgl/connected_components'
|
163
|
+
require 'rgl/condensation'
|
164
|
+
|
165
|
+
@graph = RGL::DirectedAdjacencyGraph.new
|
166
|
+
@deferred_records.each do |rec|
|
167
|
+
rec.deferred_relationships.each do |rel|
|
168
|
+
@graph.add_edge(rec, rel.satisfied_by)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def compute_record_resolvabilities
|
174
|
+
# scc == strongly connected component; i.e., a single record or a complete cycle
|
175
|
+
@graph.condensation_graph.depth_first_search do |scc|
|
176
|
+
# skip the special terminal nodes :unsatisfed and :already_saved
|
177
|
+
next if Symbol === scc.first
|
178
|
+
|
179
|
+
# find all the external deferred relationships for the SCC
|
180
|
+
external_relationships = scc.collect { |rec|
|
181
|
+
rec.deferred_relationships.reject { |dr| scc.include?(dr.satisfied_by) }
|
182
|
+
}.flatten
|
183
|
+
|
184
|
+
# combine the resolvabilities for those those to determine the resolvability for the SCC
|
185
|
+
net_resolvability = external_relationships.collect(&:resolvable?).
|
186
|
+
reject { |r| r }.empty? # are there any false ones?
|
187
|
+
|
188
|
+
# update each DeferredRecord with the determined resolvability for the SCC
|
189
|
+
scc.each do |rec|
|
190
|
+
rec.resolvability = net_resolvability
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def partition_deferred
|
196
|
+
@deferred_records.partition { |rec| rec.resolvable? }
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
##
|
201
|
+
# @private
|
202
|
+
#
|
203
|
+
# These track records which are pending until the end of the transform
|
204
|
+
# and then are used as vertices in the graph which is used for analyzing
|
205
|
+
# remaining unsatisfied relationships.
|
206
|
+
class DeferredRecord
|
207
|
+
attr_reader :record, :deferred_relationships
|
208
|
+
|
209
|
+
##
|
210
|
+
# The externally determined total resolvability for this record.
|
211
|
+
# @see TransformTracker#compute_record_resolvabilities
|
212
|
+
attr_writer :resolvability
|
213
|
+
|
214
|
+
##
|
215
|
+
# @return [DeferredRecord,nil] if the record has any currently unsatisfied
|
216
|
+
# FKs, return a properly initialized DeferredRecord.
|
217
|
+
def self.create_if_appropriate(record, fk_index)
|
218
|
+
relationships = record.class.relationships.collect { |belongs_to|
|
219
|
+
DeferredRelationship.create_if_appropriate(record, belongs_to, fk_index)
|
220
|
+
}.compact
|
221
|
+
if relationships.empty?
|
222
|
+
nil
|
223
|
+
else
|
224
|
+
DeferredRecord.new(record, relationships)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def initialize(record, deferred_relationships)
|
229
|
+
@record = record
|
230
|
+
@deferred_relationships = deferred_relationships
|
231
|
+
end
|
232
|
+
|
233
|
+
def model_class
|
234
|
+
record.class.to_s
|
235
|
+
end
|
236
|
+
|
237
|
+
def record_id
|
238
|
+
record.key.first
|
239
|
+
end
|
240
|
+
|
241
|
+
def update_satisfied_by_for_relationships(fk_index, deferred_pool)
|
242
|
+
deferred_relationships.each do |rel|
|
243
|
+
rel.update_satisfied_by(fk_index, deferred_pool)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def resolvability_determined?
|
248
|
+
!@resolvability.nil?
|
249
|
+
end
|
250
|
+
|
251
|
+
def resolvable?
|
252
|
+
unless resolvability_determined?
|
253
|
+
fail "Graph iteration failure: DR #{self} #resolvable? called before resolvability determined."
|
254
|
+
end
|
255
|
+
@resolvability
|
256
|
+
end
|
257
|
+
|
258
|
+
def report_errors(transform_status)
|
259
|
+
deferred_relationships.collect { |rel| rel.create_error }.compact.each do |error|
|
260
|
+
transform_status.transform_errors << error
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def to_s
|
265
|
+
"#{model_class}##{record_id}"
|
266
|
+
end
|
113
267
|
end
|
114
268
|
|
115
269
|
##
|
116
270
|
# @private
|
117
|
-
class
|
271
|
+
class DeferredRelationship < Struct.new(:record, :foreign_model, :reference_key, :reference_value)
|
272
|
+
def self.create_if_appropriate(record, belongs_to, fk_index)
|
273
|
+
reference_name = belongs_to.child_key.first.name
|
274
|
+
reference_value = record.send(reference_name)
|
275
|
+
foreign_model = belongs_to.parent_model
|
276
|
+
|
277
|
+
if reference_value && !fk_index.seen?(foreign_model, reference_value)
|
278
|
+
DeferredRelationship.new(
|
279
|
+
record, foreign_model, reference_name, reference_value
|
280
|
+
)
|
281
|
+
else
|
282
|
+
nil
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
attr_accessor :satisfied_by
|
287
|
+
|
288
|
+
def initialize(*)
|
289
|
+
super
|
290
|
+
@satisfied_by = :unsatisfied
|
291
|
+
end
|
292
|
+
|
293
|
+
def record_id
|
294
|
+
record.key.first # No CPKs in MDES
|
295
|
+
end
|
296
|
+
|
297
|
+
def model_class
|
298
|
+
record.class.to_s
|
299
|
+
end
|
300
|
+
|
301
|
+
def update_satisfied_by(fk_index, deferred_pool)
|
302
|
+
if seen_in?(fk_index)
|
303
|
+
self.satisfied_by = :already_saved
|
304
|
+
elsif satisfier = deferred_pool.find { |deferred_record| self.satisfied_by_deferred?(deferred_record) }
|
305
|
+
self.satisfied_by = satisfier
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def seen_in?(fk_index)
|
310
|
+
fk_index.seen?(foreign_model, reference_value)
|
311
|
+
end
|
312
|
+
|
313
|
+
def satisfied_by_deferred?(deferred_record)
|
314
|
+
self.foreign_model.to_s == deferred_record.model_class.to_s &&
|
315
|
+
self.reference_value == deferred_record.record_id
|
316
|
+
end
|
317
|
+
|
118
318
|
def create_error
|
319
|
+
return nil if resolvable?
|
320
|
+
|
321
|
+
message =
|
322
|
+
case satisfied_by
|
323
|
+
when Symbol # i.e., :unsatisfied
|
324
|
+
"Unsatisfied foreign key referencing #{foreign_model}."
|
325
|
+
else
|
326
|
+
"Associated #{foreign_model} record contains one or more unsatisfed foreign keys or refers to other records that do."
|
327
|
+
end
|
328
|
+
|
119
329
|
NcsNavigator::Warehouse::TransformError.new(
|
120
330
|
:record_id => record_id, :model_class => model_class,
|
121
|
-
:
|
331
|
+
:attribute_name => reference_key, :attribute_value => reference_value.inspect,
|
332
|
+
:message => message
|
122
333
|
)
|
123
334
|
end
|
335
|
+
|
336
|
+
def resolvable?
|
337
|
+
case satisfied_by
|
338
|
+
when :unsatisfied
|
339
|
+
false
|
340
|
+
when :already_saved
|
341
|
+
true
|
342
|
+
else
|
343
|
+
if satisfied_by.resolvability_determined?
|
344
|
+
satisfied_by.resolvable?
|
345
|
+
else
|
346
|
+
# The DFS over the coalesced graph should prevent this from happening,
|
347
|
+
# so this is a check against a programming fault or misconception.
|
348
|
+
fail "Graph iteration failure: FK #{self} #resolvable? called before satisfying-record resolvability determined."
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def to_s
|
354
|
+
"#{model_class}##{record_id}.#{reference_key} => #{foreign_model}##{reference_value}"
|
355
|
+
end
|
124
356
|
end
|
125
357
|
end
|
126
358
|
end
|