jinx-migrate 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.gitignore +14 -0
  2. data/.rspec +3 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +8 -0
  5. data/Gemfile.lock +38 -0
  6. data/History.md +6 -0
  7. data/LEGAL +5 -0
  8. data/LICENSE +22 -0
  9. data/README.md +33 -0
  10. data/Rakefile +40 -0
  11. data/bin/csvjoin +24 -0
  12. data/examples/family/README.md +24 -0
  13. data/examples/family/conf/children/fields.yaml +2 -0
  14. data/examples/family/conf/parents/defaults.yaml +3 -0
  15. data/examples/family/conf/parents/fields.yaml +6 -0
  16. data/examples/family/conf/parents/values.yaml +4 -0
  17. data/examples/family/data/children.csv +1 -0
  18. data/examples/family/data/parents.csv +1 -0
  19. data/examples/family/lib/shims.rb +17 -0
  20. data/jinx-migrate.gemspec +26 -0
  21. data/lib/jinx/csv/csvio.rb +214 -0
  22. data/lib/jinx/csv/joiner.rb +196 -0
  23. data/lib/jinx/migration/filter.rb +167 -0
  24. data/lib/jinx/migration/migratable.rb +244 -0
  25. data/lib/jinx/migration/migrator.rb +1029 -0
  26. data/lib/jinx/migration/reader.rb +16 -0
  27. data/lib/jinx/migration/version.rb +5 -0
  28. data/spec/bad/bad_spec.rb +25 -0
  29. data/spec/bad/fields.yaml +1 -0
  30. data/spec/bad/parents.csv +1 -0
  31. data/spec/bad/shims.rb +16 -0
  32. data/spec/csv/join/join_helper.rb +35 -0
  33. data/spec/csv/join/join_spec.rb +100 -0
  34. data/spec/csv/join/jumbled_src.csv +7 -0
  35. data/spec/csv/join/jumbled_tgt.csv +7 -0
  36. data/spec/csv/join/source.csv +7 -0
  37. data/spec/csv/join/target.csv +7 -0
  38. data/spec/extract/extract.rb +13 -0
  39. data/spec/extract/extract_spec.rb +33 -0
  40. data/spec/extract/fields.yaml +1 -0
  41. data/spec/extract/parents.csv +1 -0
  42. data/spec/family/child_spec.rb +27 -0
  43. data/spec/family/family.rb +13 -0
  44. data/spec/family/parent_spec.rb +57 -0
  45. data/spec/filter/fields.yaml +1 -0
  46. data/spec/filter/filter_spec.rb +20 -0
  47. data/spec/filter/parents.csv +1 -0
  48. data/spec/filter/values.yaml +4 -0
  49. data/spec/primitive/children.csv +1 -0
  50. data/spec/primitive/fields.yaml +4 -0
  51. data/spec/primitive/primitive_spec.rb +24 -0
  52. data/spec/skip/fields.yaml +1 -0
  53. data/spec/skip/parents.csv +1 -0
  54. data/spec/skip/skip_spec.rb +17 -0
  55. data/spec/spec_helper.rb +17 -0
  56. data/spec/support/model.rb +7 -0
  57. data/spec/unique/fields.yaml +1 -0
  58. data/spec/unique/parent.rb +6 -0
  59. data/spec/unique/parents.csv +1 -0
  60. data/spec/unique/shims.rb +10 -0
  61. data/spec/unique/unique_spec.rb +20 -0
  62. data/test/fixtures/csv/data/empty.csv +1 -0
  63. data/test/fixtures/csv/data/variety.csv +1 -0
  64. data/test/lib/csv/csvio_test.rb +74 -0
  65. metadata +206 -0
@@ -0,0 +1,196 @@
1
+ require 'set'
2
+
3
+ module Jinx
4
+ module Csv
5
+ # Merges two CSV files on common fields.
6
+ class Joiner
7
+ include Enumerable
8
+
9
+ # @param [String, IO] source the join source
10
+ # @param [String, IO] target the join target (default stdin)
11
+ # @param [String, IO, nil] output the output file name or device (default stdout)
12
+ def initialize(source, target=nil, output=nil)
13
+ @source = source
14
+ @target = target || STDIN
15
+ @output = output || STDOUT
16
+ end
17
+
18
+ # Joins the source to the target and writes the output. The source fields used are
19
+ # given by the +fields+ argument, if given. By default, all source fields are used.
20
+ #
21
+ # The output fields consist of the qualified source fields and all target fields.
22
+ # The output fields are in the following order:
23
+ # 1. The common fields, in order of occurrence in the source file.
24
+ # 2. The qualified source-specific fields, in order of occurrence in the source file.
25
+ # 3. The target-specific fields, in order of occurrence in the target file.
26
+ #
27
+ # The match is on the common qualified source and target fields.
28
+ # Both files must be sorted in order of the common fields, sequenced by their
29
+ # occurence in the source header.
30
+ #
31
+ # If an output argument is given, then the joined record is written to the output.
32
+ # If a block is given, then the block is called on each record prior to writing
33
+ # the record to the output. If the block returns nil, then the record is not
34
+ # written.
35
+ #
36
+ # @param [<String>] fields the optional source fields to merge
37
+ # (default is all source fields)
38
+ # @yield [rec] process the output record and return the record to write
39
+ # @yieldparam [FasterCSV::Record] rec the output record
40
+ def join(*fields, &block)
41
+ CsvIO.open(@target) do |tgt|
42
+ CsvIO.open(@source) do |src|
43
+ # all source fields (unordered)
44
+ usflds = src.field_names.to_set
45
+ fields.each do |fld|
46
+ unless usflds.include?(fld) then
47
+ raise ArgumentError.new("CSV join field #{fld} not found in the source file #{@source}.")
48
+ end
49
+ end
50
+ # the qualified source fields (ordered)
51
+ qsflds = fields.empty? ? src.field_names : fields
52
+ tflds = tgt.field_names
53
+ @common = qsflds & tflds
54
+ # The headers consist of the common fields followed by the qualified
55
+ # source-specific fields followed by the target-specific fields.
56
+ hdrs = @common | qsflds | tflds
57
+ CsvIO.open(@output, :mode => 'w', :headers => hdrs) do |out|
58
+ merge(src, tgt, out, &block)
59
+ end
60
+ end
61
+ end
62
+
63
+ alias :each :join
64
+ end
65
+
66
+ private
67
+
68
+ Buffer ||= Struct.new(:key, :record, :lookahead)
69
+
70
+ # Merges the given source into the target as the output.
71
+ # The output headers must be in the order specified by {#join}.
72
+ #
73
+ # @param [CsvIO] source the source CSV IO
74
+ # @param [CsvIO] target the target CSV IO
75
+ # @param [CsvIO] output the merged output CSV IO
76
+ # @yield (see #join)
77
+ # @yieldparam (see #join)
78
+ # @see #join
79
+ def merge(source, target, output)
80
+ # the qualified source field accessors
81
+ sflds = source.accessors & output.accessors
82
+ # the target field accessors
83
+ tflds = target.accessors
84
+ # the common fields
85
+ @common = sflds & tflds
86
+ # The target-specific accessors
87
+ trest = tflds - @common
88
+ # The source-specific accessors
89
+ srest = output.accessors - trest - @common
90
+ # The output record
91
+ obuf = Array.new(output.accessors.size)
92
+ # The source/target current/next (key, record) buffers
93
+ # Read the first and second records into the buffers
94
+ sbuf = shift(source)
95
+ tbuf = shift(target)
96
+ # Compare the source and target.
97
+ while cmp = compare(sbuf, tbuf) do
98
+ # Fill the output record in three sections: the common, source and target fields.
99
+ obuf.fill do |i|
100
+ if i < @common.size then
101
+ cmp <= 0 ? sbuf.key[i] : tbuf.key[i]
102
+ elsif i < sflds.size then
103
+ # Only fill the output record with source values if there is a current source
104
+ # record and the target does not precede the source.
105
+ sbuf.record[srest[i - @common.size]] if sbuf and cmp <= 0
106
+ elsif tbuf and cmp >= 0
107
+ # Only fill the output record with target values if there is a current target
108
+ # record and the source does not precede the target.
109
+ tbuf.record[trest[i - sflds.size]]
110
+ end
111
+ end
112
+ orec = block_given? ? yield(obuf) : obuf
113
+ # Emit the output record.
114
+ output << orec if orec
115
+ # Shift the buffers as necessary.
116
+ ss, ts = shift?(sbuf, tbuf, cmp), shift?(tbuf, sbuf, -cmp)
117
+ sbuf = shift(source, sbuf) if ss
118
+ tbuf = shift(target, tbuf) if ts
119
+ end
120
+ end
121
+
122
+ # Returns whether to shift the given buffer as follows:
123
+ # * If the buffer precedes the other buffer, then true.
124
+ # * If the buffer succeeds the other buffer, then false.
125
+ # * Otherwise, if the lookahead record has the same key as the buffer record then true.
126
+ # * Otherwise, if the other lookahead record has a different key than the other record, then true.
127
+ #
128
+ # @param [Buffer] buf the record buffer to check
129
+ # @param [Buffer] other the other record buffer
130
+ # @param [-1, 0, 1] order the buffer comparison
131
+ # @return [Boolean] whether to shift the buffer
132
+ def shift?(buf, other, order)
133
+ case order
134
+ when -1 then
135
+ true
136
+ when 1 then
137
+ false
138
+ when 0 then
139
+ compare(buf, buf.lookahead) == 0 or compare(other, other.lookahead) != 0
140
+ end
141
+ end
142
+
143
+ # Reads a record into the given buffers.
144
+ #
145
+ # @param [CsvIO] the open CSV stream to read
146
+ # @param [Buffer, nil] cbuf the current record buffer
147
+ # @return [Buffer, nil] the next current buffer, or nil if end of file
148
+ def shift(csvio, buf=nil)
149
+ if buf then
150
+ return if buf.lookahead.nil?
151
+ else
152
+ # prime the look-ahead
153
+ buf = Buffer.new(nil, nil, look_ahead(csvio))
154
+ return shift(csvio, buf)
155
+ end
156
+ buf.record = buf.lookahead.record
157
+ buf.key = buf.lookahead.key
158
+ buf.lookahead = look_ahead(csvio, buf.lookahead)
159
+ buf
160
+ end
161
+
162
+ # @param [CsvIO] csvio the CSV file stream
163
+ # @param [Buffer, nil] the look-ahead buffer
164
+ # @return [Buffer, nil] the modified look-ahead, or nil if end of file
165
+ def look_ahead(csvio, buf=nil)
166
+ rec = csvio.next || return
167
+ buf ||= Buffer.new
168
+ buf.record = rec
169
+ buf.key = @common.map { |k| rec[k] }
170
+ buf
171
+ end
172
+
173
+ # Compares the given source and target buffers with result as follows:
174
+ # * If source and target are nil, then nil
175
+ # * If source is nil and target is not nil, then -1
176
+ # * If target is nil and source is not nil, then 1
177
+ # * Otherwise, the pair-wise comparison of the source and target keys
178
+ #
179
+ # @param [:key] the key holder
180
+ # @return [-1, 0 , 1, nil] the comparison result
181
+ def compare(source, target)
182
+ return target.nil? ? nil : 1 if source.nil?
183
+ return -1 if target.nil?
184
+ source.key.each_with_index do |v1, i|
185
+ v2 = target.key[i]
186
+ next if v1.nil? and v2.nil?
187
+ return -1 if v1.nil?
188
+ return 1 if v2.nil?
189
+ cmp = v1 <=> v2
190
+ return cmp unless cmp == 0
191
+ end
192
+ 0
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,167 @@
1
+ require 'jinx/helpers/validation'
2
+
3
+ module Jinx
4
+ module Migration
5
+ # Transforms input values to a result based on a migration filter configuration.
6
+ # Each configuration entry is one of the following:
7
+ # * literal: literal
8
+ # * regexp: literal
9
+ # * regexp: template
10
+ #
11
+ # The regexp template can include match references (+$1+, +$2+, etc.) corresponding to the regexp captures.
12
+ # If the input value equals a literal, then the mapped literal is returned. Otherwise, if the input value
13
+ # matches a regexp, then the mapped transformation is returned after reference substitution. Otherwise,
14
+ # the input value is returned unchanged.
15
+ #
16
+ # For example, the config:
17
+ # /(\d{1,2})\/x\/(\d{1,2})/ : $1/15/$2
18
+ # n/a : ~
19
+ # converts the input value as follows:
20
+ # 3/12/02 => 3/12/02 (no match)
21
+ # 5/x/04 => 5/15/04
22
+ # n/a => nil
23
+ #
24
+ # A catch-all +/.*/+ regexp transforms any value which does not match another value or regexp, e.g.:
25
+ # /^(\d+(\.\d*)?)( g(ram)?s?)?$/ : $1
26
+ # /.*/ : 0
27
+ # converts the input value as follows:
28
+ # 3 => 3
29
+ # 4.3 grams => 4.3
30
+ # unknown => 0
31
+ class Filter
32
+ # Builds the filter proc from the given specification or block.
33
+ # If both a specification and a block are given, then the block is applied before
34
+ # the specificiation.
35
+ #
36
+ # @param [String] spec the filter configuration specification.
37
+ # @yield [value] converts the input field value into a caTissue property value
38
+ # @yieldparam value the CSV input value
39
+ def initialize(spec=nil, &block)
40
+ @proc = spec ? to_proc(spec, &block) : block
41
+ raise ArgumentError.new("Migration filter is missing both a specification and a block") if @proc.nil?
42
+ end
43
+
44
+ # @param [String] value the input string
45
+ # @return the transformed result
46
+ def transform(value)
47
+ @proc.call(value)
48
+ end
49
+
50
+ private
51
+
52
+ # The pattern to match a regular expression with captures.
53
+ # @private
54
+ REGEXP_PAT = /^\/(.*[^\\])\/([inx]+)?$/
55
+
56
+ # Builds the filter proc from the given specification.
57
+ # If both a specification and a block are given, then the block is applied before
58
+ # the specificiation.
59
+ #
60
+ # @param (see #initialize)
61
+ # @yield (see #initialize)
62
+ # @yieldparam (see #initialize)
63
+ # @return [Proc] a proc which convert the input field value into a caTissue property value
64
+ def to_proc(spec=nil, &block)
65
+ # Split the filter spec into a straight value => value hash and a pattern => value hash.
66
+ ph, vh = spec.split { |k, v| k =~ REGEXP_PAT }
67
+ # The Regexp => value hash is built from the pattern => value hash.
68
+ reh = regexp_hash(ph)
69
+ # The value proc.
70
+ value_proc(reh, vh)
71
+ end
72
+
73
+ # @param {Regexp => (Object, <Integer>)} regexp_hash the regexp => (result, indexes) hash
74
+ # @param {String => Object} value_hash the value => result hash
75
+ # @yield (see #to_proc)
76
+ # @yieldparam (see #to_proc)
77
+ # @return [Proc] a proc which convert the input field value into a caTissue property value
78
+ def value_proc(regexp_hash, value_hash)
79
+ # The new proc matches preferentially on the literal value, then the first matching regexp.
80
+ # If no match on either a literal or a regexp, then the value is preserved.
81
+ Proc.new do |value|
82
+ value = yield(value) if block_given?
83
+ if value_hash.has_key?(value) then
84
+ value_hash[value]
85
+ else
86
+ # The first regex which matches the value.
87
+ regexp = regexp_hash.detect_key { |re| value =~ re }
88
+ # If there is a match, then apply the filter to the match data.
89
+ # Otherwise, pass the value through unmodified.
90
+ if regexp then
91
+ reval, ndxs = regexp_hash[regexp]
92
+ if ndxs.empty? or not String === reval then
93
+ reval
94
+ else
95
+ # The match captures (cpts[i - 1] is $i match).
96
+ cpts = $~.captures
97
+ # Substitute the capture index specified in the configuration for the corresponding
98
+ # template variable, e.g. the value filter:
99
+ # /(Grade )?(\d)/ : $2
100
+ # is parsed as (reval, ndxs) = (/(Grade )?(\d)/, 1)
101
+ # and transforms 'Grade 3' to cpts[0], or '3'.
102
+ fmtd = reval % ndxs.map { |i| cpts[i] }
103
+ fmtd unless fmtd.blank?
104
+ end
105
+ elsif defined? @catch_all then
106
+ @catch_all
107
+ else
108
+ value
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ # Parses the configuration pattern string => value hash into a regexp => value hash
115
+ # qualified by the match indexes used to substitute match captures into the hash value.
116
+ #
117
+ # The pattern hash value can include match references ($1, $2, etc.). In that case,
118
+ # the match captures substitute into a %s format reference in the result.
119
+ #
120
+ # @example
121
+ # regexp_hash({'/Golf/i' => 1}) #=> {1, []}
122
+ # regexp_hash({'/Hole (\d{1,2})/' => $1}) #=> {'%', [0]}
123
+ #
124
+ # @param [{String => Object}] pat_hash the string => value hash
125
+ # @return [{Regexp => (Object, <Integer>)}] the corresponding regexp => (value, indexes) hash
126
+ def regexp_hash(pat_hash)
127
+ # The Regexp => value hash is built from the pattern => value hash.
128
+ reh = {}
129
+ # Make a matcher for each regexp pattern.
130
+ pat_hash.each do |k, v|
131
+ # The /pattern/opts string is parsed to the pattern and options.
132
+ pat, opt = REGEXP_PAT.match(k).captures
133
+ # the catch-all matcher
134
+ if pat == '.*' then
135
+ @catch_all = v
136
+ next
137
+ end
138
+ # Convert the regexp i option character to a Regexp initializer parameter.
139
+ reopt = if opt then
140
+ case opt
141
+ when 'i' then Regexp::IGNORECASE
142
+ else Jinx.fail(MigrationError, "Migration value filter regular expression #{k} qualifier not supported: expected 'i', found '#{opt}'")
143
+ end
144
+ end
145
+ # the Regexp object
146
+ re = Regexp.new(pat, reopt)
147
+ # Replace each $ match reference with a %s format reference.
148
+ reh[re] = parse_regexp_value(v)
149
+ end
150
+ reh
151
+ end
152
+
153
+ # @example
154
+ # parse_regexp_value('Grade $2') #=> ['Grade %s', [1]]
155
+ # @param value the value in the configuration regexp => value entry
156
+ # @return (Object, <Integer>) the parsed (value, indexes)
157
+ # @see #regexp_hash
158
+ def parse_regexp_value(value)
159
+ return [value, Array::EMPTY_ARRAY] unless value =~ /\$\d/
160
+ tmpl = value.gsub(/\$\d/, '%s')
161
+ # Look for match references of the form $n.
162
+ ndxs = value.scan(/\$(\d)/).map { |matches| matches.first.to_i - 1 }
163
+ [tmpl, ndxs]
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,244 @@
1
+ module Jinx
2
+ # The Migratable mix-in adds migration support for Resource domain objects.
3
+ # For each migration Resource created by a Migrator, the migration process
4
+ # is as follows:
5
+ #
6
+ # 1. The migrator creates the Resource using the empty constructor.
7
+ #
8
+ # 2. Each input field value which maps to a Resource attribute is obtained from the
9
+ # migration source.
10
+ #
11
+ # 3. If the Resource class implements a method +migrate_+_attribute_ for the
12
+ # migration _attribute_, then that migrate method is called with the input value
13
+ # argument. If there is a migrate method, then the attribute is set to the
14
+ # result of calling that method, otherwise the attribute is set to the original
15
+ # input value.
16
+ #
17
+ # For example, if the +Name+ input field maps to +Parent.name+, then a
18
+ # custom +Parent+ +migrate_name+ shim method can be defined to reformat
19
+ # the input name.
20
+ #
21
+ # 4. The Resource attribute is set to the (possibly modified) value.
22
+ #
23
+ # 5. After all input fields are processed, then {#migration_valid?} is called to
24
+ # determine whether the migrated object can be used. {#migration_valid?} is true
25
+ # by default, but a migration shim can add a validation check,
26
+ # migrated Resource class to return false for special cases.
27
+ #
28
+ # For example, a custom +Parent+ +migration_valid?+ shim method can be
29
+ # defined to return whether there is a non-empty input field value.
30
+ #
31
+ # 6. After the migrated objects are validated, then the Migrator fills in
32
+ # dependency hierarchy gaps. For example, if the Resource class +Parent+
33
+ # owns the +household+ dependent which in turn owns the +address+ dependent
34
+ # and the migration has created a +Parent+ and an +Address+ but no +Household+,
35
+ # then an empty +Household+ is created which is owned by the migrated +Parent+
36
+ # and owns the migrated +Address+.
37
+ #
38
+ # 7. After all dependencies are filled in, then the independent references are set
39
+ # for each created Resource (including the new dependents). If a created
40
+ # Resource has an independent non-collection Resource reference attribute
41
+ # and there is a migrated instance of that attribute type, then the attribute
42
+ # is set to that migrated instance.
43
+ #
44
+ # For example, if +Household+ has a +address+ attribute and there is a
45
+ # single migrated +Address+ instance, then the +address+ attribute is set
46
+ # to that migrated +Address+ instance.
47
+ #
48
+ # If the referencing class implements a method +migrate_+_attribute_ for the
49
+ # migration _attribute_, then that migrate method is called with the referenced
50
+ # instance argument. The result is used to set the attribute. Otherwise, the
51
+ # attribute is set to the original referenced instance.
52
+ #
53
+ # There must be a single unambiguous candidate independent instance, e.g. in the
54
+ # unlikely but conceivable case that two +Address+ instances are migrated, then the
55
+ # +address+ attribute is not set. Similarly, collection attributes are not set,
56
+ # e.g. a +Address+ +protocols+ attribute is not set to a migrated +Protocol+
57
+ # instance.
58
+ #
59
+ # 8. The {#migrate} method is called to complete the migration. As described in the
60
+ # method documentation, a migration shim Resource subclass can override the
61
+ # method for custom migration processing, e.g. to migrate the ambiguous or
62
+ # collection attributes mentioned above, or to fill in missing values.
63
+ #
64
+ # Note that there is an extensive set of attribute defaults defined in the +Jinx::Resource+
65
+ # application domain classes. These defaults are applied in a migration database save
66
+ # action and need not be set in a migration shim. For example, if an acceptable
67
+ # default for an +Address.country+ property is defined in the +Address+ meta-data,
68
+ # then the country does not need to be set in a migration shim.
69
+ module Migratable
70
+ # Completes setting this Migratable domain object's attributes from the given input row.
71
+ # This method is responsible for migrating attributes which are not mapped
72
+ # in the configuration. It is called after the configuration attributes for
73
+ # the given row are migrated and before {#migrate_references}.
74
+ #
75
+ # This base implementation is a no-op.
76
+ # Subclasses can modify this method to complete the migration. The overridden
77
+ # methods should call +super+ to pick up the superclass migration.
78
+ #
79
+ # @param [{Symbol => Object}] row the input row field => value hash
80
+ # @param [<Resource>] migrated the migrated instances, including this domain object
81
+ def migrate(row, migrated)
82
+ end
83
+
84
+ # Returns whether this migration target domain object is valid. The default is true.
85
+ # A migration shim should override this method on the target if there are conditions
86
+ # which determine whether the migration should be skipped for this target object.
87
+ #
88
+ # @return [Boolean] whether this migration target domain object is valid
89
+ def migration_valid?
90
+ true
91
+ end
92
+
93
+ # Migrates this domain object's migratable references. This method is called by the
94
+ # Migrator and should not be overridden by subclasses. Subclasses tailor
95
+ # individual reference attribute migration by defining a +migrate_+_attribute_ method
96
+ # for the _attribute_ to modify.
97
+ #
98
+ # The migratable reference attributes consist of the non-collection saved independent
99
+ # attributes and the unidirectional dependent attributes which don't already have a value.
100
+ # For each such migratable attribute, if there is a single instance of the attribute
101
+ # type in the given migrated domain objects, then the attribute is set to that
102
+ # migrated instance.
103
+ #
104
+ # If the attribute is associated with a method in proc_hash, then that method is called
105
+ # on the migrated instance and input row. The attribute is set to the method return value.
106
+ # proc_hash includes an entry for each +migrate_+_attribute_ method defined by this
107
+ # Resource's class.
108
+ #
109
+ # @param [{Symbol => Object}] row the input row field => value hash
110
+ # @param [<Resource>] migrated the migrated instances, including this Resource
111
+ # @param [Class] target the migration target class
112
+ # @param [{Symbol => Proc}, nil] proc_hash a hash that associates this domain object's
113
+ # attributes to a migration shim block
114
+ def migrate_references(row, migrated, target, proc_hash=nil)
115
+ # migrate the owner
116
+ migratable__migrate_owner(row, migrated, target, proc_hash)
117
+ # migrate the remaining attributes
118
+ migratable__set_nonowner_references(migratable_independent_attributes, row, migrated, proc_hash)
119
+ migratable__set_nonowner_references(self.class.unidirectional_dependent_attributes, row, migrated, proc_hash)
120
+ end
121
+
122
+ # Returns this Resource's class {Propertied#independent_attributes}.
123
+ # Applications can override this implement to restrict the independent attributes which
124
+ # are migrated, e.g. to include only saved independent attributes.
125
+ #
126
+ # @return the attributes to migrate
127
+ def migratable_independent_attributes
128
+ self.class.independent_attributes
129
+ end
130
+
131
+ # Extracts the content of this migration target to the given file.
132
+ #
133
+ # This base implementation is a no-op.
134
+ # Subclasses can modify this method to write data to the extract.
135
+ #
136
+ # @param [IO] file the extract output stream
137
+ def extract(file)
138
+ end
139
+
140
+ private
141
+
142
+ # Migrates the owner as follows:
143
+ # * If there is exactly one migrated owner, then the owner reference is
144
+ # set to that owner.
145
+ # * Otherwise, if there is more than one owner but only one owner instance
146
+ # of the given target class, then that target instance is that owner.
147
+ # * Otherwise, no reference is set.
148
+ #
149
+ # @param row (see #migrate_references)
150
+ # @param migrated (see #migrate_references)
151
+ # @param target (see #migrate_references)
152
+ # @param proc_hash (see #migrate_references)
153
+ # @return [Resource, nil] the migrated owner, if any
154
+ def migratable__migrate_owner(row, migrated, target, proc_hash=nil)
155
+ # the owner attributes=> migrated reference hash
156
+ ovh = self.class.owner_attributes.to_compact_hash do |mattr|
157
+ pa = self.class.property(mattr)
158
+ migratable__target_value(pa, row, migrated, proc_hash)
159
+ end
160
+ # If there is more than one owner candidate, then select the owner
161
+ # attribute which references the target. If there is more than one
162
+ # such attribute, then select the preferred owner.
163
+ if ovh.size > 1 then
164
+ tvh = ovh.filter_on_value { |ov| target === ov }.to_hash
165
+ if tvh.size == 1 then
166
+ ovh = tvh
167
+ else
168
+ ownrs = ovh.values.uniq
169
+ if ownrs.size == 1 then
170
+ ovh = {ovh.keys.first => ownrs.first}
171
+ else
172
+ logger.debug { "The migrated dependent #{qp} has ambiguous migrated owner references #{ovh.qp}." }
173
+ preferred = migratable__preferred_owner(ownrs)
174
+ if preferred then
175
+ logger.debug { "The preferred dependent #{qp} migrated owner reference is #{preferred.qp}." }
176
+ ovh = {ovh.keys.detect { |k| ovh[k] == preferred } => preferred}
177
+ end
178
+ end
179
+ end
180
+ end
181
+ if ovh.size == 1 then
182
+ oattr, oref = ovh.first
183
+ set_property_value(oattr, oref)
184
+ logger.debug { "Set the #{qp} #{oattr} owner to the migrated #{oref.qp}." }
185
+ end
186
+ oref
187
+ end
188
+
189
+ # This base implementation returns nil. Subclasses can override this to select a preferred owner.
190
+ #
191
+ # @param [<Resource>] candidates the migrated owners
192
+ # @return [Resource] the preferred owner
193
+ def migratable__preferred_owner(candidates)
194
+ nil
195
+ end
196
+
197
+ # @param [Property::Filter] the attributes to set
198
+ # @param row (see #migrate_references)
199
+ # @param migrated (see #migrate_references)
200
+ # @param proc_hash (see #migrate_references)
201
+ def migratable__set_nonowner_references(attr_filter, row, migrated, proc_hash=nil)
202
+ attr_filter.each_pair do |mattr, pa|
203
+ # skip owners
204
+ next if pa.owner?
205
+ # the target value
206
+ ref = migratable__target_value(pa, row, migrated, proc_hash) || next
207
+ if pa.collection? then
208
+ # the current value
209
+ value = send(pa.reader) || next
210
+ value << ref
211
+ logger.debug { "Added the migrated #{ref.qp} to #{qp} #{mattr}." }
212
+ else
213
+ current = send(mattr)
214
+ if current then
215
+ logger.debug { "Ignoring the migrated #{ref.qp} since #{qp} #{mattr} is already set to #{current.qp}." }
216
+ else
217
+ set_property_value(mattr, ref)
218
+ logger.debug { "Set the #{qp} #{mattr} to the migrated #{ref.qp}." }
219
+ end
220
+ end
221
+ end
222
+ end
223
+
224
+ # @param [Property] pa the reference attribute
225
+ # @param row (see #migrate_references)
226
+ # @param migrated (see #migrate_references)
227
+ # @param proc_hash (see #migrate_references)
228
+ # @return [Resource, nil] the migrated instance of the given class, or nil if there is not
229
+ # exactly one such instance
230
+ def migratable__target_value(pa, row, migrated, proc_hash=nil)
231
+ # the migrated references which are instances of the attribute type
232
+ refs = migrated.select { |other| other != self and pa.type === other }
233
+ # skip ambiguous references
234
+ if refs.size > 1 then logger.debug { "Migrator did not set references to ambiguous targets #{refs.pp_s}." } end
235
+ return unless refs.size == 1
236
+ # the single reference
237
+ ref = refs.first
238
+ # the shim method, if any
239
+ proc = proc_hash[pa.to_sym] if proc_hash
240
+ # if there is a shim method, then call it
241
+ proc ? proc.call(self, ref, row) : ref
242
+ end
243
+ end
244
+ end