jinx-migrate 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.yardopts +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +38 -0
- data/History.md +6 -0
- data/LEGAL +5 -0
- data/LICENSE +22 -0
- data/README.md +33 -0
- data/Rakefile +40 -0
- data/bin/csvjoin +24 -0
- data/examples/family/README.md +24 -0
- data/examples/family/conf/children/fields.yaml +2 -0
- data/examples/family/conf/parents/defaults.yaml +3 -0
- data/examples/family/conf/parents/fields.yaml +6 -0
- data/examples/family/conf/parents/values.yaml +4 -0
- data/examples/family/data/children.csv +1 -0
- data/examples/family/data/parents.csv +1 -0
- data/examples/family/lib/shims.rb +17 -0
- data/jinx-migrate.gemspec +26 -0
- data/lib/jinx/csv/csvio.rb +214 -0
- data/lib/jinx/csv/joiner.rb +196 -0
- data/lib/jinx/migration/filter.rb +167 -0
- data/lib/jinx/migration/migratable.rb +244 -0
- data/lib/jinx/migration/migrator.rb +1029 -0
- data/lib/jinx/migration/reader.rb +16 -0
- data/lib/jinx/migration/version.rb +5 -0
- data/spec/bad/bad_spec.rb +25 -0
- data/spec/bad/fields.yaml +1 -0
- data/spec/bad/parents.csv +1 -0
- data/spec/bad/shims.rb +16 -0
- data/spec/csv/join/join_helper.rb +35 -0
- data/spec/csv/join/join_spec.rb +100 -0
- data/spec/csv/join/jumbled_src.csv +7 -0
- data/spec/csv/join/jumbled_tgt.csv +7 -0
- data/spec/csv/join/source.csv +7 -0
- data/spec/csv/join/target.csv +7 -0
- data/spec/extract/extract.rb +13 -0
- data/spec/extract/extract_spec.rb +33 -0
- data/spec/extract/fields.yaml +1 -0
- data/spec/extract/parents.csv +1 -0
- data/spec/family/child_spec.rb +27 -0
- data/spec/family/family.rb +13 -0
- data/spec/family/parent_spec.rb +57 -0
- data/spec/filter/fields.yaml +1 -0
- data/spec/filter/filter_spec.rb +20 -0
- data/spec/filter/parents.csv +1 -0
- data/spec/filter/values.yaml +4 -0
- data/spec/primitive/children.csv +1 -0
- data/spec/primitive/fields.yaml +4 -0
- data/spec/primitive/primitive_spec.rb +24 -0
- data/spec/skip/fields.yaml +1 -0
- data/spec/skip/parents.csv +1 -0
- data/spec/skip/skip_spec.rb +17 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/model.rb +7 -0
- data/spec/unique/fields.yaml +1 -0
- data/spec/unique/parent.rb +6 -0
- data/spec/unique/parents.csv +1 -0
- data/spec/unique/shims.rb +10 -0
- data/spec/unique/unique_spec.rb +20 -0
- data/test/fixtures/csv/data/empty.csv +1 -0
- data/test/fixtures/csv/data/variety.csv +1 -0
- data/test/lib/csv/csvio_test.rb +74 -0
- metadata +206 -0
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Jinx
|
4
|
+
module Csv
|
5
|
+
# Merges two CSV files on common fields.
|
6
|
+
class Joiner
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# @param [String, IO] source the join source
|
10
|
+
# @param [String, IO] target the join target (default stdin)
|
11
|
+
# @param [String, IO, nil] output the output file name or device (default stdout)
|
12
|
+
def initialize(source, target=nil, output=nil)
|
13
|
+
@source = source
|
14
|
+
@target = target || STDIN
|
15
|
+
@output = output || STDOUT
|
16
|
+
end
|
17
|
+
|
18
|
+
# Joins the source to the target and writes the output. The source fields used are
|
19
|
+
# given by the +fields+ argument, if given. By default, all source fields are used.
|
20
|
+
#
|
21
|
+
# The output fields consist of the qualified source fields and all target fields.
|
22
|
+
# The output fields are in the following order:
|
23
|
+
# 1. The common fields, in order of occurrence in the source file.
|
24
|
+
# 2. The qualified source-specific fields, in order of occurrence in the source file.
|
25
|
+
# 3. The target-specific fields, in order of occurrence in the target file.
|
26
|
+
#
|
27
|
+
# The match is on the common qualified source and target fields.
|
28
|
+
# Both files must be sorted in order of the common fields, sequenced by their
|
29
|
+
# occurence in the source header.
|
30
|
+
#
|
31
|
+
# If an output argument is given, then the joined record is written to the output.
|
32
|
+
# If a block is given, then the block is called on each record prior to writing
|
33
|
+
# the record to the output. If the block returns nil, then the record is not
|
34
|
+
# written.
|
35
|
+
#
|
36
|
+
# @param [<String>] fields the optional source fields to merge
|
37
|
+
# (default is all source fields)
|
38
|
+
# @yield [rec] process the output record and return the record to write
|
39
|
+
# @yieldparam [FasterCSV::Record] rec the output record
|
40
|
+
def join(*fields, &block)
|
41
|
+
CsvIO.open(@target) do |tgt|
|
42
|
+
CsvIO.open(@source) do |src|
|
43
|
+
# all source fields (unordered)
|
44
|
+
usflds = src.field_names.to_set
|
45
|
+
fields.each do |fld|
|
46
|
+
unless usflds.include?(fld) then
|
47
|
+
raise ArgumentError.new("CSV join field #{fld} not found in the source file #{@source}.")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# the qualified source fields (ordered)
|
51
|
+
qsflds = fields.empty? ? src.field_names : fields
|
52
|
+
tflds = tgt.field_names
|
53
|
+
@common = qsflds & tflds
|
54
|
+
# The headers consist of the common fields followed by the qualified
|
55
|
+
# source-specific fields followed by the target-specific fields.
|
56
|
+
hdrs = @common | qsflds | tflds
|
57
|
+
CsvIO.open(@output, :mode => 'w', :headers => hdrs) do |out|
|
58
|
+
merge(src, tgt, out, &block)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
alias :each :join
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
Buffer ||= Struct.new(:key, :record, :lookahead)
|
69
|
+
|
70
|
+
# Merges the given source into the target as the output.
|
71
|
+
# The output headers must be in the order specified by {#join}.
|
72
|
+
#
|
73
|
+
# @param [CsvIO] source the source CSV IO
|
74
|
+
# @param [CsvIO] target the target CSV IO
|
75
|
+
# @param [CsvIO] output the merged output CSV IO
|
76
|
+
# @yield (see #join)
|
77
|
+
# @yieldparam (see #join)
|
78
|
+
# @see #join
|
79
|
+
def merge(source, target, output)
|
80
|
+
# the qualified source field accessors
|
81
|
+
sflds = source.accessors & output.accessors
|
82
|
+
# the target field accessors
|
83
|
+
tflds = target.accessors
|
84
|
+
# the common fields
|
85
|
+
@common = sflds & tflds
|
86
|
+
# The target-specific accessors
|
87
|
+
trest = tflds - @common
|
88
|
+
# The source-specific accessors
|
89
|
+
srest = output.accessors - trest - @common
|
90
|
+
# The output record
|
91
|
+
obuf = Array.new(output.accessors.size)
|
92
|
+
# The source/target current/next (key, record) buffers
|
93
|
+
# Read the first and second records into the buffers
|
94
|
+
sbuf = shift(source)
|
95
|
+
tbuf = shift(target)
|
96
|
+
# Compare the source and target.
|
97
|
+
while cmp = compare(sbuf, tbuf) do
|
98
|
+
# Fill the output record in three sections: the common, source and target fields.
|
99
|
+
obuf.fill do |i|
|
100
|
+
if i < @common.size then
|
101
|
+
cmp <= 0 ? sbuf.key[i] : tbuf.key[i]
|
102
|
+
elsif i < sflds.size then
|
103
|
+
# Only fill the output record with source values if there is a current source
|
104
|
+
# record and the target does not precede the source.
|
105
|
+
sbuf.record[srest[i - @common.size]] if sbuf and cmp <= 0
|
106
|
+
elsif tbuf and cmp >= 0
|
107
|
+
# Only fill the output record with target values if there is a current target
|
108
|
+
# record and the source does not precede the target.
|
109
|
+
tbuf.record[trest[i - sflds.size]]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
orec = block_given? ? yield(obuf) : obuf
|
113
|
+
# Emit the output record.
|
114
|
+
output << orec if orec
|
115
|
+
# Shift the buffers as necessary.
|
116
|
+
ss, ts = shift?(sbuf, tbuf, cmp), shift?(tbuf, sbuf, -cmp)
|
117
|
+
sbuf = shift(source, sbuf) if ss
|
118
|
+
tbuf = shift(target, tbuf) if ts
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns whether to shift the given buffer as follows:
|
123
|
+
# * If the buffer precedes the other buffer, then true.
|
124
|
+
# * If the buffer succeeds the other buffer, then false.
|
125
|
+
# * Otherwise, if the lookahead record has the same key as the buffer record then true.
|
126
|
+
# * Otherwise, if the other lookahead record has a different key than the other record, then true.
|
127
|
+
#
|
128
|
+
# @param [Buffer] buf the record buffer to check
|
129
|
+
# @param [Buffer] other the other record buffer
|
130
|
+
# @param [-1, 0, 1] order the buffer comparison
|
131
|
+
# @return [Boolean] whether to shift the buffer
|
132
|
+
def shift?(buf, other, order)
|
133
|
+
case order
|
134
|
+
when -1 then
|
135
|
+
true
|
136
|
+
when 1 then
|
137
|
+
false
|
138
|
+
when 0 then
|
139
|
+
compare(buf, buf.lookahead) == 0 or compare(other, other.lookahead) != 0
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Reads a record into the given buffers.
|
144
|
+
#
|
145
|
+
# @param [CsvIO] the open CSV stream to read
|
146
|
+
# @param [Buffer, nil] cbuf the current record buffer
|
147
|
+
# @return [Buffer, nil] the next current buffer, or nil if end of file
|
148
|
+
def shift(csvio, buf=nil)
|
149
|
+
if buf then
|
150
|
+
return if buf.lookahead.nil?
|
151
|
+
else
|
152
|
+
# prime the look-ahead
|
153
|
+
buf = Buffer.new(nil, nil, look_ahead(csvio))
|
154
|
+
return shift(csvio, buf)
|
155
|
+
end
|
156
|
+
buf.record = buf.lookahead.record
|
157
|
+
buf.key = buf.lookahead.key
|
158
|
+
buf.lookahead = look_ahead(csvio, buf.lookahead)
|
159
|
+
buf
|
160
|
+
end
|
161
|
+
|
162
|
+
# @param [CsvIO] csvio the CSV file stream
|
163
|
+
# @param [Buffer, nil] the look-ahead buffer
|
164
|
+
# @return [Buffer, nil] the modified look-ahead, or nil if end of file
|
165
|
+
def look_ahead(csvio, buf=nil)
|
166
|
+
rec = csvio.next || return
|
167
|
+
buf ||= Buffer.new
|
168
|
+
buf.record = rec
|
169
|
+
buf.key = @common.map { |k| rec[k] }
|
170
|
+
buf
|
171
|
+
end
|
172
|
+
|
173
|
+
# Compares the given source and target buffers with result as follows:
|
174
|
+
# * If source and target are nil, then nil
|
175
|
+
# * If source is nil and target is not nil, then -1
|
176
|
+
# * If target is nil and source is not nil, then 1
|
177
|
+
# * Otherwise, the pair-wise comparison of the source and target keys
|
178
|
+
#
|
179
|
+
# @param [:key] the key holder
|
180
|
+
# @return [-1, 0 , 1, nil] the comparison result
|
181
|
+
def compare(source, target)
|
182
|
+
return target.nil? ? nil : 1 if source.nil?
|
183
|
+
return -1 if target.nil?
|
184
|
+
source.key.each_with_index do |v1, i|
|
185
|
+
v2 = target.key[i]
|
186
|
+
next if v1.nil? and v2.nil?
|
187
|
+
return -1 if v1.nil?
|
188
|
+
return 1 if v2.nil?
|
189
|
+
cmp = v1 <=> v2
|
190
|
+
return cmp unless cmp == 0
|
191
|
+
end
|
192
|
+
0
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'jinx/helpers/validation'
|
2
|
+
|
3
|
+
module Jinx
|
4
|
+
module Migration
|
5
|
+
# Transforms input values to a result based on a migration filter configuration.
|
6
|
+
# Each configuration entry is one of the following:
|
7
|
+
# * literal: literal
|
8
|
+
# * regexp: literal
|
9
|
+
# * regexp: template
|
10
|
+
#
|
11
|
+
# The regexp template can include match references (+$1+, +$2+, etc.) corresponding to the regexp captures.
|
12
|
+
# If the input value equals a literal, then the mapped literal is returned. Otherwise, if the input value
|
13
|
+
# matches a regexp, then the mapped transformation is returned after reference substitution. Otherwise,
|
14
|
+
# the input value is returned unchanged.
|
15
|
+
#
|
16
|
+
# For example, the config:
|
17
|
+
# /(\d{1,2})\/x\/(\d{1,2})/ : $1/15/$2
|
18
|
+
# n/a : ~
|
19
|
+
# converts the input value as follows:
|
20
|
+
# 3/12/02 => 3/12/02 (no match)
|
21
|
+
# 5/x/04 => 5/15/04
|
22
|
+
# n/a => nil
|
23
|
+
#
|
24
|
+
# A catch-all +/.*/+ regexp transforms any value which does not match another value or regexp, e.g.:
|
25
|
+
# /^(\d+(\.\d*)?)( g(ram)?s?)?$/ : $1
|
26
|
+
# /.*/ : 0
|
27
|
+
# converts the input value as follows:
|
28
|
+
# 3 => 3
|
29
|
+
# 4.3 grams => 4.3
|
30
|
+
# unknown => 0
|
31
|
+
class Filter
|
32
|
+
# Builds the filter proc from the given specification or block.
|
33
|
+
# If both a specification and a block are given, then the block is applied before
|
34
|
+
# the specificiation.
|
35
|
+
#
|
36
|
+
# @param [String] spec the filter configuration specification.
|
37
|
+
# @yield [value] converts the input field value into a caTissue property value
|
38
|
+
# @yieldparam value the CSV input value
|
39
|
+
def initialize(spec=nil, &block)
|
40
|
+
@proc = spec ? to_proc(spec, &block) : block
|
41
|
+
raise ArgumentError.new("Migration filter is missing both a specification and a block") if @proc.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param [String] value the input string
|
45
|
+
# @return the transformed result
|
46
|
+
def transform(value)
|
47
|
+
@proc.call(value)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
# The pattern to match a regular expression with captures.
|
53
|
+
# @private
|
54
|
+
REGEXP_PAT = /^\/(.*[^\\])\/([inx]+)?$/
|
55
|
+
|
56
|
+
# Builds the filter proc from the given specification.
|
57
|
+
# If both a specification and a block are given, then the block is applied before
|
58
|
+
# the specificiation.
|
59
|
+
#
|
60
|
+
# @param (see #initialize)
|
61
|
+
# @yield (see #initialize)
|
62
|
+
# @yieldparam (see #initialize)
|
63
|
+
# @return [Proc] a proc which convert the input field value into a caTissue property value
|
64
|
+
def to_proc(spec=nil, &block)
|
65
|
+
# Split the filter spec into a straight value => value hash and a pattern => value hash.
|
66
|
+
ph, vh = spec.split { |k, v| k =~ REGEXP_PAT }
|
67
|
+
# The Regexp => value hash is built from the pattern => value hash.
|
68
|
+
reh = regexp_hash(ph)
|
69
|
+
# The value proc.
|
70
|
+
value_proc(reh, vh)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param {Regexp => (Object, <Integer>)} regexp_hash the regexp => (result, indexes) hash
|
74
|
+
# @param {String => Object} value_hash the value => result hash
|
75
|
+
# @yield (see #to_proc)
|
76
|
+
# @yieldparam (see #to_proc)
|
77
|
+
# @return [Proc] a proc which convert the input field value into a caTissue property value
|
78
|
+
def value_proc(regexp_hash, value_hash)
|
79
|
+
# The new proc matches preferentially on the literal value, then the first matching regexp.
|
80
|
+
# If no match on either a literal or a regexp, then the value is preserved.
|
81
|
+
Proc.new do |value|
|
82
|
+
value = yield(value) if block_given?
|
83
|
+
if value_hash.has_key?(value) then
|
84
|
+
value_hash[value]
|
85
|
+
else
|
86
|
+
# The first regex which matches the value.
|
87
|
+
regexp = regexp_hash.detect_key { |re| value =~ re }
|
88
|
+
# If there is a match, then apply the filter to the match data.
|
89
|
+
# Otherwise, pass the value through unmodified.
|
90
|
+
if regexp then
|
91
|
+
reval, ndxs = regexp_hash[regexp]
|
92
|
+
if ndxs.empty? or not String === reval then
|
93
|
+
reval
|
94
|
+
else
|
95
|
+
# The match captures (cpts[i - 1] is $i match).
|
96
|
+
cpts = $~.captures
|
97
|
+
# Substitute the capture index specified in the configuration for the corresponding
|
98
|
+
# template variable, e.g. the value filter:
|
99
|
+
# /(Grade )?(\d)/ : $2
|
100
|
+
# is parsed as (reval, ndxs) = (/(Grade )?(\d)/, 1)
|
101
|
+
# and transforms 'Grade 3' to cpts[0], or '3'.
|
102
|
+
fmtd = reval % ndxs.map { |i| cpts[i] }
|
103
|
+
fmtd unless fmtd.blank?
|
104
|
+
end
|
105
|
+
elsif defined? @catch_all then
|
106
|
+
@catch_all
|
107
|
+
else
|
108
|
+
value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Parses the configuration pattern string => value hash into a regexp => value hash
|
115
|
+
# qualified by the match indexes used to substitute match captures into the hash value.
|
116
|
+
#
|
117
|
+
# The pattern hash value can include match references ($1, $2, etc.). In that case,
|
118
|
+
# the match captures substitute into a %s format reference in the result.
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# regexp_hash({'/Golf/i' => 1}) #=> {1, []}
|
122
|
+
# regexp_hash({'/Hole (\d{1,2})/' => $1}) #=> {'%', [0]}
|
123
|
+
#
|
124
|
+
# @param [{String => Object}] pat_hash the string => value hash
|
125
|
+
# @return [{Regexp => (Object, <Integer>)}] the corresponding regexp => (value, indexes) hash
|
126
|
+
def regexp_hash(pat_hash)
|
127
|
+
# The Regexp => value hash is built from the pattern => value hash.
|
128
|
+
reh = {}
|
129
|
+
# Make a matcher for each regexp pattern.
|
130
|
+
pat_hash.each do |k, v|
|
131
|
+
# The /pattern/opts string is parsed to the pattern and options.
|
132
|
+
pat, opt = REGEXP_PAT.match(k).captures
|
133
|
+
# the catch-all matcher
|
134
|
+
if pat == '.*' then
|
135
|
+
@catch_all = v
|
136
|
+
next
|
137
|
+
end
|
138
|
+
# Convert the regexp i option character to a Regexp initializer parameter.
|
139
|
+
reopt = if opt then
|
140
|
+
case opt
|
141
|
+
when 'i' then Regexp::IGNORECASE
|
142
|
+
else Jinx.fail(MigrationError, "Migration value filter regular expression #{k} qualifier not supported: expected 'i', found '#{opt}'")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
# the Regexp object
|
146
|
+
re = Regexp.new(pat, reopt)
|
147
|
+
# Replace each $ match reference with a %s format reference.
|
148
|
+
reh[re] = parse_regexp_value(v)
|
149
|
+
end
|
150
|
+
reh
|
151
|
+
end
|
152
|
+
|
153
|
+
# @example
|
154
|
+
# parse_regexp_value('Grade $2') #=> ['Grade %s', [1]]
|
155
|
+
# @param value the value in the configuration regexp => value entry
|
156
|
+
# @return (Object, <Integer>) the parsed (value, indexes)
|
157
|
+
# @see #regexp_hash
|
158
|
+
def parse_regexp_value(value)
|
159
|
+
return [value, Array::EMPTY_ARRAY] unless value =~ /\$\d/
|
160
|
+
tmpl = value.gsub(/\$\d/, '%s')
|
161
|
+
# Look for match references of the form $n.
|
162
|
+
ndxs = value.scan(/\$(\d)/).map { |matches| matches.first.to_i - 1 }
|
163
|
+
[tmpl, ndxs]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
module Jinx
|
2
|
+
# The Migratable mix-in adds migration support for Resource domain objects.
|
3
|
+
# For each migration Resource created by a Migrator, the migration process
|
4
|
+
# is as follows:
|
5
|
+
#
|
6
|
+
# 1. The migrator creates the Resource using the empty constructor.
|
7
|
+
#
|
8
|
+
# 2. Each input field value which maps to a Resource attribute is obtained from the
|
9
|
+
# migration source.
|
10
|
+
#
|
11
|
+
# 3. If the Resource class implements a method +migrate_+_attribute_ for the
|
12
|
+
# migration _attribute_, then that migrate method is called with the input value
|
13
|
+
# argument. If there is a migrate method, then the attribute is set to the
|
14
|
+
# result of calling that method, otherwise the attribute is set to the original
|
15
|
+
# input value.
|
16
|
+
#
|
17
|
+
# For example, if the +Name+ input field maps to +Parent.name+, then a
|
18
|
+
# custom +Parent+ +migrate_name+ shim method can be defined to reformat
|
19
|
+
# the input name.
|
20
|
+
#
|
21
|
+
# 4. The Resource attribute is set to the (possibly modified) value.
|
22
|
+
#
|
23
|
+
# 5. After all input fields are processed, then {#migration_valid?} is called to
|
24
|
+
# determine whether the migrated object can be used. {#migration_valid?} is true
|
25
|
+
# by default, but a migration shim can add a validation check,
|
26
|
+
# migrated Resource class to return false for special cases.
|
27
|
+
#
|
28
|
+
# For example, a custom +Parent+ +migration_valid?+ shim method can be
|
29
|
+
# defined to return whether there is a non-empty input field value.
|
30
|
+
#
|
31
|
+
# 6. After the migrated objects are validated, then the Migrator fills in
|
32
|
+
# dependency hierarchy gaps. For example, if the Resource class +Parent+
|
33
|
+
# owns the +household+ dependent which in turn owns the +address+ dependent
|
34
|
+
# and the migration has created a +Parent+ and an +Address+ but no +Household+,
|
35
|
+
# then an empty +Household+ is created which is owned by the migrated +Parent+
|
36
|
+
# and owns the migrated +Address+.
|
37
|
+
#
|
38
|
+
# 7. After all dependencies are filled in, then the independent references are set
|
39
|
+
# for each created Resource (including the new dependents). If a created
|
40
|
+
# Resource has an independent non-collection Resource reference attribute
|
41
|
+
# and there is a migrated instance of that attribute type, then the attribute
|
42
|
+
# is set to that migrated instance.
|
43
|
+
#
|
44
|
+
# For example, if +Household+ has a +address+ attribute and there is a
|
45
|
+
# single migrated +Address+ instance, then the +address+ attribute is set
|
46
|
+
# to that migrated +Address+ instance.
|
47
|
+
#
|
48
|
+
# If the referencing class implements a method +migrate_+_attribute_ for the
|
49
|
+
# migration _attribute_, then that migrate method is called with the referenced
|
50
|
+
# instance argument. The result is used to set the attribute. Otherwise, the
|
51
|
+
# attribute is set to the original referenced instance.
|
52
|
+
#
|
53
|
+
# There must be a single unambiguous candidate independent instance, e.g. in the
|
54
|
+
# unlikely but conceivable case that two +Address+ instances are migrated, then the
|
55
|
+
# +address+ attribute is not set. Similarly, collection attributes are not set,
|
56
|
+
# e.g. a +Address+ +protocols+ attribute is not set to a migrated +Protocol+
|
57
|
+
# instance.
|
58
|
+
#
|
59
|
+
# 8. The {#migrate} method is called to complete the migration. As described in the
|
60
|
+
# method documentation, a migration shim Resource subclass can override the
|
61
|
+
# method for custom migration processing, e.g. to migrate the ambiguous or
|
62
|
+
# collection attributes mentioned above, or to fill in missing values.
|
63
|
+
#
|
64
|
+
# Note that there is an extensive set of attribute defaults defined in the +Jinx::Resource+
|
65
|
+
# application domain classes. These defaults are applied in a migration database save
|
66
|
+
# action and need not be set in a migration shim. For example, if an acceptable
|
67
|
+
# default for an +Address.country+ property is defined in the +Address+ meta-data,
|
68
|
+
# then the country does not need to be set in a migration shim.
|
69
|
+
module Migratable
|
70
|
+
# Completes setting this Migratable domain object's attributes from the given input row.
|
71
|
+
# This method is responsible for migrating attributes which are not mapped
|
72
|
+
# in the configuration. It is called after the configuration attributes for
|
73
|
+
# the given row are migrated and before {#migrate_references}.
|
74
|
+
#
|
75
|
+
# This base implementation is a no-op.
|
76
|
+
# Subclasses can modify this method to complete the migration. The overridden
|
77
|
+
# methods should call +super+ to pick up the superclass migration.
|
78
|
+
#
|
79
|
+
# @param [{Symbol => Object}] row the input row field => value hash
|
80
|
+
# @param [<Resource>] migrated the migrated instances, including this domain object
|
81
|
+
def migrate(row, migrated)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns whether this migration target domain object is valid. The default is true.
|
85
|
+
# A migration shim should override this method on the target if there are conditions
|
86
|
+
# which determine whether the migration should be skipped for this target object.
|
87
|
+
#
|
88
|
+
# @return [Boolean] whether this migration target domain object is valid
|
89
|
+
def migration_valid?
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
# Migrates this domain object's migratable references. This method is called by the
|
94
|
+
# Migrator and should not be overridden by subclasses. Subclasses tailor
|
95
|
+
# individual reference attribute migration by defining a +migrate_+_attribute_ method
|
96
|
+
# for the _attribute_ to modify.
|
97
|
+
#
|
98
|
+
# The migratable reference attributes consist of the non-collection saved independent
|
99
|
+
# attributes and the unidirectional dependent attributes which don't already have a value.
|
100
|
+
# For each such migratable attribute, if there is a single instance of the attribute
|
101
|
+
# type in the given migrated domain objects, then the attribute is set to that
|
102
|
+
# migrated instance.
|
103
|
+
#
|
104
|
+
# If the attribute is associated with a method in proc_hash, then that method is called
|
105
|
+
# on the migrated instance and input row. The attribute is set to the method return value.
|
106
|
+
# proc_hash includes an entry for each +migrate_+_attribute_ method defined by this
|
107
|
+
# Resource's class.
|
108
|
+
#
|
109
|
+
# @param [{Symbol => Object}] row the input row field => value hash
|
110
|
+
# @param [<Resource>] migrated the migrated instances, including this Resource
|
111
|
+
# @param [Class] target the migration target class
|
112
|
+
# @param [{Symbol => Proc}, nil] proc_hash a hash that associates this domain object's
|
113
|
+
# attributes to a migration shim block
|
114
|
+
def migrate_references(row, migrated, target, proc_hash=nil)
|
115
|
+
# migrate the owner
|
116
|
+
migratable__migrate_owner(row, migrated, target, proc_hash)
|
117
|
+
# migrate the remaining attributes
|
118
|
+
migratable__set_nonowner_references(migratable_independent_attributes, row, migrated, proc_hash)
|
119
|
+
migratable__set_nonowner_references(self.class.unidirectional_dependent_attributes, row, migrated, proc_hash)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns this Resource's class {Propertied#independent_attributes}.
|
123
|
+
# Applications can override this implement to restrict the independent attributes which
|
124
|
+
# are migrated, e.g. to include only saved independent attributes.
|
125
|
+
#
|
126
|
+
# @return the attributes to migrate
|
127
|
+
def migratable_independent_attributes
|
128
|
+
self.class.independent_attributes
|
129
|
+
end
|
130
|
+
|
131
|
+
# Extracts the content of this migration target to the given file.
|
132
|
+
#
|
133
|
+
# This base implementation is a no-op.
|
134
|
+
# Subclasses can modify this method to write data to the extract.
|
135
|
+
#
|
136
|
+
# @param [IO] file the extract output stream
|
137
|
+
def extract(file)
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Migrates the owner as follows:
|
143
|
+
# * If there is exactly one migrated owner, then the owner reference is
|
144
|
+
# set to that owner.
|
145
|
+
# * Otherwise, if there is more than one owner but only one owner instance
|
146
|
+
# of the given target class, then that target instance is that owner.
|
147
|
+
# * Otherwise, no reference is set.
|
148
|
+
#
|
149
|
+
# @param row (see #migrate_references)
|
150
|
+
# @param migrated (see #migrate_references)
|
151
|
+
# @param target (see #migrate_references)
|
152
|
+
# @param proc_hash (see #migrate_references)
|
153
|
+
# @return [Resource, nil] the migrated owner, if any
|
154
|
+
def migratable__migrate_owner(row, migrated, target, proc_hash=nil)
|
155
|
+
# the owner attributes=> migrated reference hash
|
156
|
+
ovh = self.class.owner_attributes.to_compact_hash do |mattr|
|
157
|
+
pa = self.class.property(mattr)
|
158
|
+
migratable__target_value(pa, row, migrated, proc_hash)
|
159
|
+
end
|
160
|
+
# If there is more than one owner candidate, then select the owner
|
161
|
+
# attribute which references the target. If there is more than one
|
162
|
+
# such attribute, then select the preferred owner.
|
163
|
+
if ovh.size > 1 then
|
164
|
+
tvh = ovh.filter_on_value { |ov| target === ov }.to_hash
|
165
|
+
if tvh.size == 1 then
|
166
|
+
ovh = tvh
|
167
|
+
else
|
168
|
+
ownrs = ovh.values.uniq
|
169
|
+
if ownrs.size == 1 then
|
170
|
+
ovh = {ovh.keys.first => ownrs.first}
|
171
|
+
else
|
172
|
+
logger.debug { "The migrated dependent #{qp} has ambiguous migrated owner references #{ovh.qp}." }
|
173
|
+
preferred = migratable__preferred_owner(ownrs)
|
174
|
+
if preferred then
|
175
|
+
logger.debug { "The preferred dependent #{qp} migrated owner reference is #{preferred.qp}." }
|
176
|
+
ovh = {ovh.keys.detect { |k| ovh[k] == preferred } => preferred}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
if ovh.size == 1 then
|
182
|
+
oattr, oref = ovh.first
|
183
|
+
set_property_value(oattr, oref)
|
184
|
+
logger.debug { "Set the #{qp} #{oattr} owner to the migrated #{oref.qp}." }
|
185
|
+
end
|
186
|
+
oref
|
187
|
+
end
|
188
|
+
|
189
|
+
# This base implementation returns nil. Subclasses can override this to select a preferred owner.
|
190
|
+
#
|
191
|
+
# @param [<Resource>] candidates the migrated owners
|
192
|
+
# @return [Resource] the preferred owner
|
193
|
+
def migratable__preferred_owner(candidates)
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
# @param [Property::Filter] the attributes to set
|
198
|
+
# @param row (see #migrate_references)
|
199
|
+
# @param migrated (see #migrate_references)
|
200
|
+
# @param proc_hash (see #migrate_references)
|
201
|
+
def migratable__set_nonowner_references(attr_filter, row, migrated, proc_hash=nil)
|
202
|
+
attr_filter.each_pair do |mattr, pa|
|
203
|
+
# skip owners
|
204
|
+
next if pa.owner?
|
205
|
+
# the target value
|
206
|
+
ref = migratable__target_value(pa, row, migrated, proc_hash) || next
|
207
|
+
if pa.collection? then
|
208
|
+
# the current value
|
209
|
+
value = send(pa.reader) || next
|
210
|
+
value << ref
|
211
|
+
logger.debug { "Added the migrated #{ref.qp} to #{qp} #{mattr}." }
|
212
|
+
else
|
213
|
+
current = send(mattr)
|
214
|
+
if current then
|
215
|
+
logger.debug { "Ignoring the migrated #{ref.qp} since #{qp} #{mattr} is already set to #{current.qp}." }
|
216
|
+
else
|
217
|
+
set_property_value(mattr, ref)
|
218
|
+
logger.debug { "Set the #{qp} #{mattr} to the migrated #{ref.qp}." }
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# @param [Property] pa the reference attribute
|
225
|
+
# @param row (see #migrate_references)
|
226
|
+
# @param migrated (see #migrate_references)
|
227
|
+
# @param proc_hash (see #migrate_references)
|
228
|
+
# @return [Resource, nil] the migrated instance of the given class, or nil if there is not
|
229
|
+
# exactly one such instance
|
230
|
+
def migratable__target_value(pa, row, migrated, proc_hash=nil)
|
231
|
+
# the migrated references which are instances of the attribute type
|
232
|
+
refs = migrated.select { |other| other != self and pa.type === other }
|
233
|
+
# skip ambiguous references
|
234
|
+
if refs.size > 1 then logger.debug { "Migrator did not set references to ambiguous targets #{refs.pp_s}." } end
|
235
|
+
return unless refs.size == 1
|
236
|
+
# the single reference
|
237
|
+
ref = refs.first
|
238
|
+
# the shim method, if any
|
239
|
+
proc = proc_hash[pa.to_sym] if proc_hash
|
240
|
+
# if there is a shim method, then call it
|
241
|
+
proc ? proc.call(self, ref, row) : ref
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|