jinx-migrate 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.yardopts +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +38 -0
- data/History.md +6 -0
- data/LEGAL +5 -0
- data/LICENSE +22 -0
- data/README.md +33 -0
- data/Rakefile +40 -0
- data/bin/csvjoin +24 -0
- data/examples/family/README.md +24 -0
- data/examples/family/conf/children/fields.yaml +2 -0
- data/examples/family/conf/parents/defaults.yaml +3 -0
- data/examples/family/conf/parents/fields.yaml +6 -0
- data/examples/family/conf/parents/values.yaml +4 -0
- data/examples/family/data/children.csv +1 -0
- data/examples/family/data/parents.csv +1 -0
- data/examples/family/lib/shims.rb +17 -0
- data/jinx-migrate.gemspec +26 -0
- data/lib/jinx/csv/csvio.rb +214 -0
- data/lib/jinx/csv/joiner.rb +196 -0
- data/lib/jinx/migration/filter.rb +167 -0
- data/lib/jinx/migration/migratable.rb +244 -0
- data/lib/jinx/migration/migrator.rb +1029 -0
- data/lib/jinx/migration/reader.rb +16 -0
- data/lib/jinx/migration/version.rb +5 -0
- data/spec/bad/bad_spec.rb +25 -0
- data/spec/bad/fields.yaml +1 -0
- data/spec/bad/parents.csv +1 -0
- data/spec/bad/shims.rb +16 -0
- data/spec/csv/join/join_helper.rb +35 -0
- data/spec/csv/join/join_spec.rb +100 -0
- data/spec/csv/join/jumbled_src.csv +7 -0
- data/spec/csv/join/jumbled_tgt.csv +7 -0
- data/spec/csv/join/source.csv +7 -0
- data/spec/csv/join/target.csv +7 -0
- data/spec/extract/extract.rb +13 -0
- data/spec/extract/extract_spec.rb +33 -0
- data/spec/extract/fields.yaml +1 -0
- data/spec/extract/parents.csv +1 -0
- data/spec/family/child_spec.rb +27 -0
- data/spec/family/family.rb +13 -0
- data/spec/family/parent_spec.rb +57 -0
- data/spec/filter/fields.yaml +1 -0
- data/spec/filter/filter_spec.rb +20 -0
- data/spec/filter/parents.csv +1 -0
- data/spec/filter/values.yaml +4 -0
- data/spec/primitive/children.csv +1 -0
- data/spec/primitive/fields.yaml +4 -0
- data/spec/primitive/primitive_spec.rb +24 -0
- data/spec/skip/fields.yaml +1 -0
- data/spec/skip/parents.csv +1 -0
- data/spec/skip/skip_spec.rb +17 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/model.rb +7 -0
- data/spec/unique/fields.yaml +1 -0
- data/spec/unique/parent.rb +6 -0
- data/spec/unique/parents.csv +1 -0
- data/spec/unique/shims.rb +10 -0
- data/spec/unique/unique_spec.rb +20 -0
- data/test/fixtures/csv/data/empty.csv +1 -0
- data/test/fixtures/csv/data/variety.csv +1 -0
- data/test/lib/csv/csvio_test.rb +74 -0
- metadata +206 -0
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Jinx
|
4
|
+
module Csv
|
5
|
+
# Merges two CSV files on common fields.
|
6
|
+
class Joiner
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# @param [String, IO] source the join source
|
10
|
+
# @param [String, IO] target the join target (default stdin)
|
11
|
+
# @param [String, IO, nil] output the output file name or device (default stdout)
|
12
|
+
def initialize(source, target=nil, output=nil)
|
13
|
+
@source = source
|
14
|
+
@target = target || STDIN
|
15
|
+
@output = output || STDOUT
|
16
|
+
end
|
17
|
+
|
18
|
+
# Joins the source to the target and writes the output. The source fields used are
|
19
|
+
# given by the +fields+ argument, if given. By default, all source fields are used.
|
20
|
+
#
|
21
|
+
# The output fields consist of the qualified source fields and all target fields.
|
22
|
+
# The output fields are in the following order:
|
23
|
+
# 1. The common fields, in order of occurrence in the source file.
|
24
|
+
# 2. The qualified source-specific fields, in order of occurrence in the source file.
|
25
|
+
# 3. The target-specific fields, in order of occurrence in the target file.
|
26
|
+
#
|
27
|
+
# The match is on the common qualified source and target fields.
|
28
|
+
# Both files must be sorted in order of the common fields, sequenced by their
|
29
|
+
# occurence in the source header.
|
30
|
+
#
|
31
|
+
# If an output argument is given, then the joined record is written to the output.
|
32
|
+
# If a block is given, then the block is called on each record prior to writing
|
33
|
+
# the record to the output. If the block returns nil, then the record is not
|
34
|
+
# written.
|
35
|
+
#
|
36
|
+
# @param [<String>] fields the optional source fields to merge
|
37
|
+
# (default is all source fields)
|
38
|
+
# @yield [rec] process the output record and return the record to write
|
39
|
+
# @yieldparam [FasterCSV::Record] rec the output record
|
40
|
+
def join(*fields, &block)
|
41
|
+
CsvIO.open(@target) do |tgt|
|
42
|
+
CsvIO.open(@source) do |src|
|
43
|
+
# all source fields (unordered)
|
44
|
+
usflds = src.field_names.to_set
|
45
|
+
fields.each do |fld|
|
46
|
+
unless usflds.include?(fld) then
|
47
|
+
raise ArgumentError.new("CSV join field #{fld} not found in the source file #{@source}.")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# the qualified source fields (ordered)
|
51
|
+
qsflds = fields.empty? ? src.field_names : fields
|
52
|
+
tflds = tgt.field_names
|
53
|
+
@common = qsflds & tflds
|
54
|
+
# The headers consist of the common fields followed by the qualified
|
55
|
+
# source-specific fields followed by the target-specific fields.
|
56
|
+
hdrs = @common | qsflds | tflds
|
57
|
+
CsvIO.open(@output, :mode => 'w', :headers => hdrs) do |out|
|
58
|
+
merge(src, tgt, out, &block)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
alias :each :join
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
Buffer ||= Struct.new(:key, :record, :lookahead)
|
69
|
+
|
70
|
+
# Merges the given source into the target as the output.
|
71
|
+
# The output headers must be in the order specified by {#join}.
|
72
|
+
#
|
73
|
+
# @param [CsvIO] source the source CSV IO
|
74
|
+
# @param [CsvIO] target the target CSV IO
|
75
|
+
# @param [CsvIO] output the merged output CSV IO
|
76
|
+
# @yield (see #join)
|
77
|
+
# @yieldparam (see #join)
|
78
|
+
# @see #join
|
79
|
+
def merge(source, target, output)
|
80
|
+
# the qualified source field accessors
|
81
|
+
sflds = source.accessors & output.accessors
|
82
|
+
# the target field accessors
|
83
|
+
tflds = target.accessors
|
84
|
+
# the common fields
|
85
|
+
@common = sflds & tflds
|
86
|
+
# The target-specific accessors
|
87
|
+
trest = tflds - @common
|
88
|
+
# The source-specific accessors
|
89
|
+
srest = output.accessors - trest - @common
|
90
|
+
# The output record
|
91
|
+
obuf = Array.new(output.accessors.size)
|
92
|
+
# The source/target current/next (key, record) buffers
|
93
|
+
# Read the first and second records into the buffers
|
94
|
+
sbuf = shift(source)
|
95
|
+
tbuf = shift(target)
|
96
|
+
# Compare the source and target.
|
97
|
+
while cmp = compare(sbuf, tbuf) do
|
98
|
+
# Fill the output record in three sections: the common, source and target fields.
|
99
|
+
obuf.fill do |i|
|
100
|
+
if i < @common.size then
|
101
|
+
cmp <= 0 ? sbuf.key[i] : tbuf.key[i]
|
102
|
+
elsif i < sflds.size then
|
103
|
+
# Only fill the output record with source values if there is a current source
|
104
|
+
# record and the target does not precede the source.
|
105
|
+
sbuf.record[srest[i - @common.size]] if sbuf and cmp <= 0
|
106
|
+
elsif tbuf and cmp >= 0
|
107
|
+
# Only fill the output record with target values if there is a current target
|
108
|
+
# record and the source does not precede the target.
|
109
|
+
tbuf.record[trest[i - sflds.size]]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
orec = block_given? ? yield(obuf) : obuf
|
113
|
+
# Emit the output record.
|
114
|
+
output << orec if orec
|
115
|
+
# Shift the buffers as necessary.
|
116
|
+
ss, ts = shift?(sbuf, tbuf, cmp), shift?(tbuf, sbuf, -cmp)
|
117
|
+
sbuf = shift(source, sbuf) if ss
|
118
|
+
tbuf = shift(target, tbuf) if ts
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns whether to shift the given buffer as follows:
|
123
|
+
# * If the buffer precedes the other buffer, then true.
|
124
|
+
# * If the buffer succeeds the other buffer, then false.
|
125
|
+
# * Otherwise, if the lookahead record has the same key as the buffer record then true.
|
126
|
+
# * Otherwise, if the other lookahead record has a different key than the other record, then true.
|
127
|
+
#
|
128
|
+
# @param [Buffer] buf the record buffer to check
|
129
|
+
# @param [Buffer] other the other record buffer
|
130
|
+
# @param [-1, 0, 1] order the buffer comparison
|
131
|
+
# @return [Boolean] whether to shift the buffer
|
132
|
+
def shift?(buf, other, order)
|
133
|
+
case order
|
134
|
+
when -1 then
|
135
|
+
true
|
136
|
+
when 1 then
|
137
|
+
false
|
138
|
+
when 0 then
|
139
|
+
compare(buf, buf.lookahead) == 0 or compare(other, other.lookahead) != 0
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Reads a record into the given buffers.
|
144
|
+
#
|
145
|
+
# @param [CsvIO] the open CSV stream to read
|
146
|
+
# @param [Buffer, nil] cbuf the current record buffer
|
147
|
+
# @return [Buffer, nil] the next current buffer, or nil if end of file
|
148
|
+
def shift(csvio, buf=nil)
|
149
|
+
if buf then
|
150
|
+
return if buf.lookahead.nil?
|
151
|
+
else
|
152
|
+
# prime the look-ahead
|
153
|
+
buf = Buffer.new(nil, nil, look_ahead(csvio))
|
154
|
+
return shift(csvio, buf)
|
155
|
+
end
|
156
|
+
buf.record = buf.lookahead.record
|
157
|
+
buf.key = buf.lookahead.key
|
158
|
+
buf.lookahead = look_ahead(csvio, buf.lookahead)
|
159
|
+
buf
|
160
|
+
end
|
161
|
+
|
162
|
+
# @param [CsvIO] csvio the CSV file stream
|
163
|
+
# @param [Buffer, nil] the look-ahead buffer
|
164
|
+
# @return [Buffer, nil] the modified look-ahead, or nil if end of file
|
165
|
+
def look_ahead(csvio, buf=nil)
|
166
|
+
rec = csvio.next || return
|
167
|
+
buf ||= Buffer.new
|
168
|
+
buf.record = rec
|
169
|
+
buf.key = @common.map { |k| rec[k] }
|
170
|
+
buf
|
171
|
+
end
|
172
|
+
|
173
|
+
# Compares the given source and target buffers with result as follows:
|
174
|
+
# * If source and target are nil, then nil
|
175
|
+
# * If source is nil and target is not nil, then -1
|
176
|
+
# * If target is nil and source is not nil, then 1
|
177
|
+
# * Otherwise, the pair-wise comparison of the source and target keys
|
178
|
+
#
|
179
|
+
# @param [:key] the key holder
|
180
|
+
# @return [-1, 0 , 1, nil] the comparison result
|
181
|
+
def compare(source, target)
|
182
|
+
return target.nil? ? nil : 1 if source.nil?
|
183
|
+
return -1 if target.nil?
|
184
|
+
source.key.each_with_index do |v1, i|
|
185
|
+
v2 = target.key[i]
|
186
|
+
next if v1.nil? and v2.nil?
|
187
|
+
return -1 if v1.nil?
|
188
|
+
return 1 if v2.nil?
|
189
|
+
cmp = v1 <=> v2
|
190
|
+
return cmp unless cmp == 0
|
191
|
+
end
|
192
|
+
0
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'jinx/helpers/validation'
|
2
|
+
|
3
|
+
module Jinx
|
4
|
+
module Migration
|
5
|
+
# Transforms input values to a result based on a migration filter configuration.
|
6
|
+
# Each configuration entry is one of the following:
|
7
|
+
# * literal: literal
|
8
|
+
# * regexp: literal
|
9
|
+
# * regexp: template
|
10
|
+
#
|
11
|
+
# The regexp template can include match references (+$1+, +$2+, etc.) corresponding to the regexp captures.
|
12
|
+
# If the input value equals a literal, then the mapped literal is returned. Otherwise, if the input value
|
13
|
+
# matches a regexp, then the mapped transformation is returned after reference substitution. Otherwise,
|
14
|
+
# the input value is returned unchanged.
|
15
|
+
#
|
16
|
+
# For example, the config:
|
17
|
+
# /(\d{1,2})\/x\/(\d{1,2})/ : $1/15/$2
|
18
|
+
# n/a : ~
|
19
|
+
# converts the input value as follows:
|
20
|
+
# 3/12/02 => 3/12/02 (no match)
|
21
|
+
# 5/x/04 => 5/15/04
|
22
|
+
# n/a => nil
|
23
|
+
#
|
24
|
+
# A catch-all +/.*/+ regexp transforms any value which does not match another value or regexp, e.g.:
|
25
|
+
# /^(\d+(\.\d*)?)( g(ram)?s?)?$/ : $1
|
26
|
+
# /.*/ : 0
|
27
|
+
# converts the input value as follows:
|
28
|
+
# 3 => 3
|
29
|
+
# 4.3 grams => 4.3
|
30
|
+
# unknown => 0
|
31
|
+
class Filter
|
32
|
+
# Builds the filter proc from the given specification or block.
|
33
|
+
# If both a specification and a block are given, then the block is applied before
|
34
|
+
# the specificiation.
|
35
|
+
#
|
36
|
+
# @param [String] spec the filter configuration specification.
|
37
|
+
# @yield [value] converts the input field value into a caTissue property value
|
38
|
+
# @yieldparam value the CSV input value
|
39
|
+
def initialize(spec=nil, &block)
|
40
|
+
@proc = spec ? to_proc(spec, &block) : block
|
41
|
+
raise ArgumentError.new("Migration filter is missing both a specification and a block") if @proc.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param [String] value the input string
|
45
|
+
# @return the transformed result
|
46
|
+
def transform(value)
|
47
|
+
@proc.call(value)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
# The pattern to match a regular expression with captures.
|
53
|
+
# @private
|
54
|
+
REGEXP_PAT = /^\/(.*[^\\])\/([inx]+)?$/
|
55
|
+
|
56
|
+
# Builds the filter proc from the given specification.
|
57
|
+
# If both a specification and a block are given, then the block is applied before
|
58
|
+
# the specificiation.
|
59
|
+
#
|
60
|
+
# @param (see #initialize)
|
61
|
+
# @yield (see #initialize)
|
62
|
+
# @yieldparam (see #initialize)
|
63
|
+
# @return [Proc] a proc which convert the input field value into a caTissue property value
|
64
|
+
def to_proc(spec=nil, &block)
|
65
|
+
# Split the filter spec into a straight value => value hash and a pattern => value hash.
|
66
|
+
ph, vh = spec.split { |k, v| k =~ REGEXP_PAT }
|
67
|
+
# The Regexp => value hash is built from the pattern => value hash.
|
68
|
+
reh = regexp_hash(ph)
|
69
|
+
# The value proc.
|
70
|
+
value_proc(reh, vh)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param {Regexp => (Object, <Integer>)} regexp_hash the regexp => (result, indexes) hash
|
74
|
+
# @param {String => Object} value_hash the value => result hash
|
75
|
+
# @yield (see #to_proc)
|
76
|
+
# @yieldparam (see #to_proc)
|
77
|
+
# @return [Proc] a proc which convert the input field value into a caTissue property value
|
78
|
+
def value_proc(regexp_hash, value_hash)
|
79
|
+
# The new proc matches preferentially on the literal value, then the first matching regexp.
|
80
|
+
# If no match on either a literal or a regexp, then the value is preserved.
|
81
|
+
Proc.new do |value|
|
82
|
+
value = yield(value) if block_given?
|
83
|
+
if value_hash.has_key?(value) then
|
84
|
+
value_hash[value]
|
85
|
+
else
|
86
|
+
# The first regex which matches the value.
|
87
|
+
regexp = regexp_hash.detect_key { |re| value =~ re }
|
88
|
+
# If there is a match, then apply the filter to the match data.
|
89
|
+
# Otherwise, pass the value through unmodified.
|
90
|
+
if regexp then
|
91
|
+
reval, ndxs = regexp_hash[regexp]
|
92
|
+
if ndxs.empty? or not String === reval then
|
93
|
+
reval
|
94
|
+
else
|
95
|
+
# The match captures (cpts[i - 1] is $i match).
|
96
|
+
cpts = $~.captures
|
97
|
+
# Substitute the capture index specified in the configuration for the corresponding
|
98
|
+
# template variable, e.g. the value filter:
|
99
|
+
# /(Grade )?(\d)/ : $2
|
100
|
+
# is parsed as (reval, ndxs) = (/(Grade )?(\d)/, 1)
|
101
|
+
# and transforms 'Grade 3' to cpts[0], or '3'.
|
102
|
+
fmtd = reval % ndxs.map { |i| cpts[i] }
|
103
|
+
fmtd unless fmtd.blank?
|
104
|
+
end
|
105
|
+
elsif defined? @catch_all then
|
106
|
+
@catch_all
|
107
|
+
else
|
108
|
+
value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# Parses the configuration pattern string => value hash into a regexp => value hash
|
115
|
+
# qualified by the match indexes used to substitute match captures into the hash value.
|
116
|
+
#
|
117
|
+
# The pattern hash value can include match references ($1, $2, etc.). In that case,
|
118
|
+
# the match captures substitute into a %s format reference in the result.
|
119
|
+
#
|
120
|
+
# @example
|
121
|
+
# regexp_hash({'/Golf/i' => 1}) #=> {1, []}
|
122
|
+
# regexp_hash({'/Hole (\d{1,2})/' => $1}) #=> {'%', [0]}
|
123
|
+
#
|
124
|
+
# @param [{String => Object}] pat_hash the string => value hash
|
125
|
+
# @return [{Regexp => (Object, <Integer>)}] the corresponding regexp => (value, indexes) hash
|
126
|
+
def regexp_hash(pat_hash)
|
127
|
+
# The Regexp => value hash is built from the pattern => value hash.
|
128
|
+
reh = {}
|
129
|
+
# Make a matcher for each regexp pattern.
|
130
|
+
pat_hash.each do |k, v|
|
131
|
+
# The /pattern/opts string is parsed to the pattern and options.
|
132
|
+
pat, opt = REGEXP_PAT.match(k).captures
|
133
|
+
# the catch-all matcher
|
134
|
+
if pat == '.*' then
|
135
|
+
@catch_all = v
|
136
|
+
next
|
137
|
+
end
|
138
|
+
# Convert the regexp i option character to a Regexp initializer parameter.
|
139
|
+
reopt = if opt then
|
140
|
+
case opt
|
141
|
+
when 'i' then Regexp::IGNORECASE
|
142
|
+
else Jinx.fail(MigrationError, "Migration value filter regular expression #{k} qualifier not supported: expected 'i', found '#{opt}'")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
# the Regexp object
|
146
|
+
re = Regexp.new(pat, reopt)
|
147
|
+
# Replace each $ match reference with a %s format reference.
|
148
|
+
reh[re] = parse_regexp_value(v)
|
149
|
+
end
|
150
|
+
reh
|
151
|
+
end
|
152
|
+
|
153
|
+
# @example
|
154
|
+
# parse_regexp_value('Grade $2') #=> ['Grade %s', [1]]
|
155
|
+
# @param value the value in the configuration regexp => value entry
|
156
|
+
# @return (Object, <Integer>) the parsed (value, indexes)
|
157
|
+
# @see #regexp_hash
|
158
|
+
def parse_regexp_value(value)
|
159
|
+
return [value, Array::EMPTY_ARRAY] unless value =~ /\$\d/
|
160
|
+
tmpl = value.gsub(/\$\d/, '%s')
|
161
|
+
# Look for match references of the form $n.
|
162
|
+
ndxs = value.scan(/\$(\d)/).map { |matches| matches.first.to_i - 1 }
|
163
|
+
[tmpl, ndxs]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,244 @@
|
|
1
|
+
module Jinx
|
2
|
+
# The Migratable mix-in adds migration support for Resource domain objects.
|
3
|
+
# For each migration Resource created by a Migrator, the migration process
|
4
|
+
# is as follows:
|
5
|
+
#
|
6
|
+
# 1. The migrator creates the Resource using the empty constructor.
|
7
|
+
#
|
8
|
+
# 2. Each input field value which maps to a Resource attribute is obtained from the
|
9
|
+
# migration source.
|
10
|
+
#
|
11
|
+
# 3. If the Resource class implements a method +migrate_+_attribute_ for the
|
12
|
+
# migration _attribute_, then that migrate method is called with the input value
|
13
|
+
# argument. If there is a migrate method, then the attribute is set to the
|
14
|
+
# result of calling that method, otherwise the attribute is set to the original
|
15
|
+
# input value.
|
16
|
+
#
|
17
|
+
# For example, if the +Name+ input field maps to +Parent.name+, then a
|
18
|
+
# custom +Parent+ +migrate_name+ shim method can be defined to reformat
|
19
|
+
# the input name.
|
20
|
+
#
|
21
|
+
# 4. The Resource attribute is set to the (possibly modified) value.
|
22
|
+
#
|
23
|
+
# 5. After all input fields are processed, then {#migration_valid?} is called to
|
24
|
+
# determine whether the migrated object can be used. {#migration_valid?} is true
|
25
|
+
# by default, but a migration shim can add a validation check,
|
26
|
+
# migrated Resource class to return false for special cases.
|
27
|
+
#
|
28
|
+
# For example, a custom +Parent+ +migration_valid?+ shim method can be
|
29
|
+
# defined to return whether there is a non-empty input field value.
|
30
|
+
#
|
31
|
+
# 6. After the migrated objects are validated, then the Migrator fills in
|
32
|
+
# dependency hierarchy gaps. For example, if the Resource class +Parent+
|
33
|
+
# owns the +household+ dependent which in turn owns the +address+ dependent
|
34
|
+
# and the migration has created a +Parent+ and an +Address+ but no +Household+,
|
35
|
+
# then an empty +Household+ is created which is owned by the migrated +Parent+
|
36
|
+
# and owns the migrated +Address+.
|
37
|
+
#
|
38
|
+
# 7. After all dependencies are filled in, then the independent references are set
|
39
|
+
# for each created Resource (including the new dependents). If a created
|
40
|
+
# Resource has an independent non-collection Resource reference attribute
|
41
|
+
# and there is a migrated instance of that attribute type, then the attribute
|
42
|
+
# is set to that migrated instance.
|
43
|
+
#
|
44
|
+
# For example, if +Household+ has a +address+ attribute and there is a
|
45
|
+
# single migrated +Address+ instance, then the +address+ attribute is set
|
46
|
+
# to that migrated +Address+ instance.
|
47
|
+
#
|
48
|
+
# If the referencing class implements a method +migrate_+_attribute_ for the
|
49
|
+
# migration _attribute_, then that migrate method is called with the referenced
|
50
|
+
# instance argument. The result is used to set the attribute. Otherwise, the
|
51
|
+
# attribute is set to the original referenced instance.
|
52
|
+
#
|
53
|
+
# There must be a single unambiguous candidate independent instance, e.g. in the
|
54
|
+
# unlikely but conceivable case that two +Address+ instances are migrated, then the
|
55
|
+
# +address+ attribute is not set. Similarly, collection attributes are not set,
|
56
|
+
# e.g. a +Address+ +protocols+ attribute is not set to a migrated +Protocol+
|
57
|
+
# instance.
|
58
|
+
#
|
59
|
+
# 8. The {#migrate} method is called to complete the migration. As described in the
|
60
|
+
# method documentation, a migration shim Resource subclass can override the
|
61
|
+
# method for custom migration processing, e.g. to migrate the ambiguous or
|
62
|
+
# collection attributes mentioned above, or to fill in missing values.
|
63
|
+
#
|
64
|
+
# Note that there is an extensive set of attribute defaults defined in the +Jinx::Resource+
|
65
|
+
# application domain classes. These defaults are applied in a migration database save
|
66
|
+
# action and need not be set in a migration shim. For example, if an acceptable
|
67
|
+
# default for an +Address.country+ property is defined in the +Address+ meta-data,
|
68
|
+
# then the country does not need to be set in a migration shim.
|
69
|
+
module Migratable
|
70
|
+
# Completes setting this Migratable domain object's attributes from the given input row.
|
71
|
+
# This method is responsible for migrating attributes which are not mapped
|
72
|
+
# in the configuration. It is called after the configuration attributes for
|
73
|
+
# the given row are migrated and before {#migrate_references}.
|
74
|
+
#
|
75
|
+
# This base implementation is a no-op.
|
76
|
+
# Subclasses can modify this method to complete the migration. The overridden
|
77
|
+
# methods should call +super+ to pick up the superclass migration.
|
78
|
+
#
|
79
|
+
# @param [{Symbol => Object}] row the input row field => value hash
|
80
|
+
# @param [<Resource>] migrated the migrated instances, including this domain object
|
81
|
+
def migrate(row, migrated)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns whether this migration target domain object is valid. The default is true.
|
85
|
+
# A migration shim should override this method on the target if there are conditions
|
86
|
+
# which determine whether the migration should be skipped for this target object.
|
87
|
+
#
|
88
|
+
# @return [Boolean] whether this migration target domain object is valid
|
89
|
+
def migration_valid?
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
# Migrates this domain object's migratable references. This method is called by the
|
94
|
+
# Migrator and should not be overridden by subclasses. Subclasses tailor
|
95
|
+
# individual reference attribute migration by defining a +migrate_+_attribute_ method
|
96
|
+
# for the _attribute_ to modify.
|
97
|
+
#
|
98
|
+
# The migratable reference attributes consist of the non-collection saved independent
|
99
|
+
# attributes and the unidirectional dependent attributes which don't already have a value.
|
100
|
+
# For each such migratable attribute, if there is a single instance of the attribute
|
101
|
+
# type in the given migrated domain objects, then the attribute is set to that
|
102
|
+
# migrated instance.
|
103
|
+
#
|
104
|
+
# If the attribute is associated with a method in proc_hash, then that method is called
|
105
|
+
# on the migrated instance and input row. The attribute is set to the method return value.
|
106
|
+
# proc_hash includes an entry for each +migrate_+_attribute_ method defined by this
|
107
|
+
# Resource's class.
|
108
|
+
#
|
109
|
+
# @param [{Symbol => Object}] row the input row field => value hash
|
110
|
+
# @param [<Resource>] migrated the migrated instances, including this Resource
|
111
|
+
# @param [Class] target the migration target class
|
112
|
+
# @param [{Symbol => Proc}, nil] proc_hash a hash that associates this domain object's
|
113
|
+
# attributes to a migration shim block
|
114
|
+
def migrate_references(row, migrated, target, proc_hash=nil)
|
115
|
+
# migrate the owner
|
116
|
+
migratable__migrate_owner(row, migrated, target, proc_hash)
|
117
|
+
# migrate the remaining attributes
|
118
|
+
migratable__set_nonowner_references(migratable_independent_attributes, row, migrated, proc_hash)
|
119
|
+
migratable__set_nonowner_references(self.class.unidirectional_dependent_attributes, row, migrated, proc_hash)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns this Resource's class {Propertied#independent_attributes}.
|
123
|
+
# Applications can override this implement to restrict the independent attributes which
|
124
|
+
# are migrated, e.g. to include only saved independent attributes.
|
125
|
+
#
|
126
|
+
# @return the attributes to migrate
|
127
|
+
def migratable_independent_attributes
|
128
|
+
self.class.independent_attributes
|
129
|
+
end
|
130
|
+
|
131
|
+
# Extracts the content of this migration target to the given file.
|
132
|
+
#
|
133
|
+
# This base implementation is a no-op.
|
134
|
+
# Subclasses can modify this method to write data to the extract.
|
135
|
+
#
|
136
|
+
# @param [IO] file the extract output stream
|
137
|
+
def extract(file)
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Migrates the owner as follows:
|
143
|
+
# * If there is exactly one migrated owner, then the owner reference is
|
144
|
+
# set to that owner.
|
145
|
+
# * Otherwise, if there is more than one owner but only one owner instance
|
146
|
+
# of the given target class, then that target instance is that owner.
|
147
|
+
# * Otherwise, no reference is set.
|
148
|
+
#
|
149
|
+
# @param row (see #migrate_references)
|
150
|
+
# @param migrated (see #migrate_references)
|
151
|
+
# @param target (see #migrate_references)
|
152
|
+
# @param proc_hash (see #migrate_references)
|
153
|
+
# @return [Resource, nil] the migrated owner, if any
|
154
|
+
def migratable__migrate_owner(row, migrated, target, proc_hash=nil)
|
155
|
+
# the owner attributes=> migrated reference hash
|
156
|
+
ovh = self.class.owner_attributes.to_compact_hash do |mattr|
|
157
|
+
pa = self.class.property(mattr)
|
158
|
+
migratable__target_value(pa, row, migrated, proc_hash)
|
159
|
+
end
|
160
|
+
# If there is more than one owner candidate, then select the owner
|
161
|
+
# attribute which references the target. If there is more than one
|
162
|
+
# such attribute, then select the preferred owner.
|
163
|
+
if ovh.size > 1 then
|
164
|
+
tvh = ovh.filter_on_value { |ov| target === ov }.to_hash
|
165
|
+
if tvh.size == 1 then
|
166
|
+
ovh = tvh
|
167
|
+
else
|
168
|
+
ownrs = ovh.values.uniq
|
169
|
+
if ownrs.size == 1 then
|
170
|
+
ovh = {ovh.keys.first => ownrs.first}
|
171
|
+
else
|
172
|
+
logger.debug { "The migrated dependent #{qp} has ambiguous migrated owner references #{ovh.qp}." }
|
173
|
+
preferred = migratable__preferred_owner(ownrs)
|
174
|
+
if preferred then
|
175
|
+
logger.debug { "The preferred dependent #{qp} migrated owner reference is #{preferred.qp}." }
|
176
|
+
ovh = {ovh.keys.detect { |k| ovh[k] == preferred } => preferred}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
if ovh.size == 1 then
|
182
|
+
oattr, oref = ovh.first
|
183
|
+
set_property_value(oattr, oref)
|
184
|
+
logger.debug { "Set the #{qp} #{oattr} owner to the migrated #{oref.qp}." }
|
185
|
+
end
|
186
|
+
oref
|
187
|
+
end
|
188
|
+
|
189
|
+
# This base implementation returns nil. Subclasses can override this to select a preferred owner.
|
190
|
+
#
|
191
|
+
# @param [<Resource>] candidates the migrated owners
|
192
|
+
# @return [Resource] the preferred owner
|
193
|
+
def migratable__preferred_owner(candidates)
|
194
|
+
nil
|
195
|
+
end
|
196
|
+
|
197
|
+
# @param [Property::Filter] the attributes to set
|
198
|
+
# @param row (see #migrate_references)
|
199
|
+
# @param migrated (see #migrate_references)
|
200
|
+
# @param proc_hash (see #migrate_references)
|
201
|
+
def migratable__set_nonowner_references(attr_filter, row, migrated, proc_hash=nil)
|
202
|
+
attr_filter.each_pair do |mattr, pa|
|
203
|
+
# skip owners
|
204
|
+
next if pa.owner?
|
205
|
+
# the target value
|
206
|
+
ref = migratable__target_value(pa, row, migrated, proc_hash) || next
|
207
|
+
if pa.collection? then
|
208
|
+
# the current value
|
209
|
+
value = send(pa.reader) || next
|
210
|
+
value << ref
|
211
|
+
logger.debug { "Added the migrated #{ref.qp} to #{qp} #{mattr}." }
|
212
|
+
else
|
213
|
+
current = send(mattr)
|
214
|
+
if current then
|
215
|
+
logger.debug { "Ignoring the migrated #{ref.qp} since #{qp} #{mattr} is already set to #{current.qp}." }
|
216
|
+
else
|
217
|
+
set_property_value(mattr, ref)
|
218
|
+
logger.debug { "Set the #{qp} #{mattr} to the migrated #{ref.qp}." }
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# @param [Property] pa the reference attribute
|
225
|
+
# @param row (see #migrate_references)
|
226
|
+
# @param migrated (see #migrate_references)
|
227
|
+
# @param proc_hash (see #migrate_references)
|
228
|
+
# @return [Resource, nil] the migrated instance of the given class, or nil if there is not
|
229
|
+
# exactly one such instance
|
230
|
+
def migratable__target_value(pa, row, migrated, proc_hash=nil)
|
231
|
+
# the migrated references which are instances of the attribute type
|
232
|
+
refs = migrated.select { |other| other != self and pa.type === other }
|
233
|
+
# skip ambiguous references
|
234
|
+
if refs.size > 1 then logger.debug { "Migrator did not set references to ambiguous targets #{refs.pp_s}." } end
|
235
|
+
return unless refs.size == 1
|
236
|
+
# the single reference
|
237
|
+
ref = refs.first
|
238
|
+
# the shim method, if any
|
239
|
+
proc = proc_hash[pa.to_sym] if proc_hash
|
240
|
+
# if there is a shim method, then call it
|
241
|
+
proc ? proc.call(self, ref, row) : ref
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|