jinx-migrate 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/.gitignore +14 -0
  2. data/.rspec +3 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +8 -0
  5. data/Gemfile.lock +38 -0
  6. data/History.md +6 -0
  7. data/LEGAL +5 -0
  8. data/LICENSE +22 -0
  9. data/README.md +33 -0
  10. data/Rakefile +40 -0
  11. data/bin/csvjoin +24 -0
  12. data/examples/family/README.md +24 -0
  13. data/examples/family/conf/children/fields.yaml +2 -0
  14. data/examples/family/conf/parents/defaults.yaml +3 -0
  15. data/examples/family/conf/parents/fields.yaml +6 -0
  16. data/examples/family/conf/parents/values.yaml +4 -0
  17. data/examples/family/data/children.csv +1 -0
  18. data/examples/family/data/parents.csv +1 -0
  19. data/examples/family/lib/shims.rb +17 -0
  20. data/jinx-migrate.gemspec +26 -0
  21. data/lib/jinx/csv/csvio.rb +214 -0
  22. data/lib/jinx/csv/joiner.rb +196 -0
  23. data/lib/jinx/migration/filter.rb +167 -0
  24. data/lib/jinx/migration/migratable.rb +244 -0
  25. data/lib/jinx/migration/migrator.rb +1029 -0
  26. data/lib/jinx/migration/reader.rb +16 -0
  27. data/lib/jinx/migration/version.rb +5 -0
  28. data/spec/bad/bad_spec.rb +25 -0
  29. data/spec/bad/fields.yaml +1 -0
  30. data/spec/bad/parents.csv +1 -0
  31. data/spec/bad/shims.rb +16 -0
  32. data/spec/csv/join/join_helper.rb +35 -0
  33. data/spec/csv/join/join_spec.rb +100 -0
  34. data/spec/csv/join/jumbled_src.csv +7 -0
  35. data/spec/csv/join/jumbled_tgt.csv +7 -0
  36. data/spec/csv/join/source.csv +7 -0
  37. data/spec/csv/join/target.csv +7 -0
  38. data/spec/extract/extract.rb +13 -0
  39. data/spec/extract/extract_spec.rb +33 -0
  40. data/spec/extract/fields.yaml +1 -0
  41. data/spec/extract/parents.csv +1 -0
  42. data/spec/family/child_spec.rb +27 -0
  43. data/spec/family/family.rb +13 -0
  44. data/spec/family/parent_spec.rb +57 -0
  45. data/spec/filter/fields.yaml +1 -0
  46. data/spec/filter/filter_spec.rb +20 -0
  47. data/spec/filter/parents.csv +1 -0
  48. data/spec/filter/values.yaml +4 -0
  49. data/spec/primitive/children.csv +1 -0
  50. data/spec/primitive/fields.yaml +4 -0
  51. data/spec/primitive/primitive_spec.rb +24 -0
  52. data/spec/skip/fields.yaml +1 -0
  53. data/spec/skip/parents.csv +1 -0
  54. data/spec/skip/skip_spec.rb +17 -0
  55. data/spec/spec_helper.rb +17 -0
  56. data/spec/support/model.rb +7 -0
  57. data/spec/unique/fields.yaml +1 -0
  58. data/spec/unique/parent.rb +6 -0
  59. data/spec/unique/parents.csv +1 -0
  60. data/spec/unique/shims.rb +10 -0
  61. data/spec/unique/unique_spec.rb +20 -0
  62. data/test/fixtures/csv/data/empty.csv +1 -0
  63. data/test/fixtures/csv/data/variety.csv +1 -0
  64. data/test/lib/csv/csvio_test.rb +74 -0
  65. metadata +206 -0
@@ -0,0 +1,1029 @@
1
+ require 'yaml'
2
+ require 'jinx/csv/csvio'
3
+ require 'jinx/helpers/boolean'
4
+ require 'jinx/helpers/class'
5
+ require 'jinx/helpers/collections'
6
+ require 'jinx/helpers/lazy_hash'
7
+ require 'jinx/helpers/log'
8
+ require 'jinx/helpers/inflector'
9
+ require 'jinx/helpers/pretty_print'
10
+ require 'jinx/helpers/stopwatch'
11
+ require 'jinx/helpers/transitive_closure'
12
+ require 'jinx/migration/migratable'
13
+ require 'jinx/migration/reader'
14
+ require 'jinx/migration/filter'
15
+
16
+ module Jinx
17
+ class MigrationError < RuntimeError; end
18
+
19
+ # Migrates a CSV extract to a caBIG application.
20
+ class Migrator
21
+ include Enumerable
22
+
23
+ # Creates a new Migrator. The input can be either a file name or a source factory. The factory
24
+ # implements the +open+ method, which returns a {Migration::Reader}
25
+ #
26
+ # @param [{Symbol => Object}] opts the migration options
27
+ # @option opts [String] :target the required target domain class
28
+ # @option opts [<String>, String] :mapping the required input field => caTissue attribute mapping file(s)
29
+ # @option opts [String, Migration::Reader] :input the required input file name or an adapter which
30
+ # implements the {Migration::Reader} methods
31
+ # @option opts [Jinx::Database] :database the optional destination +Jinx::Database+
32
+ # @option opts [<String>, String] :defaults the optional caTissue attribute => value default mapping file(s)
33
+ # @option opts [<String>, String] :filters the optional caTissue attribute input value => caTissue value filter file(s)
34
+ # @option opts [<String>, String] :shims the optional shim file(s) to load
35
+ # @option opts [String] :unique the optional flag which ensures that migrator calls the +uniquify+ method on
36
+ # those migrated objects whose class includes the +Unique+ module
37
+ # @option opts [String] :create the optional flag indicating that existing target objects are ignored
38
+ # @option opts [String] :bad the optional invalid record file
39
+ # @option opts [String, IO] :extract the optional extract file or object that responds to +<<+
40
+ # @option opts [<String>] :extract_headers the optional extract CSV field headers
41
+ # @option opts [Integer] :from the optional starting source record number to process
42
+ # @option opts [Integer] :to the optional ending source record number to process
43
+ # @option opts [Boolean] :quiet the optional flag which suppress output messages
44
+ # @option opts [Boolean] :verbose the optional flag to print the migration progress
45
+ def initialize(opts)
46
+ @rec_cnt = 0
47
+ @mgt_mths = {}
48
+ parse_options(opts)
49
+ build
50
+ end
51
+
52
+ # Imports this migrator's file into the database with the given connect options.
53
+ # This method creates or updates the domain objects mapped from the import source.
54
+ # If a block is given to this method, then the block is called on each stored
55
+ # migration target object.
56
+ #
57
+ # If the +:create+ option is set, then an input record for a target object which already
58
+ # exists in the database is noted in a debug log message and ignored rather than updated.
59
+ #
60
+ # @yield (see #migrate)
61
+ # @yieldparam (see #migrate)
62
+ # @return (see #migrate)
63
+ def migrate_to_database(&block)
64
+ # migrate with save
65
+ tm = Stopwatch.measure { execute_save(&block) }.elapsed
66
+ logger.debug { format_migration_time_log_message(tm) }
67
+ end
68
+
69
+ # Imports this migrator's CSV file and calls the given block on each migrated target
70
+ # domain object. If no block is given, then this method returns an array of the
71
+ # migrated target objects.
72
+ #
73
+ # @yield [target, row] operation performed on the migration target
74
+ # @yieldparam [Jinx::Resource] target the migrated target domain object
75
+ # @yieldparam [{Symbol => Object}] row the migration source record
76
+ def migrate(&block)
77
+ unless block_given? then
78
+ return migrate { |tgt, row| tgt }
79
+ end
80
+ # If there is an extract, then wrap the migration in an extract
81
+ # writer block.
82
+ if @extract then
83
+ if String === @extract then
84
+ logger.debug { "Opening migration extract #{@extract}..." }
85
+ FileUtils::mkdir_p(File.dirname(@extract))
86
+ if @extract_hdrs then
87
+ logger.debug { "Migration extract headers: #{@extract_hdrs.join(', ')}." }
88
+ CsvIO.open(@extract, :mode => 'w', :headers => @extract_hdrs) do |io|
89
+ @extract = io
90
+ return migrate(&block)
91
+ end
92
+ else
93
+ File.open(@extract, 'w') do |io|
94
+ @extract = io
95
+ return migrate(&block)
96
+ end
97
+ end
98
+ end
99
+ # Copy the extract into a local variable and clear the extract i.v.
100
+ # prior to a recursive call with an extract writer block.
101
+ io, @extract = @extract, nil
102
+ return migrate do |tgt, row|
103
+ res = yield(tgt, row)
104
+ tgt.extract(io)
105
+ res
106
+ end
107
+ end
108
+ begin
109
+ migrate_rows(&block)
110
+ ensure
111
+ @rejects.close if @rejects
112
+ remove_migration_methods
113
+ end
114
+ end
115
+
116
+ # @yield [target] iterate on each migration target
117
+ # @yieldparam [Jinx::Resource] the migration target
118
+ def each
119
+ migrate { |tgt, row| yield tgt }
120
+ end
121
+
122
+ private
123
+
124
+ # Cleans up after the migration by removing the methods injected by migration
125
+ # shims.
126
+ def remove_migration_methods
127
+ # remove the migrate_<attribute> methods
128
+ @mgt_mths.each do | klass, hash|
129
+ hash.each_value do |sym|
130
+ while klass.method_defined?(sym)
131
+ klass.instance_method(sym).owner.module_eval { remove_method(sym) }
132
+ end
133
+ end
134
+ end
135
+ # remove the migrate method
136
+ @creatable_classes.each do |klass|
137
+ while (k = klass.instance_method(:migrate).owner) < Migratable
138
+ k.module_eval { remove_method(:migrate) }
139
+ end
140
+ end
141
+ # remove the target extract method
142
+ remove_extract_method(@target) if @extract
143
+ end
144
+
145
+ def remove_extract_method(klass)
146
+ if (klass.method_defined?(:extract)) then
147
+ klass.module_eval { remove_method(:extract) }
148
+ sc = klass.superclass
149
+ remove_extract_method(sc) if sc < Migratable
150
+ end
151
+ end
152
+
153
+ # {#migrate} with a {#save} block on the migration target. Each migrated object
154
+ # is created, if necessary, after the target save.
155
+ def execute_save
156
+ if @database.nil? then
157
+ Jinx.fail(MigrationError, "Migrator cannot save records since the database option was not specified.")
158
+ end
159
+ @database.open do |db|
160
+ migrate do |tgt, rec|
161
+ save(tgt, db)
162
+ # Ensure that each migrated object is created if necessary.
163
+ @migrated.each do |obj|
164
+ next if obj.identifier
165
+ logger.debug { "The migrator is saving the migrated #{obj}..." }
166
+ save(obj, db)
167
+ logger.debug { "The migrator saved the migrated #{obj}." }
168
+ end
169
+ yield(tgt, rec) if block_given?
170
+ db.clear
171
+ end
172
+ end
173
+ end
174
+
175
+ # @return a log message String for the given migration time in seconds
176
+ def format_migration_time_log_message(time)
177
+ # the database execution time
178
+ dt = @database.execution_time
179
+ if time > 120 then
180
+ time /= 60
181
+ dt /= 60
182
+ unit = "minutes"
183
+ else
184
+ unit = "seconds"
185
+ end
186
+ "Migration took #{'%.2f' % time} #{unit}, of which #{'%.2f' % dt} were database operations."
187
+ end
188
+
189
+ def parse_options(opts)
190
+ @fld_map_files = opts[:mapping]
191
+ if @fld_map_files.nil? then
192
+ Jinx.fail(MigrationError, "Migrator missing required field mapping file parameter")
193
+ end
194
+ @def_files = opts[:defaults]
195
+ @flt_files = opts[:filters]
196
+ shims_opt = opts[:shims] ||= []
197
+ # Make a single shims file into an array.
198
+ @shims = shims_opt.collection? ? shims_opt : [shims_opt]
199
+ @unique = opts[:unique]
200
+ @from = opts[:from] ||= 1
201
+ @input = opts[:input]
202
+ if @input.nil? then
203
+ Jinx.fail(MigrationError, "Migrator missing required source file parameter")
204
+ end
205
+ @database = opts[:database]
206
+ @target_class = opts[:target]
207
+ if @target_class.nil? then
208
+ Jinx.fail(MigrationError, "Migrator missing required target class parameter")
209
+ end
210
+ @bad_file = opts[:bad]
211
+ @extract = opts[:extract]
212
+ @extract_hdrs = opts[:extract_headers]
213
+ @create = opts[:create]
214
+ logger.info("Migration options: #{printable_options(opts).pp_s}.")
215
+ # flag indicating whether to print a progress monitor
216
+ @verbose = opts[:verbose]
217
+ end
218
+
219
+ def printable_options(opts)
220
+ popts = opts.reject { |option, value| value.nil_or_empty? }
221
+ # The target class should be a simple class name rather than the class metadata.
222
+ popts[:target] = popts[:target].qp if popts.has_key?(:target)
223
+ popts
224
+ end
225
+
226
+ def build
227
+ # the current source class => instance map
228
+ Jinx.fail(MigrationError, "No file to migrate") if @input.nil?
229
+
230
+ # If the input is a file name, then make a CSV loader which only converts input fields
231
+ # corresponding to non-String attributes.
232
+ if String === @input then
233
+ @reader = CsvIO.new(@input, &method(:convert))
234
+ logger.debug { "Migration data input file #{@input} headers: #{@reader.headers.qp}" }
235
+ else
236
+ @reader = @input
237
+ end
238
+
239
+ # add shim modifiers
240
+ load_shims(@shims)
241
+
242
+ # create the class => path => default value hash
243
+ @def_hash = @def_files ? load_defaults_files(@def_files) : {}
244
+ # create the class => path => default value hash
245
+ @flt_hash = @flt_files ? load_filter_files(@flt_files) : {}
246
+ # the missing owner classes
247
+ @owners = Set.new
248
+ # create the class => path => header hash
249
+ fld_map = load_field_map_files(@fld_map_files)
250
+ # create the class => paths hash
251
+ @cls_paths_hash = create_class_paths_hash(fld_map, @def_hash)
252
+ # create the path => class => header hash
253
+ @header_map = create_header_map(fld_map)
254
+ # Order the creatable classes by dependency, owners first, to smooth the migration process.
255
+ @creatable_classes = @cls_paths_hash.keys.sort do |klass, other|
256
+ other.depends_on?(klass) ? -1 : (klass.depends_on?(other) ? 1 : 0)
257
+ end
258
+ # An abstract class cannot be instantiated.
259
+ @creatable_classes.each do |klass|
260
+ if klass.abstract? then
261
+ Jinx.fail(MigrationError, "Migrator cannot create the abstract class #{klass}; specify a subclass instead in the mapping file.")
262
+ end
263
+ end
264
+
265
+ logger.info { "Migration creatable classes: #{@creatable_classes.qp}." }
266
+ unless @def_hash.empty? then logger.info { "Migration defaults: #{@def_hash.qp}." } end
267
+
268
+ # the class => attribute migration methods hash
269
+ create_migration_method_hashes
270
+
271
+ # Print the input field => attribute map and collect the String input fields for
272
+ # the custom CSVLoader converter.
273
+ @nonstring_headers = Set.new
274
+ logger.info("Migration attributes:")
275
+ @header_map.each do |path, cls_hdr_hash|
276
+ prop = path.last
277
+ cls_hdr_hash.each do |klass, hdr|
278
+ type_s = prop.type ? prop.type.qp : 'Object'
279
+ logger.info(" #{hdr} => #{klass.qp}.#{path.join('.')} (#{type_s})")
280
+ end
281
+ @nonstring_headers.merge!(cls_hdr_hash.values) if prop.type != Java::JavaLang::String
282
+ end
283
+ end
284
+
285
+ # Converts the given input field value as follows:
286
+ # * If the info header is a String field, then return the value unchanged.
287
+ # * Otherwise, return nil which will delegate to the generic CsvIO converter.
288
+ # @param (see CsvIO#convert)
289
+ # @yield (see CsvIO#convert)
290
+ def convert(value, info)
291
+ value unless @nonstring_headers.include?(info.header)
292
+ end
293
+
294
+ # Adds missing owner classes to the migration class path hash (with empty paths)
295
+ # for the classes in the given hash.
296
+ #
297
+ # @param [{Class => Object}] hash the class map
298
+ # @yield the map entry for a new owner
299
+ def add_owners(hash, &factory)
300
+ hash.keys.each { |klass| add_owners_for(klass, hash, &factory) }
301
+ end
302
+
303
+ # Adds missing owner classes to the migration class path hash (with empty paths)
304
+ # for the given migration class.
305
+ #
306
+ # @param [Class] klass the migration class
307
+ # @param [{Class => Object}] hash the class map
308
+ # @yield the map entry for a new owner
309
+ def add_owners_for(klass, hash, &factory)
310
+ owner = missing_owner_for(klass, hash) || return
311
+ logger.debug { "The migrator is adding #{klass.qp} owner #{owner}..." }
312
+ @owners << owner
313
+ hash[owner] = yield
314
+ add_owners_for(owner, hash, &factory)
315
+ end
316
+
317
+ # @param [Class] klass the migration class
318
+ # @param [{Class => Object}] hash the class map
319
+ # @return [Class, nil] the missing class owner, if any
320
+ def missing_owner_for(klass, hash)
321
+ # check for an owner among the current migration classes
322
+ return if klass.owners.any? do |owner|
323
+ hash.detect_key { |other| other <= owner }
324
+ end
325
+ # Find the first non-abstract candidate owner that is a dependent
326
+ # of a migrated class.
327
+ klass.owners.detect do |owner|
328
+ not owner.abstract? and hash.detect_key { |other| owner.depends_on?(other, true) }
329
+ end
330
+ end
331
+
332
+ # Creates the class => +migrate_+_<attribute>_ hash for the given klasses.
333
+ def create_migration_method_hashes
334
+ # the class => attribute => migration filter hash
335
+ @attr_flt_hash = {}
336
+ customizable_class_attributes.each do |klass, pas|
337
+ flts = migration_filters(klass) || next
338
+ @attr_flt_hash[klass] = flts
339
+ end
340
+ # print the migration shim methods
341
+ unless @mgt_mths.empty? then
342
+ logger.info("Migration shim methods:\n#{@mgt_mths.qp}")
343
+ end
344
+ end
345
+
346
+ # @return the class => attributes hash for terminal path attributes which can be customized by +migrate_+ methods
347
+ def customizable_class_attributes
348
+ # The customizable classes set, starting with creatable classes and adding in
349
+ # the migration path terminal attribute declarer classes below.
350
+ klasses = @creatable_classes.to_set
351
+ # the class => path terminal attributes hash
352
+ cls_attrs_hash = LazyHash.new { Set.new }
353
+ # add each path terminal attribute and its declarer class
354
+ @cls_paths_hash.each_value do |paths|
355
+ paths.each do |path|
356
+ prop = path.last
357
+ type = prop.declarer
358
+ klasses << type
359
+ cls_attrs_hash[type] << prop
360
+ end
361
+ end
362
+
363
+ # Merge each redundant customizable superclass into its concrete customizable subclasses.
364
+ klasses.dup.each do |cls|
365
+ redundant = false
366
+ klasses.each do |other|
367
+ # cls is redundant if it is a superclass of other
368
+ redundant = other < cls
369
+ if redundant then
370
+ cls_attrs_hash[other].merge!(cls_attrs_hash[cls])
371
+ end
372
+ end
373
+ # remove the redundant class
374
+ if redundant then
375
+ cls_attrs_hash.delete(cls)
376
+ klasses.delete(cls)
377
+ end
378
+ end
379
+
380
+ cls_attrs_hash
381
+ end
382
+
383
+ # Discovers methods of the form +migrate+__attribute_ implemented for the paths
384
+ # in the given class => paths hash the given klass. The migrate method is called
385
+ # on the input field value corresponding to the path.
386
+ def migration_filters(klass)
387
+ # the attribute => migration method hash
388
+ mh = attribute_method_hash(klass)
389
+ @mgt_mths[klass] = mh unless mh.empty?
390
+ fh = attribute_filter_hash(klass)
391
+ return if mh.empty? and fh.empty?
392
+ # For each class path terminal attribute metadata, add the migration filters
393
+ # to the attribute metadata => proc hash.
394
+ klass.attributes.to_compact_hash do |pa|
395
+ # the filter
396
+ flt = fh[pa]
397
+ # the migration shim method
398
+ mth = mh[pa]
399
+ # the filter proc
400
+ Proc.new do |obj, value, row|
401
+ # filter the value
402
+ value = flt.transform(value) if flt and not value.nil?
403
+ # apply the migrate_<attribute> method, if defined
404
+ if mth then
405
+ obj.send(mth, value, row) unless value.nil?
406
+ else
407
+ value
408
+ end
409
+ end
410
+ end
411
+ end
412
+
413
+ def attribute_method_hash(klass)
414
+ # the migrate methods, excluding the Migratable migrate_references method
415
+ mths = klass.instance_methods(true).select { |mth| mth =~ /^migrate.(?!references)/ }
416
+ # the attribute => migration method hash
417
+ mh = {}
418
+ mths.each do |mth|
419
+ # the attribute suffix, e.g. name for migrate_name or Name for migrateName
420
+ suffix = /^migrate(_)?(.*)/.match(mth).captures[1]
421
+ # the attribute name
422
+ attr_nm = suffix[0, 1].downcase + suffix[1..-1]
423
+ # the attribute for the name, or skip if no such attribute
424
+ pa = klass.standard_attribute(attr_nm) rescue next
425
+ # associate the attribute => method
426
+ mh[pa] = mth
427
+ end
428
+ mh
429
+ end
430
+
431
+ # Builds the property => filter hash. The filter is specified in the +--filter+ migration
432
+ # option. A Boolean property has a default String => Boolean filter which converts the
433
+ # input string to a Boolean as specified in the +Jinx::Boolean+ +to_boolean+ methods.
434
+ #
435
+ # @param [Class] klass the migration class
436
+ # @return [Property => Proc] the filter migration methods
437
+ def attribute_filter_hash(klass)
438
+ hash = @flt_hash[klass]
439
+ fh = {}
440
+ klass.each_property do |prop|
441
+ pa = prop.attribute
442
+ spec = hash[pa] if hash
443
+ # If the property is boolean, then make a filter that operates on the parsed string input.
444
+ if prop.type == Java::JavaLang::Boolean then
445
+ fh[pa] = boolean_filter(spec)
446
+ logger.debug { "The migrator added the default text -> boolean filter for #{klass.qp} #{pa}." }
447
+ elsif spec then
448
+ fh[pa] = Migration::Filter.new(spec)
449
+ end
450
+ end
451
+ unless fh.empty? then
452
+ logger.debug { "The migration filters were loaded for #{klass.qp} #{fh.keys.to_series}." }
453
+ end
454
+ fh
455
+ end
456
+
457
+ # @param [String, nil] the value filter, if any
458
+ # @return [Migration::Filter] the boolean property migration filter
459
+ def boolean_filter(spec=nil)
460
+ # break up the spec into two specs, one on strings and one on booleans
461
+ bspec, sspec = spec.split { |k, v| Boolean === k } if spec
462
+ bf = Migration::Filter.new(bspec) if bspec and not bspec.empty?
463
+ sf = Migration::Filter.new(sspec) if sspec and not sspec.empty?
464
+ # make the composite filter
465
+ Migration::Filter.new do |value|
466
+ fv = sf.transform(value) if sf
467
+ if fv.nil? then
468
+ bv = Jinx::Boolean.for(value) rescue nil
469
+ fv = bf.nil? || bv.nil? ? bv : bf.transform(bv)
470
+ end
471
+ fv
472
+ end
473
+ end
474
+
475
+ # Loads the shim files.
476
+ #
477
+ # @param [<String>, String] files the file or file array
478
+ def load_shims(files)
479
+ logger.debug { "Loading the migration shims with load path #{$:.pp_s}..." }
480
+ files.enumerate do |file|
481
+ load file
482
+ logger.info { "The migrator loaded the shim file #{file}." }
483
+ end
484
+ end
485
+
486
+ # Migrates all rows in the input.
487
+ #
488
+ # @yield (see #migrate)
489
+ # @yieldparam (see #migrate)
490
+ def migrate_rows
491
+ # open an CSV output for rejects if the bad option is set
492
+ if @bad_file then
493
+ @rejects = open_rejects(@bad_file)
494
+ logger.info("Unmigrated records will be written to #{File.expand_path(@bad_file)}.")
495
+ end
496
+
497
+ @rec_cnt = mgt_cnt = 0
498
+ logger.info { "Migrating #{@input}..." }
499
+ puts "Migrating #{@input}..." if @verbose
500
+ @reader.each do |row|
501
+ # the one-based current record number
502
+ rec_no = @rec_cnt + 1
503
+ # skip if the row precedes the from option
504
+ if rec_no == @from and @rec_cnt > 0 then
505
+ logger.info("Skipped the initial #{@rec_cnt} records.")
506
+ elsif rec_no == @to then
507
+ logger.info("Ending the migration after processing record #{@rec_cnt}.")
508
+ return
509
+ elsif rec_no < @from then
510
+ @rec_cnt += 1
511
+ next
512
+ end
513
+ begin
514
+ # migrate the row
515
+ logger.debug { "Migrating record #{rec_no}..." }
516
+ tgt = migrate_row(row)
517
+ # call the block on the migrated target
518
+ if tgt then
519
+ logger.debug { "The migrator built #{tgt} with the following content:\n#{tgt.dump}" }
520
+ yield(tgt, row)
521
+ end
522
+ rescue Exception => e
523
+ logger.error("Migration error on record #{rec_no} - #{e.message}:\n#{e.backtrace.pp_s}")
524
+ # If there is a reject file, then don't propagate the error.
525
+ raise unless @rejects
526
+ # try to clear the migration state
527
+ clear(tgt) rescue nil
528
+ # clear the target
529
+ tgt = nil
530
+ end
531
+ if tgt then
532
+ # replace the log message below with the commented alternative to detect a memory leak
533
+ logger.info { "Migrated record #{rec_no}." }
534
+ #memory_usage = `ps -o rss= -p #{Process.pid}`.to_f / 1024 # in megabytes
535
+ #logger.debug { "Migrated rec #{@rec_cnt}; memory usage: #{sprintf("%.1f", memory_usage)} MB." }
536
+ mgt_cnt += 1
537
+ if @verbose then print_progress(mgt_cnt) end
538
+ # clear the migration state
539
+ clear(tgt)
540
+ elsif @rejects then
541
+ # If there is a rejects file then warn, write the reject and continue.
542
+ logger.warn("Migration not performed on record #{rec_no}.")
543
+ @rejects << row
544
+ @rejects.flush
545
+ logger.debug("Invalid record #{rec_no} was written to the rejects file #{@bad_file}.")
546
+ else
547
+ Jinx.fail(MigrationError, "Migration not performed on record #{rec_no}")
548
+ end
549
+ # Bump the record count.
550
+ @rec_cnt += 1
551
+ end
552
+ logger.info("Migrated #{mgt_cnt} of #{@rec_cnt} records.")
553
+ if @verbose then
554
+ puts
555
+ puts "Migrated #{mgt_cnt} of #{@rec_cnt} records."
556
+ end
557
+ end
558
+
559
+ # Makes the rejects CSV output file.
560
+ #
561
+ # @param [String] file the output file
562
+ # @return [IO] the reject stream
563
+ def open_rejects(file)
564
+ # Make the parent directory.
565
+ FileUtils.mkdir_p(File.dirname(file))
566
+ # Open the file.
567
+ FasterCSV.open(file, 'w', :headers => true, :header_converters => :symbol, :write_headers => true)
568
+ end
569
+
570
+ # Prints a '+' progress indicator after each migrated record to stdout.
571
+ #
572
+ # @param [Integer] count the migrated record count
573
+ def print_progress(count)
574
+ # If the line is 72 characters, then print a line break
575
+ puts if count % 72 == 0
576
+ # Print the progress indicator
577
+ print "+"
578
+ end
579
+
580
+ # Clears references to objects allocated for migration of a single row into the given target.
581
+ # This method does nothing. Subclasses can override.
582
+ #
583
+ # This method is overridden by subclasses to clear the migration state to conserve memory,
584
+ # since this migrator should consume O(1) rather than O(n) memory for n migration records.
585
+ def clear(target)
586
+ end
587
+
588
+ # Imports the given CSV row into a target object.
589
+ #
590
+ # @param [{Symbol => Object}] row the input row field => value hash
591
+ # @return the migrated target object if the migration is valid, nil otherwise
592
+ def migrate_row(row)
593
+ # create an instance for each creatable class
594
+ created = Set.new
595
+ # the migrated objects
596
+ migrated = @creatable_classes.map { |klass| create(klass, row, created) }
597
+ # migrate each object from the input row
598
+ migrated.each do |obj|
599
+ # First uniquify the object if necessary.
600
+ if @unique and Unique === obj then
601
+ logger.debug { "The migrator is making #{obj} unique..." }
602
+ obj.uniquify
603
+ end
604
+ obj.migrate(row, migrated)
605
+ end
606
+ # the valid migrated objects
607
+ @migrated = migrate_valid_references(row, migrated)
608
+ # the candidate target objects
609
+ tgts = @migrated.select { |obj| @target_class === obj }
610
+ if tgts.size > 1 then
611
+ raise MigrationError.new("Ambiguous #{@target_class} targets #{tgts.to_series}")
612
+ end
613
+ target = tgts.first || return
614
+
615
+ logger.debug { "Migrated target #{target}." }
616
+ target
617
+ end
618
+
619
+ # Sets the migration references for each valid migrated object.
620
+ #
621
+ # @param row (see #migrate_row)
622
+ # @param [Array] migrated the migrated objects
623
+ # @return [Array] the valid migrated objects
624
+ def migrate_valid_references(row, migrated)
625
+ # Split the valid and invalid objects. The iteration is in reverse dependency order,
626
+ # since invalidating a dependent can invalidate the owner.
627
+ ordered = migrated.transitive_closure(:dependents)
628
+ ordered.keep_if { |obj| migrated.include?(obj) }.reverse!
629
+ valid, invalid = ordered.partition do |obj|
630
+ if migration_valid?(obj) then
631
+ obj.migrate_references(row, migrated, @target_class, @attr_flt_hash[obj.class])
632
+ true
633
+ else
634
+ obj.class.owner_attributes.each { |pa| obj.clear_attribute(pa) }
635
+ false
636
+ end
637
+ end
638
+
639
+ # Go back through the valid objects in dependency order to invalidate dependents
640
+ # whose owner is invalid.
641
+ valid.reverse.each do |obj|
642
+ unless owner_valid?(obj, valid, invalid) then
643
+ invalid << valid.delete(obj)
644
+ logger.debug { "The migrator invalidated #{obj} since it does not have a valid owner." }
645
+ end
646
+ end
647
+
648
+ # Go back through the valid objects in reverse dependency order to invalidate owners
649
+ # created only to hold a dependent which was subsequently invalidated.
650
+ valid.reject do |obj|
651
+ if @owners.include?(obj.class) and obj.dependents.all? { |dep| invalid.include?(dep) } then
652
+ # clear all references from the invalidated owner
653
+ obj.class.domain_attributes.each { |pa| obj.clear_attribute(pa) }
654
+ invalid << obj
655
+ logger.debug { "The migrator invalidated #{obj.qp} since it was created solely to hold subsequently invalidated dependents." }
656
+ true
657
+ end
658
+ end
659
+ end
660
+
661
+ # Returns whether the given domain object satisfies at least one of the following conditions:
662
+ # * it does not have an owner among the invalid objects
663
+ # * it has an owner among the valid objects
664
+ #
665
+ # @param [Resource] obj the domain object to check
666
+ # @param [<Resource>] valid the valid migrated objects
667
+ # @param [<Resource>] invalid the invalid migrated objects
668
+ # @return [Boolean] whether the owner is valid
669
+ def owner_valid?(obj, valid, invalid)
670
+ otypes = obj.class.owners
671
+ invalid.all? { |other| not otypes.include?(other.class) } or
672
+ valid.any? { |other| otypes.include?(other.class) }
673
+ end
674
+
675
+ # @param [Migratable] obj the migrated object
676
+ # @return [Boolean] whether the migration is successful
677
+ def migration_valid?(obj)
678
+ if obj.migration_valid? then
679
+ true
680
+ else
681
+ logger.debug { "The migrated #{obj.qp} is invalid." }
682
+ false
683
+ end
684
+ end
685
+
686
+ # Creates an instance of the given klass from the given row.
687
+ # The new klass instance and all intermediate migrated instances are added to the
688
+ # created set.
689
+ #
690
+ # @param [Class] klass
691
+ # @param [{Symbol => Object}] row the input row
692
+ # @param [<Resource>] created the migrated instances for this row
693
+ # @return [Resource] the new instance
694
+ def create(klass, row, created)
695
+ # the new object
696
+ logger.debug { "The migrator is building #{klass.qp}..." }
697
+ created << obj = klass.new
698
+ migrate_properties(obj, row, created)
699
+ add_defaults(obj, row, created)
700
+ logger.debug { "The migrator built #{obj}." }
701
+ obj
702
+ end
703
+
704
+ # Migrates each input field to the associated domain object attribute.
705
+ # String input values are stripped. Missing input values are ignored.
706
+ #
707
+ # @param [Resource] the migration object
708
+ # @param row (see #create)
709
+ # @param [<Resource>] created (see #create)
710
+ def migrate_properties(obj, row, created)
711
+ # for each input header which maps to a migratable target attribute metadata path,
712
+ # set the target attribute, creating intermediate objects as needed.
713
+ @cls_paths_hash[obj.class].each do |path|
714
+ header = @header_map[path][obj.class]
715
+ # the input value
716
+ value = row[header]
717
+ value.strip! if String === value
718
+ next if value.nil?
719
+ # fill the reference path
720
+ ref = fill_path(obj, path[0...-1], row, created)
721
+ # set the attribute
722
+ migrate_property(ref, path.last, value, row)
723
+ end
724
+ end
725
+
726
+ # @param [Resource] the migration object
727
+ # @param row (see #create)
728
+ # @param [<Resource>] created (see #create)
729
+ def add_defaults(obj, row, created)
730
+ dh = @def_hash[obj.class] || return
731
+ dh.each do |path, value|
732
+ # fill the reference path
733
+ ref = fill_path(obj, path[0...-1], row, created)
734
+ # set the attribute to the default value unless there is already a value
735
+ ref.merge_attribute(path.last.to_sym, value)
736
+ end
737
+ end
738
+
739
+ # Fills the given reference Property path starting at obj.
740
+ #
741
+ # @param row (see #create)
742
+ # @param created (see #create)
743
+ # @return the last domain object in the path
744
+ def fill_path(obj, path, row, created)
745
+ # create the intermediate objects as needed (or return obj if path is empty)
746
+ path.inject(obj) do |parent, prop|
747
+ # the referenced object
748
+ parent.send(prop.reader) or create_reference(parent, prop, row, created)
749
+ end
750
+ end
751
+
752
+ # Sets the given migrated object's reference attribute to a new referenced domain object.
753
+ #
754
+ # @param [Resource] obj the domain object being migrated
755
+ # @param [Property] property the property being migrated
756
+ # @param row (see #create)
757
+ # @param created (see #create)
758
+ # @return the new object
759
+ def create_reference(obj, property, row, created)
760
+ if property.type.abstract? then
761
+ Jinx.fail(MigrationError, "Cannot create #{obj.qp} #{property} with abstract type #{property.type}")
762
+ end
763
+ ref = property.type.new
764
+ ref.migrate(row, Array::EMPTY_ARRAY)
765
+ obj.send(property.writer, ref)
766
+ created << ref
767
+ logger.debug { "The migrator created #{obj.qp} #{property} #{ref}." }
768
+ ref
769
+ end
770
+
771
+ # Sets the given property value to the filtered input value. If there is a filter
772
+ # defined for the property, then that filter is applied. If there is a migration
773
+ # shim method with name +migrate_+_attribute_, then that method is called on the
774
+ # (possibly filtered) value. The target object property is set to the resulting
775
+ # filtered value.
776
+ #
777
+ # @param [Migratable] obj the target domain object
778
+ # @param [Property] property the property to set
779
+ # @param value the input value
780
+ # @param [{Symbol => Object}] row the input row
781
+ def migrate_property(obj, property, value, row)
782
+ # if there is a shim migrate_<attribute> method, then call it on the input value
783
+ value = filter_value(obj, property, value, row)
784
+ return if value.nil?
785
+ # set the attribute
786
+ begin
787
+ obj.send(property.writer, value)
788
+ rescue Exception => e
789
+ Jinx.fail(MigrationError, "Could not set #{obj.qp} #{property} to #{value.qp}", e)
790
+ end
791
+ logger.debug { "Migrated #{obj.qp} #{property} to #{value}." }
792
+ end
793
+
794
+ # Calls the shim migrate_<attribute> method or config filter on the input value.
795
+ #
796
+ # @param value the input value
797
+ # @param [Property] property the property to set
798
+ # @return the input value, if there is no filter, otherwise the filtered value
799
+ def filter_value(obj, property, value, row)
800
+ flt = filter_for(obj, property.to_sym)
801
+ return value if flt.nil?
802
+ fval = flt.call(obj, value, row)
803
+ unless value == fval then
804
+ logger.debug { "The migration filter transformed the #{obj.qp} #{property} value from #{value.qp} to #{fval}." }
805
+ end
806
+ fval
807
+ end
808
+
809
+ def filter_for(obj, attribute)
810
+ flts = @attr_flt_hash[obj.class] || return
811
+ flts[attribute]
812
+ end
813
+
814
+ # @param [Resource] obj the domain object to save in the database
815
+ # @return [Resource, nil] obj if the save is successful, nil otherwise
816
+ def save(obj, database)
817
+ if @create then
818
+ logger.debug { "Migrator creating #{obj}..." }
819
+ database.create(obj)
820
+ logger.debug { "Migrator created #{obj}." }
821
+ else
822
+ logger.debug { "Migrator saving #{obj}..." }
823
+ database.save(obj)
824
+ logger.debug { "Migrator saved #{obj}." }
825
+ end
826
+ end
827
+
828
+ def current_record
829
+ @rec_cnt + 1
830
+ end
831
+
832
+ # @param [<String>, String] files the migration fields mapping file or file array
833
+ # @return [{Class => {Property => Symbol}}] the class => path => header hash
834
+ # loaded from the mapping files
835
+ def load_field_map_files(files)
836
+ map = LazyHash.new { Hash.new }
837
+ files.enumerate { |file| load_field_map_file(file, map) }
838
+
839
+ # include the target class
840
+ map[@target_class] ||= Hash.new
841
+ # add the default classes
842
+ @def_hash.each_key { |klass| map[klass] ||= Hash.new }
843
+ # add the owners
844
+ add_owners(map) { Hash.new }
845
+
846
+ # Include only concrete classes that are not a superclass of another migration class.
847
+ classes = map.keys
848
+ sub_hash = classes.to_compact_hash do |klass|
849
+ subs = classes.select { |other| other < klass }
850
+ subs.delete_if { |klass| subs.any? { |other| other < klass } }
851
+ end
852
+
853
+ # Merge the superclass paths into the subclass paths.
854
+ sub_hash.each do |klass, subs|
855
+ paths = map.delete(klass)
856
+ # Add, but don't replace, path => header entries from the superclass.
857
+ subs.each do |sub|
858
+ map[sub].merge!(paths) { |key, old, new| old }
859
+ logger.debug { "Migrator merged #{klass.qp} mappings into the subclass #{sub.qp}." }
860
+ end
861
+ end
862
+
863
+ # Validate that there are no abstract classes in the mapping.
864
+ map.each_key do |klass|
865
+ if klass.abstract? then
866
+ raise MigrationError.new("Cannot migrate to the abstract class #{klass}")
867
+ end
868
+ end
869
+
870
+ map
871
+ end
872
+
873
+ # @param [String] file the migration fields configuration file
874
+ # @param [{Class => {Property => Symbol}}] hash the class => path => header hash
875
+ # to populate from the loaded configuration
876
+ def load_field_map_file(file, hash)
877
+ # load the field mapping config file
878
+ begin
879
+ config = YAML.load_file(file)
880
+ rescue
881
+ Jinx.fail(MigrationError, "Could not read field map file #{file}: " + $!)
882
+ end
883
+ populate_field_map(config, hash)
884
+ end
885
+
886
+ # @param [{String => String}] config the attribute => header specification
887
+ # @param hash (see #load_field_map_file)
888
+ def populate_field_map(config, hash)
889
+ # collect the class => path => header entries
890
+ config.each do |field, attr_list|
891
+ next if attr_list.blank?
892
+ # the header accessor method for the field
893
+ header = @reader.accessor(field)
894
+ if header.nil? then
895
+ Jinx.fail(MigrationError, "Field defined in migration configuration not found in input file #{@input} headers: #{field}")
896
+ end
897
+ # associate each attribute path in the property value with the header
898
+ attr_list.split(/,\s*/).each do |path_s|
899
+ klass, path = create_attribute_path(path_s)
900
+ hash[klass][path] = header
901
+ end
902
+ end
903
+ end
904
+
905
+ # Loads the defaults configuration files.
906
+ #
907
+ # @param [<String>, String] files the file or file array to load
908
+ # @return [<Class => <String => Object>>] the class => path => default value entries
909
+ def load_defaults_files(files)
910
+ # collect the class => path => value entries from each defaults file
911
+ hash = LazyHash.new { Hash.new }
912
+ files.enumerate { |file| load_defaults_file(file, hash) }
913
+ hash
914
+ end
915
+
916
+ # Loads the defaults config file into the given hash.
917
+ #
918
+ # @param [String] file the file to load
919
+ # @param [<Class => <String => Object>>] hash the class => path => default value entries
920
+ def load_defaults_file(file, hash)
921
+ begin
922
+ config = YAML::load_file(file)
923
+ rescue
924
+ Jinx.fail(MigrationError, "Could not read defaults file #{file}: " + $!)
925
+ end
926
+ # collect the class => path => value entries
927
+ config.each do |path_s, value|
928
+ next if value.nil_or_empty?
929
+ klass, path = create_attribute_path(path_s)
930
+ hash[klass][path] = value
931
+ end
932
+ end
933
+ # Loads the filter config files.
934
+ #
935
+ # @param [<String>, String] files the file or file array to load
936
+ # @return [<Class => <String => Object>>] the class => path => default value entries
937
+ def load_filter_files(files)
938
+ # collect the class => path => value entries from each defaults file
939
+ hash = {}
940
+ files.enumerate { |file| load_filter_file(file, hash) }
941
+ logger.debug { "The migrator loaded the filters #{hash.qp}." }
942
+ hash
943
+ end
944
+
945
+ # Loads the filter config file into the given hash.
946
+ #
947
+ # @param [String] file the file to load
948
+ # @param [<Class => <String => <Object => Object>>>] hash the class => path => input value => caTissue value entries
949
+ def load_filter_file(file, hash)
950
+ # collect the class => attribute => filter entries
951
+ logger.debug { "Loading the migration filter configuration #{file}..." }
952
+ begin
953
+ config = YAML::load_file(file)
954
+ rescue
955
+ Jinx.fail(MigrationError, "Could not read filter file #{file}: " + $!)
956
+ end
957
+ config.each do |path_s, flt|
958
+ next if flt.nil_or_empty?
959
+ klass, path = create_attribute_path(path_s)
960
+ unless path.size == 1 then
961
+ Jinx.fail(MigrationError, "Migration filter configuration path not supported: #{path_s}")
962
+ end
963
+ pa = klass.standard_attribute(path.first.to_sym)
964
+ flt_hash = hash[klass] ||= {}
965
+ flt_hash[pa] = flt
966
+ end
967
+ end
968
+
969
+ # @param [String] path_s a period-delimited path string path_s in the form _class_(._attribute_)+
970
+ # @return [<Property>] the corresponding attribute metadata path
971
+ # @raise [MigrationError] if the path string is malformed or an attribute is not found
972
+ def create_attribute_path(path_s)
973
+ names = path_s.split('.')
974
+ # If the path starts with a capitalized class name, then resolve the class.
975
+ # Otherwise, the target class is the start of the path.
976
+ klass = names.first =~ /^[A-Z]/ ? context_module.module_for_name(names.shift) : @target_class
977
+ # There must be at least one attribute.
978
+ if names.empty? then
979
+ Jinx.fail(MigrationError, "Property entry in migration configuration is not in <class>.<attribute> format: #{path_s}")
980
+ end
981
+
982
+ # Build the attribute path.
983
+ path = []
984
+ names.inject(klass) do |parent, name|
985
+ pa = name.to_sym
986
+ prop = begin
987
+ parent.property(pa)
988
+ rescue NameError => e
989
+ Jinx.fail(MigrationError, "Migration field mapping attribute #{parent.qp}.#{pa} not found", e)
990
+ end
991
+ if prop.collection? then
992
+ Jinx.fail(MigrationError, "Migration field mapping attribute #{parent.qp}.#{prop} is a collection, which is not supported")
993
+ end
994
+ path << prop
995
+ prop.type
996
+ end
997
+
998
+ # Return the starting class and Property path.
999
+ # Note that the starting class is not necessarily the first path attribute declarer, since the
1000
+ # starting class could be the concrete target class rather than an abstract declarer. this is
1001
+ # important, since the class must be instantiated.
1002
+ [klass, path]
1003
+ end
1004
+
1005
+ # The context module is given by the target class {ResourceClass#domain_module}.
1006
+ #
1007
+ # @return [Module] the class name resolution context
1008
+ def context_module
1009
+ @target_class.domain_module
1010
+ end
1011
+
1012
+ # @return a new class => [paths] hash from the migration fields configuration map
1013
+ def create_class_paths_hash(fld_map, def_map)
1014
+ hash = {}
1015
+ fld_map.each { |klass, path_hdr_hash| hash[klass] = path_hdr_hash.keys.to_set }
1016
+ def_map.each { |klass, path_val_hash| (hash[klass] ||= Set.new).merge(path_val_hash.keys) }
1017
+ hash
1018
+ end
1019
+
1020
+ # @return a new path => class => header hash from the migration fields configuration map
1021
+ def create_header_map(fld_map)
1022
+ hash = LazyHash.new { Hash.new }
1023
+ fld_map.each do |klass, path_hdr_hash|
1024
+ path_hdr_hash.each { |path, hdr| hash[path][klass] = hdr }
1025
+ end
1026
+ hash
1027
+ end
1028
+ end
1029
+ end