jinx-migrate 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.gitignore +14 -0
  2. data/.rspec +3 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile +8 -0
  5. data/Gemfile.lock +38 -0
  6. data/History.md +6 -0
  7. data/LEGAL +5 -0
  8. data/LICENSE +22 -0
  9. data/README.md +33 -0
  10. data/Rakefile +40 -0
  11. data/bin/csvjoin +24 -0
  12. data/examples/family/README.md +24 -0
  13. data/examples/family/conf/children/fields.yaml +2 -0
  14. data/examples/family/conf/parents/defaults.yaml +3 -0
  15. data/examples/family/conf/parents/fields.yaml +6 -0
  16. data/examples/family/conf/parents/values.yaml +4 -0
  17. data/examples/family/data/children.csv +1 -0
  18. data/examples/family/data/parents.csv +1 -0
  19. data/examples/family/lib/shims.rb +17 -0
  20. data/jinx-migrate.gemspec +26 -0
  21. data/lib/jinx/csv/csvio.rb +214 -0
  22. data/lib/jinx/csv/joiner.rb +196 -0
  23. data/lib/jinx/migration/filter.rb +167 -0
  24. data/lib/jinx/migration/migratable.rb +244 -0
  25. data/lib/jinx/migration/migrator.rb +1029 -0
  26. data/lib/jinx/migration/reader.rb +16 -0
  27. data/lib/jinx/migration/version.rb +5 -0
  28. data/spec/bad/bad_spec.rb +25 -0
  29. data/spec/bad/fields.yaml +1 -0
  30. data/spec/bad/parents.csv +1 -0
  31. data/spec/bad/shims.rb +16 -0
  32. data/spec/csv/join/join_helper.rb +35 -0
  33. data/spec/csv/join/join_spec.rb +100 -0
  34. data/spec/csv/join/jumbled_src.csv +7 -0
  35. data/spec/csv/join/jumbled_tgt.csv +7 -0
  36. data/spec/csv/join/source.csv +7 -0
  37. data/spec/csv/join/target.csv +7 -0
  38. data/spec/extract/extract.rb +13 -0
  39. data/spec/extract/extract_spec.rb +33 -0
  40. data/spec/extract/fields.yaml +1 -0
  41. data/spec/extract/parents.csv +1 -0
  42. data/spec/family/child_spec.rb +27 -0
  43. data/spec/family/family.rb +13 -0
  44. data/spec/family/parent_spec.rb +57 -0
  45. data/spec/filter/fields.yaml +1 -0
  46. data/spec/filter/filter_spec.rb +20 -0
  47. data/spec/filter/parents.csv +1 -0
  48. data/spec/filter/values.yaml +4 -0
  49. data/spec/primitive/children.csv +1 -0
  50. data/spec/primitive/fields.yaml +4 -0
  51. data/spec/primitive/primitive_spec.rb +24 -0
  52. data/spec/skip/fields.yaml +1 -0
  53. data/spec/skip/parents.csv +1 -0
  54. data/spec/skip/skip_spec.rb +17 -0
  55. data/spec/spec_helper.rb +17 -0
  56. data/spec/support/model.rb +7 -0
  57. data/spec/unique/fields.yaml +1 -0
  58. data/spec/unique/parent.rb +6 -0
  59. data/spec/unique/parents.csv +1 -0
  60. data/spec/unique/shims.rb +10 -0
  61. data/spec/unique/unique_spec.rb +20 -0
  62. data/test/fixtures/csv/data/empty.csv +1 -0
  63. data/test/fixtures/csv/data/variety.csv +1 -0
  64. data/test/lib/csv/csvio_test.rb +74 -0
  65. metadata +206 -0
@@ -0,0 +1,1029 @@
1
+ require 'yaml'
2
+ require 'jinx/csv/csvio'
3
+ require 'jinx/helpers/boolean'
4
+ require 'jinx/helpers/class'
5
+ require 'jinx/helpers/collections'
6
+ require 'jinx/helpers/lazy_hash'
7
+ require 'jinx/helpers/log'
8
+ require 'jinx/helpers/inflector'
9
+ require 'jinx/helpers/pretty_print'
10
+ require 'jinx/helpers/stopwatch'
11
+ require 'jinx/helpers/transitive_closure'
12
+ require 'jinx/migration/migratable'
13
+ require 'jinx/migration/reader'
14
+ require 'jinx/migration/filter'
15
+
16
+ module Jinx
17
+ class MigrationError < RuntimeError; end
18
+
19
+ # Migrates a CSV extract to a caBIG application.
20
+ class Migrator
21
+ include Enumerable
22
+
23
+ # Creates a new Migrator. The input can be either a file name or a source factory. The factory
24
+ # implements the +open+ method, which returns a {Migration::Reader}
25
+ #
26
+ # @param [{Symbol => Object}] opts the migration options
27
+ # @option opts [String] :target the required target domain class
28
+ # @option opts [<String>, String] :mapping the required input field => caTissue attribute mapping file(s)
29
+ # @option opts [String, Migration::Reader] :input the required input file name or an adapter which
30
+ # implements the {Migration::Reader} methods
31
+ # @option opts [Jinx::Database] :database the optional destination +Jinx::Database+
32
+ # @option opts [<String>, String] :defaults the optional caTissue attribute => value default mapping file(s)
33
+ # @option opts [<String>, String] :filters the optional caTissue attribute input value => caTissue value filter file(s)
34
+ # @option opts [<String>, String] :shims the optional shim file(s) to load
35
+ # @option opts [String] :unique the optional flag which ensures that migrator calls the +uniquify+ method on
36
+ # those migrated objects whose class includes the +Unique+ module
37
+ # @option opts [String] :create the optional flag indicating that existing target objects are ignored
38
+ # @option opts [String] :bad the optional invalid record file
39
+ # @option opts [String, IO] :extract the optional extract file or object that responds to +<<+
40
+ # @option opts [<String>] :extract_headers the optional extract CSV field headers
41
+ # @option opts [Integer] :from the optional starting source record number to process
42
+ # @option opts [Integer] :to the optional ending source record number to process
43
+ # @option opts [Boolean] :quiet the optional flag which suppress output messages
44
+ # @option opts [Boolean] :verbose the optional flag to print the migration progress
45
+ def initialize(opts)
46
+ @rec_cnt = 0
47
+ @mgt_mths = {}
48
+ parse_options(opts)
49
+ build
50
+ end
51
+
52
+ # Imports this migrator's file into the database with the given connect options.
53
+ # This method creates or updates the domain objects mapped from the import source.
54
+ # If a block is given to this method, then the block is called on each stored
55
+ # migration target object.
56
+ #
57
+ # If the +:create+ option is set, then an input record for a target object which already
58
+ # exists in the database is noted in a debug log message and ignored rather than updated.
59
+ #
60
+ # @yield (see #migrate)
61
+ # @yieldparam (see #migrate)
62
+ # @return (see #migrate)
63
+ def migrate_to_database(&block)
64
+ # migrate with save
65
+ tm = Stopwatch.measure { execute_save(&block) }.elapsed
66
+ logger.debug { format_migration_time_log_message(tm) }
67
+ end
68
+
69
+ # Imports this migrator's CSV file and calls the given block on each migrated target
70
+ # domain object. If no block is given, then this method returns an array of the
71
+ # migrated target objects.
72
+ #
73
+ # @yield [target, row] operation performed on the migration target
74
+ # @yieldparam [Jinx::Resource] target the migrated target domain object
75
+ # @yieldparam [{Symbol => Object}] row the migration source record
76
+ def migrate(&block)
77
+ unless block_given? then
78
+ return migrate { |tgt, row| tgt }
79
+ end
80
+ # If there is an extract, then wrap the migration in an extract
81
+ # writer block.
82
+ if @extract then
83
+ if String === @extract then
84
+ logger.debug { "Opening migration extract #{@extract}..." }
85
+ FileUtils::mkdir_p(File.dirname(@extract))
86
+ if @extract_hdrs then
87
+ logger.debug { "Migration extract headers: #{@extract_hdrs.join(', ')}." }
88
+ CsvIO.open(@extract, :mode => 'w', :headers => @extract_hdrs) do |io|
89
+ @extract = io
90
+ return migrate(&block)
91
+ end
92
+ else
93
+ File.open(@extract, 'w') do |io|
94
+ @extract = io
95
+ return migrate(&block)
96
+ end
97
+ end
98
+ end
99
+ # Copy the extract into a local variable and clear the extract i.v.
100
+ # prior to a recursive call with an extract writer block.
101
+ io, @extract = @extract, nil
102
+ return migrate do |tgt, row|
103
+ res = yield(tgt, row)
104
+ tgt.extract(io)
105
+ res
106
+ end
107
+ end
108
+ begin
109
+ migrate_rows(&block)
110
+ ensure
111
+ @rejects.close if @rejects
112
+ remove_migration_methods
113
+ end
114
+ end
115
+
116
+ # @yield [target] iterate on each migration target
117
+ # @yieldparam [Jinx::Resource] the migration target
118
+ def each
119
+ migrate { |tgt, row| yield tgt }
120
+ end
121
+
122
+ private
123
+
124
+ # Cleans up after the migration by removing the methods injected by migration
125
+ # shims.
126
+ def remove_migration_methods
127
+ # remove the migrate_<attribute> methods
128
+ @mgt_mths.each do | klass, hash|
129
+ hash.each_value do |sym|
130
+ while klass.method_defined?(sym)
131
+ klass.instance_method(sym).owner.module_eval { remove_method(sym) }
132
+ end
133
+ end
134
+ end
135
+ # remove the migrate method
136
+ @creatable_classes.each do |klass|
137
+ while (k = klass.instance_method(:migrate).owner) < Migratable
138
+ k.module_eval { remove_method(:migrate) }
139
+ end
140
+ end
141
+ # remove the target extract method
142
+ remove_extract_method(@target) if @extract
143
+ end
144
+
145
+ def remove_extract_method(klass)
146
+ if (klass.method_defined?(:extract)) then
147
+ klass.module_eval { remove_method(:extract) }
148
+ sc = klass.superclass
149
+ remove_extract_method(sc) if sc < Migratable
150
+ end
151
+ end
152
+
153
+ # {#migrate} with a {#save} block on the migration target. Each migrated object
154
+ # is created, if necessary, after the target save.
155
+ def execute_save
156
+ if @database.nil? then
157
+ Jinx.fail(MigrationError, "Migrator cannot save records since the database option was not specified.")
158
+ end
159
+ @database.open do |db|
160
+ migrate do |tgt, rec|
161
+ save(tgt, db)
162
+ # Ensure that each migrated object is created if necessary.
163
+ @migrated.each do |obj|
164
+ next if obj.identifier
165
+ logger.debug { "The migrator is saving the migrated #{obj}..." }
166
+ save(obj, db)
167
+ logger.debug { "The migrator saved the migrated #{obj}." }
168
+ end
169
+ yield(tgt, rec) if block_given?
170
+ db.clear
171
+ end
172
+ end
173
+ end
174
+
175
+ # @return a log message String for the given migration time in seconds
176
+ def format_migration_time_log_message(time)
177
+ # the database execution time
178
+ dt = @database.execution_time
179
+ if time > 120 then
180
+ time /= 60
181
+ dt /= 60
182
+ unit = "minutes"
183
+ else
184
+ unit = "seconds"
185
+ end
186
+ "Migration took #{'%.2f' % time} #{unit}, of which #{'%.2f' % dt} were database operations."
187
+ end
188
+
189
+ def parse_options(opts)
190
+ @fld_map_files = opts[:mapping]
191
+ if @fld_map_files.nil? then
192
+ Jinx.fail(MigrationError, "Migrator missing required field mapping file parameter")
193
+ end
194
+ @def_files = opts[:defaults]
195
+ @flt_files = opts[:filters]
196
+ shims_opt = opts[:shims] ||= []
197
+ # Make a single shims file into an array.
198
+ @shims = shims_opt.collection? ? shims_opt : [shims_opt]
199
+ @unique = opts[:unique]
200
+ @from = opts[:from] ||= 1
201
+ @input = opts[:input]
202
+ if @input.nil? then
203
+ Jinx.fail(MigrationError, "Migrator missing required source file parameter")
204
+ end
205
+ @database = opts[:database]
206
+ @target_class = opts[:target]
207
+ if @target_class.nil? then
208
+ Jinx.fail(MigrationError, "Migrator missing required target class parameter")
209
+ end
210
+ @bad_file = opts[:bad]
211
+ @extract = opts[:extract]
212
+ @extract_hdrs = opts[:extract_headers]
213
+ @create = opts[:create]
214
+ logger.info("Migration options: #{printable_options(opts).pp_s}.")
215
+ # flag indicating whether to print a progress monitor
216
+ @verbose = opts[:verbose]
217
+ end
218
+
219
+ def printable_options(opts)
220
+ popts = opts.reject { |option, value| value.nil_or_empty? }
221
+ # The target class should be a simple class name rather than the class metadata.
222
+ popts[:target] = popts[:target].qp if popts.has_key?(:target)
223
+ popts
224
+ end
225
+
226
+ def build
227
+ # the current source class => instance map
228
+ Jinx.fail(MigrationError, "No file to migrate") if @input.nil?
229
+
230
+ # If the input is a file name, then make a CSV loader which only converts input fields
231
+ # corresponding to non-String attributes.
232
+ if String === @input then
233
+ @reader = CsvIO.new(@input, &method(:convert))
234
+ logger.debug { "Migration data input file #{@input} headers: #{@reader.headers.qp}" }
235
+ else
236
+ @reader = @input
237
+ end
238
+
239
+ # add shim modifiers
240
+ load_shims(@shims)
241
+
242
+ # create the class => path => default value hash
243
+ @def_hash = @def_files ? load_defaults_files(@def_files) : {}
244
+ # create the class => path => default value hash
245
+ @flt_hash = @flt_files ? load_filter_files(@flt_files) : {}
246
+ # the missing owner classes
247
+ @owners = Set.new
248
+ # create the class => path => header hash
249
+ fld_map = load_field_map_files(@fld_map_files)
250
+ # create the class => paths hash
251
+ @cls_paths_hash = create_class_paths_hash(fld_map, @def_hash)
252
+ # create the path => class => header hash
253
+ @header_map = create_header_map(fld_map)
254
+ # Order the creatable classes by dependency, owners first, to smooth the migration process.
255
+ @creatable_classes = @cls_paths_hash.keys.sort do |klass, other|
256
+ other.depends_on?(klass) ? -1 : (klass.depends_on?(other) ? 1 : 0)
257
+ end
258
+ # An abstract class cannot be instantiated.
259
+ @creatable_classes.each do |klass|
260
+ if klass.abstract? then
261
+ Jinx.fail(MigrationError, "Migrator cannot create the abstract class #{klass}; specify a subclass instead in the mapping file.")
262
+ end
263
+ end
264
+
265
+ logger.info { "Migration creatable classes: #{@creatable_classes.qp}." }
266
+ unless @def_hash.empty? then logger.info { "Migration defaults: #{@def_hash.qp}." } end
267
+
268
+ # the class => attribute migration methods hash
269
+ create_migration_method_hashes
270
+
271
+ # Print the input field => attribute map and collect the String input fields for
272
+ # the custom CSVLoader converter.
273
+ @nonstring_headers = Set.new
274
+ logger.info("Migration attributes:")
275
+ @header_map.each do |path, cls_hdr_hash|
276
+ prop = path.last
277
+ cls_hdr_hash.each do |klass, hdr|
278
+ type_s = prop.type ? prop.type.qp : 'Object'
279
+ logger.info(" #{hdr} => #{klass.qp}.#{path.join('.')} (#{type_s})")
280
+ end
281
+ @nonstring_headers.merge!(cls_hdr_hash.values) if prop.type != Java::JavaLang::String
282
+ end
283
+ end
284
+
285
+ # Converts the given input field value as follows:
286
+ # * If the info header is a String field, then return the value unchanged.
287
+ # * Otherwise, return nil which will delegate to the generic CsvIO converter.
288
+ # @param (see CsvIO#convert)
289
+ # @yield (see CsvIO#convert)
290
+ def convert(value, info)
291
+ value unless @nonstring_headers.include?(info.header)
292
+ end
293
+
294
+ # Adds missing owner classes to the migration class path hash (with empty paths)
295
+ # for the classes in the given hash.
296
+ #
297
+ # @param [{Class => Object}] hash the class map
298
+ # @yield the map entry for a new owner
299
+ def add_owners(hash, &factory)
300
+ hash.keys.each { |klass| add_owners_for(klass, hash, &factory) }
301
+ end
302
+
303
+ # Adds missing owner classes to the migration class path hash (with empty paths)
304
+ # for the given migration class.
305
+ #
306
+ # @param [Class] klass the migration class
307
+ # @param [{Class => Object}] hash the class map
308
+ # @yield the map entry for a new owner
309
+ def add_owners_for(klass, hash, &factory)
310
+ owner = missing_owner_for(klass, hash) || return
311
+ logger.debug { "The migrator is adding #{klass.qp} owner #{owner}..." }
312
+ @owners << owner
313
+ hash[owner] = yield
314
+ add_owners_for(owner, hash, &factory)
315
+ end
316
+
317
+ # @param [Class] klass the migration class
318
+ # @param [{Class => Object}] hash the class map
319
+ # @return [Class, nil] the missing class owner, if any
320
+ def missing_owner_for(klass, hash)
321
+ # check for an owner among the current migration classes
322
+ return if klass.owners.any? do |owner|
323
+ hash.detect_key { |other| other <= owner }
324
+ end
325
+ # Find the first non-abstract candidate owner that is a dependent
326
+ # of a migrated class.
327
+ klass.owners.detect do |owner|
328
+ not owner.abstract? and hash.detect_key { |other| owner.depends_on?(other, true) }
329
+ end
330
+ end
331
+
332
+ # Creates the class => +migrate_+_<attribute>_ hash for the given klasses.
333
+ def create_migration_method_hashes
334
+ # the class => attribute => migration filter hash
335
+ @attr_flt_hash = {}
336
+ customizable_class_attributes.each do |klass, pas|
337
+ flts = migration_filters(klass) || next
338
+ @attr_flt_hash[klass] = flts
339
+ end
340
+ # print the migration shim methods
341
+ unless @mgt_mths.empty? then
342
+ logger.info("Migration shim methods:\n#{@mgt_mths.qp}")
343
+ end
344
+ end
345
+
346
+ # @return the class => attributes hash for terminal path attributes which can be customized by +migrate_+ methods
347
+ def customizable_class_attributes
348
+ # The customizable classes set, starting with creatable classes and adding in
349
+ # the migration path terminal attribute declarer classes below.
350
+ klasses = @creatable_classes.to_set
351
+ # the class => path terminal attributes hash
352
+ cls_attrs_hash = LazyHash.new { Set.new }
353
+ # add each path terminal attribute and its declarer class
354
+ @cls_paths_hash.each_value do |paths|
355
+ paths.each do |path|
356
+ prop = path.last
357
+ type = prop.declarer
358
+ klasses << type
359
+ cls_attrs_hash[type] << prop
360
+ end
361
+ end
362
+
363
+ # Merge each redundant customizable superclass into its concrete customizable subclasses.
364
+ klasses.dup.each do |cls|
365
+ redundant = false
366
+ klasses.each do |other|
367
+ # cls is redundant if it is a superclass of other
368
+ redundant = other < cls
369
+ if redundant then
370
+ cls_attrs_hash[other].merge!(cls_attrs_hash[cls])
371
+ end
372
+ end
373
+ # remove the redundant class
374
+ if redundant then
375
+ cls_attrs_hash.delete(cls)
376
+ klasses.delete(cls)
377
+ end
378
+ end
379
+
380
+ cls_attrs_hash
381
+ end
382
+
383
+ # Discovers methods of the form +migrate+__attribute_ implemented for the paths
384
+ # in the given class => paths hash the given klass. The migrate method is called
385
+ # on the input field value corresponding to the path.
386
+ def migration_filters(klass)
387
+ # the attribute => migration method hash
388
+ mh = attribute_method_hash(klass)
389
+ @mgt_mths[klass] = mh unless mh.empty?
390
+ fh = attribute_filter_hash(klass)
391
+ return if mh.empty? and fh.empty?
392
+ # For each class path terminal attribute metadata, add the migration filters
393
+ # to the attribute metadata => proc hash.
394
+ klass.attributes.to_compact_hash do |pa|
395
+ # the filter
396
+ flt = fh[pa]
397
+ # the migration shim method
398
+ mth = mh[pa]
399
+ # the filter proc
400
+ Proc.new do |obj, value, row|
401
+ # filter the value
402
+ value = flt.transform(value) if flt and not value.nil?
403
+ # apply the migrate_<attribute> method, if defined
404
+ if mth then
405
+ obj.send(mth, value, row) unless value.nil?
406
+ else
407
+ value
408
+ end
409
+ end
410
+ end
411
+ end
412
+
413
+ def attribute_method_hash(klass)
414
+ # the migrate methods, excluding the Migratable migrate_references method
415
+ mths = klass.instance_methods(true).select { |mth| mth =~ /^migrate.(?!references)/ }
416
+ # the attribute => migration method hash
417
+ mh = {}
418
+ mths.each do |mth|
419
+ # the attribute suffix, e.g. name for migrate_name or Name for migrateName
420
+ suffix = /^migrate(_)?(.*)/.match(mth).captures[1]
421
+ # the attribute name
422
+ attr_nm = suffix[0, 1].downcase + suffix[1..-1]
423
+ # the attribute for the name, or skip if no such attribute
424
+ pa = klass.standard_attribute(attr_nm) rescue next
425
+ # associate the attribute => method
426
+ mh[pa] = mth
427
+ end
428
+ mh
429
+ end
430
+
431
+ # Builds the property => filter hash. The filter is specified in the +--filter+ migration
432
+ # option. A Boolean property has a default String => Boolean filter which converts the
433
+ # input string to a Boolean as specified in the +Jinx::Boolean+ +to_boolean+ methods.
434
+ #
435
+ # @param [Class] klass the migration class
436
+ # @return [Property => Proc] the filter migration methods
437
+ def attribute_filter_hash(klass)
438
+ hash = @flt_hash[klass]
439
+ fh = {}
440
+ klass.each_property do |prop|
441
+ pa = prop.attribute
442
+ spec = hash[pa] if hash
443
+ # If the property is boolean, then make a filter that operates on the parsed string input.
444
+ if prop.type == Java::JavaLang::Boolean then
445
+ fh[pa] = boolean_filter(spec)
446
+ logger.debug { "The migrator added the default text -> boolean filter for #{klass.qp} #{pa}." }
447
+ elsif spec then
448
+ fh[pa] = Migration::Filter.new(spec)
449
+ end
450
+ end
451
+ unless fh.empty? then
452
+ logger.debug { "The migration filters were loaded for #{klass.qp} #{fh.keys.to_series}." }
453
+ end
454
+ fh
455
+ end
456
+
457
+ # @param [String, nil] the value filter, if any
458
+ # @return [Migration::Filter] the boolean property migration filter
459
+ def boolean_filter(spec=nil)
460
+ # break up the spec into two specs, one on strings and one on booleans
461
+ bspec, sspec = spec.split { |k, v| Boolean === k } if spec
462
+ bf = Migration::Filter.new(bspec) if bspec and not bspec.empty?
463
+ sf = Migration::Filter.new(sspec) if sspec and not sspec.empty?
464
+ # make the composite filter
465
+ Migration::Filter.new do |value|
466
+ fv = sf.transform(value) if sf
467
+ if fv.nil? then
468
+ bv = Jinx::Boolean.for(value) rescue nil
469
+ fv = bf.nil? || bv.nil? ? bv : bf.transform(bv)
470
+ end
471
+ fv
472
+ end
473
+ end
474
+
475
+ # Loads the shim files.
476
+ #
477
+ # @param [<String>, String] files the file or file array
478
+ def load_shims(files)
479
+ logger.debug { "Loading the migration shims with load path #{$:.pp_s}..." }
480
+ files.enumerate do |file|
481
+ load file
482
+ logger.info { "The migrator loaded the shim file #{file}." }
483
+ end
484
+ end
485
+
486
+ # Migrates all rows in the input.
487
+ #
488
+ # @yield (see #migrate)
489
+ # @yieldparam (see #migrate)
490
+ def migrate_rows
491
+ # open an CSV output for rejects if the bad option is set
492
+ if @bad_file then
493
+ @rejects = open_rejects(@bad_file)
494
+ logger.info("Unmigrated records will be written to #{File.expand_path(@bad_file)}.")
495
+ end
496
+
497
+ @rec_cnt = mgt_cnt = 0
498
+ logger.info { "Migrating #{@input}..." }
499
+ puts "Migrating #{@input}..." if @verbose
500
+ @reader.each do |row|
501
+ # the one-based current record number
502
+ rec_no = @rec_cnt + 1
503
+ # skip if the row precedes the from option
504
+ if rec_no == @from and @rec_cnt > 0 then
505
+ logger.info("Skipped the initial #{@rec_cnt} records.")
506
+ elsif rec_no == @to then
507
+ logger.info("Ending the migration after processing record #{@rec_cnt}.")
508
+ return
509
+ elsif rec_no < @from then
510
+ @rec_cnt += 1
511
+ next
512
+ end
513
+ begin
514
+ # migrate the row
515
+ logger.debug { "Migrating record #{rec_no}..." }
516
+ tgt = migrate_row(row)
517
+ # call the block on the migrated target
518
+ if tgt then
519
+ logger.debug { "The migrator built #{tgt} with the following content:\n#{tgt.dump}" }
520
+ yield(tgt, row)
521
+ end
522
+ rescue Exception => e
523
+ logger.error("Migration error on record #{rec_no} - #{e.message}:\n#{e.backtrace.pp_s}")
524
+ # If there is a reject file, then don't propagate the error.
525
+ raise unless @rejects
526
+ # try to clear the migration state
527
+ clear(tgt) rescue nil
528
+ # clear the target
529
+ tgt = nil
530
+ end
531
+ if tgt then
532
+ # replace the log message below with the commented alternative to detect a memory leak
533
+ logger.info { "Migrated record #{rec_no}." }
534
+ #memory_usage = `ps -o rss= -p #{Process.pid}`.to_f / 1024 # in megabytes
535
+ #logger.debug { "Migrated rec #{@rec_cnt}; memory usage: #{sprintf("%.1f", memory_usage)} MB." }
536
+ mgt_cnt += 1
537
+ if @verbose then print_progress(mgt_cnt) end
538
+ # clear the migration state
539
+ clear(tgt)
540
+ elsif @rejects then
541
+ # If there is a rejects file then warn, write the reject and continue.
542
+ logger.warn("Migration not performed on record #{rec_no}.")
543
+ @rejects << row
544
+ @rejects.flush
545
+ logger.debug("Invalid record #{rec_no} was written to the rejects file #{@bad_file}.")
546
+ else
547
+ Jinx.fail(MigrationError, "Migration not performed on record #{rec_no}")
548
+ end
549
+ # Bump the record count.
550
+ @rec_cnt += 1
551
+ end
552
+ logger.info("Migrated #{mgt_cnt} of #{@rec_cnt} records.")
553
+ if @verbose then
554
+ puts
555
+ puts "Migrated #{mgt_cnt} of #{@rec_cnt} records."
556
+ end
557
+ end
558
+
559
+ # Makes the rejects CSV output file.
560
+ #
561
+ # @param [String] file the output file
562
+ # @return [IO] the reject stream
563
+ def open_rejects(file)
564
+ # Make the parent directory.
565
+ FileUtils.mkdir_p(File.dirname(file))
566
+ # Open the file.
567
+ FasterCSV.open(file, 'w', :headers => true, :header_converters => :symbol, :write_headers => true)
568
+ end
569
+
570
+ # Prints a '+' progress indicator after each migrated record to stdout.
571
+ #
572
+ # @param [Integer] count the migrated record count
573
+ def print_progress(count)
574
+ # If the line is 72 characters, then print a line break
575
+ puts if count % 72 == 0
576
+ # Print the progress indicator
577
+ print "+"
578
+ end
579
+
580
+ # Clears references to objects allocated for migration of a single row into the given target.
581
+ # This method does nothing. Subclasses can override.
582
+ #
583
+ # This method is overridden by subclasses to clear the migration state to conserve memory,
584
+ # since this migrator should consume O(1) rather than O(n) memory for n migration records.
585
+ def clear(target)
586
+ end
587
+
588
+ # Imports the given CSV row into a target object.
589
+ #
590
+ # @param [{Symbol => Object}] row the input row field => value hash
591
+ # @return the migrated target object if the migration is valid, nil otherwise
592
+ def migrate_row(row)
593
+ # create an instance for each creatable class
594
+ created = Set.new
595
+ # the migrated objects
596
+ migrated = @creatable_classes.map { |klass| create(klass, row, created) }
597
+ # migrate each object from the input row
598
+ migrated.each do |obj|
599
+ # First uniquify the object if necessary.
600
+ if @unique and Unique === obj then
601
+ logger.debug { "The migrator is making #{obj} unique..." }
602
+ obj.uniquify
603
+ end
604
+ obj.migrate(row, migrated)
605
+ end
606
+ # the valid migrated objects
607
+ @migrated = migrate_valid_references(row, migrated)
608
+ # the candidate target objects
609
+ tgts = @migrated.select { |obj| @target_class === obj }
610
+ if tgts.size > 1 then
611
+ raise MigrationError.new("Ambiguous #{@target_class} targets #{tgts.to_series}")
612
+ end
613
+ target = tgts.first || return
614
+
615
+ logger.debug { "Migrated target #{target}." }
616
+ target
617
+ end
618
+
619
+ # Sets the migration references for each valid migrated object.
620
+ #
621
+ # @param row (see #migrate_row)
622
+ # @param [Array] migrated the migrated objects
623
+ # @return [Array] the valid migrated objects
624
+ def migrate_valid_references(row, migrated)
625
+ # Split the valid and invalid objects. The iteration is in reverse dependency order,
626
+ # since invalidating a dependent can invalidate the owner.
627
+ ordered = migrated.transitive_closure(:dependents)
628
+ ordered.keep_if { |obj| migrated.include?(obj) }.reverse!
629
+ valid, invalid = ordered.partition do |obj|
630
+ if migration_valid?(obj) then
631
+ obj.migrate_references(row, migrated, @target_class, @attr_flt_hash[obj.class])
632
+ true
633
+ else
634
+ obj.class.owner_attributes.each { |pa| obj.clear_attribute(pa) }
635
+ false
636
+ end
637
+ end
638
+
639
+ # Go back through the valid objects in dependency order to invalidate dependents
640
+ # whose owner is invalid.
641
+ valid.reverse.each do |obj|
642
+ unless owner_valid?(obj, valid, invalid) then
643
+ invalid << valid.delete(obj)
644
+ logger.debug { "The migrator invalidated #{obj} since it does not have a valid owner." }
645
+ end
646
+ end
647
+
648
+ # Go back through the valid objects in reverse dependency order to invalidate owners
649
+ # created only to hold a dependent which was subsequently invalidated.
650
+ valid.reject do |obj|
651
+ if @owners.include?(obj.class) and obj.dependents.all? { |dep| invalid.include?(dep) } then
652
+ # clear all references from the invalidated owner
653
+ obj.class.domain_attributes.each { |pa| obj.clear_attribute(pa) }
654
+ invalid << obj
655
+ logger.debug { "The migrator invalidated #{obj.qp} since it was created solely to hold subsequently invalidated dependents." }
656
+ true
657
+ end
658
+ end
659
+ end
660
+
661
+ # Returns whether the given domain object satisfies at least one of the following conditions:
662
+ # * it does not have an owner among the invalid objects
663
+ # * it has an owner among the valid objects
664
+ #
665
+ # @param [Resource] obj the domain object to check
666
+ # @param [<Resource>] valid the valid migrated objects
667
+ # @param [<Resource>] invalid the invalid migrated objects
668
+ # @return [Boolean] whether the owner is valid
669
+ def owner_valid?(obj, valid, invalid)
670
+ otypes = obj.class.owners
671
+ invalid.all? { |other| not otypes.include?(other.class) } or
672
+ valid.any? { |other| otypes.include?(other.class) }
673
+ end
674
+
675
+ # @param [Migratable] obj the migrated object
676
+ # @return [Boolean] whether the migration is successful
677
+ def migration_valid?(obj)
678
+ if obj.migration_valid? then
679
+ true
680
+ else
681
+ logger.debug { "The migrated #{obj.qp} is invalid." }
682
+ false
683
+ end
684
+ end
685
+
686
+ # Creates an instance of the given klass from the given row.
687
+ # The new klass instance and all intermediate migrated instances are added to the
688
+ # created set.
689
+ #
690
+ # @param [Class] klass
691
+ # @param [{Symbol => Object}] row the input row
692
+ # @param [<Resource>] created the migrated instances for this row
693
+ # @return [Resource] the new instance
694
+ def create(klass, row, created)
695
+ # the new object
696
+ logger.debug { "The migrator is building #{klass.qp}..." }
697
+ created << obj = klass.new
698
+ migrate_properties(obj, row, created)
699
+ add_defaults(obj, row, created)
700
+ logger.debug { "The migrator built #{obj}." }
701
+ obj
702
+ end
703
+
704
+ # Migrates each input field to the associated domain object attribute.
705
+ # String input values are stripped. Missing input values are ignored.
706
+ #
707
+ # @param [Resource] the migration object
708
+ # @param row (see #create)
709
+ # @param [<Resource>] created (see #create)
710
+ def migrate_properties(obj, row, created)
711
+ # for each input header which maps to a migratable target attribute metadata path,
712
+ # set the target attribute, creating intermediate objects as needed.
713
+ @cls_paths_hash[obj.class].each do |path|
714
+ header = @header_map[path][obj.class]
715
+ # the input value
716
+ value = row[header]
717
+ value.strip! if String === value
718
+ next if value.nil?
719
+ # fill the reference path
720
+ ref = fill_path(obj, path[0...-1], row, created)
721
+ # set the attribute
722
+ migrate_property(ref, path.last, value, row)
723
+ end
724
+ end
725
+
726
+ # @param [Resource] the migration object
727
+ # @param row (see #create)
728
+ # @param [<Resource>] created (see #create)
729
+ def add_defaults(obj, row, created)
730
+ dh = @def_hash[obj.class] || return
731
+ dh.each do |path, value|
732
+ # fill the reference path
733
+ ref = fill_path(obj, path[0...-1], row, created)
734
+ # set the attribute to the default value unless there is already a value
735
+ ref.merge_attribute(path.last.to_sym, value)
736
+ end
737
+ end
738
+
739
+ # Fills the given reference Property path starting at obj.
740
+ #
741
+ # @param row (see #create)
742
+ # @param created (see #create)
743
+ # @return the last domain object in the path
744
+ def fill_path(obj, path, row, created)
745
+ # create the intermediate objects as needed (or return obj if path is empty)
746
+ path.inject(obj) do |parent, prop|
747
+ # the referenced object
748
+ parent.send(prop.reader) or create_reference(parent, prop, row, created)
749
+ end
750
+ end
751
+
752
+ # Sets the given migrated object's reference attribute to a new referenced domain object.
753
+ #
754
+ # @param [Resource] obj the domain object being migrated
755
+ # @param [Property] property the property being migrated
756
+ # @param row (see #create)
757
+ # @param created (see #create)
758
+ # @return the new object
759
+ def create_reference(obj, property, row, created)
760
+ if property.type.abstract? then
761
+ Jinx.fail(MigrationError, "Cannot create #{obj.qp} #{property} with abstract type #{property.type}")
762
+ end
763
+ ref = property.type.new
764
+ ref.migrate(row, Array::EMPTY_ARRAY)
765
+ obj.send(property.writer, ref)
766
+ created << ref
767
+ logger.debug { "The migrator created #{obj.qp} #{property} #{ref}." }
768
+ ref
769
+ end
770
+
771
+ # Sets the given property value to the filtered input value. If there is a filter
772
+ # defined for the property, then that filter is applied. If there is a migration
773
+ # shim method with name +migrate_+_attribute_, then that method is called on the
774
+ # (possibly filtered) value. The target object property is set to the resulting
775
+ # filtered value.
776
+ #
777
+ # @param [Migratable] obj the target domain object
778
+ # @param [Property] property the property to set
779
+ # @param value the input value
780
+ # @param [{Symbol => Object}] row the input row
781
+ def migrate_property(obj, property, value, row)
782
+ # if there is a shim migrate_<attribute> method, then call it on the input value
783
+ value = filter_value(obj, property, value, row)
784
+ return if value.nil?
785
+ # set the attribute
786
+ begin
787
+ obj.send(property.writer, value)
788
+ rescue Exception => e
789
+ Jinx.fail(MigrationError, "Could not set #{obj.qp} #{property} to #{value.qp}", e)
790
+ end
791
+ logger.debug { "Migrated #{obj.qp} #{property} to #{value}." }
792
+ end
793
+
794
+ # Calls the shim migrate_<attribute> method or config filter on the input value.
795
+ #
796
+ # @param value the input value
797
+ # @param [Property] property the property to set
798
+ # @return the input value, if there is no filter, otherwise the filtered value
799
+ def filter_value(obj, property, value, row)
800
+ flt = filter_for(obj, property.to_sym)
801
+ return value if flt.nil?
802
+ fval = flt.call(obj, value, row)
803
+ unless value == fval then
804
+ logger.debug { "The migration filter transformed the #{obj.qp} #{property} value from #{value.qp} to #{fval}." }
805
+ end
806
+ fval
807
+ end
808
+
809
+ def filter_for(obj, attribute)
810
+ flts = @attr_flt_hash[obj.class] || return
811
+ flts[attribute]
812
+ end
813
+
814
+ # @param [Resource] obj the domain object to save in the database
815
+ # @return [Resource, nil] obj if the save is successful, nil otherwise
816
+ def save(obj, database)
817
+ if @create then
818
+ logger.debug { "Migrator creating #{obj}..." }
819
+ database.create(obj)
820
+ logger.debug { "Migrator created #{obj}." }
821
+ else
822
+ logger.debug { "Migrator saving #{obj}..." }
823
+ database.save(obj)
824
+ logger.debug { "Migrator saved #{obj}." }
825
+ end
826
+ end
827
+
828
+ def current_record
829
+ @rec_cnt + 1
830
+ end
831
+
832
+ # @param [<String>, String] files the migration fields mapping file or file array
833
+ # @return [{Class => {Property => Symbol}}] the class => path => header hash
834
+ # loaded from the mapping files
835
+ def load_field_map_files(files)
836
+ map = LazyHash.new { Hash.new }
837
+ files.enumerate { |file| load_field_map_file(file, map) }
838
+
839
+ # include the target class
840
+ map[@target_class] ||= Hash.new
841
+ # add the default classes
842
+ @def_hash.each_key { |klass| map[klass] ||= Hash.new }
843
+ # add the owners
844
+ add_owners(map) { Hash.new }
845
+
846
+ # Include only concrete classes that are not a superclass of another migration class.
847
+ classes = map.keys
848
+ sub_hash = classes.to_compact_hash do |klass|
849
+ subs = classes.select { |other| other < klass }
850
+ subs.delete_if { |klass| subs.any? { |other| other < klass } }
851
+ end
852
+
853
+ # Merge the superclass paths into the subclass paths.
854
+ sub_hash.each do |klass, subs|
855
+ paths = map.delete(klass)
856
+ # Add, but don't replace, path => header entries from the superclass.
857
+ subs.each do |sub|
858
+ map[sub].merge!(paths) { |key, old, new| old }
859
+ logger.debug { "Migrator merged #{klass.qp} mappings into the subclass #{sub.qp}." }
860
+ end
861
+ end
862
+
863
+ # Validate that there are no abstract classes in the mapping.
864
+ map.each_key do |klass|
865
+ if klass.abstract? then
866
+ raise MigrationError.new("Cannot migrate to the abstract class #{klass}")
867
+ end
868
+ end
869
+
870
+ map
871
+ end
872
+
873
+ # @param [String] file the migration fields configuration file
874
+ # @param [{Class => {Property => Symbol}}] hash the class => path => header hash
875
+ # to populate from the loaded configuration
876
+ def load_field_map_file(file, hash)
877
+ # load the field mapping config file
878
+ begin
879
+ config = YAML.load_file(file)
880
+ rescue
881
+ Jinx.fail(MigrationError, "Could not read field map file #{file}: " + $!)
882
+ end
883
+ populate_field_map(config, hash)
884
+ end
885
+
886
+ # @param [{String => String}] config the attribute => header specification
887
+ # @param hash (see #load_field_map_file)
888
+ def populate_field_map(config, hash)
889
+ # collect the class => path => header entries
890
+ config.each do |field, attr_list|
891
+ next if attr_list.blank?
892
+ # the header accessor method for the field
893
+ header = @reader.accessor(field)
894
+ if header.nil? then
895
+ Jinx.fail(MigrationError, "Field defined in migration configuration not found in input file #{@input} headers: #{field}")
896
+ end
897
+ # associate each attribute path in the property value with the header
898
+ attr_list.split(/,\s*/).each do |path_s|
899
+ klass, path = create_attribute_path(path_s)
900
+ hash[klass][path] = header
901
+ end
902
+ end
903
+ end
904
+
905
+ # Loads the defaults configuration files.
906
+ #
907
+ # @param [<String>, String] files the file or file array to load
908
+ # @return [<Class => <String => Object>>] the class => path => default value entries
909
+ def load_defaults_files(files)
910
+ # collect the class => path => value entries from each defaults file
911
+ hash = LazyHash.new { Hash.new }
912
+ files.enumerate { |file| load_defaults_file(file, hash) }
913
+ hash
914
+ end
915
+
916
+ # Loads the defaults config file into the given hash.
917
+ #
918
+ # @param [String] file the file to load
919
+ # @param [<Class => <String => Object>>] hash the class => path => default value entries
920
+ def load_defaults_file(file, hash)
921
+ begin
922
+ config = YAML::load_file(file)
923
+ rescue
924
+ Jinx.fail(MigrationError, "Could not read defaults file #{file}: " + $!)
925
+ end
926
+ # collect the class => path => value entries
927
+ config.each do |path_s, value|
928
+ next if value.nil_or_empty?
929
+ klass, path = create_attribute_path(path_s)
930
+ hash[klass][path] = value
931
+ end
932
+ end
933
+ # Loads the filter config files.
934
+ #
935
+ # @param [<String>, String] files the file or file array to load
936
+ # @return [<Class => <String => Object>>] the class => path => default value entries
937
+ def load_filter_files(files)
938
+ # collect the class => path => value entries from each defaults file
939
+ hash = {}
940
+ files.enumerate { |file| load_filter_file(file, hash) }
941
+ logger.debug { "The migrator loaded the filters #{hash.qp}." }
942
+ hash
943
+ end
944
+
945
+ # Loads the filter config file into the given hash.
946
+ #
947
+ # @param [String] file the file to load
948
+ # @param [<Class => <String => <Object => Object>>>] hash the class => path => input value => caTissue value entries
949
+ def load_filter_file(file, hash)
950
+ # collect the class => attribute => filter entries
951
+ logger.debug { "Loading the migration filter configuration #{file}..." }
952
+ begin
953
+ config = YAML::load_file(file)
954
+ rescue
955
+ Jinx.fail(MigrationError, "Could not read filter file #{file}: " + $!)
956
+ end
957
+ config.each do |path_s, flt|
958
+ next if flt.nil_or_empty?
959
+ klass, path = create_attribute_path(path_s)
960
+ unless path.size == 1 then
961
+ Jinx.fail(MigrationError, "Migration filter configuration path not supported: #{path_s}")
962
+ end
963
+ pa = klass.standard_attribute(path.first.to_sym)
964
+ flt_hash = hash[klass] ||= {}
965
+ flt_hash[pa] = flt
966
+ end
967
+ end
968
+
969
+ # @param [String] path_s a period-delimited path string path_s in the form _class_(._attribute_)+
970
+ # @return [<Property>] the corresponding attribute metadata path
971
+ # @raise [MigrationError] if the path string is malformed or an attribute is not found
972
+ def create_attribute_path(path_s)
973
+ names = path_s.split('.')
974
+ # If the path starts with a capitalized class name, then resolve the class.
975
+ # Otherwise, the target class is the start of the path.
976
+ klass = names.first =~ /^[A-Z]/ ? context_module.module_for_name(names.shift) : @target_class
977
+ # There must be at least one attribute.
978
+ if names.empty? then
979
+ Jinx.fail(MigrationError, "Property entry in migration configuration is not in <class>.<attribute> format: #{path_s}")
980
+ end
981
+
982
+ # Build the attribute path.
983
+ path = []
984
+ names.inject(klass) do |parent, name|
985
+ pa = name.to_sym
986
+ prop = begin
987
+ parent.property(pa)
988
+ rescue NameError => e
989
+ Jinx.fail(MigrationError, "Migration field mapping attribute #{parent.qp}.#{pa} not found", e)
990
+ end
991
+ if prop.collection? then
992
+ Jinx.fail(MigrationError, "Migration field mapping attribute #{parent.qp}.#{prop} is a collection, which is not supported")
993
+ end
994
+ path << prop
995
+ prop.type
996
+ end
997
+
998
+ # Return the starting class and Property path.
999
+ # Note that the starting class is not necessarily the first path attribute declarer, since the
1000
+ # starting class could be the concrete target class rather than an abstract declarer. this is
1001
+ # important, since the class must be instantiated.
1002
+ [klass, path]
1003
+ end
1004
+
1005
+ # The context module is given by the target class {ResourceClass#domain_module}.
1006
+ #
1007
+ # @return [Module] the class name resolution context
1008
+ def context_module
1009
+ @target_class.domain_module
1010
+ end
1011
+
1012
+ # @return a new class => [paths] hash from the migration fields configuration map
1013
+ def create_class_paths_hash(fld_map, def_map)
1014
+ hash = {}
1015
+ fld_map.each { |klass, path_hdr_hash| hash[klass] = path_hdr_hash.keys.to_set }
1016
+ def_map.each { |klass, path_val_hash| (hash[klass] ||= Set.new).merge(path_val_hash.keys) }
1017
+ hash
1018
+ end
1019
+
1020
+ # @return a new path => class => header hash from the migration fields configuration map
1021
+ def create_header_map(fld_map)
1022
+ hash = LazyHash.new { Hash.new }
1023
+ fld_map.each do |klass, path_hdr_hash|
1024
+ path_hdr_hash.each { |path, hdr| hash[path][klass] = hdr }
1025
+ end
1026
+ hash
1027
+ end
1028
+ end
1029
+ end