hekenga 0.2.13 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
1
  require 'hekenga/invalid'
2
2
  require 'hekenga/context'
3
3
  require 'hekenga/parallel_job'
4
+ require 'hekenga/parallel_task'
4
5
  require 'hekenga/master_process'
6
+ require 'hekenga/document_task_record'
7
+ require 'hekenga/document_task_executor'
5
8
  require 'hekenga/log'
6
9
  module Hekenga
7
10
  class Migration
8
- attr_accessor :stamp, :description, :batch_size
9
- attr_reader :tasks
11
+ attr_accessor :stamp, :description, :batch_size, :active_idx
12
+ attr_reader :tasks, :session, :test_mode
10
13
 
11
14
  def initialize
12
15
  @tasks = []
@@ -50,15 +53,19 @@ module Hekenga
50
53
  def reload_logs
51
54
  @logs = {}
52
55
  end
56
+
53
57
  def performing?
54
58
  Hekenga::Log.where(pkey: self.to_key, done: false).any?
55
59
  end
60
+
56
61
  def performed?
57
62
  !!log(self.tasks.length - 1).done
58
63
  end
64
+
59
65
  def test_mode!
60
66
  @test_mode = true
61
67
  end
68
+
62
69
  def perform!
63
70
  if Hekenga.status(self) == :naught
64
71
  Hekenga::MasterProcess.new(self).run!
@@ -67,322 +74,102 @@ module Hekenga
67
74
  return false
68
75
  end
69
76
  end
70
- def perform_task!(task_idx = 0, scope = nil)
77
+
78
+ def perform_task!(task_idx)
71
79
  task = @tasks[task_idx] or return
72
80
  @active_idx = task_idx
73
81
  case task
74
82
  when Hekenga::SimpleTask
75
83
  start_simple_task(task)
76
84
  when Hekenga::DocumentTask
77
- # TODO - online migration support (have log.total update, requeue)
78
- scope ||= task.scope.asc(:_id)
79
85
  if task.parallel
80
- start_parallel_task(task, task_idx, scope)
81
- else
82
- start_document_task(task, task_idx, scope)
83
- end
84
- end
85
- end
86
- def recover!
87
- # NOTE - can't find a way to check this automatically with ActiveJob right now
88
- return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
89
- # Write failures
90
- @tasks.each.with_index do |task, idx|
91
- # If no log, run the task now
92
- unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
93
- return false unless retry_task!(task, idx)
94
- next
95
- end
96
- # Did this task fail?
97
- failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
98
- # If it didn't, keep searching
99
- next unless failedP
100
- # This is the first failure we've detected - recover from it
101
- case task
102
- when Hekenga::DocumentTask
103
- ret = recover_document_task!(task, idx)
104
- when Hekenga::SimpleTask
105
- ret = recover_simple!(task, idx)
106
- end
107
-
108
- case ret
109
- when :next
110
- next
111
- when :cancel
112
- return false
86
+ start_parallel_task(task, task_idx)
113
87
  else
114
- return false unless retry_task!(task, idx, ret)
88
+ start_document_task(task, task_idx)
115
89
  end
116
90
  end
117
- return true
118
91
  end
119
92
 
120
- def retry_task!(task, idx, scope = nil)
121
- Hekenga.log "Retrying task##{idx}"
122
- unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
123
- Hekenga.log "Failed to retry the task. Aborting.."
124
- return false
125
- end
126
- return true
127
- end
128
-
129
- def recover_simple!(task, idx)
130
- # Simple tasks just get retried - no fuss
131
- Hekenga.log("Found failed simple task. Retrying..")
132
- return
133
- end
134
-
135
- def recover_document_task!(task, idx)
136
- # Document tasks are a bit more involved.
137
- validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
138
- write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
139
- error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
140
- cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
141
-
142
- # Stats
143
- validation_failure_ctr = validation_failures.count
144
- write_failure_ctr = write_failures.count
145
- error_failure_ctr = error_failures.count
146
- cancelled_failure_ctr = cancelled_failures.count
147
-
148
- # Prompt for recovery
149
- recoverP = prompt(
150
- "Found #{validation_failure_ctr} invalid, "+
151
- "#{write_failure_ctr} failed writes, "+
152
- "#{error_failure_ctr} errors, "+
153
- "#{cancelled_failure_ctr} cancelled on migration. Recover?"
154
- )
155
- return :next unless recoverP
156
-
157
- # Recover from critical write failures (DB records potentially lost)
158
- unless write_failure_ctr.zero?
159
- Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
160
- recover_data(write_failures, task.scope.klass)
161
- end
162
-
163
- # Resume task from point of error
164
- if task.parallel
165
- # TODO - support for recovery on huge # IDs
166
- failed_ids = [
167
- write_failures.pluck(:document_ids),
168
- error_failures.pluck(:batch_start),
169
- cancelled_failures.pluck(:document_ids),
170
- validation_failures.pluck(:doc_id)
171
- ].flatten.compact
172
- resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
173
- else
174
- first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
175
- invalid_ids = validation_failures.pluck(:doc_id)
176
- if first_id && invalid_ids.any?
177
- resume_scope = task.scope.klass.asc(:_id).and(
178
- task.scope.selector,
179
- task.scope.klass.or(
180
- {_id: {:$gte => first_id}},
181
- {_id: {:$in => invalid_ids}}
182
- ).selector
183
- )
184
- elsif first_id
185
- resume_scope = task.scope.asc(:_id).gte(_id: first_id)
186
- elsif invalid_ids.any?
187
- resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
188
- else
189
- resume_scope = :next
190
- end
191
- end
192
-
193
- return resume_scope
194
- end
195
-
196
- def recover_data(write_failures, klass)
197
- write_failures.each do |write_failure|
198
- failed_ids = write_failure.document_ids
199
- extant = klass.in(_id: failed_ids).pluck(:_id)
200
- to_recover = (failed_ids - extant)
201
- docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
202
- next if docs.empty?
203
- Hekenga.log "Recovering #{docs.length} documents.."
204
- klass.collection.insert_many(docs)
205
- end
206
- end
207
-
208
- def prompt(str)
209
- loop do
210
- print "#{str} (Y/N):\n"
211
- case gets.chomp.downcase
212
- when "y"
213
- return true
214
- when "n"
215
- return false
216
- end
217
- end
218
- end
219
-
220
- def rollback!
221
- # TODO
93
+ def recover!
94
+ Hekenga::MasterProcess.new(self).recover!
222
95
  end
223
96
 
224
97
  # Internal perform methods
225
98
  def start_simple_task(task)
226
99
  create_log!
227
100
  begin
228
- with_setup do
229
- task.up!(@context)
230
- end
101
+ @context = Hekenga::Context.new(test_mode: test_mode)
102
+ task.up!(@context)
231
103
  rescue => e
232
104
  simple_failure!(e)
233
105
  return
106
+ ensure
107
+ @context = nil
234
108
  end
235
109
  log_done!
236
110
  end
237
111
 
238
- def check_for_completion
239
- if log.processed == log.total
240
- log_done!
241
- end
242
- end
243
112
  def log_done!
244
- log.set(done: true, finished: Time.now)
113
+ log.set_without_session({done: true, finished: Time.now})
245
114
  end
246
- def start_parallel_task(task, task_idx, scope)
247
- # TODO - support for crazy numbers of documents where pluck is too big
248
- scope.asc(:_id).pluck(:_id).tap do |all_ids|
249
- create_log!(total: all_ids.length)
250
- end.each_slice(batch_size).each do |ids|
251
- Hekenga::ParallelJob.perform_later(
252
- self.to_key, task_idx, ids.map(&:to_s), !!@test_mode
253
- )
254
- end
255
- check_for_completion # if 0 items to migrate
256
- end
257
- def run_parallel_task(task_idx, ids)
258
- @active_idx = task_idx
259
- if log(task_idx).cancel
260
- failed_cancelled!(ids)
261
- return
262
- end
263
- task = self.tasks[task_idx] or return
264
- with_setup(task) do
265
- process_batch(task, task.scope.klass.asc(:_id).in(_id: ids).to_a)
266
- unless @skipped.empty?
267
- failed_cancelled!(@skipped.map(&:_id))
268
- end
269
- end
115
+
116
+ def start_parallel_task(task, task_idx)
117
+ create_log!
118
+ Hekenga::ParallelTask.new(
119
+ migration: self,
120
+ task: task,
121
+ task_idx: task_idx,
122
+ test_mode: test_mode
123
+ ).start!
270
124
  end
271
- def with_setup(task = nil)
272
- @context = Hekenga::Context.new(@test_mode)
273
- task&.setups&.each do |block|
274
- @context.instance_exec(&block)
275
- end
276
- begin
277
- yield
278
- ensure
279
- @context = nil
280
- end
125
+
126
+ def task_records(task_idx)
127
+ Hekenga::DocumentTaskRecord.where(migration_key: to_key, task_idx: task_idx)
281
128
  end
282
- def start_document_task(task, task_idx, scope)
283
- create_log!(total: scope.count)
129
+
130
+ def start_document_task(task, task_idx, recover: false)
131
+ create_log!
284
132
  records = []
285
- with_setup(task) do
286
- scope.asc(:_id).no_timeout.each do |record|
287
- records.push(record)
288
- if records.length == batch_size
289
- process_batch(task, records)
290
- return if log.cancel
291
- records = []
292
- end
293
- end
294
- process_batch(task, records) if records.any?
295
- end
133
+ task_records(task_idx).delete_all unless recover
134
+ executor_key = BSON::ObjectId.new
135
+ task.scope.asc(:_id).no_timeout.each do |record|
136
+ records.push(record)
137
+ next unless records.length == (task.batch_size || batch_size)
138
+
139
+ records = filter_out_processed(task, task_idx, records) if recover
140
+ next unless records.length == (task.batch_size || batch_size)
141
+
142
+ execute_document_task(task_idx, executor_key, records)
143
+ records = []
144
+ return if log.cancel
145
+ end
146
+ records = filter_out_processed(task, task_idx, records) if recover
147
+ execute_document_task(task_idx, executor_key, records) if records.any?
148
+ return if log.cancel
296
149
  log_done!
297
150
  end
298
- def run_filters(task, record)
299
- task.filters.all? do |block|
300
- @context.instance_exec(record, &block)
301
- end
302
- end
303
- def deep_clone(record)
304
- record.as_document.deep_dup
305
- end
306
- def process_batch(task, records)
307
- @skipped = []
308
- to_persist = []
309
- fallbacks = []
310
151
 
311
- filtered = records.group_by do |record|
312
- run_filters(task, record)
152
+ def filter_out_processed(task, task_idx, records)
153
+ return records if records.empty?
154
+
155
+ selector = task_records(task_idx).in(ids: records.map(&:id))
156
+ processed_ids = selector.pluck(:ids).flatten.to_set
157
+ records.reject do |record|
158
+ processed_ids.include?(record._id)
313
159
  end
314
- log_skipped(task, filtered[false]) if filtered[false]
315
- return unless filtered[true]
316
- filtered[true].map.with_index do |record, idx|
317
- original_record = deep_clone(record)
318
- begin
319
- task.up!(@context, record)
320
- rescue => e
321
- failed_apply!(e, record, records[0].id)
322
- @skipped = filtered[true][idx+1..-1]
323
- return
324
- end
325
- if validate_record(task, record)
326
- to_persist.push(record)
327
- fallbacks.push(original_record)
328
- else
329
- if log.cancel
330
- @skipped = filtered[true][idx+1..-1]
331
- return
332
- end
333
- end
334
- end.compact
335
- persist_batch(task, to_persist, fallbacks)
336
- end
337
- def log_skipped(task, records)
338
- log.incr_and_return(
339
- skipped: records.length,
340
- processed: records.length
341
- )
342
- check_for_completion
343
160
  end
344
- def log_success(task, records)
345
- log.incr_and_return(
346
- processed: records.length
161
+
162
+ def execute_document_task(task_idx, executor_key, records)
163
+ task_record = Hekenga::DocumentTaskRecord.create(
164
+ migration_key: to_key,
165
+ task_idx: task_idx,
166
+ executor_key: executor_key,
167
+ test_mode: test_mode,
168
+ ids: records.map(&:id)
347
169
  )
348
- check_for_completion
170
+ Hekenga::DocumentTaskExecutor.new(task_record, records: records).run!
349
171
  end
350
172
 
351
- def persist_batch(task, records, original_records)
352
- if @test_mode
353
- log_success(task, records)
354
- return
355
- end
356
- # NOTE - edgecase where callbacks cause the record to become invalid is
357
- # not covered
358
- records.each do |record|
359
- begin
360
- next if task.skip_prepare
361
- if task.timeless
362
- record.timeless.send(:prepare_update) {}
363
- else
364
- record.send(:prepare_update) {}
365
- end
366
- rescue => e
367
- # If prepare_update throws an error, we're in trouble - crash out now
368
- failed_apply!(e, record, records[0].id)
369
- return
370
- end
371
- end
372
- begin
373
- delete_records!(task.scope.klass, records.map(&:_id))
374
- write_records!(task.scope.klass, records)
375
- log_success(task, records)
376
- rescue => e
377
- failed_write!(e, original_records)
378
- end
379
- end
380
- def delete_records!(klass, ids)
381
- klass.in(_id: ids).delete_all
382
- end
383
- def write_records!(klass, records)
384
- klass.collection.insert_many(records.map(&:as_document))
385
- end
386
173
  def simple_failure!(error)
387
174
  log.add_failure({
388
175
  message: error.to_s,
@@ -391,57 +178,10 @@ module Hekenga
391
178
  }, Hekenga::Failure::Error)
392
179
  log_cancel!
393
180
  end
394
- def failed_cancelled!(ids)
395
- log.add_failure({
396
- document_ids: ids,
397
- batch_start: ids[0]
398
- }, Hekenga::Failure::Cancelled)
399
- end
400
- def failed_apply!(error, record, batch_start_id)
401
- log.add_failure({
402
- message: error.to_s,
403
- backtrace: error.backtrace,
404
- document: deep_clone(record),
405
- batch_start: batch_start_id
406
- }, Hekenga::Failure::Error)
407
- log_cancel!
408
- end
181
+
409
182
  def log_cancel!
410
- log.set(cancel: true, error: true, done: true, finished: Time.now)
411
- end
412
- def failed_write!(error, original_records)
413
- log.add_failure({
414
- message: error.to_s,
415
- backtrace: error.backtrace,
416
- documents: original_records,
417
- document_ids: original_records.map {|x| x["_id"]},
418
- batch_start: original_records[0]["_id"]
419
- }, Hekenga::Failure::Write)
420
- log_cancel!
421
- end
422
- def failed_validation!(task, record)
423
- log.add_failure({
424
- doc_id: record.id,
425
- errs: record.errors.full_messages,
426
- document: deep_clone(record),
427
- }, Hekenga::Failure::Validation)
428
- log.set(error: true)
429
- log.incr_and_return(processed: 1, unvalid: 1)
430
- if task.invalid_strategy == :cancel
431
- log_cancel!
432
- else
433
- check_for_completion
434
- end
435
- end
436
- def validate_record(task, record)
437
- # TODO - ability to skip validation
438
- # TODO - handle errors on validation
439
- if record.valid?
440
- true
441
- else
442
- failed_validation!(task, record)
443
- false
444
- end
183
+ # Bypass the active transaction if there is one
184
+ log.set_without_session({cancel: true, error: true, done: true, finished: Time.now})
445
185
  end
446
186
 
447
187
  # Validations
@@ -4,10 +4,17 @@ module Hekenga
4
4
  queue_as do
5
5
  ENV["HEKENGA_QUEUE"] || :migration
6
6
  end
7
- def perform(migration_key, task_idx, ids, test_mode)
8
- migration = Hekenga.find_migration(migration_key)
9
- migration.test_mode! if test_mode
10
- migration.run_parallel_task(task_idx, ids)
7
+ def perform(document_task_record_id, executor_key)
8
+ record = Hekenga::DocumentTaskRecord.where(_id: document_task_record_id).first
9
+ return if record.nil?
10
+ return if record.executor_key != BSON::ObjectId(executor_key)
11
+ return if record.complete?
12
+
13
+ executor = Hekenga::DocumentTaskExecutor.new(record)
14
+ return if executor.migration_cancelled?
15
+
16
+ executor.run!
17
+ executor.check_for_completion!
11
18
  end
12
19
  end
13
20
  end
@@ -0,0 +1,110 @@
1
+ require 'hekenga/iterator'
2
+ require 'hekenga/document_task_executor'
3
+ require 'hekenga/task_splitter'
4
+
5
+ module Hekenga
6
+ class ParallelTask
7
+ attr_reader :migration, :task, :task_idx, :test_mode
8
+
9
+ def initialize(migration:, task:, task_idx:, test_mode:)
10
+ @migration = migration
11
+ @task = task
12
+ @task_idx = task_idx
13
+ @test_mode = test_mode
14
+ end
15
+
16
+ def start!
17
+ clear_task_records!
18
+ @executor_key = BSON::ObjectId.new
19
+ generate_for_scope(task.scope)
20
+ check_for_completion!
21
+ end
22
+
23
+ def resume!
24
+ @executor_key = BSON::ObjectId.new
25
+ task_records.set(executor_key: @executor_key)
26
+ queue_jobs!(task_records.incomplete)
27
+ generate_new_records!
28
+ recover_failed_records!
29
+ check_for_completion!
30
+ end
31
+
32
+ def complete?
33
+ task_records.incomplete.none?
34
+ end
35
+
36
+ def check_for_completion!
37
+ if complete?
38
+ migration.log(task_idx).set_without_session(done: true, finished: Time.now)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def generate_for_scope(scope)
45
+ Hekenga::Iterator.new(scope, size: 100_000).each do |id_block|
46
+ task_records = id_block.each_slice(batch_size).map do |id_slice|
47
+ generate_task_records!(id_slice)
48
+ end
49
+ write_task_records!(task_records)
50
+ queue_jobs!(task_records)
51
+ end
52
+ end
53
+
54
+ def generate_new_records!
55
+ last_record = task_records.desc(:_id).first
56
+ last_id = last_record&.ids&.last
57
+ scope = task.scope
58
+ scope = task.scope.and(_id: {'$gt': last_id}) if last_id
59
+ generate_for_scope(scope)
60
+ end
61
+
62
+ # Any records with a failure or a validation failure get moved into
63
+ # a new task record which is incomplete and gets a job queued
64
+ def recover_failed_records!
65
+ task_records.complete.no_timeout.each do |record|
66
+ Hekenga::TaskSplitter.new(record, @executor_key).call.tap do |new_record|
67
+ next if new_record.nil?
68
+
69
+ Hekenga::ParallelJob.perform_later(new_record.id.to_s, @executor_key.to_s)
70
+ end
71
+ end
72
+ end
73
+
74
+ def batch_size
75
+ task.batch_size || migration.batch_size
76
+ end
77
+
78
+ def clear_task_records!
79
+ task_records.delete_all
80
+ end
81
+
82
+ def task_records
83
+ migration.task_records(task_idx)
84
+ end
85
+
86
+ def generate_task_records!(id_slice)
87
+ Hekenga::DocumentTaskRecord.new(
88
+ migration_key: migration.to_key,
89
+ task_idx: task_idx,
90
+ executor_key: @executor_key,
91
+ test_mode: test_mode,
92
+ ids: id_slice
93
+ ).tap do |record|
94
+ record.send(:prepare_insert) {}
95
+ end
96
+ end
97
+
98
+ def write_task_records!(records)
99
+ Hekenga::DocumentTaskRecord.collection.bulk_write(records.map do |record|
100
+ { insert_one: record.as_document }
101
+ end)
102
+ end
103
+
104
+ def queue_jobs!(records)
105
+ records.each do |record|
106
+ Hekenga::ParallelJob.perform_later(record.id.to_s, @executor_key.to_s)
107
+ end
108
+ end
109
+ end
110
+ end
@@ -16,8 +16,7 @@ module Hekenga
16
16
  end
17
17
 
18
18
  def to_path
19
- @path ||= File.join(Hekenga.config.abs_dir, @migration.to_key.gsub(/\:/, '-') +
20
- '.rb')
19
+ @path ||= File.join(Hekenga.config.abs_dir, @migration.to_key+".rb")
21
20
  end
22
21
 
23
22
  def to_s
@@ -28,33 +27,38 @@ module Hekenga
28
27
  created #{@migration.timestamp.sub("T", " ").inspect}
29
28
 
30
29
  ## Optional
31
- # batch_size 10
30
+ #batch_size 25
32
31
 
33
32
  ## Simple tasks
34
- # task "task description" do
35
- # up do
36
- # end
37
- # end
33
+ #task "task description" do
34
+ # up do
35
+ # end
36
+ #end
38
37
 
39
38
  ## Per document tasks
40
- # per_document "task description" do
41
- # ## Required
42
- # scope MyModel.all
39
+ #per_document "task description" do
40
+ # ## Required
41
+ # scope MyModel.all
43
42
  #
44
- # ## Optional config
45
- # # parallel!
46
- # # timeless!
47
- # # skip_prepare!
48
- # # when_invalid :prompt # :prompt, :cancel, :stop, :continue
49
- # #
50
- # # setup do
51
- # # end
52
- # # filter do
53
- # # end
43
+ # ## Optional config
44
+ # #parallel!
45
+ # #timeless!
46
+ # #always_write!
47
+ # #skip_prepare!
48
+ # #batch_size 25
49
+ # #write_strategy :update # :delete_then_insert
54
50
  #
55
- # up do |doc|
56
- # end
57
- # end
51
+ # # Called once per batch, instance variables will be accessible
52
+ # # in the filter & up blocks
53
+ # #setup do
54
+ # #end
55
+ #
56
+ # #filter do |doc|
57
+ # #end
58
+ #
59
+ # up do |doc|
60
+ # end
61
+ #end
58
62
  end
59
63
  EOF
60
64
  end
@@ -0,0 +1,4 @@
1
+ module Hekenga
2
+ class TaskFailedError < Hekenga::BaseError
3
+ end
4
+ end