hekenga 0.2.13 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,15 @@
1
1
  require 'hekenga/invalid'
2
2
  require 'hekenga/context'
3
3
  require 'hekenga/parallel_job'
4
+ require 'hekenga/parallel_task'
4
5
  require 'hekenga/master_process'
6
+ require 'hekenga/document_task_record'
7
+ require 'hekenga/document_task_executor'
5
8
  require 'hekenga/log'
6
9
  module Hekenga
7
10
  class Migration
8
- attr_accessor :stamp, :description, :batch_size
9
- attr_reader :tasks
11
+ attr_accessor :stamp, :description, :batch_size, :active_idx
12
+ attr_reader :tasks, :session, :test_mode
10
13
 
11
14
  def initialize
12
15
  @tasks = []
@@ -50,15 +53,19 @@ module Hekenga
50
53
  def reload_logs
51
54
  @logs = {}
52
55
  end
56
+
53
57
  def performing?
54
58
  Hekenga::Log.where(pkey: self.to_key, done: false).any?
55
59
  end
60
+
56
61
  def performed?
57
62
  !!log(self.tasks.length - 1).done
58
63
  end
64
+
59
65
  def test_mode!
60
66
  @test_mode = true
61
67
  end
68
+
62
69
  def perform!
63
70
  if Hekenga.status(self) == :naught
64
71
  Hekenga::MasterProcess.new(self).run!
@@ -67,322 +74,102 @@ module Hekenga
67
74
  return false
68
75
  end
69
76
  end
70
- def perform_task!(task_idx = 0, scope = nil)
77
+
78
+ def perform_task!(task_idx)
71
79
  task = @tasks[task_idx] or return
72
80
  @active_idx = task_idx
73
81
  case task
74
82
  when Hekenga::SimpleTask
75
83
  start_simple_task(task)
76
84
  when Hekenga::DocumentTask
77
- # TODO - online migration support (have log.total update, requeue)
78
- scope ||= task.scope.asc(:_id)
79
85
  if task.parallel
80
- start_parallel_task(task, task_idx, scope)
81
- else
82
- start_document_task(task, task_idx, scope)
83
- end
84
- end
85
- end
86
- def recover!
87
- # NOTE - can't find a way to check this automatically with ActiveJob right now
88
- return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
89
- # Write failures
90
- @tasks.each.with_index do |task, idx|
91
- # If no log, run the task now
92
- unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
93
- return false unless retry_task!(task, idx)
94
- next
95
- end
96
- # Did this task fail?
97
- failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
98
- # If it didn't, keep searching
99
- next unless failedP
100
- # This is the first failure we've detected - recover from it
101
- case task
102
- when Hekenga::DocumentTask
103
- ret = recover_document_task!(task, idx)
104
- when Hekenga::SimpleTask
105
- ret = recover_simple!(task, idx)
106
- end
107
-
108
- case ret
109
- when :next
110
- next
111
- when :cancel
112
- return false
86
+ start_parallel_task(task, task_idx)
113
87
  else
114
- return false unless retry_task!(task, idx, ret)
88
+ start_document_task(task, task_idx)
115
89
  end
116
90
  end
117
- return true
118
91
  end
119
92
 
120
- def retry_task!(task, idx, scope = nil)
121
- Hekenga.log "Retrying task##{idx}"
122
- unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
123
- Hekenga.log "Failed to retry the task. Aborting.."
124
- return false
125
- end
126
- return true
127
- end
128
-
129
- def recover_simple!(task, idx)
130
- # Simple tasks just get retried - no fuss
131
- Hekenga.log("Found failed simple task. Retrying..")
132
- return
133
- end
134
-
135
- def recover_document_task!(task, idx)
136
- # Document tasks are a bit more involved.
137
- validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
138
- write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
139
- error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
140
- cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
141
-
142
- # Stats
143
- validation_failure_ctr = validation_failures.count
144
- write_failure_ctr = write_failures.count
145
- error_failure_ctr = error_failures.count
146
- cancelled_failure_ctr = cancelled_failures.count
147
-
148
- # Prompt for recovery
149
- recoverP = prompt(
150
- "Found #{validation_failure_ctr} invalid, "+
151
- "#{write_failure_ctr} failed writes, "+
152
- "#{error_failure_ctr} errors, "+
153
- "#{cancelled_failure_ctr} cancelled on migration. Recover?"
154
- )
155
- return :next unless recoverP
156
-
157
- # Recover from critical write failures (DB records potentially lost)
158
- unless write_failure_ctr.zero?
159
- Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
160
- recover_data(write_failures, task.scope.klass)
161
- end
162
-
163
- # Resume task from point of error
164
- if task.parallel
165
- # TODO - support for recovery on huge # IDs
166
- failed_ids = [
167
- write_failures.pluck(:document_ids),
168
- error_failures.pluck(:batch_start),
169
- cancelled_failures.pluck(:document_ids),
170
- validation_failures.pluck(:doc_id)
171
- ].flatten.compact
172
- resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
173
- else
174
- first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
175
- invalid_ids = validation_failures.pluck(:doc_id)
176
- if first_id && invalid_ids.any?
177
- resume_scope = task.scope.klass.asc(:_id).and(
178
- task.scope.selector,
179
- task.scope.klass.or(
180
- {_id: {:$gte => first_id}},
181
- {_id: {:$in => invalid_ids}}
182
- ).selector
183
- )
184
- elsif first_id
185
- resume_scope = task.scope.asc(:_id).gte(_id: first_id)
186
- elsif invalid_ids.any?
187
- resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
188
- else
189
- resume_scope = :next
190
- end
191
- end
192
-
193
- return resume_scope
194
- end
195
-
196
- def recover_data(write_failures, klass)
197
- write_failures.each do |write_failure|
198
- failed_ids = write_failure.document_ids
199
- extant = klass.in(_id: failed_ids).pluck(:_id)
200
- to_recover = (failed_ids - extant)
201
- docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
202
- next if docs.empty?
203
- Hekenga.log "Recovering #{docs.length} documents.."
204
- klass.collection.insert_many(docs)
205
- end
206
- end
207
-
208
- def prompt(str)
209
- loop do
210
- print "#{str} (Y/N):\n"
211
- case gets.chomp.downcase
212
- when "y"
213
- return true
214
- when "n"
215
- return false
216
- end
217
- end
218
- end
219
-
220
- def rollback!
221
- # TODO
93
+ def recover!
94
+ Hekenga::MasterProcess.new(self).recover!
222
95
  end
223
96
 
224
97
  # Internal perform methods
225
98
  def start_simple_task(task)
226
99
  create_log!
227
100
  begin
228
- with_setup do
229
- task.up!(@context)
230
- end
101
+ @context = Hekenga::Context.new(test_mode: test_mode)
102
+ task.up!(@context)
231
103
  rescue => e
232
104
  simple_failure!(e)
233
105
  return
106
+ ensure
107
+ @context = nil
234
108
  end
235
109
  log_done!
236
110
  end
237
111
 
238
- def check_for_completion
239
- if log.processed == log.total
240
- log_done!
241
- end
242
- end
243
112
  def log_done!
244
- log.set(done: true, finished: Time.now)
113
+ log.set_without_session({done: true, finished: Time.now})
245
114
  end
246
- def start_parallel_task(task, task_idx, scope)
247
- # TODO - support for crazy numbers of documents where pluck is too big
248
- scope.asc(:_id).pluck(:_id).tap do |all_ids|
249
- create_log!(total: all_ids.length)
250
- end.each_slice(batch_size).each do |ids|
251
- Hekenga::ParallelJob.perform_later(
252
- self.to_key, task_idx, ids.map(&:to_s), !!@test_mode
253
- )
254
- end
255
- check_for_completion # if 0 items to migrate
256
- end
257
- def run_parallel_task(task_idx, ids)
258
- @active_idx = task_idx
259
- if log(task_idx).cancel
260
- failed_cancelled!(ids)
261
- return
262
- end
263
- task = self.tasks[task_idx] or return
264
- with_setup(task) do
265
- process_batch(task, task.scope.klass.asc(:_id).in(_id: ids).to_a)
266
- unless @skipped.empty?
267
- failed_cancelled!(@skipped.map(&:_id))
268
- end
269
- end
115
+
116
+ def start_parallel_task(task, task_idx)
117
+ create_log!
118
+ Hekenga::ParallelTask.new(
119
+ migration: self,
120
+ task: task,
121
+ task_idx: task_idx,
122
+ test_mode: test_mode
123
+ ).start!
270
124
  end
271
- def with_setup(task = nil)
272
- @context = Hekenga::Context.new(@test_mode)
273
- task&.setups&.each do |block|
274
- @context.instance_exec(&block)
275
- end
276
- begin
277
- yield
278
- ensure
279
- @context = nil
280
- end
125
+
126
+ def task_records(task_idx)
127
+ Hekenga::DocumentTaskRecord.where(migration_key: to_key, task_idx: task_idx)
281
128
  end
282
- def start_document_task(task, task_idx, scope)
283
- create_log!(total: scope.count)
129
+
130
+ def start_document_task(task, task_idx, recover: false)
131
+ create_log!
284
132
  records = []
285
- with_setup(task) do
286
- scope.asc(:_id).no_timeout.each do |record|
287
- records.push(record)
288
- if records.length == batch_size
289
- process_batch(task, records)
290
- return if log.cancel
291
- records = []
292
- end
293
- end
294
- process_batch(task, records) if records.any?
295
- end
133
+ task_records(task_idx).delete_all unless recover
134
+ executor_key = BSON::ObjectId.new
135
+ task.scope.asc(:_id).no_timeout.each do |record|
136
+ records.push(record)
137
+ next unless records.length == (task.batch_size || batch_size)
138
+
139
+ records = filter_out_processed(task, task_idx, records) if recover
140
+ next unless records.length == (task.batch_size || batch_size)
141
+
142
+ execute_document_task(task_idx, executor_key, records)
143
+ records = []
144
+ return if log.cancel
145
+ end
146
+ records = filter_out_processed(task, task_idx, records) if recover
147
+ execute_document_task(task_idx, executor_key, records) if records.any?
148
+ return if log.cancel
296
149
  log_done!
297
150
  end
298
- def run_filters(task, record)
299
- task.filters.all? do |block|
300
- @context.instance_exec(record, &block)
301
- end
302
- end
303
- def deep_clone(record)
304
- record.as_document.deep_dup
305
- end
306
- def process_batch(task, records)
307
- @skipped = []
308
- to_persist = []
309
- fallbacks = []
310
151
 
311
- filtered = records.group_by do |record|
312
- run_filters(task, record)
152
+ def filter_out_processed(task, task_idx, records)
153
+ return records if records.empty?
154
+
155
+ selector = task_records(task_idx).in(ids: records.map(&:id))
156
+ processed_ids = selector.pluck(:ids).flatten.to_set
157
+ records.reject do |record|
158
+ processed_ids.include?(record._id)
313
159
  end
314
- log_skipped(task, filtered[false]) if filtered[false]
315
- return unless filtered[true]
316
- filtered[true].map.with_index do |record, idx|
317
- original_record = deep_clone(record)
318
- begin
319
- task.up!(@context, record)
320
- rescue => e
321
- failed_apply!(e, record, records[0].id)
322
- @skipped = filtered[true][idx+1..-1]
323
- return
324
- end
325
- if validate_record(task, record)
326
- to_persist.push(record)
327
- fallbacks.push(original_record)
328
- else
329
- if log.cancel
330
- @skipped = filtered[true][idx+1..-1]
331
- return
332
- end
333
- end
334
- end.compact
335
- persist_batch(task, to_persist, fallbacks)
336
- end
337
- def log_skipped(task, records)
338
- log.incr_and_return(
339
- skipped: records.length,
340
- processed: records.length
341
- )
342
- check_for_completion
343
160
  end
344
- def log_success(task, records)
345
- log.incr_and_return(
346
- processed: records.length
161
+
162
+ def execute_document_task(task_idx, executor_key, records)
163
+ task_record = Hekenga::DocumentTaskRecord.create(
164
+ migration_key: to_key,
165
+ task_idx: task_idx,
166
+ executor_key: executor_key,
167
+ test_mode: test_mode,
168
+ ids: records.map(&:id)
347
169
  )
348
- check_for_completion
170
+ Hekenga::DocumentTaskExecutor.new(task_record, records: records).run!
349
171
  end
350
172
 
351
- def persist_batch(task, records, original_records)
352
- if @test_mode
353
- log_success(task, records)
354
- return
355
- end
356
- # NOTE - edgecase where callbacks cause the record to become invalid is
357
- # not covered
358
- records.each do |record|
359
- begin
360
- next if task.skip_prepare
361
- if task.timeless
362
- record.timeless.send(:prepare_update) {}
363
- else
364
- record.send(:prepare_update) {}
365
- end
366
- rescue => e
367
- # If prepare_update throws an error, we're in trouble - crash out now
368
- failed_apply!(e, record, records[0].id)
369
- return
370
- end
371
- end
372
- begin
373
- delete_records!(task.scope.klass, records.map(&:_id))
374
- write_records!(task.scope.klass, records)
375
- log_success(task, records)
376
- rescue => e
377
- failed_write!(e, original_records)
378
- end
379
- end
380
- def delete_records!(klass, ids)
381
- klass.in(_id: ids).delete_all
382
- end
383
- def write_records!(klass, records)
384
- klass.collection.insert_many(records.map(&:as_document))
385
- end
386
173
  def simple_failure!(error)
387
174
  log.add_failure({
388
175
  message: error.to_s,
@@ -391,57 +178,10 @@ module Hekenga
391
178
  }, Hekenga::Failure::Error)
392
179
  log_cancel!
393
180
  end
394
- def failed_cancelled!(ids)
395
- log.add_failure({
396
- document_ids: ids,
397
- batch_start: ids[0]
398
- }, Hekenga::Failure::Cancelled)
399
- end
400
- def failed_apply!(error, record, batch_start_id)
401
- log.add_failure({
402
- message: error.to_s,
403
- backtrace: error.backtrace,
404
- document: deep_clone(record),
405
- batch_start: batch_start_id
406
- }, Hekenga::Failure::Error)
407
- log_cancel!
408
- end
181
+
409
182
  def log_cancel!
410
- log.set(cancel: true, error: true, done: true, finished: Time.now)
411
- end
412
- def failed_write!(error, original_records)
413
- log.add_failure({
414
- message: error.to_s,
415
- backtrace: error.backtrace,
416
- documents: original_records,
417
- document_ids: original_records.map {|x| x["_id"]},
418
- batch_start: original_records[0]["_id"]
419
- }, Hekenga::Failure::Write)
420
- log_cancel!
421
- end
422
- def failed_validation!(task, record)
423
- log.add_failure({
424
- doc_id: record.id,
425
- errs: record.errors.full_messages,
426
- document: deep_clone(record),
427
- }, Hekenga::Failure::Validation)
428
- log.set(error: true)
429
- log.incr_and_return(processed: 1, unvalid: 1)
430
- if task.invalid_strategy == :cancel
431
- log_cancel!
432
- else
433
- check_for_completion
434
- end
435
- end
436
- def validate_record(task, record)
437
- # TODO - ability to skip validation
438
- # TODO - handle errors on validation
439
- if record.valid?
440
- true
441
- else
442
- failed_validation!(task, record)
443
- false
444
- end
183
+ # Bypass the active transaction if there is one
184
+ log.set_without_session({cancel: true, error: true, done: true, finished: Time.now})
445
185
  end
446
186
 
447
187
  # Validations
@@ -4,10 +4,17 @@ module Hekenga
4
4
  queue_as do
5
5
  ENV["HEKENGA_QUEUE"] || :migration
6
6
  end
7
- def perform(migration_key, task_idx, ids, test_mode)
8
- migration = Hekenga.find_migration(migration_key)
9
- migration.test_mode! if test_mode
10
- migration.run_parallel_task(task_idx, ids)
7
+ def perform(document_task_record_id, executor_key)
8
+ record = Hekenga::DocumentTaskRecord.where(_id: document_task_record_id).first
9
+ return if record.nil?
10
+ return if record.executor_key != BSON::ObjectId(executor_key)
11
+ return if record.complete?
12
+
13
+ executor = Hekenga::DocumentTaskExecutor.new(record)
14
+ return if executor.migration_cancelled?
15
+
16
+ executor.run!
17
+ executor.check_for_completion!
11
18
  end
12
19
  end
13
20
  end
@@ -0,0 +1,110 @@
1
+ require 'hekenga/iterator'
2
+ require 'hekenga/document_task_executor'
3
+ require 'hekenga/task_splitter'
4
+
5
+ module Hekenga
6
+ class ParallelTask
7
+ attr_reader :migration, :task, :task_idx, :test_mode
8
+
9
+ def initialize(migration:, task:, task_idx:, test_mode:)
10
+ @migration = migration
11
+ @task = task
12
+ @task_idx = task_idx
13
+ @test_mode = test_mode
14
+ end
15
+
16
+ def start!
17
+ clear_task_records!
18
+ @executor_key = BSON::ObjectId.new
19
+ generate_for_scope(task.scope)
20
+ check_for_completion!
21
+ end
22
+
23
+ def resume!
24
+ @executor_key = BSON::ObjectId.new
25
+ task_records.set(executor_key: @executor_key)
26
+ queue_jobs!(task_records.incomplete)
27
+ generate_new_records!
28
+ recover_failed_records!
29
+ check_for_completion!
30
+ end
31
+
32
+ def complete?
33
+ task_records.incomplete.none?
34
+ end
35
+
36
+ def check_for_completion!
37
+ if complete?
38
+ migration.log(task_idx).set_without_session(done: true, finished: Time.now)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def generate_for_scope(scope)
45
+ Hekenga::Iterator.new(scope, size: 100_000).each do |id_block|
46
+ task_records = id_block.each_slice(batch_size).map do |id_slice|
47
+ generate_task_records!(id_slice)
48
+ end
49
+ write_task_records!(task_records)
50
+ queue_jobs!(task_records)
51
+ end
52
+ end
53
+
54
+ def generate_new_records!
55
+ last_record = task_records.desc(:_id).first
56
+ last_id = last_record&.ids&.last
57
+ scope = task.scope
58
+ scope = task.scope.and(_id: {'$gt': last_id}) if last_id
59
+ generate_for_scope(scope)
60
+ end
61
+
62
+ # Any records with a failure or a validation failure get moved into
63
+ # a new task record which is incomplete and gets a job queued
64
+ def recover_failed_records!
65
+ task_records.complete.no_timeout.each do |record|
66
+ Hekenga::TaskSplitter.new(record, @executor_key).call.tap do |new_record|
67
+ next if new_record.nil?
68
+
69
+ Hekenga::ParallelJob.perform_later(new_record.id.to_s, @executor_key.to_s)
70
+ end
71
+ end
72
+ end
73
+
74
+ def batch_size
75
+ task.batch_size || migration.batch_size
76
+ end
77
+
78
+ def clear_task_records!
79
+ task_records.delete_all
80
+ end
81
+
82
+ def task_records
83
+ migration.task_records(task_idx)
84
+ end
85
+
86
+ def generate_task_records!(id_slice)
87
+ Hekenga::DocumentTaskRecord.new(
88
+ migration_key: migration.to_key,
89
+ task_idx: task_idx,
90
+ executor_key: @executor_key,
91
+ test_mode: test_mode,
92
+ ids: id_slice
93
+ ).tap do |record|
94
+ record.send(:prepare_insert) {}
95
+ end
96
+ end
97
+
98
+ def write_task_records!(records)
99
+ Hekenga::DocumentTaskRecord.collection.bulk_write(records.map do |record|
100
+ { insert_one: record.as_document }
101
+ end)
102
+ end
103
+
104
+ def queue_jobs!(records)
105
+ records.each do |record|
106
+ Hekenga::ParallelJob.perform_later(record.id.to_s, @executor_key.to_s)
107
+ end
108
+ end
109
+ end
110
+ end
@@ -28,33 +28,38 @@ module Hekenga
28
28
  created #{@migration.timestamp.sub("T", " ").inspect}
29
29
 
30
30
  ## Optional
31
- # batch_size 10
31
+ #batch_size 25
32
32
 
33
33
  ## Simple tasks
34
- # task "task description" do
35
- # up do
36
- # end
37
- # end
34
+ #task "task description" do
35
+ # up do
36
+ # end
37
+ #end
38
38
 
39
39
  ## Per document tasks
40
- # per_document "task description" do
41
- # ## Required
42
- # scope MyModel.all
40
+ #per_document "task description" do
41
+ # ## Required
42
+ # scope MyModel.all
43
43
  #
44
- # ## Optional config
45
- # # parallel!
46
- # # timeless!
47
- # # skip_prepare!
48
- # # when_invalid :prompt # :prompt, :cancel, :stop, :continue
49
- # #
50
- # # setup do
51
- # # end
52
- # # filter do
53
- # # end
44
+ # ## Optional config
45
+ # #parallel!
46
+ # #timeless!
47
+ # #always_write!
48
+ # #skip_prepare!
49
+ # #batch_size 25
50
+ # #write_strategy :update # :delete_then_insert
54
51
  #
55
- # up do |doc|
56
- # end
57
- # end
52
+ # # Called once per batch, instance variables will be accessible
53
+ # # in the filter & up blocks
54
+ # #setup do
55
+ # #end
56
+ #
57
+ # #filter do |doc|
58
+ # #end
59
+ #
60
+ # up do |doc|
61
+ # end
62
+ #end
58
63
  end
59
64
  EOF
60
65
  end
@@ -0,0 +1,4 @@
1
+ module Hekenga
2
+ class TaskFailedError < Hekenga::BaseError
3
+ end
4
+ end