hekenga 0.2.13 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +33 -0
- data/docker-compose.yml +30 -0
- data/exe/hekenga +25 -38
- data/hekenga.gemspec +5 -4
- data/lib/hekenga/context.rb +7 -6
- data/lib/hekenga/document_task.rb +9 -3
- data/lib/hekenga/document_task_executor.rb +264 -0
- data/lib/hekenga/document_task_record.rb +29 -0
- data/lib/hekenga/dsl/document_task.rb +28 -1
- data/lib/hekenga/dsl/migration.rb +3 -0
- data/lib/hekenga/failure/write.rb +1 -0
- data/lib/hekenga/iterator.rb +26 -0
- data/lib/hekenga/log.rb +14 -19
- data/lib/hekenga/master_process.rb +184 -105
- data/lib/hekenga/migration.rb +70 -330
- data/lib/hekenga/parallel_job.rb +11 -4
- data/lib/hekenga/parallel_task.rb +110 -0
- data/lib/hekenga/scaffold.rb +26 -21
- data/lib/hekenga/task_failed_error.rb +4 -0
- data/lib/hekenga/task_splitter.rb +30 -0
- data/lib/hekenga/version.rb +1 -1
- data/lib/hekenga.rb +22 -10
- metadata +22 -13
data/lib/hekenga/migration.rb
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
require 'hekenga/invalid'
|
2
2
|
require 'hekenga/context'
|
3
3
|
require 'hekenga/parallel_job'
|
4
|
+
require 'hekenga/parallel_task'
|
4
5
|
require 'hekenga/master_process'
|
6
|
+
require 'hekenga/document_task_record'
|
7
|
+
require 'hekenga/document_task_executor'
|
5
8
|
require 'hekenga/log'
|
6
9
|
module Hekenga
|
7
10
|
class Migration
|
8
|
-
attr_accessor :stamp, :description, :batch_size
|
9
|
-
attr_reader :tasks
|
11
|
+
attr_accessor :stamp, :description, :batch_size, :active_idx
|
12
|
+
attr_reader :tasks, :session, :test_mode
|
10
13
|
|
11
14
|
def initialize
|
12
15
|
@tasks = []
|
@@ -50,15 +53,19 @@ module Hekenga
|
|
50
53
|
def reload_logs
|
51
54
|
@logs = {}
|
52
55
|
end
|
56
|
+
|
53
57
|
def performing?
|
54
58
|
Hekenga::Log.where(pkey: self.to_key, done: false).any?
|
55
59
|
end
|
60
|
+
|
56
61
|
def performed?
|
57
62
|
!!log(self.tasks.length - 1).done
|
58
63
|
end
|
64
|
+
|
59
65
|
def test_mode!
|
60
66
|
@test_mode = true
|
61
67
|
end
|
68
|
+
|
62
69
|
def perform!
|
63
70
|
if Hekenga.status(self) == :naught
|
64
71
|
Hekenga::MasterProcess.new(self).run!
|
@@ -67,322 +74,102 @@ module Hekenga
|
|
67
74
|
return false
|
68
75
|
end
|
69
76
|
end
|
70
|
-
|
77
|
+
|
78
|
+
def perform_task!(task_idx)
|
71
79
|
task = @tasks[task_idx] or return
|
72
80
|
@active_idx = task_idx
|
73
81
|
case task
|
74
82
|
when Hekenga::SimpleTask
|
75
83
|
start_simple_task(task)
|
76
84
|
when Hekenga::DocumentTask
|
77
|
-
# TODO - online migration support (have log.total update, requeue)
|
78
|
-
scope ||= task.scope.asc(:_id)
|
79
85
|
if task.parallel
|
80
|
-
start_parallel_task(task, task_idx
|
81
|
-
else
|
82
|
-
start_document_task(task, task_idx, scope)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
def recover!
|
87
|
-
# NOTE - can't find a way to check this automatically with ActiveJob right now
|
88
|
-
return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
|
89
|
-
# Write failures
|
90
|
-
@tasks.each.with_index do |task, idx|
|
91
|
-
# If no log, run the task now
|
92
|
-
unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
|
93
|
-
return false unless retry_task!(task, idx)
|
94
|
-
next
|
95
|
-
end
|
96
|
-
# Did this task fail?
|
97
|
-
failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
|
98
|
-
# If it didn't, keep searching
|
99
|
-
next unless failedP
|
100
|
-
# This is the first failure we've detected - recover from it
|
101
|
-
case task
|
102
|
-
when Hekenga::DocumentTask
|
103
|
-
ret = recover_document_task!(task, idx)
|
104
|
-
when Hekenga::SimpleTask
|
105
|
-
ret = recover_simple!(task, idx)
|
106
|
-
end
|
107
|
-
|
108
|
-
case ret
|
109
|
-
when :next
|
110
|
-
next
|
111
|
-
when :cancel
|
112
|
-
return false
|
86
|
+
start_parallel_task(task, task_idx)
|
113
87
|
else
|
114
|
-
|
88
|
+
start_document_task(task, task_idx)
|
115
89
|
end
|
116
90
|
end
|
117
|
-
return true
|
118
91
|
end
|
119
92
|
|
120
|
-
def
|
121
|
-
Hekenga.
|
122
|
-
unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
|
123
|
-
Hekenga.log "Failed to retry the task. Aborting.."
|
124
|
-
return false
|
125
|
-
end
|
126
|
-
return true
|
127
|
-
end
|
128
|
-
|
129
|
-
def recover_simple!(task, idx)
|
130
|
-
# Simple tasks just get retried - no fuss
|
131
|
-
Hekenga.log("Found failed simple task. Retrying..")
|
132
|
-
return
|
133
|
-
end
|
134
|
-
|
135
|
-
def recover_document_task!(task, idx)
|
136
|
-
# Document tasks are a bit more involved.
|
137
|
-
validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
|
138
|
-
write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
|
139
|
-
error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
|
140
|
-
cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
|
141
|
-
|
142
|
-
# Stats
|
143
|
-
validation_failure_ctr = validation_failures.count
|
144
|
-
write_failure_ctr = write_failures.count
|
145
|
-
error_failure_ctr = error_failures.count
|
146
|
-
cancelled_failure_ctr = cancelled_failures.count
|
147
|
-
|
148
|
-
# Prompt for recovery
|
149
|
-
recoverP = prompt(
|
150
|
-
"Found #{validation_failure_ctr} invalid, "+
|
151
|
-
"#{write_failure_ctr} failed writes, "+
|
152
|
-
"#{error_failure_ctr} errors, "+
|
153
|
-
"#{cancelled_failure_ctr} cancelled on migration. Recover?"
|
154
|
-
)
|
155
|
-
return :next unless recoverP
|
156
|
-
|
157
|
-
# Recover from critical write failures (DB records potentially lost)
|
158
|
-
unless write_failure_ctr.zero?
|
159
|
-
Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
|
160
|
-
recover_data(write_failures, task.scope.klass)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Resume task from point of error
|
164
|
-
if task.parallel
|
165
|
-
# TODO - support for recovery on huge # IDs
|
166
|
-
failed_ids = [
|
167
|
-
write_failures.pluck(:document_ids),
|
168
|
-
error_failures.pluck(:batch_start),
|
169
|
-
cancelled_failures.pluck(:document_ids),
|
170
|
-
validation_failures.pluck(:doc_id)
|
171
|
-
].flatten.compact
|
172
|
-
resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
|
173
|
-
else
|
174
|
-
first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
|
175
|
-
invalid_ids = validation_failures.pluck(:doc_id)
|
176
|
-
if first_id && invalid_ids.any?
|
177
|
-
resume_scope = task.scope.klass.asc(:_id).and(
|
178
|
-
task.scope.selector,
|
179
|
-
task.scope.klass.or(
|
180
|
-
{_id: {:$gte => first_id}},
|
181
|
-
{_id: {:$in => invalid_ids}}
|
182
|
-
).selector
|
183
|
-
)
|
184
|
-
elsif first_id
|
185
|
-
resume_scope = task.scope.asc(:_id).gte(_id: first_id)
|
186
|
-
elsif invalid_ids.any?
|
187
|
-
resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
|
188
|
-
else
|
189
|
-
resume_scope = :next
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
return resume_scope
|
194
|
-
end
|
195
|
-
|
196
|
-
def recover_data(write_failures, klass)
|
197
|
-
write_failures.each do |write_failure|
|
198
|
-
failed_ids = write_failure.document_ids
|
199
|
-
extant = klass.in(_id: failed_ids).pluck(:_id)
|
200
|
-
to_recover = (failed_ids - extant)
|
201
|
-
docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
|
202
|
-
next if docs.empty?
|
203
|
-
Hekenga.log "Recovering #{docs.length} documents.."
|
204
|
-
klass.collection.insert_many(docs)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def prompt(str)
|
209
|
-
loop do
|
210
|
-
print "#{str} (Y/N):\n"
|
211
|
-
case gets.chomp.downcase
|
212
|
-
when "y"
|
213
|
-
return true
|
214
|
-
when "n"
|
215
|
-
return false
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
def rollback!
|
221
|
-
# TODO
|
93
|
+
def recover!
|
94
|
+
Hekenga::MasterProcess.new(self).recover!
|
222
95
|
end
|
223
96
|
|
224
97
|
# Internal perform methods
|
225
98
|
def start_simple_task(task)
|
226
99
|
create_log!
|
227
100
|
begin
|
228
|
-
|
229
|
-
|
230
|
-
end
|
101
|
+
@context = Hekenga::Context.new(test_mode: test_mode)
|
102
|
+
task.up!(@context)
|
231
103
|
rescue => e
|
232
104
|
simple_failure!(e)
|
233
105
|
return
|
106
|
+
ensure
|
107
|
+
@context = nil
|
234
108
|
end
|
235
109
|
log_done!
|
236
110
|
end
|
237
111
|
|
238
|
-
def check_for_completion
|
239
|
-
if log.processed == log.total
|
240
|
-
log_done!
|
241
|
-
end
|
242
|
-
end
|
243
112
|
def log_done!
|
244
|
-
log.
|
113
|
+
log.set_without_session({done: true, finished: Time.now})
|
245
114
|
end
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
check_for_completion # if 0 items to migrate
|
256
|
-
end
|
257
|
-
def run_parallel_task(task_idx, ids)
|
258
|
-
@active_idx = task_idx
|
259
|
-
if log(task_idx).cancel
|
260
|
-
failed_cancelled!(ids)
|
261
|
-
return
|
262
|
-
end
|
263
|
-
task = self.tasks[task_idx] or return
|
264
|
-
with_setup(task) do
|
265
|
-
process_batch(task, task.scope.klass.asc(:_id).in(_id: ids).to_a)
|
266
|
-
unless @skipped.empty?
|
267
|
-
failed_cancelled!(@skipped.map(&:_id))
|
268
|
-
end
|
269
|
-
end
|
115
|
+
|
116
|
+
def start_parallel_task(task, task_idx)
|
117
|
+
create_log!
|
118
|
+
Hekenga::ParallelTask.new(
|
119
|
+
migration: self,
|
120
|
+
task: task,
|
121
|
+
task_idx: task_idx,
|
122
|
+
test_mode: test_mode
|
123
|
+
).start!
|
270
124
|
end
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
@context.instance_exec(&block)
|
275
|
-
end
|
276
|
-
begin
|
277
|
-
yield
|
278
|
-
ensure
|
279
|
-
@context = nil
|
280
|
-
end
|
125
|
+
|
126
|
+
def task_records(task_idx)
|
127
|
+
Hekenga::DocumentTaskRecord.where(migration_key: to_key, task_idx: task_idx)
|
281
128
|
end
|
282
|
-
|
283
|
-
|
129
|
+
|
130
|
+
def start_document_task(task, task_idx, recover: false)
|
131
|
+
create_log!
|
284
132
|
records = []
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
133
|
+
task_records(task_idx).delete_all unless recover
|
134
|
+
executor_key = BSON::ObjectId.new
|
135
|
+
task.scope.asc(:_id).no_timeout.each do |record|
|
136
|
+
records.push(record)
|
137
|
+
next unless records.length == (task.batch_size || batch_size)
|
138
|
+
|
139
|
+
records = filter_out_processed(task, task_idx, records) if recover
|
140
|
+
next unless records.length == (task.batch_size || batch_size)
|
141
|
+
|
142
|
+
execute_document_task(task_idx, executor_key, records)
|
143
|
+
records = []
|
144
|
+
return if log.cancel
|
145
|
+
end
|
146
|
+
records = filter_out_processed(task, task_idx, records) if recover
|
147
|
+
execute_document_task(task_idx, executor_key, records) if records.any?
|
148
|
+
return if log.cancel
|
296
149
|
log_done!
|
297
150
|
end
|
298
|
-
def run_filters(task, record)
|
299
|
-
task.filters.all? do |block|
|
300
|
-
@context.instance_exec(record, &block)
|
301
|
-
end
|
302
|
-
end
|
303
|
-
def deep_clone(record)
|
304
|
-
record.as_document.deep_dup
|
305
|
-
end
|
306
|
-
def process_batch(task, records)
|
307
|
-
@skipped = []
|
308
|
-
to_persist = []
|
309
|
-
fallbacks = []
|
310
151
|
|
311
|
-
|
312
|
-
|
152
|
+
def filter_out_processed(task, task_idx, records)
|
153
|
+
return records if records.empty?
|
154
|
+
|
155
|
+
selector = task_records(task_idx).in(ids: records.map(&:id))
|
156
|
+
processed_ids = selector.pluck(:ids).flatten.to_set
|
157
|
+
records.reject do |record|
|
158
|
+
processed_ids.include?(record._id)
|
313
159
|
end
|
314
|
-
log_skipped(task, filtered[false]) if filtered[false]
|
315
|
-
return unless filtered[true]
|
316
|
-
filtered[true].map.with_index do |record, idx|
|
317
|
-
original_record = deep_clone(record)
|
318
|
-
begin
|
319
|
-
task.up!(@context, record)
|
320
|
-
rescue => e
|
321
|
-
failed_apply!(e, record, records[0].id)
|
322
|
-
@skipped = filtered[true][idx+1..-1]
|
323
|
-
return
|
324
|
-
end
|
325
|
-
if validate_record(task, record)
|
326
|
-
to_persist.push(record)
|
327
|
-
fallbacks.push(original_record)
|
328
|
-
else
|
329
|
-
if log.cancel
|
330
|
-
@skipped = filtered[true][idx+1..-1]
|
331
|
-
return
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end.compact
|
335
|
-
persist_batch(task, to_persist, fallbacks)
|
336
|
-
end
|
337
|
-
def log_skipped(task, records)
|
338
|
-
log.incr_and_return(
|
339
|
-
skipped: records.length,
|
340
|
-
processed: records.length
|
341
|
-
)
|
342
|
-
check_for_completion
|
343
160
|
end
|
344
|
-
|
345
|
-
|
346
|
-
|
161
|
+
|
162
|
+
def execute_document_task(task_idx, executor_key, records)
|
163
|
+
task_record = Hekenga::DocumentTaskRecord.create(
|
164
|
+
migration_key: to_key,
|
165
|
+
task_idx: task_idx,
|
166
|
+
executor_key: executor_key,
|
167
|
+
test_mode: test_mode,
|
168
|
+
ids: records.map(&:id)
|
347
169
|
)
|
348
|
-
|
170
|
+
Hekenga::DocumentTaskExecutor.new(task_record, records: records).run!
|
349
171
|
end
|
350
172
|
|
351
|
-
def persist_batch(task, records, original_records)
|
352
|
-
if @test_mode
|
353
|
-
log_success(task, records)
|
354
|
-
return
|
355
|
-
end
|
356
|
-
# NOTE - edgecase where callbacks cause the record to become invalid is
|
357
|
-
# not covered
|
358
|
-
records.each do |record|
|
359
|
-
begin
|
360
|
-
next if task.skip_prepare
|
361
|
-
if task.timeless
|
362
|
-
record.timeless.send(:prepare_update) {}
|
363
|
-
else
|
364
|
-
record.send(:prepare_update) {}
|
365
|
-
end
|
366
|
-
rescue => e
|
367
|
-
# If prepare_update throws an error, we're in trouble - crash out now
|
368
|
-
failed_apply!(e, record, records[0].id)
|
369
|
-
return
|
370
|
-
end
|
371
|
-
end
|
372
|
-
begin
|
373
|
-
delete_records!(task.scope.klass, records.map(&:_id))
|
374
|
-
write_records!(task.scope.klass, records)
|
375
|
-
log_success(task, records)
|
376
|
-
rescue => e
|
377
|
-
failed_write!(e, original_records)
|
378
|
-
end
|
379
|
-
end
|
380
|
-
def delete_records!(klass, ids)
|
381
|
-
klass.in(_id: ids).delete_all
|
382
|
-
end
|
383
|
-
def write_records!(klass, records)
|
384
|
-
klass.collection.insert_many(records.map(&:as_document))
|
385
|
-
end
|
386
173
|
def simple_failure!(error)
|
387
174
|
log.add_failure({
|
388
175
|
message: error.to_s,
|
@@ -391,57 +178,10 @@ module Hekenga
|
|
391
178
|
}, Hekenga::Failure::Error)
|
392
179
|
log_cancel!
|
393
180
|
end
|
394
|
-
|
395
|
-
log.add_failure({
|
396
|
-
document_ids: ids,
|
397
|
-
batch_start: ids[0]
|
398
|
-
}, Hekenga::Failure::Cancelled)
|
399
|
-
end
|
400
|
-
def failed_apply!(error, record, batch_start_id)
|
401
|
-
log.add_failure({
|
402
|
-
message: error.to_s,
|
403
|
-
backtrace: error.backtrace,
|
404
|
-
document: deep_clone(record),
|
405
|
-
batch_start: batch_start_id
|
406
|
-
}, Hekenga::Failure::Error)
|
407
|
-
log_cancel!
|
408
|
-
end
|
181
|
+
|
409
182
|
def log_cancel!
|
410
|
-
|
411
|
-
|
412
|
-
def failed_write!(error, original_records)
|
413
|
-
log.add_failure({
|
414
|
-
message: error.to_s,
|
415
|
-
backtrace: error.backtrace,
|
416
|
-
documents: original_records,
|
417
|
-
document_ids: original_records.map {|x| x["_id"]},
|
418
|
-
batch_start: original_records[0]["_id"]
|
419
|
-
}, Hekenga::Failure::Write)
|
420
|
-
log_cancel!
|
421
|
-
end
|
422
|
-
def failed_validation!(task, record)
|
423
|
-
log.add_failure({
|
424
|
-
doc_id: record.id,
|
425
|
-
errs: record.errors.full_messages,
|
426
|
-
document: deep_clone(record),
|
427
|
-
}, Hekenga::Failure::Validation)
|
428
|
-
log.set(error: true)
|
429
|
-
log.incr_and_return(processed: 1, unvalid: 1)
|
430
|
-
if task.invalid_strategy == :cancel
|
431
|
-
log_cancel!
|
432
|
-
else
|
433
|
-
check_for_completion
|
434
|
-
end
|
435
|
-
end
|
436
|
-
def validate_record(task, record)
|
437
|
-
# TODO - ability to skip validation
|
438
|
-
# TODO - handle errors on validation
|
439
|
-
if record.valid?
|
440
|
-
true
|
441
|
-
else
|
442
|
-
failed_validation!(task, record)
|
443
|
-
false
|
444
|
-
end
|
183
|
+
# Bypass the active transaction if there is one
|
184
|
+
log.set_without_session({cancel: true, error: true, done: true, finished: Time.now})
|
445
185
|
end
|
446
186
|
|
447
187
|
# Validations
|
data/lib/hekenga/parallel_job.rb
CHANGED
@@ -4,10 +4,17 @@ module Hekenga
|
|
4
4
|
queue_as do
|
5
5
|
ENV["HEKENGA_QUEUE"] || :migration
|
6
6
|
end
|
7
|
-
def perform(
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def perform(document_task_record_id, executor_key)
|
8
|
+
record = Hekenga::DocumentTaskRecord.where(_id: document_task_record_id).first
|
9
|
+
return if record.nil?
|
10
|
+
return if record.executor_key != BSON::ObjectId(executor_key)
|
11
|
+
return if record.complete?
|
12
|
+
|
13
|
+
executor = Hekenga::DocumentTaskExecutor.new(record)
|
14
|
+
return if executor.migration_cancelled?
|
15
|
+
|
16
|
+
executor.run!
|
17
|
+
executor.check_for_completion!
|
11
18
|
end
|
12
19
|
end
|
13
20
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'hekenga/iterator'
|
2
|
+
require 'hekenga/document_task_executor'
|
3
|
+
require 'hekenga/task_splitter'
|
4
|
+
|
5
|
+
module Hekenga
|
6
|
+
class ParallelTask
|
7
|
+
attr_reader :migration, :task, :task_idx, :test_mode
|
8
|
+
|
9
|
+
def initialize(migration:, task:, task_idx:, test_mode:)
|
10
|
+
@migration = migration
|
11
|
+
@task = task
|
12
|
+
@task_idx = task_idx
|
13
|
+
@test_mode = test_mode
|
14
|
+
end
|
15
|
+
|
16
|
+
def start!
|
17
|
+
clear_task_records!
|
18
|
+
@executor_key = BSON::ObjectId.new
|
19
|
+
generate_for_scope(task.scope)
|
20
|
+
check_for_completion!
|
21
|
+
end
|
22
|
+
|
23
|
+
def resume!
|
24
|
+
@executor_key = BSON::ObjectId.new
|
25
|
+
task_records.set(executor_key: @executor_key)
|
26
|
+
queue_jobs!(task_records.incomplete)
|
27
|
+
generate_new_records!
|
28
|
+
recover_failed_records!
|
29
|
+
check_for_completion!
|
30
|
+
end
|
31
|
+
|
32
|
+
def complete?
|
33
|
+
task_records.incomplete.none?
|
34
|
+
end
|
35
|
+
|
36
|
+
def check_for_completion!
|
37
|
+
if complete?
|
38
|
+
migration.log(task_idx).set_without_session(done: true, finished: Time.now)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def generate_for_scope(scope)
|
45
|
+
Hekenga::Iterator.new(scope, size: 100_000).each do |id_block|
|
46
|
+
task_records = id_block.each_slice(batch_size).map do |id_slice|
|
47
|
+
generate_task_records!(id_slice)
|
48
|
+
end
|
49
|
+
write_task_records!(task_records)
|
50
|
+
queue_jobs!(task_records)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def generate_new_records!
|
55
|
+
last_record = task_records.desc(:_id).first
|
56
|
+
last_id = last_record&.ids&.last
|
57
|
+
scope = task.scope
|
58
|
+
scope = task.scope.and(_id: {'$gt': last_id}) if last_id
|
59
|
+
generate_for_scope(scope)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Any records with a failure or a validation failure get moved into
|
63
|
+
# a new task record which is incomplete and gets a job queued
|
64
|
+
def recover_failed_records!
|
65
|
+
task_records.complete.no_timeout.each do |record|
|
66
|
+
Hekenga::TaskSplitter.new(record, @executor_key).call.tap do |new_record|
|
67
|
+
next if new_record.nil?
|
68
|
+
|
69
|
+
Hekenga::ParallelJob.perform_later(new_record.id.to_s, @executor_key.to_s)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def batch_size
|
75
|
+
task.batch_size || migration.batch_size
|
76
|
+
end
|
77
|
+
|
78
|
+
def clear_task_records!
|
79
|
+
task_records.delete_all
|
80
|
+
end
|
81
|
+
|
82
|
+
def task_records
|
83
|
+
migration.task_records(task_idx)
|
84
|
+
end
|
85
|
+
|
86
|
+
def generate_task_records!(id_slice)
|
87
|
+
Hekenga::DocumentTaskRecord.new(
|
88
|
+
migration_key: migration.to_key,
|
89
|
+
task_idx: task_idx,
|
90
|
+
executor_key: @executor_key,
|
91
|
+
test_mode: test_mode,
|
92
|
+
ids: id_slice
|
93
|
+
).tap do |record|
|
94
|
+
record.send(:prepare_insert) {}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def write_task_records!(records)
|
99
|
+
Hekenga::DocumentTaskRecord.collection.bulk_write(records.map do |record|
|
100
|
+
{ insert_one: record.as_document }
|
101
|
+
end)
|
102
|
+
end
|
103
|
+
|
104
|
+
def queue_jobs!(records)
|
105
|
+
records.each do |record|
|
106
|
+
Hekenga::ParallelJob.perform_later(record.id.to_s, @executor_key.to_s)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/hekenga/scaffold.rb
CHANGED
@@ -28,33 +28,38 @@ module Hekenga
|
|
28
28
|
created #{@migration.timestamp.sub("T", " ").inspect}
|
29
29
|
|
30
30
|
## Optional
|
31
|
-
#
|
31
|
+
#batch_size 25
|
32
32
|
|
33
33
|
## Simple tasks
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
34
|
+
#task "task description" do
|
35
|
+
# up do
|
36
|
+
# end
|
37
|
+
#end
|
38
38
|
|
39
39
|
## Per document tasks
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
40
|
+
#per_document "task description" do
|
41
|
+
# ## Required
|
42
|
+
# scope MyModel.all
|
43
43
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
# # end
|
52
|
-
# # filter do
|
53
|
-
# # end
|
44
|
+
# ## Optional config
|
45
|
+
# #parallel!
|
46
|
+
# #timeless!
|
47
|
+
# #always_write!
|
48
|
+
# #skip_prepare!
|
49
|
+
# #batch_size 25
|
50
|
+
# #write_strategy :update # :delete_then_insert
|
54
51
|
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
52
|
+
# # Called once per batch, instance variables will be accessible
|
53
|
+
# # in the filter & up blocks
|
54
|
+
# #setup do
|
55
|
+
# #end
|
56
|
+
#
|
57
|
+
# #filter do |doc|
|
58
|
+
# #end
|
59
|
+
#
|
60
|
+
# up do |doc|
|
61
|
+
# end
|
62
|
+
#end
|
58
63
|
end
|
59
64
|
EOF
|
60
65
|
end
|