hekenga 0.2.13 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/CHANGELOG.md +29 -0
- data/docker-compose.yml +30 -0
- data/exe/hekenga +25 -38
- data/hekenga.gemspec +5 -4
- data/lib/hekenga/context.rb +7 -6
- data/lib/hekenga/document_task.rb +9 -3
- data/lib/hekenga/document_task_executor.rb +264 -0
- data/lib/hekenga/document_task_record.rb +29 -0
- data/lib/hekenga/dsl/document_task.rb +28 -1
- data/lib/hekenga/dsl/migration.rb +3 -0
- data/lib/hekenga/failure/write.rb +1 -0
- data/lib/hekenga/iterator.rb +26 -0
- data/lib/hekenga/log.rb +14 -19
- data/lib/hekenga/master_process.rb +184 -105
- data/lib/hekenga/migration.rb +70 -330
- data/lib/hekenga/parallel_job.rb +11 -4
- data/lib/hekenga/parallel_task.rb +110 -0
- data/lib/hekenga/scaffold.rb +27 -23
- data/lib/hekenga/task_failed_error.rb +4 -0
- data/lib/hekenga/task_splitter.rb +30 -0
- data/lib/hekenga/version.rb +1 -1
- data/lib/hekenga.rb +22 -10
- metadata +22 -13
data/lib/hekenga/migration.rb
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
require 'hekenga/invalid'
|
2
2
|
require 'hekenga/context'
|
3
3
|
require 'hekenga/parallel_job'
|
4
|
+
require 'hekenga/parallel_task'
|
4
5
|
require 'hekenga/master_process'
|
6
|
+
require 'hekenga/document_task_record'
|
7
|
+
require 'hekenga/document_task_executor'
|
5
8
|
require 'hekenga/log'
|
6
9
|
module Hekenga
|
7
10
|
class Migration
|
8
|
-
attr_accessor :stamp, :description, :batch_size
|
9
|
-
attr_reader :tasks
|
11
|
+
attr_accessor :stamp, :description, :batch_size, :active_idx
|
12
|
+
attr_reader :tasks, :session, :test_mode
|
10
13
|
|
11
14
|
def initialize
|
12
15
|
@tasks = []
|
@@ -50,15 +53,19 @@ module Hekenga
|
|
50
53
|
def reload_logs
|
51
54
|
@logs = {}
|
52
55
|
end
|
56
|
+
|
53
57
|
def performing?
|
54
58
|
Hekenga::Log.where(pkey: self.to_key, done: false).any?
|
55
59
|
end
|
60
|
+
|
56
61
|
def performed?
|
57
62
|
!!log(self.tasks.length - 1).done
|
58
63
|
end
|
64
|
+
|
59
65
|
def test_mode!
|
60
66
|
@test_mode = true
|
61
67
|
end
|
68
|
+
|
62
69
|
def perform!
|
63
70
|
if Hekenga.status(self) == :naught
|
64
71
|
Hekenga::MasterProcess.new(self).run!
|
@@ -67,322 +74,102 @@ module Hekenga
|
|
67
74
|
return false
|
68
75
|
end
|
69
76
|
end
|
70
|
-
|
77
|
+
|
78
|
+
def perform_task!(task_idx)
|
71
79
|
task = @tasks[task_idx] or return
|
72
80
|
@active_idx = task_idx
|
73
81
|
case task
|
74
82
|
when Hekenga::SimpleTask
|
75
83
|
start_simple_task(task)
|
76
84
|
when Hekenga::DocumentTask
|
77
|
-
# TODO - online migration support (have log.total update, requeue)
|
78
|
-
scope ||= task.scope.asc(:_id)
|
79
85
|
if task.parallel
|
80
|
-
start_parallel_task(task, task_idx
|
81
|
-
else
|
82
|
-
start_document_task(task, task_idx, scope)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
def recover!
|
87
|
-
# NOTE - can't find a way to check this automatically with ActiveJob right now
|
88
|
-
return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
|
89
|
-
# Write failures
|
90
|
-
@tasks.each.with_index do |task, idx|
|
91
|
-
# If no log, run the task now
|
92
|
-
unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
|
93
|
-
return false unless retry_task!(task, idx)
|
94
|
-
next
|
95
|
-
end
|
96
|
-
# Did this task fail?
|
97
|
-
failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
|
98
|
-
# If it didn't, keep searching
|
99
|
-
next unless failedP
|
100
|
-
# This is the first failure we've detected - recover from it
|
101
|
-
case task
|
102
|
-
when Hekenga::DocumentTask
|
103
|
-
ret = recover_document_task!(task, idx)
|
104
|
-
when Hekenga::SimpleTask
|
105
|
-
ret = recover_simple!(task, idx)
|
106
|
-
end
|
107
|
-
|
108
|
-
case ret
|
109
|
-
when :next
|
110
|
-
next
|
111
|
-
when :cancel
|
112
|
-
return false
|
86
|
+
start_parallel_task(task, task_idx)
|
113
87
|
else
|
114
|
-
|
88
|
+
start_document_task(task, task_idx)
|
115
89
|
end
|
116
90
|
end
|
117
|
-
return true
|
118
91
|
end
|
119
92
|
|
120
|
-
def
|
121
|
-
Hekenga.
|
122
|
-
unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
|
123
|
-
Hekenga.log "Failed to retry the task. Aborting.."
|
124
|
-
return false
|
125
|
-
end
|
126
|
-
return true
|
127
|
-
end
|
128
|
-
|
129
|
-
def recover_simple!(task, idx)
|
130
|
-
# Simple tasks just get retried - no fuss
|
131
|
-
Hekenga.log("Found failed simple task. Retrying..")
|
132
|
-
return
|
133
|
-
end
|
134
|
-
|
135
|
-
def recover_document_task!(task, idx)
|
136
|
-
# Document tasks are a bit more involved.
|
137
|
-
validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
|
138
|
-
write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
|
139
|
-
error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
|
140
|
-
cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
|
141
|
-
|
142
|
-
# Stats
|
143
|
-
validation_failure_ctr = validation_failures.count
|
144
|
-
write_failure_ctr = write_failures.count
|
145
|
-
error_failure_ctr = error_failures.count
|
146
|
-
cancelled_failure_ctr = cancelled_failures.count
|
147
|
-
|
148
|
-
# Prompt for recovery
|
149
|
-
recoverP = prompt(
|
150
|
-
"Found #{validation_failure_ctr} invalid, "+
|
151
|
-
"#{write_failure_ctr} failed writes, "+
|
152
|
-
"#{error_failure_ctr} errors, "+
|
153
|
-
"#{cancelled_failure_ctr} cancelled on migration. Recover?"
|
154
|
-
)
|
155
|
-
return :next unless recoverP
|
156
|
-
|
157
|
-
# Recover from critical write failures (DB records potentially lost)
|
158
|
-
unless write_failure_ctr.zero?
|
159
|
-
Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
|
160
|
-
recover_data(write_failures, task.scope.klass)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Resume task from point of error
|
164
|
-
if task.parallel
|
165
|
-
# TODO - support for recovery on huge # IDs
|
166
|
-
failed_ids = [
|
167
|
-
write_failures.pluck(:document_ids),
|
168
|
-
error_failures.pluck(:batch_start),
|
169
|
-
cancelled_failures.pluck(:document_ids),
|
170
|
-
validation_failures.pluck(:doc_id)
|
171
|
-
].flatten.compact
|
172
|
-
resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
|
173
|
-
else
|
174
|
-
first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
|
175
|
-
invalid_ids = validation_failures.pluck(:doc_id)
|
176
|
-
if first_id && invalid_ids.any?
|
177
|
-
resume_scope = task.scope.klass.asc(:_id).and(
|
178
|
-
task.scope.selector,
|
179
|
-
task.scope.klass.or(
|
180
|
-
{_id: {:$gte => first_id}},
|
181
|
-
{_id: {:$in => invalid_ids}}
|
182
|
-
).selector
|
183
|
-
)
|
184
|
-
elsif first_id
|
185
|
-
resume_scope = task.scope.asc(:_id).gte(_id: first_id)
|
186
|
-
elsif invalid_ids.any?
|
187
|
-
resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
|
188
|
-
else
|
189
|
-
resume_scope = :next
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
return resume_scope
|
194
|
-
end
|
195
|
-
|
196
|
-
def recover_data(write_failures, klass)
|
197
|
-
write_failures.each do |write_failure|
|
198
|
-
failed_ids = write_failure.document_ids
|
199
|
-
extant = klass.in(_id: failed_ids).pluck(:_id)
|
200
|
-
to_recover = (failed_ids - extant)
|
201
|
-
docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
|
202
|
-
next if docs.empty?
|
203
|
-
Hekenga.log "Recovering #{docs.length} documents.."
|
204
|
-
klass.collection.insert_many(docs)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def prompt(str)
|
209
|
-
loop do
|
210
|
-
print "#{str} (Y/N):\n"
|
211
|
-
case gets.chomp.downcase
|
212
|
-
when "y"
|
213
|
-
return true
|
214
|
-
when "n"
|
215
|
-
return false
|
216
|
-
end
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
def rollback!
|
221
|
-
# TODO
|
93
|
+
def recover!
|
94
|
+
Hekenga::MasterProcess.new(self).recover!
|
222
95
|
end
|
223
96
|
|
224
97
|
# Internal perform methods
|
225
98
|
def start_simple_task(task)
|
226
99
|
create_log!
|
227
100
|
begin
|
228
|
-
|
229
|
-
|
230
|
-
end
|
101
|
+
@context = Hekenga::Context.new(test_mode: test_mode)
|
102
|
+
task.up!(@context)
|
231
103
|
rescue => e
|
232
104
|
simple_failure!(e)
|
233
105
|
return
|
106
|
+
ensure
|
107
|
+
@context = nil
|
234
108
|
end
|
235
109
|
log_done!
|
236
110
|
end
|
237
111
|
|
238
|
-
def check_for_completion
|
239
|
-
if log.processed == log.total
|
240
|
-
log_done!
|
241
|
-
end
|
242
|
-
end
|
243
112
|
def log_done!
|
244
|
-
log.
|
113
|
+
log.set_without_session({done: true, finished: Time.now})
|
245
114
|
end
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
check_for_completion # if 0 items to migrate
|
256
|
-
end
|
257
|
-
def run_parallel_task(task_idx, ids)
|
258
|
-
@active_idx = task_idx
|
259
|
-
if log(task_idx).cancel
|
260
|
-
failed_cancelled!(ids)
|
261
|
-
return
|
262
|
-
end
|
263
|
-
task = self.tasks[task_idx] or return
|
264
|
-
with_setup(task) do
|
265
|
-
process_batch(task, task.scope.klass.asc(:_id).in(_id: ids).to_a)
|
266
|
-
unless @skipped.empty?
|
267
|
-
failed_cancelled!(@skipped.map(&:_id))
|
268
|
-
end
|
269
|
-
end
|
115
|
+
|
116
|
+
def start_parallel_task(task, task_idx)
|
117
|
+
create_log!
|
118
|
+
Hekenga::ParallelTask.new(
|
119
|
+
migration: self,
|
120
|
+
task: task,
|
121
|
+
task_idx: task_idx,
|
122
|
+
test_mode: test_mode
|
123
|
+
).start!
|
270
124
|
end
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
@context.instance_exec(&block)
|
275
|
-
end
|
276
|
-
begin
|
277
|
-
yield
|
278
|
-
ensure
|
279
|
-
@context = nil
|
280
|
-
end
|
125
|
+
|
126
|
+
def task_records(task_idx)
|
127
|
+
Hekenga::DocumentTaskRecord.where(migration_key: to_key, task_idx: task_idx)
|
281
128
|
end
|
282
|
-
|
283
|
-
|
129
|
+
|
130
|
+
def start_document_task(task, task_idx, recover: false)
|
131
|
+
create_log!
|
284
132
|
records = []
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
133
|
+
task_records(task_idx).delete_all unless recover
|
134
|
+
executor_key = BSON::ObjectId.new
|
135
|
+
task.scope.asc(:_id).no_timeout.each do |record|
|
136
|
+
records.push(record)
|
137
|
+
next unless records.length == (task.batch_size || batch_size)
|
138
|
+
|
139
|
+
records = filter_out_processed(task, task_idx, records) if recover
|
140
|
+
next unless records.length == (task.batch_size || batch_size)
|
141
|
+
|
142
|
+
execute_document_task(task_idx, executor_key, records)
|
143
|
+
records = []
|
144
|
+
return if log.cancel
|
145
|
+
end
|
146
|
+
records = filter_out_processed(task, task_idx, records) if recover
|
147
|
+
execute_document_task(task_idx, executor_key, records) if records.any?
|
148
|
+
return if log.cancel
|
296
149
|
log_done!
|
297
150
|
end
|
298
|
-
def run_filters(task, record)
|
299
|
-
task.filters.all? do |block|
|
300
|
-
@context.instance_exec(record, &block)
|
301
|
-
end
|
302
|
-
end
|
303
|
-
def deep_clone(record)
|
304
|
-
record.as_document.deep_dup
|
305
|
-
end
|
306
|
-
def process_batch(task, records)
|
307
|
-
@skipped = []
|
308
|
-
to_persist = []
|
309
|
-
fallbacks = []
|
310
151
|
|
311
|
-
|
312
|
-
|
152
|
+
def filter_out_processed(task, task_idx, records)
|
153
|
+
return records if records.empty?
|
154
|
+
|
155
|
+
selector = task_records(task_idx).in(ids: records.map(&:id))
|
156
|
+
processed_ids = selector.pluck(:ids).flatten.to_set
|
157
|
+
records.reject do |record|
|
158
|
+
processed_ids.include?(record._id)
|
313
159
|
end
|
314
|
-
log_skipped(task, filtered[false]) if filtered[false]
|
315
|
-
return unless filtered[true]
|
316
|
-
filtered[true].map.with_index do |record, idx|
|
317
|
-
original_record = deep_clone(record)
|
318
|
-
begin
|
319
|
-
task.up!(@context, record)
|
320
|
-
rescue => e
|
321
|
-
failed_apply!(e, record, records[0].id)
|
322
|
-
@skipped = filtered[true][idx+1..-1]
|
323
|
-
return
|
324
|
-
end
|
325
|
-
if validate_record(task, record)
|
326
|
-
to_persist.push(record)
|
327
|
-
fallbacks.push(original_record)
|
328
|
-
else
|
329
|
-
if log.cancel
|
330
|
-
@skipped = filtered[true][idx+1..-1]
|
331
|
-
return
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end.compact
|
335
|
-
persist_batch(task, to_persist, fallbacks)
|
336
|
-
end
|
337
|
-
def log_skipped(task, records)
|
338
|
-
log.incr_and_return(
|
339
|
-
skipped: records.length,
|
340
|
-
processed: records.length
|
341
|
-
)
|
342
|
-
check_for_completion
|
343
160
|
end
|
344
|
-
|
345
|
-
|
346
|
-
|
161
|
+
|
162
|
+
def execute_document_task(task_idx, executor_key, records)
|
163
|
+
task_record = Hekenga::DocumentTaskRecord.create(
|
164
|
+
migration_key: to_key,
|
165
|
+
task_idx: task_idx,
|
166
|
+
executor_key: executor_key,
|
167
|
+
test_mode: test_mode,
|
168
|
+
ids: records.map(&:id)
|
347
169
|
)
|
348
|
-
|
170
|
+
Hekenga::DocumentTaskExecutor.new(task_record, records: records).run!
|
349
171
|
end
|
350
172
|
|
351
|
-
def persist_batch(task, records, original_records)
|
352
|
-
if @test_mode
|
353
|
-
log_success(task, records)
|
354
|
-
return
|
355
|
-
end
|
356
|
-
# NOTE - edgecase where callbacks cause the record to become invalid is
|
357
|
-
# not covered
|
358
|
-
records.each do |record|
|
359
|
-
begin
|
360
|
-
next if task.skip_prepare
|
361
|
-
if task.timeless
|
362
|
-
record.timeless.send(:prepare_update) {}
|
363
|
-
else
|
364
|
-
record.send(:prepare_update) {}
|
365
|
-
end
|
366
|
-
rescue => e
|
367
|
-
# If prepare_update throws an error, we're in trouble - crash out now
|
368
|
-
failed_apply!(e, record, records[0].id)
|
369
|
-
return
|
370
|
-
end
|
371
|
-
end
|
372
|
-
begin
|
373
|
-
delete_records!(task.scope.klass, records.map(&:_id))
|
374
|
-
write_records!(task.scope.klass, records)
|
375
|
-
log_success(task, records)
|
376
|
-
rescue => e
|
377
|
-
failed_write!(e, original_records)
|
378
|
-
end
|
379
|
-
end
|
380
|
-
def delete_records!(klass, ids)
|
381
|
-
klass.in(_id: ids).delete_all
|
382
|
-
end
|
383
|
-
def write_records!(klass, records)
|
384
|
-
klass.collection.insert_many(records.map(&:as_document))
|
385
|
-
end
|
386
173
|
def simple_failure!(error)
|
387
174
|
log.add_failure({
|
388
175
|
message: error.to_s,
|
@@ -391,57 +178,10 @@ module Hekenga
|
|
391
178
|
}, Hekenga::Failure::Error)
|
392
179
|
log_cancel!
|
393
180
|
end
|
394
|
-
|
395
|
-
log.add_failure({
|
396
|
-
document_ids: ids,
|
397
|
-
batch_start: ids[0]
|
398
|
-
}, Hekenga::Failure::Cancelled)
|
399
|
-
end
|
400
|
-
def failed_apply!(error, record, batch_start_id)
|
401
|
-
log.add_failure({
|
402
|
-
message: error.to_s,
|
403
|
-
backtrace: error.backtrace,
|
404
|
-
document: deep_clone(record),
|
405
|
-
batch_start: batch_start_id
|
406
|
-
}, Hekenga::Failure::Error)
|
407
|
-
log_cancel!
|
408
|
-
end
|
181
|
+
|
409
182
|
def log_cancel!
|
410
|
-
|
411
|
-
|
412
|
-
def failed_write!(error, original_records)
|
413
|
-
log.add_failure({
|
414
|
-
message: error.to_s,
|
415
|
-
backtrace: error.backtrace,
|
416
|
-
documents: original_records,
|
417
|
-
document_ids: original_records.map {|x| x["_id"]},
|
418
|
-
batch_start: original_records[0]["_id"]
|
419
|
-
}, Hekenga::Failure::Write)
|
420
|
-
log_cancel!
|
421
|
-
end
|
422
|
-
def failed_validation!(task, record)
|
423
|
-
log.add_failure({
|
424
|
-
doc_id: record.id,
|
425
|
-
errs: record.errors.full_messages,
|
426
|
-
document: deep_clone(record),
|
427
|
-
}, Hekenga::Failure::Validation)
|
428
|
-
log.set(error: true)
|
429
|
-
log.incr_and_return(processed: 1, unvalid: 1)
|
430
|
-
if task.invalid_strategy == :cancel
|
431
|
-
log_cancel!
|
432
|
-
else
|
433
|
-
check_for_completion
|
434
|
-
end
|
435
|
-
end
|
436
|
-
def validate_record(task, record)
|
437
|
-
# TODO - ability to skip validation
|
438
|
-
# TODO - handle errors on validation
|
439
|
-
if record.valid?
|
440
|
-
true
|
441
|
-
else
|
442
|
-
failed_validation!(task, record)
|
443
|
-
false
|
444
|
-
end
|
183
|
+
# Bypass the active transaction if there is one
|
184
|
+
log.set_without_session({cancel: true, error: true, done: true, finished: Time.now})
|
445
185
|
end
|
446
186
|
|
447
187
|
# Validations
|
data/lib/hekenga/parallel_job.rb
CHANGED
@@ -4,10 +4,17 @@ module Hekenga
|
|
4
4
|
queue_as do
|
5
5
|
ENV["HEKENGA_QUEUE"] || :migration
|
6
6
|
end
|
7
|
-
def perform(
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def perform(document_task_record_id, executor_key)
|
8
|
+
record = Hekenga::DocumentTaskRecord.where(_id: document_task_record_id).first
|
9
|
+
return if record.nil?
|
10
|
+
return if record.executor_key != BSON::ObjectId(executor_key)
|
11
|
+
return if record.complete?
|
12
|
+
|
13
|
+
executor = Hekenga::DocumentTaskExecutor.new(record)
|
14
|
+
return if executor.migration_cancelled?
|
15
|
+
|
16
|
+
executor.run!
|
17
|
+
executor.check_for_completion!
|
11
18
|
end
|
12
19
|
end
|
13
20
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'hekenga/iterator'
|
2
|
+
require 'hekenga/document_task_executor'
|
3
|
+
require 'hekenga/task_splitter'
|
4
|
+
|
5
|
+
module Hekenga
|
6
|
+
class ParallelTask
|
7
|
+
attr_reader :migration, :task, :task_idx, :test_mode
|
8
|
+
|
9
|
+
def initialize(migration:, task:, task_idx:, test_mode:)
|
10
|
+
@migration = migration
|
11
|
+
@task = task
|
12
|
+
@task_idx = task_idx
|
13
|
+
@test_mode = test_mode
|
14
|
+
end
|
15
|
+
|
16
|
+
def start!
|
17
|
+
clear_task_records!
|
18
|
+
@executor_key = BSON::ObjectId.new
|
19
|
+
generate_for_scope(task.scope)
|
20
|
+
check_for_completion!
|
21
|
+
end
|
22
|
+
|
23
|
+
def resume!
|
24
|
+
@executor_key = BSON::ObjectId.new
|
25
|
+
task_records.set(executor_key: @executor_key)
|
26
|
+
queue_jobs!(task_records.incomplete)
|
27
|
+
generate_new_records!
|
28
|
+
recover_failed_records!
|
29
|
+
check_for_completion!
|
30
|
+
end
|
31
|
+
|
32
|
+
def complete?
|
33
|
+
task_records.incomplete.none?
|
34
|
+
end
|
35
|
+
|
36
|
+
def check_for_completion!
|
37
|
+
if complete?
|
38
|
+
migration.log(task_idx).set_without_session(done: true, finished: Time.now)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def generate_for_scope(scope)
|
45
|
+
Hekenga::Iterator.new(scope, size: 100_000).each do |id_block|
|
46
|
+
task_records = id_block.each_slice(batch_size).map do |id_slice|
|
47
|
+
generate_task_records!(id_slice)
|
48
|
+
end
|
49
|
+
write_task_records!(task_records)
|
50
|
+
queue_jobs!(task_records)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def generate_new_records!
|
55
|
+
last_record = task_records.desc(:_id).first
|
56
|
+
last_id = last_record&.ids&.last
|
57
|
+
scope = task.scope
|
58
|
+
scope = task.scope.and(_id: {'$gt': last_id}) if last_id
|
59
|
+
generate_for_scope(scope)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Any records with a failure or a validation failure get moved into
|
63
|
+
# a new task record which is incomplete and gets a job queued
|
64
|
+
def recover_failed_records!
|
65
|
+
task_records.complete.no_timeout.each do |record|
|
66
|
+
Hekenga::TaskSplitter.new(record, @executor_key).call.tap do |new_record|
|
67
|
+
next if new_record.nil?
|
68
|
+
|
69
|
+
Hekenga::ParallelJob.perform_later(new_record.id.to_s, @executor_key.to_s)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def batch_size
|
75
|
+
task.batch_size || migration.batch_size
|
76
|
+
end
|
77
|
+
|
78
|
+
def clear_task_records!
|
79
|
+
task_records.delete_all
|
80
|
+
end
|
81
|
+
|
82
|
+
def task_records
|
83
|
+
migration.task_records(task_idx)
|
84
|
+
end
|
85
|
+
|
86
|
+
def generate_task_records!(id_slice)
|
87
|
+
Hekenga::DocumentTaskRecord.new(
|
88
|
+
migration_key: migration.to_key,
|
89
|
+
task_idx: task_idx,
|
90
|
+
executor_key: @executor_key,
|
91
|
+
test_mode: test_mode,
|
92
|
+
ids: id_slice
|
93
|
+
).tap do |record|
|
94
|
+
record.send(:prepare_insert) {}
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def write_task_records!(records)
|
99
|
+
Hekenga::DocumentTaskRecord.collection.bulk_write(records.map do |record|
|
100
|
+
{ insert_one: record.as_document }
|
101
|
+
end)
|
102
|
+
end
|
103
|
+
|
104
|
+
def queue_jobs!(records)
|
105
|
+
records.each do |record|
|
106
|
+
Hekenga::ParallelJob.perform_later(record.id.to_s, @executor_key.to_s)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
data/lib/hekenga/scaffold.rb
CHANGED
@@ -16,8 +16,7 @@ module Hekenga
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def to_path
|
19
|
-
@path ||= File.join(Hekenga.config.abs_dir, @migration.to_key.
|
20
|
-
'.rb')
|
19
|
+
@path ||= File.join(Hekenga.config.abs_dir, @migration.to_key+".rb")
|
21
20
|
end
|
22
21
|
|
23
22
|
def to_s
|
@@ -28,33 +27,38 @@ module Hekenga
|
|
28
27
|
created #{@migration.timestamp.sub("T", " ").inspect}
|
29
28
|
|
30
29
|
## Optional
|
31
|
-
#
|
30
|
+
#batch_size 25
|
32
31
|
|
33
32
|
## Simple tasks
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
33
|
+
#task "task description" do
|
34
|
+
# up do
|
35
|
+
# end
|
36
|
+
#end
|
38
37
|
|
39
38
|
## Per document tasks
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
39
|
+
#per_document "task description" do
|
40
|
+
# ## Required
|
41
|
+
# scope MyModel.all
|
43
42
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
# # end
|
52
|
-
# # filter do
|
53
|
-
# # end
|
43
|
+
# ## Optional config
|
44
|
+
# #parallel!
|
45
|
+
# #timeless!
|
46
|
+
# #always_write!
|
47
|
+
# #skip_prepare!
|
48
|
+
# #batch_size 25
|
49
|
+
# #write_strategy :update # :delete_then_insert
|
54
50
|
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
51
|
+
# # Called once per batch, instance variables will be accessible
|
52
|
+
# # in the filter & up blocks
|
53
|
+
# #setup do
|
54
|
+
# #end
|
55
|
+
#
|
56
|
+
# #filter do |doc|
|
57
|
+
# #end
|
58
|
+
#
|
59
|
+
# up do |doc|
|
60
|
+
# end
|
61
|
+
#end
|
58
62
|
end
|
59
63
|
EOF
|
60
64
|
end
|