hekenga 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,149 @@
1
+ module Hekenga
2
+ class MasterProcess
3
+ def initialize(migration)
4
+ @migration = migration
5
+ end
6
+
7
+ def run!
8
+ Hekenga.log "Launching migration #{@migration.to_key}"
9
+ @migration.tasks.each.with_index do |task, idx|
10
+ launch_task(task, idx)
11
+ report_while_active(task, idx)
12
+ if @migration.log(idx).cancel
13
+ Hekenga.log "TERMINATING DUE TO CRITICAL ERRORS"
14
+ report_errors(idx)
15
+ return
16
+ elsif any_validation_errors?(idx)
17
+ handle_validation_errors(task, idx)
18
+ return if @migration.log(idx).cancel
19
+ end
20
+ cleanup
21
+ end
22
+ end
23
+
24
+ def retry!(task_idx, scope)
25
+ task = @migration.tasks[task_idx]
26
+ # Reset logs completely
27
+ Hekenga::Log.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
28
+ Hekenga::Failure.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
29
+ @migration.reload_logs
30
+ # Start the task based on the passed scope - similar to run! but we exit
31
+ # directly on failure.
32
+ launch_task(task, task_idx, scope)
33
+ report_while_active(task, task_idx)
34
+ if @migration.log(task_idx).cancel
35
+ return false
36
+ elsif any_validation_errors?(task_idx)
37
+ handle_validation_errors(task, task_idx)
38
+ if @migration.log(task_idx).cancel
39
+ return false
40
+ end
41
+ end
42
+ cleanup
43
+ true
44
+ end
45
+
46
+ def any_validation_errors?(idx)
47
+ Hekenga::Failure::Validation.where(pkey: @migration.to_key, task_idx: idx).any?
48
+ end
49
+
50
+ def handle_validation_errors(task, idx)
51
+ return unless task.respond_to?(:invalid_strategy)
52
+ return if idx == @migration.tasks.length - 1
53
+ case task.invalid_strategy
54
+ when :prompt
55
+ unless continue_prompt?("There were validation errors in the last task.")
56
+ @migration.log(idx).set(cancel: true)
57
+ return
58
+ end
59
+ when :stop
60
+ Hekenga.log "TERMINATING DUE TO VALIDATION ERRORS"
61
+ @migration.log(idx).set(cancel: true)
62
+ return
63
+ end
64
+ end
65
+
66
+ def report_errors(idx)
67
+ scope = @migration.log(idx).failures
68
+ log_id = @migration.log(idx).id
69
+ # Validation errors
70
+ valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
71
+ valid_errs_ctr = valid_errs.count
72
+ unless valid_errs_ctr.zero?
73
+ Hekenga.log "#{valid_errs_ctr} records failed validation. To get a list:"
74
+ Hekenga.log "Hekenga::Failure::Validation.lookup('#{log_id}', #{idx})"
75
+ end
76
+ # Write failures
77
+ write_errs = scope.where(_type: "Hekenga::Failure::Write")
78
+ write_errs_ctr = write_errs.count
79
+ unless write_errs_ctr.zero?
80
+ Hekenga.log "#{write_errs_ctr} write errors detected. Error messages:"
81
+ Hekenga.log(write_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
82
+ Hekenga.log "To get a list:"
83
+ Hekenga.log "Hekenga::Failure::Write.lookup('#{log_id}', #{idx})"
84
+ # TODO - recover message
85
+ end
86
+ # Migration errors
87
+ general_errs = scope.where(_type: "Hekenga::Failure::Error")
88
+ general_errs_ctr = general_errs.count
89
+ unless general_errs_ctr.zero?
90
+ Hekenga.log "#{general_errs_ctr} migration errors detected. Error messages:"
91
+ Hekenga.log(general_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
92
+ Hekenga.log "To get a list:"
93
+ Hekenga.log "Hekenga::Failure::Error.lookup('#{log_id}', #{idx})"
94
+ # TODO - recover message
95
+ end
96
+ end
97
+ def launch_task(task, idx, scope = nil)
98
+ Hekenga.log "Launching task##{idx}: #{task.description}"
99
+ @active_thread = Thread.new do
100
+ @migration.perform_task!(idx, scope)
101
+ end.tap {|t| t.abort_on_exception = true }
102
+ end
103
+ def report_while_active(task, idx)
104
+ # Wait for the log to be generated
105
+ until (@migration.log(idx) rescue nil)
106
+ sleep 1
107
+ end
108
+ # Periodically report on thread progress
109
+ until @migration.log(idx).reload.done
110
+ @active_thread.join unless @active_thread.alive?
111
+ report_status(task, idx)
112
+ return if @migration.log(idx).cancel
113
+ sleep Hekenga.config.report_sleep
114
+ end
115
+ report_status(task, idx)
116
+ return if @migration.log(idx).cancel
117
+ report_errors(idx)
118
+ Hekenga.log "Completed"
119
+ end
120
+ def report_status(task, idx)
121
+ # Simple tasks
122
+ case task
123
+ when Hekenga::DocumentTask
124
+ scope = @migration.log(idx).failures
125
+ skipped_ctr = @migration.log(idx).skipped
126
+ valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
127
+ valid_errs_ctr = valid_errs.count
128
+ Hekenga.log "Processed #{@migration.log(idx).processed} of #{@migration.log(idx).total} (#{valid_errs_ctr} invalid, #{skipped_ctr} skipped)"
129
+ when Hekenga::SimpleTask
130
+ Hekenga.log "Waiting on task"
131
+ end
132
+ end
133
+ def cleanup
134
+ @active_thread = nil
135
+ end
136
+
137
+ def continue_prompt?(str)
138
+ loop do
139
+ print "#{str} Continue? (Y/N)\n"
140
+ case gets.chomp.downcase
141
+ when "y"
142
+ return true
143
+ when "n"
144
+ return false
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,464 @@
1
+ require 'hekenga/invalid'
2
+ require 'hekenga/context'
3
+ require 'hekenga/parallel_job'
4
+ require 'hekenga/master_process'
5
+ require 'hekenga/log'
6
+ module Hekenga
7
+ class Migration
8
+ attr_accessor :stamp, :description, :batch_size
9
+ attr_reader :tasks
10
+
11
+ def initialize
12
+ @tasks = []
13
+ @logs = {}
14
+ @batch_size = 25
15
+ end
16
+
17
+ # Internal
18
+ def timestamp
19
+ self.stamp.strftime("%Y-%m-%dT%H:%M")
20
+ end
21
+
22
+ def desc_to_token
23
+ @desc_to_token ||= self.description.gsub(/[^A-Za-z]+/,"_").gsub(/(^_|_$)/,"")
24
+ end
25
+
26
+ def inspect
27
+ "<Hekenga::Migration #{self.to_key}>"
28
+ end
29
+
30
+ def to_key
31
+ @pkey ||= "#{timestamp}-#{desc_to_token}"
32
+ end
33
+
34
+ def log(task_idx = @active_idx)
35
+ raise "Missing task index" if task_idx.nil?
36
+ @logs[task_idx] ||= Hekenga::Log.find_by(
37
+ pkey: self.to_key,
38
+ task_idx: task_idx
39
+ )
40
+ end
41
+
42
+ def create_log!(attrs = {})
43
+ @logs[@active_idx] = Hekenga::Log.create(attrs.merge(
44
+ migration: self,
45
+ task_idx: @active_idx
46
+ ))
47
+ end
48
+
49
+ # API
50
+ def reload_logs
51
+ @logs = {}
52
+ end
53
+ def performing?
54
+ Hekenga::Log.where(pkey: self.to_key, done: false).any?
55
+ end
56
+ def performed?
57
+ !!log(self.tasks.length - 1).done
58
+ end
59
+ def test_mode!
60
+ @test_mode = true
61
+ end
62
+ def perform!
63
+ if Hekenga.status(self) == :naught
64
+ Hekenga::MasterProcess.new(self).run!
65
+ else
66
+ Hekenga.log "This migration has already been run! Aborting."
67
+ return false
68
+ end
69
+ end
70
+ def perform_task!(task_idx = 0, scope = nil)
71
+ task = @tasks[task_idx] or return
72
+ @active_task = task
73
+ @active_idx = task_idx
74
+ case task
75
+ when Hekenga::SimpleTask
76
+ start_simple_task(task)
77
+ when Hekenga::DocumentTask
78
+ # TODO - online migration support (have log.total update, requeue)
79
+ scope ||= task.scope.asc(:_id)
80
+ create_log!(total: scope.count)
81
+ if task.parallel
82
+ start_parallel_task(task, task_idx, scope)
83
+ else
84
+ start_document_task(task, task_idx, scope)
85
+ end
86
+ end
87
+ end
88
+ def recover!
89
+ # NOTE - can't find a way to check this automatically with ActiveJob right now
90
+ return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
91
+ # Write failures
92
+ @tasks.each.with_index do |task, idx|
93
+ # If no log, run the task now
94
+ unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
95
+ return false unless retry_task!(task, idx)
96
+ next
97
+ end
98
+ # Did this task fail?
99
+ failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
100
+ # If it didn't, keep searching
101
+ next unless failedP
102
+ # This is the first failure we've detected - recover from it
103
+ case task
104
+ when Hekenga::DocumentTask
105
+ ret = recover_document_task!(task, idx)
106
+ when Hekenga::SimpleTask
107
+ ret = recover_simple!(task, idx)
108
+ end
109
+
110
+ case ret
111
+ when :next
112
+ next
113
+ when :cancel
114
+ return false
115
+ else
116
+ return false unless retry_task!(task, idx, ret)
117
+ end
118
+ end
119
+ return true
120
+ end
121
+
122
+ def retry_task!(task, idx, scope = nil)
123
+ Hekenga.log "Retrying task##{idx}"
124
+ unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
125
+ Hekenga.log "Failed to retry the task. Aborting.."
126
+ return false
127
+ end
128
+ return true
129
+ end
130
+
131
+ def recover_simple!(task, idx)
132
+ # Simple tasks just get retried - no fuss
133
+ Hekenga.log("Found failed simple task. Retrying..")
134
+ return
135
+ end
136
+
137
+ def recover_document_task!(task, idx)
138
+ # Document tasks are a bit more involved.
139
+ validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
140
+ write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
141
+ error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
142
+ cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
143
+
144
+ # Stats
145
+ validation_failure_ctr = validation_failures.count
146
+ write_failure_ctr = write_failures.count
147
+ error_failure_ctr = error_failures.count
148
+ cancelled_failure_ctr = cancelled_failures.count
149
+
150
+ # Prompt for recovery
151
+ recoverP = prompt(
152
+ "Found #{validation_failure_ctr} invalid, "+
153
+ "#{write_failure_ctr} failed writes, "+
154
+ "#{error_failure_ctr} errors, "+
155
+ "#{cancelled_failure_ctr} cancelled on migration. Recover?"
156
+ )
157
+ return :next unless recoverP
158
+
159
+ # Recover from critical write failures (DB records potentially lost)
160
+ unless write_failure_ctr.zero?
161
+ Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
162
+ recover_data(write_failures, task.scope.klass)
163
+ end
164
+
165
+ # Resume task from point of error
166
+ if task.parallel
167
+ # TODO - support for recovery on huge # IDs
168
+ failed_ids = [
169
+ write_failures.pluck(:document_ids),
170
+ error_failures.pluck(:batch_start),
171
+ cancelled_failures.pluck(:document_ids),
172
+ validation_failures.pluck(:doc_id)
173
+ ].flatten.compact
174
+ resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
175
+ else
176
+ first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
177
+ invalid_ids = validation_failures.pluck(:doc_id)
178
+ if first_id && invalid_ids.any?
179
+ resume_scope = task.scope.klass.asc(:_id).and(
180
+ task.scope.selector,
181
+ task.scope.klass.or(
182
+ {_id: {:$gte => first_id}},
183
+ {_id: {:$in => invalid_ids}}
184
+ ).selector
185
+ )
186
+ elsif first_id
187
+ resume_scope = task.scope.asc(:_id).gte(_id: first_id)
188
+ elsif invalid_ids.any?
189
+ resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
190
+ else
191
+ resume_scope = :next
192
+ end
193
+ end
194
+
195
+ return resume_scope
196
+ end
197
+
198
+ def recover_data(write_failures, klass)
199
+ write_failures.each do |write_failure|
200
+ failed_ids = write_failure.document_ids
201
+ extant = klass.in(_id: failed_ids).pluck(:_id)
202
+ to_recover = (failed_ids - extant)
203
+ docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
204
+ next if docs.empty?
205
+ Hekenga.log "Recovering #{docs.length} documents.."
206
+ klass.collection.insert_many(docs)
207
+ end
208
+ end
209
+
210
+ def prompt(str)
211
+ loop do
212
+ print "#{str} (Y/N):\n"
213
+ case gets.chomp.downcase
214
+ when "y"
215
+ return true
216
+ when "n"
217
+ return false
218
+ end
219
+ end
220
+ end
221
+
222
+ def rollback!
223
+ # TODO
224
+ end
225
+
226
+ # Internal perform methods
227
+ def start_simple_task(task)
228
+ create_log!
229
+ begin
230
+ with_setup do
231
+ task.up!(@context)
232
+ end
233
+ rescue => e
234
+ simple_failure!(e)
235
+ return
236
+ end
237
+ log_done!
238
+ end
239
+
240
+ def check_for_completion
241
+ if log.processed == log.total
242
+ log_done!
243
+ end
244
+ end
245
+ def log_done!
246
+ log.set(done: true, finished: Time.now)
247
+ end
248
+ def start_parallel_task(task, task_idx, scope)
249
+ # TODO - support for crazy numbers of documents where pluck is too big
250
+ scope.asc(:_id).pluck(:_id).take(log.total).each_slice(batch_size).each do |ids|
251
+ Hekenga::ParallelJob.perform_later(
252
+ self.to_key, task_idx, ids.map(&:to_s), !!@test_mode
253
+ )
254
+ end
255
+ end
256
+ def run_parallel_task(task_idx, ids)
257
+ if log(task_idx).cancel
258
+ failed_cancelled!(ids)
259
+ return
260
+ end
261
+ task = self.tasks[task_idx] or return
262
+ @active_idx = task_idx
263
+ with_setup(task) do
264
+ process_batch(task, task.scope.asc(:_id).in(_id: ids).to_a)
265
+ unless @skipped.empty?
266
+ failed_cancelled!(@skipped)
267
+ end
268
+ end
269
+ end
270
+ def with_setup(task = nil)
271
+ @context = Hekenga::Context.new(@test_mode)
272
+ task&.setups&.each do |block|
273
+ @context.instance_exec(&block)
274
+ end
275
+ # Disable specific callbacks
276
+ begin
277
+ task&.disable_rules&.each do |rule|
278
+ rule[:klass].skip_callback rule[:callback]
279
+ end
280
+ yield
281
+ ensure
282
+ @context = nil
283
+ # Make sure the callbacks make it back
284
+ task&.disable_rules&.each do |rule|
285
+ rule[:klass].set_callback rule[:callback]
286
+ end
287
+ end
288
+ end
289
+ def start_document_task(task, task_idx, scope)
290
+ records = []
291
+ with_setup(task) do
292
+ scope.asc(:_id).each do |record|
293
+ records.push(record)
294
+ if records.length == batch_size
295
+ process_batch(task, records)
296
+ return if log.cancel
297
+ records = []
298
+ end
299
+ end
300
+ process_batch(task, records) if records.any?
301
+ end
302
+ log_done!
303
+ end
304
+ def run_filters(task, record)
305
+ task.filters.all? do |block|
306
+ @context.instance_exec(record, &block)
307
+ end
308
+ end
309
+ def process_batch(task, records)
310
+ @skipped = []
311
+ to_persist = []
312
+ fallbacks = []
313
+
314
+ filtered = records.group_by do |record|
315
+ run_filters(task, record)
316
+ end
317
+ log_skipped(task, filtered[false]) if filtered[false]
318
+ return unless filtered[true]
319
+ filtered[true].map.with_index do |record, idx|
320
+ original_record = Marshal.load(Marshal.dump(record.as_document))
321
+ begin
322
+ task.up!(@context, record)
323
+ rescue => e
324
+ failed_apply!(e, record, records[0].id)
325
+ @skipped = filtered[true][idx+1..-1]
326
+ return
327
+ end
328
+ if validate_record(record)
329
+ to_persist.push(record)
330
+ fallbacks.push(original_record)
331
+ else
332
+ if log.cancel
333
+ @skipped = filtered[true][idx+1..-1]
334
+ return
335
+ end
336
+ end
337
+ end.compact
338
+ persist_batch(task, to_persist, fallbacks)
339
+ end
340
+ def log_skipped(task, records)
341
+ log.incr_and_return(
342
+ skipped: records.length,
343
+ processed: records.length
344
+ )
345
+ check_for_completion
346
+ end
347
+ def log_success(task, records)
348
+ log.incr_and_return(
349
+ processed: records.length
350
+ )
351
+ check_for_completion
352
+ end
353
+
354
+ def persist_batch(task, records, original_records)
355
+ if @test_mode
356
+ log_success(task, records)
357
+ return
358
+ end
359
+ # NOTE - edgecase where callbacks cause the record to become invalid is
360
+ # not covered
361
+ records.each do |record|
362
+ begin
363
+ next if task.skip_prepare
364
+ if task.timeless
365
+ record.timeless.send(:prepare_update) {}
366
+ else
367
+ record.send(:prepare_update) {}
368
+ end
369
+ rescue => e
370
+ # If prepare_update throws an error, we're in trouble - crash out now
371
+ failed_apply!(e, record, records[0].id)
372
+ return
373
+ end
374
+ end
375
+ begin
376
+ delete_records!(task.scope.klass, records.map(&:_id))
377
+ write_records!(task.scope.klass, records)
378
+ log_success(task, records)
379
+ rescue => e
380
+ failed_write!(e, original_records)
381
+ end
382
+ end
383
+ def delete_records!(klass, ids)
384
+ klass.in(_id: ids).delete_all
385
+ end
386
+ def write_records!(klass, records)
387
+ klass.collection.insert_many(records.map(&:as_document))
388
+ end
389
+ def simple_failure!(error)
390
+ log.add_failure({
391
+ message: error.to_s,
392
+ backtrace: error.backtrace,
393
+ simple: true
394
+ }, Hekenga::Failure::Error)
395
+ log_cancel!
396
+ end
397
+ def failed_cancelled!(ids)
398
+ log.add_failure({
399
+ document_ids: ids,
400
+ batch_start: ids[0]
401
+ }, Hekenga::Failure::Cancelled)
402
+ end
403
+ def failed_apply!(error, record, batch_start_id)
404
+ log.add_failure({
405
+ message: error.to_s,
406
+ backtrace: error.backtrace,
407
+ document: Marshal.load(Marshal.dump(record.as_document)),
408
+ batch_start: batch_start_id
409
+ }, Hekenga::Failure::Error)
410
+ log_cancel!
411
+ end
412
+ def log_cancel!
413
+ log.set(cancel: true, error: true, done: true, finished: Time.now)
414
+ end
415
+ def failed_write!(error, original_records)
416
+ log.add_failure({
417
+ message: error.to_s,
418
+ backtrace: error.backtrace,
419
+ documents: original_records,
420
+ document_ids: original_records.map {|x| x["_id"]},
421
+ batch_start: original_records[0]["_id"]
422
+ }, Hekenga::Failure::Write)
423
+ log_cancel!
424
+ end
425
+ def failed_validation!(record)
426
+ log.add_failure({
427
+ doc_id: record.id,
428
+ errs: record.errors.full_messages,
429
+ document: Marshal.load(Marshal.dump(record.as_document))
430
+ }, Hekenga::Failure::Validation)
431
+ log.set(error: true)
432
+ log.incr_and_return(processed: 1, unvalid: 1)
433
+ if @active_task.invalid_strategy == :cancel
434
+ log_cancel!
435
+ else
436
+ check_for_completion
437
+ end
438
+ end
439
+ def validate_record(record)
440
+ # TODO - ability to skip validation
441
+ if record.valid?
442
+ true
443
+ else
444
+ failed_validation!(record)
445
+ false
446
+ end
447
+ end
448
+
449
+ # Validations
450
+ MIN_TOKEN_LENGTH = 5
451
+
452
+ def validation_error(field, reason)
453
+ raise Hekenga::Invalid.new(self, field, reason)
454
+ end
455
+
456
+ def validate!
457
+ validation_error(:stamp, "missing") unless self.stamp.is_a?(Time)
458
+ validation_error(:description, "missing") unless self.description
459
+ validation_error(:description, "too short") unless self.desc_to_token.length > 5
460
+ validation_error(:tasks, "missing") if self.tasks.length.zero?
461
+ true
462
+ end
463
+ end
464
+ end
@@ -0,0 +1,11 @@
1
+ require 'active_job'
2
+ module Hekenga
3
+ class ParallelJob < ActiveJob::Base
4
+ queue_as :migration
5
+ def perform(migration_key, task_idx, ids, test_mode)
6
+ migration = Hekenga.find_migration(migration_key)
7
+ migration.test_mode! if test_mode
8
+ migration.run_parallel_task(task_idx, ids)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,32 @@
1
+ require 'hekenga/irreversible'
2
+ module Hekenga
3
+ class SimpleTask
4
+ attr_reader :ups, :downs
5
+ attr_accessor :description
6
+ def initialize
7
+ @ups = []
8
+ @downs = []
9
+ end
10
+
11
+ def validate!
12
+ raise Hekenga::Invalid.new(self, :ups, "missing") unless ups.any?
13
+ end
14
+
15
+ def up!(context)
16
+ @ups.each do |block|
17
+ context.instance_exec(&block)
18
+ end
19
+ end
20
+
21
+ def down!
22
+ raise Hekenga::Irreversible.new(self) unless reversible?
23
+ @downs.each do |block|
24
+ block.call
25
+ end
26
+ end
27
+
28
+ def reversible?
29
+ downs.any?
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Hekenga
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,8 @@
1
+ require 'hekenga/base_error'
2
+ module Hekenga
3
+ class VirtualMethod < Hekenga::BaseError
4
+ def initialize(klass, method)
5
+ super("#{klass.to_s}##{method} has not been implemented.")
6
+ end
7
+ end
8
+ end