hekenga 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ module Hekenga
2
+ class MasterProcess
3
+ def initialize(migration)
4
+ @migration = migration
5
+ end
6
+
7
+ def run!
8
+ Hekenga.log "Launching migration #{@migration.to_key}"
9
+ @migration.tasks.each.with_index do |task, idx|
10
+ launch_task(task, idx)
11
+ report_while_active(task, idx)
12
+ if @migration.log(idx).cancel
13
+ Hekenga.log "TERMINATING DUE TO CRITICAL ERRORS"
14
+ report_errors(idx)
15
+ return
16
+ elsif any_validation_errors?(idx)
17
+ handle_validation_errors(task, idx)
18
+ return if @migration.log(idx).cancel
19
+ end
20
+ cleanup
21
+ end
22
+ end
23
+
24
+ def retry!(task_idx, scope)
25
+ task = @migration.tasks[task_idx]
26
+ # Reset logs completely
27
+ Hekenga::Log.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
28
+ Hekenga::Failure.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
29
+ @migration.reload_logs
30
+ # Start the task based on the passed scope - similar to run! but we exit
31
+ # directly on failure.
32
+ launch_task(task, task_idx, scope)
33
+ report_while_active(task, task_idx)
34
+ if @migration.log(task_idx).cancel
35
+ return false
36
+ elsif any_validation_errors?(task_idx)
37
+ handle_validation_errors(task, task_idx)
38
+ if @migration.log(task_idx).cancel
39
+ return false
40
+ end
41
+ end
42
+ cleanup
43
+ true
44
+ end
45
+
46
+ def any_validation_errors?(idx)
47
+ Hekenga::Failure::Validation.where(pkey: @migration.to_key, task_idx: idx).any?
48
+ end
49
+
50
+ def handle_validation_errors(task, idx)
51
+ return unless task.respond_to?(:invalid_strategy)
52
+ return if idx == @migration.tasks.length - 1
53
+ case task.invalid_strategy
54
+ when :prompt
55
+ unless continue_prompt?("There were validation errors in the last task.")
56
+ @migration.log(idx).set(cancel: true)
57
+ return
58
+ end
59
+ when :stop
60
+ Hekenga.log "TERMINATING DUE TO VALIDATION ERRORS"
61
+ @migration.log(idx).set(cancel: true)
62
+ return
63
+ end
64
+ end
65
+
66
+ def report_errors(idx)
67
+ scope = @migration.log(idx).failures
68
+ log_id = @migration.log(idx).id
69
+ # Validation errors
70
+ valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
71
+ valid_errs_ctr = valid_errs.count
72
+ unless valid_errs_ctr.zero?
73
+ Hekenga.log "#{valid_errs_ctr} records failed validation. To get a list:"
74
+ Hekenga.log "Hekenga::Failure::Validation.lookup('#{log_id}', #{idx})"
75
+ end
76
+ # Write failures
77
+ write_errs = scope.where(_type: "Hekenga::Failure::Write")
78
+ write_errs_ctr = write_errs.count
79
+ unless write_errs_ctr.zero?
80
+ Hekenga.log "#{write_errs_ctr} write errors detected. Error messages:"
81
+ Hekenga.log(write_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
82
+ Hekenga.log "To get a list:"
83
+ Hekenga.log "Hekenga::Failure::Write.lookup('#{log_id}', #{idx})"
84
+ # TODO - recover message
85
+ end
86
+ # Migration errors
87
+ general_errs = scope.where(_type: "Hekenga::Failure::Error")
88
+ general_errs_ctr = general_errs.count
89
+ unless general_errs_ctr.zero?
90
+ Hekenga.log "#{general_errs_ctr} migration errors detected. Error messages:"
91
+ Hekenga.log(general_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
92
+ Hekenga.log "To get a list:"
93
+ Hekenga.log "Hekenga::Failure::Error.lookup('#{log_id}', #{idx})"
94
+ # TODO - recover message
95
+ end
96
+ end
97
+ def launch_task(task, idx, scope = nil)
98
+ Hekenga.log "Launching task##{idx}: #{task.description}"
99
+ @active_thread = Thread.new do
100
+ @migration.perform_task!(idx, scope)
101
+ end.tap {|t| t.abort_on_exception = true }
102
+ end
103
+ def report_while_active(task, idx)
104
+ # Wait for the log to be generated
105
+ until (@migration.log(idx) rescue nil)
106
+ sleep 1
107
+ end
108
+ # Periodically report on thread progress
109
+ until @migration.log(idx).reload.done
110
+ @active_thread.join unless @active_thread.alive?
111
+ report_status(task, idx)
112
+ return if @migration.log(idx).cancel
113
+ sleep Hekenga.config.report_sleep
114
+ end
115
+ report_status(task, idx)
116
+ return if @migration.log(idx).cancel
117
+ report_errors(idx)
118
+ Hekenga.log "Completed"
119
+ end
120
+ def report_status(task, idx)
121
+ # Simple tasks
122
+ case task
123
+ when Hekenga::DocumentTask
124
+ scope = @migration.log(idx).failures
125
+ skipped_ctr = @migration.log(idx).skipped
126
+ valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
127
+ valid_errs_ctr = valid_errs.count
128
+ Hekenga.log "Processed #{@migration.log(idx).processed} of #{@migration.log(idx).total} (#{valid_errs_ctr} invalid, #{skipped_ctr} skipped)"
129
+ when Hekenga::SimpleTask
130
+ Hekenga.log "Waiting on task"
131
+ end
132
+ end
133
+ def cleanup
134
+ @active_thread = nil
135
+ end
136
+
137
+ def continue_prompt?(str)
138
+ loop do
139
+ print "#{str} Continue? (Y/N)\n"
140
+ case gets.chomp.downcase
141
+ when "y"
142
+ return true
143
+ when "n"
144
+ return false
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,464 @@
1
+ require 'hekenga/invalid'
2
+ require 'hekenga/context'
3
+ require 'hekenga/parallel_job'
4
+ require 'hekenga/master_process'
5
+ require 'hekenga/log'
6
+ module Hekenga
7
+ class Migration
8
+ attr_accessor :stamp, :description, :batch_size
9
+ attr_reader :tasks
10
+
11
+ def initialize
12
+ @tasks = []
13
+ @logs = {}
14
+ @batch_size = 25
15
+ end
16
+
17
+ # Internal
18
+ def timestamp
19
+ self.stamp.strftime("%Y-%m-%dT%H:%M")
20
+ end
21
+
22
+ def desc_to_token
23
+ @desc_to_token ||= self.description.gsub(/[^A-Za-z]+/,"_").gsub(/(^_|_$)/,"")
24
+ end
25
+
26
+ def inspect
27
+ "<Hekenga::Migration #{self.to_key}>"
28
+ end
29
+
30
+ def to_key
31
+ @pkey ||= "#{timestamp}-#{desc_to_token}"
32
+ end
33
+
34
+ def log(task_idx = @active_idx)
35
+ raise "Missing task index" if task_idx.nil?
36
+ @logs[task_idx] ||= Hekenga::Log.find_by(
37
+ pkey: self.to_key,
38
+ task_idx: task_idx
39
+ )
40
+ end
41
+
42
+ def create_log!(attrs = {})
43
+ @logs[@active_idx] = Hekenga::Log.create(attrs.merge(
44
+ migration: self,
45
+ task_idx: @active_idx
46
+ ))
47
+ end
48
+
49
+ # API
50
+ def reload_logs
51
+ @logs = {}
52
+ end
53
+ def performing?
54
+ Hekenga::Log.where(pkey: self.to_key, done: false).any?
55
+ end
56
+ def performed?
57
+ !!log(self.tasks.length - 1).done
58
+ end
59
+ def test_mode!
60
+ @test_mode = true
61
+ end
62
+ def perform!
63
+ if Hekenga.status(self) == :naught
64
+ Hekenga::MasterProcess.new(self).run!
65
+ else
66
+ Hekenga.log "This migration has already been run! Aborting."
67
+ return false
68
+ end
69
+ end
70
+ def perform_task!(task_idx = 0, scope = nil)
71
+ task = @tasks[task_idx] or return
72
+ @active_task = task
73
+ @active_idx = task_idx
74
+ case task
75
+ when Hekenga::SimpleTask
76
+ start_simple_task(task)
77
+ when Hekenga::DocumentTask
78
+ # TODO - online migration support (have log.total update, requeue)
79
+ scope ||= task.scope.asc(:_id)
80
+ create_log!(total: scope.count)
81
+ if task.parallel
82
+ start_parallel_task(task, task_idx, scope)
83
+ else
84
+ start_document_task(task, task_idx, scope)
85
+ end
86
+ end
87
+ end
88
+ def recover!
89
+ # NOTE - can't find a way to check this automatically with ActiveJob right now
90
+ return false unless prompt "Check that the migration queue has processed before recovering. Continue?"
91
+ # Write failures
92
+ @tasks.each.with_index do |task, idx|
93
+ # If no log, run the task now
94
+ unless Hekenga::Log.where(pkey: self.to_key, task_idx: idx).any?
95
+ return false unless retry_task!(task, idx)
96
+ next
97
+ end
98
+ # Did this task fail?
99
+ failedP = log(idx).cancel || Hekenga::Failure.where(pkey: to_key, task_idx: idx).any?
100
+ # If it didn't, keep searching
101
+ next unless failedP
102
+ # This is the first failure we've detected - recover from it
103
+ case task
104
+ when Hekenga::DocumentTask
105
+ ret = recover_document_task!(task, idx)
106
+ when Hekenga::SimpleTask
107
+ ret = recover_simple!(task, idx)
108
+ end
109
+
110
+ case ret
111
+ when :next
112
+ next
113
+ when :cancel
114
+ return false
115
+ else
116
+ return false unless retry_task!(task, idx, ret)
117
+ end
118
+ end
119
+ return true
120
+ end
121
+
122
+ def retry_task!(task, idx, scope = nil)
123
+ Hekenga.log "Retrying task##{idx}"
124
+ unless Hekenga::MasterProcess.new(self).retry!(idx, scope)
125
+ Hekenga.log "Failed to retry the task. Aborting.."
126
+ return false
127
+ end
128
+ return true
129
+ end
130
+
131
+ def recover_simple!(task, idx)
132
+ # Simple tasks just get retried - no fuss
133
+ Hekenga.log("Found failed simple task. Retrying..")
134
+ return
135
+ end
136
+
137
+ def recover_document_task!(task, idx)
138
+ # Document tasks are a bit more involved.
139
+ validation_failures = Hekenga::Failure::Validation.where(pkey: to_key, task_idx: idx)
140
+ write_failures = Hekenga::Failure::Write.where(pkey: to_key, task_idx: idx)
141
+ error_failures = Hekenga::Failure::Error.where(pkey: to_key, task_idx: idx)
142
+ cancelled_failures = Hekenga::Failure::Cancelled.where(pkey: to_key, task_idx: idx)
143
+
144
+ # Stats
145
+ validation_failure_ctr = validation_failures.count
146
+ write_failure_ctr = write_failures.count
147
+ error_failure_ctr = error_failures.count
148
+ cancelled_failure_ctr = cancelled_failures.count
149
+
150
+ # Prompt for recovery
151
+ recoverP = prompt(
152
+ "Found #{validation_failure_ctr} invalid, "+
153
+ "#{write_failure_ctr} failed writes, "+
154
+ "#{error_failure_ctr} errors, "+
155
+ "#{cancelled_failure_ctr} cancelled on migration. Recover?"
156
+ )
157
+ return :next unless recoverP
158
+
159
+ # Recover from critical write failures (DB records potentially lost)
160
+ unless write_failure_ctr.zero?
161
+ Hekenga.log "Recovering old data from #{write_failure_ctr} write failure(s)"
162
+ recover_data(write_failures, task.scope.klass)
163
+ end
164
+
165
+ # Resume task from point of error
166
+ if task.parallel
167
+ # TODO - support for recovery on huge # IDs
168
+ failed_ids = [
169
+ write_failures.pluck(:document_ids),
170
+ error_failures.pluck(:batch_start),
171
+ cancelled_failures.pluck(:document_ids),
172
+ validation_failures.pluck(:doc_id)
173
+ ].flatten.compact
174
+ resume_scope = task.scope.klass.asc(:_id).in(_id: failed_ids)
175
+ else
176
+ first_id = error_failures.first&.batch_start || write_failures.first&.batch_start
177
+ invalid_ids = validation_failures.pluck(:doc_id)
178
+ if first_id && invalid_ids.any?
179
+ resume_scope = task.scope.klass.asc(:_id).and(
180
+ task.scope.selector,
181
+ task.scope.klass.or(
182
+ {_id: {:$gte => first_id}},
183
+ {_id: {:$in => invalid_ids}}
184
+ ).selector
185
+ )
186
+ elsif first_id
187
+ resume_scope = task.scope.asc(:_id).gte(_id: first_id)
188
+ elsif invalid_ids.any?
189
+ resume_scope = task.scope.klass.asc(:_id).in(_id: invalid_ids)
190
+ else
191
+ resume_scope = :next
192
+ end
193
+ end
194
+
195
+ return resume_scope
196
+ end
197
+
198
+ def recover_data(write_failures, klass)
199
+ write_failures.each do |write_failure|
200
+ failed_ids = write_failure.document_ids
201
+ extant = klass.in(_id: failed_ids).pluck(:_id)
202
+ to_recover = (failed_ids - extant)
203
+ docs = write_failure.documents.find_all {|x| to_recover.include?(x["_id"])}
204
+ next if docs.empty?
205
+ Hekenga.log "Recovering #{docs.length} documents.."
206
+ klass.collection.insert_many(docs)
207
+ end
208
+ end
209
+
210
+ def prompt(str)
211
+ loop do
212
+ print "#{str} (Y/N):\n"
213
+ case gets.chomp.downcase
214
+ when "y"
215
+ return true
216
+ when "n"
217
+ return false
218
+ end
219
+ end
220
+ end
221
+
222
+ def rollback!
223
+ # TODO
224
+ end
225
+
226
+ # Internal perform methods
227
+ def start_simple_task(task)
228
+ create_log!
229
+ begin
230
+ with_setup do
231
+ task.up!(@context)
232
+ end
233
+ rescue => e
234
+ simple_failure!(e)
235
+ return
236
+ end
237
+ log_done!
238
+ end
239
+
240
+ def check_for_completion
241
+ if log.processed == log.total
242
+ log_done!
243
+ end
244
+ end
245
+ def log_done!
246
+ log.set(done: true, finished: Time.now)
247
+ end
248
+ def start_parallel_task(task, task_idx, scope)
249
+ # TODO - support for crazy numbers of documents where pluck is too big
250
+ scope.asc(:_id).pluck(:_id).take(log.total).each_slice(batch_size).each do |ids|
251
+ Hekenga::ParallelJob.perform_later(
252
+ self.to_key, task_idx, ids.map(&:to_s), !!@test_mode
253
+ )
254
+ end
255
+ end
256
+ def run_parallel_task(task_idx, ids)
257
+ if log(task_idx).cancel
258
+ failed_cancelled!(ids)
259
+ return
260
+ end
261
+ task = self.tasks[task_idx] or return
262
+ @active_idx = task_idx
263
+ with_setup(task) do
264
+ process_batch(task, task.scope.asc(:_id).in(_id: ids).to_a)
265
+ unless @skipped.empty?
266
+ failed_cancelled!(@skipped)
267
+ end
268
+ end
269
+ end
270
+ def with_setup(task = nil)
271
+ @context = Hekenga::Context.new(@test_mode)
272
+ task&.setups&.each do |block|
273
+ @context.instance_exec(&block)
274
+ end
275
+ # Disable specific callbacks
276
+ begin
277
+ task&.disable_rules&.each do |rule|
278
+ rule[:klass].skip_callback rule[:callback]
279
+ end
280
+ yield
281
+ ensure
282
+ @context = nil
283
+ # Make sure the callbacks make it back
284
+ task&.disable_rules&.each do |rule|
285
+ rule[:klass].set_callback rule[:callback]
286
+ end
287
+ end
288
+ end
289
+ def start_document_task(task, task_idx, scope)
290
+ records = []
291
+ with_setup(task) do
292
+ scope.asc(:_id).each do |record|
293
+ records.push(record)
294
+ if records.length == batch_size
295
+ process_batch(task, records)
296
+ return if log.cancel
297
+ records = []
298
+ end
299
+ end
300
+ process_batch(task, records) if records.any?
301
+ end
302
+ log_done!
303
+ end
304
+ def run_filters(task, record)
305
+ task.filters.all? do |block|
306
+ @context.instance_exec(record, &block)
307
+ end
308
+ end
309
+ def process_batch(task, records)
310
+ @skipped = []
311
+ to_persist = []
312
+ fallbacks = []
313
+
314
+ filtered = records.group_by do |record|
315
+ run_filters(task, record)
316
+ end
317
+ log_skipped(task, filtered[false]) if filtered[false]
318
+ return unless filtered[true]
319
+ filtered[true].map.with_index do |record, idx|
320
+ original_record = Marshal.load(Marshal.dump(record.as_document))
321
+ begin
322
+ task.up!(@context, record)
323
+ rescue => e
324
+ failed_apply!(e, record, records[0].id)
325
+ @skipped = filtered[true][idx+1..-1]
326
+ return
327
+ end
328
+ if validate_record(record)
329
+ to_persist.push(record)
330
+ fallbacks.push(original_record)
331
+ else
332
+ if log.cancel
333
+ @skipped = filtered[true][idx+1..-1]
334
+ return
335
+ end
336
+ end
337
+ end.compact
338
+ persist_batch(task, to_persist, fallbacks)
339
+ end
340
+ def log_skipped(task, records)
341
+ log.incr_and_return(
342
+ skipped: records.length,
343
+ processed: records.length
344
+ )
345
+ check_for_completion
346
+ end
347
+ def log_success(task, records)
348
+ log.incr_and_return(
349
+ processed: records.length
350
+ )
351
+ check_for_completion
352
+ end
353
+
354
+ def persist_batch(task, records, original_records)
355
+ if @test_mode
356
+ log_success(task, records)
357
+ return
358
+ end
359
+ # NOTE - edgecase where callbacks cause the record to become invalid is
360
+ # not covered
361
+ records.each do |record|
362
+ begin
363
+ next if task.skip_prepare
364
+ if task.timeless
365
+ record.timeless.send(:prepare_update) {}
366
+ else
367
+ record.send(:prepare_update) {}
368
+ end
369
+ rescue => e
370
+ # If prepare_update throws an error, we're in trouble - crash out now
371
+ failed_apply!(e, record, records[0].id)
372
+ return
373
+ end
374
+ end
375
+ begin
376
+ delete_records!(task.scope.klass, records.map(&:_id))
377
+ write_records!(task.scope.klass, records)
378
+ log_success(task, records)
379
+ rescue => e
380
+ failed_write!(e, original_records)
381
+ end
382
+ end
383
+ def delete_records!(klass, ids)
384
+ klass.in(_id: ids).delete_all
385
+ end
386
+ def write_records!(klass, records)
387
+ klass.collection.insert_many(records.map(&:as_document))
388
+ end
389
+ def simple_failure!(error)
390
+ log.add_failure({
391
+ message: error.to_s,
392
+ backtrace: error.backtrace,
393
+ simple: true
394
+ }, Hekenga::Failure::Error)
395
+ log_cancel!
396
+ end
397
+ def failed_cancelled!(ids)
398
+ log.add_failure({
399
+ document_ids: ids,
400
+ batch_start: ids[0]
401
+ }, Hekenga::Failure::Cancelled)
402
+ end
403
+ def failed_apply!(error, record, batch_start_id)
404
+ log.add_failure({
405
+ message: error.to_s,
406
+ backtrace: error.backtrace,
407
+ document: Marshal.load(Marshal.dump(record.as_document)),
408
+ batch_start: batch_start_id
409
+ }, Hekenga::Failure::Error)
410
+ log_cancel!
411
+ end
412
+ def log_cancel!
413
+ log.set(cancel: true, error: true, done: true, finished: Time.now)
414
+ end
415
+ def failed_write!(error, original_records)
416
+ log.add_failure({
417
+ message: error.to_s,
418
+ backtrace: error.backtrace,
419
+ documents: original_records,
420
+ document_ids: original_records.map {|x| x["_id"]},
421
+ batch_start: original_records[0]["_id"]
422
+ }, Hekenga::Failure::Write)
423
+ log_cancel!
424
+ end
425
+ def failed_validation!(record)
426
+ log.add_failure({
427
+ doc_id: record.id,
428
+ errs: record.errors.full_messages,
429
+ document: Marshal.load(Marshal.dump(record.as_document))
430
+ }, Hekenga::Failure::Validation)
431
+ log.set(error: true)
432
+ log.incr_and_return(processed: 1, unvalid: 1)
433
+ if @active_task.invalid_strategy == :cancel
434
+ log_cancel!
435
+ else
436
+ check_for_completion
437
+ end
438
+ end
439
+ def validate_record(record)
440
+ # TODO - ability to skip validation
441
+ if record.valid?
442
+ true
443
+ else
444
+ failed_validation!(record)
445
+ false
446
+ end
447
+ end
448
+
449
+ # Validations
450
+ MIN_TOKEN_LENGTH = 5
451
+
452
+ def validation_error(field, reason)
453
+ raise Hekenga::Invalid.new(self, field, reason)
454
+ end
455
+
456
+ def validate!
457
+ validation_error(:stamp, "missing") unless self.stamp.is_a?(Time)
458
+ validation_error(:description, "missing") unless self.description
459
+ validation_error(:description, "too short") unless self.desc_to_token.length > 5
460
+ validation_error(:tasks, "missing") if self.tasks.length.zero?
461
+ true
462
+ end
463
+ end
464
+ end
@@ -0,0 +1,11 @@
1
+ require 'active_job'
2
+ module Hekenga
3
+ class ParallelJob < ActiveJob::Base
4
+ queue_as :migration
5
+ def perform(migration_key, task_idx, ids, test_mode)
6
+ migration = Hekenga.find_migration(migration_key)
7
+ migration.test_mode! if test_mode
8
+ migration.run_parallel_task(task_idx, ids)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,32 @@
1
+ require 'hekenga/irreversible'
2
+ module Hekenga
3
+ class SimpleTask
4
+ attr_reader :ups, :downs
5
+ attr_accessor :description
6
+ def initialize
7
+ @ups = []
8
+ @downs = []
9
+ end
10
+
11
+ def validate!
12
+ raise Hekenga::Invalid.new(self, :ups, "missing") unless ups.any?
13
+ end
14
+
15
+ def up!(context)
16
+ @ups.each do |block|
17
+ context.instance_exec(&block)
18
+ end
19
+ end
20
+
21
+ def down!
22
+ raise Hekenga::Irreversible.new(self) unless reversible?
23
+ @downs.each do |block|
24
+ block.call
25
+ end
26
+ end
27
+
28
+ def reversible?
29
+ downs.any?
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Hekenga
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,8 @@
1
+ require 'hekenga/base_error'
2
+ module Hekenga
3
+ class VirtualMethod < Hekenga::BaseError
4
+ def initialize(klass, method)
5
+ super("#{klass.to_s}##{method} has not been implemented.")
6
+ end
7
+ end
8
+ end