hekenga 0.2.13 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/hekenga/log.rb CHANGED
@@ -18,12 +18,7 @@ module Hekenga
18
18
  field :skip, default: false
19
19
 
20
20
  # Used by document tasks
21
- field :total
22
- field :processed, default: 0
23
- field :skipped, default: 0
24
- field :unvalid, default: 0
25
- field :started, default: ->{ Time.now }
26
- field :finished, type: Time
21
+ field :finished, type: Time
27
22
 
28
23
  has_many :failures, class_name: "Hekenga::Failure"
29
24
 
@@ -36,21 +31,21 @@ module Hekenga
36
31
  end
37
32
 
38
33
  def add_failure(attrs, klass)
39
- self.failures.create({
40
- pkey: self.pkey,
41
- task_idx: self.task_idx
42
- }.merge(attrs), klass)
34
+ failure = klass.new(attrs.merge(pkey: pkey, task_idx: task_idx, log_id: _id))
35
+ failure.send(:prepare_insert) {}
36
+ Hekenga::Failure.collection.insert_one(
37
+ failure.as_document,
38
+ session: nil
39
+ )
43
40
  end
44
41
 
45
- def incr_and_return(fields)
46
- doc = self.class.where(_id: self.id).find_one_and_update({
47
- :$inc => fields
48
- }, return_document: :after, projection: fields.keys.map {|x| [x, 1]}.to_h, upsert: true)
49
- fields.map do |field, _|
50
- value = doc.send(field)
51
- send("#{field}=", value)
52
- [field, value]
53
- end.to_h
42
+ def set_without_session(attrs)
43
+ self.class.collection.update_one(
44
+ { _id: _id },
45
+ {'$set': attrs},
46
+ session: nil
47
+ )
48
+ self.attributes = attrs
54
49
  end
55
50
  end
56
51
  end
@@ -1,3 +1,6 @@
1
+ require 'hekenga/task_failed_error'
2
+ require 'hekenga/task_splitter'
3
+
1
4
  module Hekenga
2
5
  class MasterProcess
3
6
  def initialize(migration)
@@ -5,101 +8,33 @@ module Hekenga
5
8
  end
6
9
 
7
10
  def run!
8
- Hekenga.log "Launching migration #{@migration.to_key}"
11
+ Hekenga.log "Launching migration #{@migration.to_key}: #{@migration.description}"
9
12
  @migration.tasks.each.with_index do |task, idx|
10
13
  launch_task(task, idx)
11
14
  report_while_active(task, idx)
12
- if @migration.log(idx).cancel
13
- Hekenga.log "TERMINATING DUE TO CRITICAL ERRORS"
14
- report_errors(idx)
15
- return
16
- elsif any_validation_errors?(idx)
17
- handle_validation_errors(task, idx)
18
- return if @migration.log(idx).cancel
19
- end
20
- cleanup
15
+ rescue Hekenga::TaskFailedError
16
+ return false
17
+ ensure
18
+ @active_thread = nil
21
19
  end
20
+ true
22
21
  end
23
22
 
24
- def retry!(task_idx, scope)
25
- task = @migration.tasks[task_idx]
26
- # Reset logs completely
27
- Hekenga::Log.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
28
- Hekenga::Failure.where(pkey: @migration.to_key, task_idx: task_idx).delete_all
29
- @migration.reload_logs
30
- # Start the task based on the passed scope - similar to run! but we exit
31
- # directly on failure.
32
- launch_task(task, task_idx, scope)
33
- report_while_active(task, task_idx)
34
- if @migration.log(task_idx).cancel
23
+ def recover!
24
+ Hekenga.log "Recovering migration #{@migration.to_key}: #{@migration.description}"
25
+ @migration.tasks.each.with_index do |task, idx|
26
+ recover_task(task, idx)
27
+ report_while_active(task, idx) if @active_thread
28
+ rescue Hekenga::TaskFailedError
35
29
  return false
36
- elsif any_validation_errors?(task_idx)
37
- handle_validation_errors(task, task_idx)
38
- if @migration.log(task_idx).cancel
39
- return false
40
- end
30
+ ensure
31
+ @active_thread = nil
41
32
  end
42
- cleanup
43
33
  true
44
34
  end
45
35
 
46
- def any_validation_errors?(idx)
47
- Hekenga::Failure::Validation.where(pkey: @migration.to_key, task_idx: idx).any?
48
- end
49
-
50
- def handle_validation_errors(task, idx)
51
- return unless task.respond_to?(:invalid_strategy)
52
- return if idx == @migration.tasks.length - 1
53
- case task.invalid_strategy
54
- when :prompt
55
- unless continue_prompt?("There were validation errors in the last task.")
56
- @migration.log(idx).set(cancel: true)
57
- return
58
- end
59
- when :stop
60
- Hekenga.log "TERMINATING DUE TO VALIDATION ERRORS"
61
- @migration.log(idx).set(cancel: true)
62
- return
63
- end
64
- end
36
+ private
65
37
 
66
- def report_errors(idx)
67
- scope = @migration.log(idx).failures
68
- log_id = @migration.log(idx).id
69
- # Validation errors
70
- valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
71
- valid_errs_ctr = valid_errs.count
72
- unless valid_errs_ctr.zero?
73
- Hekenga.log "#{valid_errs_ctr} records failed validation. To get a list:"
74
- Hekenga.log "Hekenga::Failure::Validation.lookup('#{log_id}', #{idx})"
75
- end
76
- # Write failures
77
- write_errs = scope.where(_type: "Hekenga::Failure::Write")
78
- write_errs_ctr = write_errs.count
79
- unless write_errs_ctr.zero?
80
- Hekenga.log "#{write_errs_ctr} write errors detected. Error messages:"
81
- Hekenga.log(write_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
82
- Hekenga.log "To get a list:"
83
- Hekenga.log "Hekenga::Failure::Write.lookup('#{log_id}', #{idx})"
84
- # TODO - recover message
85
- end
86
- # Migration errors
87
- general_errs = scope.where(_type: "Hekenga::Failure::Error")
88
- general_errs_ctr = general_errs.count
89
- unless general_errs_ctr.zero?
90
- Hekenga.log "#{general_errs_ctr} migration errors detected. Error messages:"
91
- Hekenga.log(general_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
92
- Hekenga.log "To get a list:"
93
- Hekenga.log "Hekenga::Failure::Error.lookup('#{log_id}', #{idx})"
94
- # TODO - recover message
95
- end
96
- end
97
- def launch_task(task, idx, scope = nil)
98
- Hekenga.log "Launching task##{idx}: #{task.description}"
99
- @active_thread = Thread.new do
100
- @migration.perform_task!(idx, scope)
101
- end.tap {|t| t.abort_on_exception = true }
102
- end
103
38
  def report_while_active(task, idx)
104
39
  # Wait for the log to be generated
105
40
  until (@migration.log(idx) rescue nil)
@@ -107,43 +42,187 @@ module Hekenga
107
42
  end
108
43
  # Periodically report on thread progress
109
44
  until @migration.log(idx).reload.done
110
- @active_thread.join unless @active_thread.alive?
45
+ @active_thread.join
111
46
  report_status(task, idx)
112
- return if @migration.log(idx).cancel
113
47
  sleep Hekenga.config.report_sleep
114
48
  end
115
- report_status(task, idx)
116
- return if @migration.log(idx).cancel
117
- report_errors(idx)
49
+ report_status(task, idx) if task.is_a?(Hekenga::DocumentTask)
50
+ report_result(task, idx)
118
51
  Hekenga.log "Completed"
119
52
  end
53
+
54
+ def recover_task(task, idx)
55
+ case task
56
+ when Hekenga::DocumentTask
57
+ recover_document_task(task, idx)
58
+ when Hekenga::SimpleTask
59
+ recover_simple_task(task, idx)
60
+ end
61
+ end
62
+
63
+ def recover_document_task(task, idx)
64
+ log = @migration.log(idx) rescue nil
65
+ if log.nil?
66
+ launch_task(task, idx)
67
+ elsif document_task_failed?(log, idx, fail_on_invalid: true)
68
+ Hekenga.log "Recovering task##{idx}: #{task.description}"
69
+ log.set_without_session({
70
+ done: false,
71
+ error: false,
72
+ cancel: false,
73
+ finished: nil,
74
+ })
75
+ recover_write_failures(task, log)
76
+ task_records = @migration.task_records(idx)
77
+ if task.parallel
78
+ in_thread do
79
+ Hekenga::ParallelTask.new(
80
+ migration: @migration,
81
+ task: task,
82
+ task_idx: idx,
83
+ test_mode: @migration.test_mode
84
+ ).resume!
85
+ end
86
+ else
87
+ # Strategy: clear failures; reset state
88
+ log.failures.delete_all
89
+ task_records.incomplete.delete_all
90
+ task_records.each do |record|
91
+ Hekenga::TaskSplitter.new(record, @executor_key).call&.destroy
92
+ end
93
+ @migration.active_idx = idx
94
+ in_thread do
95
+ @migration.start_document_task(task, idx, recover: true)
96
+ end
97
+ end
98
+
99
+ else
100
+ Hekenga.log "Skipping completed task##{idx}: #{task.description}"
101
+ end
102
+ end
103
+
104
+ def document_task_failed?(log, idx, fail_on_invalid:)
105
+ return true if log.nil?
106
+ return true if log.error
107
+ return true if log.cancel
108
+ return true if @migration.task_records(idx).incomplete.any?
109
+
110
+ stats = combined_stats(idx)
111
+ return false if stats.blank?
112
+ return true if stats['failed'].positive?
113
+ return true if fail_on_invalid && stats['invalid'].positive?
114
+
115
+ false
116
+ end
117
+
118
+ def recover_write_failures(task, log)
119
+ klass = task.scope.klass
120
+ log.failures.where(_type: "Hekenga::Failure::Write").each do |write_failure|
121
+ next unless write_failure.documents.any?
122
+
123
+ existing = klass.in(_id: write_failure.documents.map {|doc| doc["_id"]}).pluck(:_id).to_set
124
+ to_write = write_failure.documents.reject {|doc| existing.include?(doc["_id"])}
125
+ next if to_write.empty?
126
+
127
+ Hekenga.log("Recovering #{to_write.length} write failures")
128
+ klass.collection.insert_many(to_write)
129
+ end.delete_all
130
+ end
131
+
132
+ def recover_simple_task(task, idx)
133
+ log = @migration.log(idx) rescue nil
134
+ if log.nil?
135
+ Hekenga.log "Recovering task##{idx}: #{task.description}"
136
+ launch_task(task, idx)
137
+ elsif log.error
138
+ Hekenga.log "Recovering task##{idx}: #{task.description}"
139
+ # Strategy: clear logs + rerun
140
+ log.failures.delete_all
141
+ log.destroy
142
+ @migration.reload_logs
143
+ launch_task(task, idx)
144
+ else
145
+ Hekenga.log "Skipping completed task##{idx}: #{task.description}"
146
+ end
147
+ end
148
+
149
+ def launch_task(task, idx)
150
+ Hekenga.log "Launching task##{idx}: #{task.description}"
151
+ in_thread do
152
+ @migration.perform_task!(idx)
153
+ end
154
+ end
155
+
156
+ def in_thread(&block)
157
+ @active_thread = Thread.new(&block).tap do |t|
158
+ t.report_on_exception = false
159
+ t.abort_on_exception = true
160
+ end
161
+ end
162
+
120
163
  def report_status(task, idx)
121
- # Simple tasks
122
164
  case task
123
165
  when Hekenga::DocumentTask
124
- scope = @migration.log(idx).failures
125
- skipped_ctr = @migration.log(idx).skipped
126
- valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
127
- valid_errs_ctr = valid_errs.count
128
- Hekenga.log "Processed #{@migration.log(idx).processed} of #{@migration.log(idx).total} (#{valid_errs_ctr} invalid, #{skipped_ctr} skipped)"
166
+ if task.parallel
167
+ Hekenga.log "#{@migration.task_records(idx).complete.count} of #{@migration.task_records(idx).count} batches processed"
168
+ check_for_completion!(idx)
169
+ else
170
+ Hekenga.log "#{@migration.task_records(idx).complete.count} batches processed"
171
+ end
129
172
  when Hekenga::SimpleTask
130
- Hekenga.log "Waiting on task"
173
+ Hekenga.log "Waiting for task to complete"
131
174
  end
132
175
  end
133
- def cleanup
134
- @active_thread = nil
176
+
177
+ def report_result(task, idx)
178
+ case task
179
+ when Hekenga::DocumentTask
180
+ Hekenga.log "Migration result:"
181
+ combined_stats(idx)&.each do |stat, count|
182
+ Hekenga.log " - #{stat.capitalize}: #{count}"
183
+ end
184
+ if document_task_failed?(@migration.log(idx), idx, fail_on_invalid: false)
185
+ Hekenga.log "There were failures while running the task. Stopping"
186
+ raise Hekenga::TaskFailedError
187
+ end
188
+ when Hekenga::SimpleTask
189
+ report_simple_result(idx)
190
+ end
191
+ end
192
+
193
+ def combined_stats(idx)
194
+ Hekenga::DocumentTaskRecord.collection.aggregate([
195
+ { "$match" => @migration.task_records(idx).selector },
196
+ { "$group" => {
197
+ "_id" => "1",
198
+ "failed" => { "$sum" => "$stats.failed" },
199
+ "invalid" => { "$sum" => "$stats.invalid" },
200
+ "written" => { "$sum" => "$stats.written" },
201
+ }}
202
+ ]).to_a[0]&.except("_id")
135
203
  end
136
204
 
137
- def continue_prompt?(str)
138
- loop do
139
- print "#{str} Continue? (Y/N)\n"
140
- case gets.chomp.downcase
141
- when "y"
142
- return true
143
- when "n"
144
- return false
205
+ def report_simple_result(idx)
206
+ if @migration.log(idx).failures.any?
207
+ Hekenga.log "The task crashed with the following error message:"
208
+ @migration.log(idx).failures.each do |failure|
209
+ Hekenga.log failure.message
145
210
  end
211
+ raise Hekenga::TaskFailedError
212
+ else
213
+ Hekenga.log "Task succeeded"
146
214
  end
147
215
  end
216
+
217
+ def check_for_completion!(idx)
218
+ complete = @migration.task_records(idx).incomplete.none?
219
+ return unless complete
220
+
221
+ @migration.log(idx).set_without_session(
222
+ done: true,
223
+ finished: Time.now,
224
+ error: @migration.task_records(idx).failed.any?
225
+ )
226
+ end
148
227
  end
149
228
  end