hekenga 0.2.13 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/CHANGELOG.md +29 -0
- data/docker-compose.yml +30 -0
- data/exe/hekenga +25 -38
- data/hekenga.gemspec +5 -4
- data/lib/hekenga/context.rb +7 -6
- data/lib/hekenga/document_task.rb +9 -3
- data/lib/hekenga/document_task_executor.rb +264 -0
- data/lib/hekenga/document_task_record.rb +29 -0
- data/lib/hekenga/dsl/document_task.rb +28 -1
- data/lib/hekenga/dsl/migration.rb +3 -0
- data/lib/hekenga/failure/write.rb +1 -0
- data/lib/hekenga/iterator.rb +26 -0
- data/lib/hekenga/log.rb +14 -19
- data/lib/hekenga/master_process.rb +184 -105
- data/lib/hekenga/migration.rb +70 -330
- data/lib/hekenga/parallel_job.rb +11 -4
- data/lib/hekenga/parallel_task.rb +110 -0
- data/lib/hekenga/scaffold.rb +27 -23
- data/lib/hekenga/task_failed_error.rb +4 -0
- data/lib/hekenga/task_splitter.rb +30 -0
- data/lib/hekenga/version.rb +1 -1
- data/lib/hekenga.rb +22 -10
- metadata +22 -13
data/lib/hekenga/log.rb
CHANGED
@@ -18,12 +18,7 @@ module Hekenga
|
|
18
18
|
field :skip, default: false
|
19
19
|
|
20
20
|
# Used by document tasks
|
21
|
-
field :
|
22
|
-
field :processed, default: 0
|
23
|
-
field :skipped, default: 0
|
24
|
-
field :unvalid, default: 0
|
25
|
-
field :started, default: ->{ Time.now }
|
26
|
-
field :finished, type: Time
|
21
|
+
field :finished, type: Time
|
27
22
|
|
28
23
|
has_many :failures, class_name: "Hekenga::Failure"
|
29
24
|
|
@@ -36,21 +31,21 @@ module Hekenga
|
|
36
31
|
end
|
37
32
|
|
38
33
|
def add_failure(attrs, klass)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
34
|
+
failure = klass.new(attrs.merge(pkey: pkey, task_idx: task_idx, log_id: _id))
|
35
|
+
failure.send(:prepare_insert) {}
|
36
|
+
Hekenga::Failure.collection.insert_one(
|
37
|
+
failure.as_document,
|
38
|
+
session: nil
|
39
|
+
)
|
43
40
|
end
|
44
41
|
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
[field, value]
|
53
|
-
end.to_h
|
42
|
+
def set_without_session(attrs)
|
43
|
+
self.class.collection.update_one(
|
44
|
+
{ _id: _id },
|
45
|
+
{'$set': attrs},
|
46
|
+
session: nil
|
47
|
+
)
|
48
|
+
self.attributes = attrs
|
54
49
|
end
|
55
50
|
end
|
56
51
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'hekenga/task_failed_error'
|
2
|
+
require 'hekenga/task_splitter'
|
3
|
+
|
1
4
|
module Hekenga
|
2
5
|
class MasterProcess
|
3
6
|
def initialize(migration)
|
@@ -5,101 +8,33 @@ module Hekenga
|
|
5
8
|
end
|
6
9
|
|
7
10
|
def run!
|
8
|
-
Hekenga.log "Launching migration #{@migration.to_key}"
|
11
|
+
Hekenga.log "Launching migration #{@migration.to_key}: #{@migration.description}"
|
9
12
|
@migration.tasks.each.with_index do |task, idx|
|
10
13
|
launch_task(task, idx)
|
11
14
|
report_while_active(task, idx)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
elsif any_validation_errors?(idx)
|
17
|
-
handle_validation_errors(task, idx)
|
18
|
-
return if @migration.log(idx).cancel
|
19
|
-
end
|
20
|
-
cleanup
|
15
|
+
rescue Hekenga::TaskFailedError
|
16
|
+
return false
|
17
|
+
ensure
|
18
|
+
@active_thread = nil
|
21
19
|
end
|
20
|
+
true
|
22
21
|
end
|
23
22
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
# Start the task based on the passed scope - similar to run! but we exit
|
31
|
-
# directly on failure.
|
32
|
-
launch_task(task, task_idx, scope)
|
33
|
-
report_while_active(task, task_idx)
|
34
|
-
if @migration.log(task_idx).cancel
|
23
|
+
def recover!
|
24
|
+
Hekenga.log "Recovering migration #{@migration.to_key}: #{@migration.description}"
|
25
|
+
@migration.tasks.each.with_index do |task, idx|
|
26
|
+
recover_task(task, idx)
|
27
|
+
report_while_active(task, idx) if @active_thread
|
28
|
+
rescue Hekenga::TaskFailedError
|
35
29
|
return false
|
36
|
-
|
37
|
-
|
38
|
-
if @migration.log(task_idx).cancel
|
39
|
-
return false
|
40
|
-
end
|
30
|
+
ensure
|
31
|
+
@active_thread = nil
|
41
32
|
end
|
42
|
-
cleanup
|
43
33
|
true
|
44
34
|
end
|
45
35
|
|
46
|
-
|
47
|
-
Hekenga::Failure::Validation.where(pkey: @migration.to_key, task_idx: idx).any?
|
48
|
-
end
|
49
|
-
|
50
|
-
def handle_validation_errors(task, idx)
|
51
|
-
return unless task.respond_to?(:invalid_strategy)
|
52
|
-
return if idx == @migration.tasks.length - 1
|
53
|
-
case task.invalid_strategy
|
54
|
-
when :prompt
|
55
|
-
unless continue_prompt?("There were validation errors in the last task.")
|
56
|
-
@migration.log(idx).set(cancel: true)
|
57
|
-
return
|
58
|
-
end
|
59
|
-
when :stop
|
60
|
-
Hekenga.log "TERMINATING DUE TO VALIDATION ERRORS"
|
61
|
-
@migration.log(idx).set(cancel: true)
|
62
|
-
return
|
63
|
-
end
|
64
|
-
end
|
36
|
+
private
|
65
37
|
|
66
|
-
def report_errors(idx)
|
67
|
-
scope = @migration.log(idx).failures
|
68
|
-
log_id = @migration.log(idx).id
|
69
|
-
# Validation errors
|
70
|
-
valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
|
71
|
-
valid_errs_ctr = valid_errs.count
|
72
|
-
unless valid_errs_ctr.zero?
|
73
|
-
Hekenga.log "#{valid_errs_ctr} records failed validation. To get a list:"
|
74
|
-
Hekenga.log "Hekenga::Failure::Validation.lookup('#{log_id}', #{idx})"
|
75
|
-
end
|
76
|
-
# Write failures
|
77
|
-
write_errs = scope.where(_type: "Hekenga::Failure::Write")
|
78
|
-
write_errs_ctr = write_errs.count
|
79
|
-
unless write_errs_ctr.zero?
|
80
|
-
Hekenga.log "#{write_errs_ctr} write errors detected. Error messages:"
|
81
|
-
Hekenga.log(write_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
|
82
|
-
Hekenga.log "To get a list:"
|
83
|
-
Hekenga.log "Hekenga::Failure::Write.lookup('#{log_id}', #{idx})"
|
84
|
-
# TODO - recover message
|
85
|
-
end
|
86
|
-
# Migration errors
|
87
|
-
general_errs = scope.where(_type: "Hekenga::Failure::Error")
|
88
|
-
general_errs_ctr = general_errs.count
|
89
|
-
unless general_errs_ctr.zero?
|
90
|
-
Hekenga.log "#{general_errs_ctr} migration errors detected. Error messages:"
|
91
|
-
Hekenga.log(general_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
|
92
|
-
Hekenga.log "To get a list:"
|
93
|
-
Hekenga.log "Hekenga::Failure::Error.lookup('#{log_id}', #{idx})"
|
94
|
-
# TODO - recover message
|
95
|
-
end
|
96
|
-
end
|
97
|
-
def launch_task(task, idx, scope = nil)
|
98
|
-
Hekenga.log "Launching task##{idx}: #{task.description}"
|
99
|
-
@active_thread = Thread.new do
|
100
|
-
@migration.perform_task!(idx, scope)
|
101
|
-
end.tap {|t| t.abort_on_exception = true }
|
102
|
-
end
|
103
38
|
def report_while_active(task, idx)
|
104
39
|
# Wait for the log to be generated
|
105
40
|
until (@migration.log(idx) rescue nil)
|
@@ -107,43 +42,187 @@ module Hekenga
|
|
107
42
|
end
|
108
43
|
# Periodically report on thread progress
|
109
44
|
until @migration.log(idx).reload.done
|
110
|
-
@active_thread.join
|
45
|
+
@active_thread.join
|
111
46
|
report_status(task, idx)
|
112
|
-
return if @migration.log(idx).cancel
|
113
47
|
sleep Hekenga.config.report_sleep
|
114
48
|
end
|
115
|
-
report_status(task, idx)
|
116
|
-
|
117
|
-
report_errors(idx)
|
49
|
+
report_status(task, idx) if task.is_a?(Hekenga::DocumentTask)
|
50
|
+
report_result(task, idx)
|
118
51
|
Hekenga.log "Completed"
|
119
52
|
end
|
53
|
+
|
54
|
+
def recover_task(task, idx)
|
55
|
+
case task
|
56
|
+
when Hekenga::DocumentTask
|
57
|
+
recover_document_task(task, idx)
|
58
|
+
when Hekenga::SimpleTask
|
59
|
+
recover_simple_task(task, idx)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def recover_document_task(task, idx)
|
64
|
+
log = @migration.log(idx) rescue nil
|
65
|
+
if log.nil?
|
66
|
+
launch_task(task, idx)
|
67
|
+
elsif document_task_failed?(log, idx, fail_on_invalid: true)
|
68
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
69
|
+
log.set_without_session({
|
70
|
+
done: false,
|
71
|
+
error: false,
|
72
|
+
cancel: false,
|
73
|
+
finished: nil,
|
74
|
+
})
|
75
|
+
recover_write_failures(task, log)
|
76
|
+
task_records = @migration.task_records(idx)
|
77
|
+
if task.parallel
|
78
|
+
in_thread do
|
79
|
+
Hekenga::ParallelTask.new(
|
80
|
+
migration: @migration,
|
81
|
+
task: task,
|
82
|
+
task_idx: idx,
|
83
|
+
test_mode: @migration.test_mode
|
84
|
+
).resume!
|
85
|
+
end
|
86
|
+
else
|
87
|
+
# Strategy: clear failures; reset state
|
88
|
+
log.failures.delete_all
|
89
|
+
task_records.incomplete.delete_all
|
90
|
+
task_records.each do |record|
|
91
|
+
Hekenga::TaskSplitter.new(record, @executor_key).call&.destroy
|
92
|
+
end
|
93
|
+
@migration.active_idx = idx
|
94
|
+
in_thread do
|
95
|
+
@migration.start_document_task(task, idx, recover: true)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
else
|
100
|
+
Hekenga.log "Skipping completed task##{idx}: #{task.description}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def document_task_failed?(log, idx, fail_on_invalid:)
|
105
|
+
return true if log.nil?
|
106
|
+
return true if log.error
|
107
|
+
return true if log.cancel
|
108
|
+
return true if @migration.task_records(idx).incomplete.any?
|
109
|
+
|
110
|
+
stats = combined_stats(idx)
|
111
|
+
return false if stats.blank?
|
112
|
+
return true if stats['failed'].positive?
|
113
|
+
return true if fail_on_invalid && stats['invalid'].positive?
|
114
|
+
|
115
|
+
false
|
116
|
+
end
|
117
|
+
|
118
|
+
def recover_write_failures(task, log)
|
119
|
+
klass = task.scope.klass
|
120
|
+
log.failures.where(_type: "Hekenga::Failure::Write").each do |write_failure|
|
121
|
+
next unless write_failure.documents.any?
|
122
|
+
|
123
|
+
existing = klass.in(_id: write_failure.documents.map {|doc| doc["_id"]}).pluck(:_id).to_set
|
124
|
+
to_write = write_failure.documents.reject {|doc| existing.include?(doc["_id"])}
|
125
|
+
next if to_write.empty?
|
126
|
+
|
127
|
+
Hekenga.log("Recovering #{to_write.length} write failures")
|
128
|
+
klass.collection.insert_many(to_write)
|
129
|
+
end.delete_all
|
130
|
+
end
|
131
|
+
|
132
|
+
def recover_simple_task(task, idx)
|
133
|
+
log = @migration.log(idx) rescue nil
|
134
|
+
if log.nil?
|
135
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
136
|
+
launch_task(task, idx)
|
137
|
+
elsif log.error
|
138
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
139
|
+
# Strategy: clear logs + rerun
|
140
|
+
log.failures.delete_all
|
141
|
+
log.destroy
|
142
|
+
@migration.reload_logs
|
143
|
+
launch_task(task, idx)
|
144
|
+
else
|
145
|
+
Hekenga.log "Skipping completed task##{idx}: #{task.description}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def launch_task(task, idx)
|
150
|
+
Hekenga.log "Launching task##{idx}: #{task.description}"
|
151
|
+
in_thread do
|
152
|
+
@migration.perform_task!(idx)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def in_thread(&block)
|
157
|
+
@active_thread = Thread.new(&block).tap do |t|
|
158
|
+
t.report_on_exception = false
|
159
|
+
t.abort_on_exception = true
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
120
163
|
def report_status(task, idx)
|
121
|
-
# Simple tasks
|
122
164
|
case task
|
123
165
|
when Hekenga::DocumentTask
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
166
|
+
if task.parallel
|
167
|
+
Hekenga.log "#{@migration.task_records(idx).complete.count} of #{@migration.task_records(idx).count} batches processed"
|
168
|
+
check_for_completion!(idx)
|
169
|
+
else
|
170
|
+
Hekenga.log "#{@migration.task_records(idx).complete.count} batches processed"
|
171
|
+
end
|
129
172
|
when Hekenga::SimpleTask
|
130
|
-
Hekenga.log "Waiting
|
173
|
+
Hekenga.log "Waiting for task to complete"
|
131
174
|
end
|
132
175
|
end
|
133
|
-
|
134
|
-
|
176
|
+
|
177
|
+
def report_result(task, idx)
|
178
|
+
case task
|
179
|
+
when Hekenga::DocumentTask
|
180
|
+
Hekenga.log "Migration result:"
|
181
|
+
combined_stats(idx)&.each do |stat, count|
|
182
|
+
Hekenga.log " - #{stat.capitalize}: #{count}"
|
183
|
+
end
|
184
|
+
if document_task_failed?(@migration.log(idx), idx, fail_on_invalid: false)
|
185
|
+
Hekenga.log "There were failures while running the task. Stopping"
|
186
|
+
raise Hekenga::TaskFailedError
|
187
|
+
end
|
188
|
+
when Hekenga::SimpleTask
|
189
|
+
report_simple_result(idx)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def combined_stats(idx)
|
194
|
+
Hekenga::DocumentTaskRecord.collection.aggregate([
|
195
|
+
{ "$match" => @migration.task_records(idx).selector },
|
196
|
+
{ "$group" => {
|
197
|
+
"_id" => "1",
|
198
|
+
"failed" => { "$sum" => "$stats.failed" },
|
199
|
+
"invalid" => { "$sum" => "$stats.invalid" },
|
200
|
+
"written" => { "$sum" => "$stats.written" },
|
201
|
+
}}
|
202
|
+
]).to_a[0]&.except("_id")
|
135
203
|
end
|
136
204
|
|
137
|
-
def
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
return true
|
143
|
-
when "n"
|
144
|
-
return false
|
205
|
+
def report_simple_result(idx)
|
206
|
+
if @migration.log(idx).failures.any?
|
207
|
+
Hekenga.log "The task crashed with the following error message:"
|
208
|
+
@migration.log(idx).failures.each do |failure|
|
209
|
+
Hekenga.log failure.message
|
145
210
|
end
|
211
|
+
raise Hekenga::TaskFailedError
|
212
|
+
else
|
213
|
+
Hekenga.log "Task succeeded"
|
146
214
|
end
|
147
215
|
end
|
216
|
+
|
217
|
+
def check_for_completion!(idx)
|
218
|
+
complete = @migration.task_records(idx).incomplete.none?
|
219
|
+
return unless complete
|
220
|
+
|
221
|
+
@migration.log(idx).set_without_session(
|
222
|
+
done: true,
|
223
|
+
finished: Time.now,
|
224
|
+
error: @migration.task_records(idx).failed.any?
|
225
|
+
)
|
226
|
+
end
|
148
227
|
end
|
149
228
|
end
|