hekenga 0.2.13 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/CHANGELOG.md +29 -0
- data/docker-compose.yml +30 -0
- data/exe/hekenga +25 -38
- data/hekenga.gemspec +5 -4
- data/lib/hekenga/context.rb +7 -6
- data/lib/hekenga/document_task.rb +9 -3
- data/lib/hekenga/document_task_executor.rb +264 -0
- data/lib/hekenga/document_task_record.rb +29 -0
- data/lib/hekenga/dsl/document_task.rb +28 -1
- data/lib/hekenga/dsl/migration.rb +3 -0
- data/lib/hekenga/failure/write.rb +1 -0
- data/lib/hekenga/iterator.rb +26 -0
- data/lib/hekenga/log.rb +14 -19
- data/lib/hekenga/master_process.rb +184 -105
- data/lib/hekenga/migration.rb +70 -330
- data/lib/hekenga/parallel_job.rb +11 -4
- data/lib/hekenga/parallel_task.rb +110 -0
- data/lib/hekenga/scaffold.rb +27 -23
- data/lib/hekenga/task_failed_error.rb +4 -0
- data/lib/hekenga/task_splitter.rb +30 -0
- data/lib/hekenga/version.rb +1 -1
- data/lib/hekenga.rb +22 -10
- metadata +22 -13
data/lib/hekenga/log.rb
CHANGED
@@ -18,12 +18,7 @@ module Hekenga
|
|
18
18
|
field :skip, default: false
|
19
19
|
|
20
20
|
# Used by document tasks
|
21
|
-
field :
|
22
|
-
field :processed, default: 0
|
23
|
-
field :skipped, default: 0
|
24
|
-
field :unvalid, default: 0
|
25
|
-
field :started, default: ->{ Time.now }
|
26
|
-
field :finished, type: Time
|
21
|
+
field :finished, type: Time
|
27
22
|
|
28
23
|
has_many :failures, class_name: "Hekenga::Failure"
|
29
24
|
|
@@ -36,21 +31,21 @@ module Hekenga
|
|
36
31
|
end
|
37
32
|
|
38
33
|
def add_failure(attrs, klass)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
34
|
+
failure = klass.new(attrs.merge(pkey: pkey, task_idx: task_idx, log_id: _id))
|
35
|
+
failure.send(:prepare_insert) {}
|
36
|
+
Hekenga::Failure.collection.insert_one(
|
37
|
+
failure.as_document,
|
38
|
+
session: nil
|
39
|
+
)
|
43
40
|
end
|
44
41
|
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
[field, value]
|
53
|
-
end.to_h
|
42
|
+
def set_without_session(attrs)
|
43
|
+
self.class.collection.update_one(
|
44
|
+
{ _id: _id },
|
45
|
+
{'$set': attrs},
|
46
|
+
session: nil
|
47
|
+
)
|
48
|
+
self.attributes = attrs
|
54
49
|
end
|
55
50
|
end
|
56
51
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'hekenga/task_failed_error'
|
2
|
+
require 'hekenga/task_splitter'
|
3
|
+
|
1
4
|
module Hekenga
|
2
5
|
class MasterProcess
|
3
6
|
def initialize(migration)
|
@@ -5,101 +8,33 @@ module Hekenga
|
|
5
8
|
end
|
6
9
|
|
7
10
|
def run!
|
8
|
-
Hekenga.log "Launching migration #{@migration.to_key}"
|
11
|
+
Hekenga.log "Launching migration #{@migration.to_key}: #{@migration.description}"
|
9
12
|
@migration.tasks.each.with_index do |task, idx|
|
10
13
|
launch_task(task, idx)
|
11
14
|
report_while_active(task, idx)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
elsif any_validation_errors?(idx)
|
17
|
-
handle_validation_errors(task, idx)
|
18
|
-
return if @migration.log(idx).cancel
|
19
|
-
end
|
20
|
-
cleanup
|
15
|
+
rescue Hekenga::TaskFailedError
|
16
|
+
return false
|
17
|
+
ensure
|
18
|
+
@active_thread = nil
|
21
19
|
end
|
20
|
+
true
|
22
21
|
end
|
23
22
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
# Start the task based on the passed scope - similar to run! but we exit
|
31
|
-
# directly on failure.
|
32
|
-
launch_task(task, task_idx, scope)
|
33
|
-
report_while_active(task, task_idx)
|
34
|
-
if @migration.log(task_idx).cancel
|
23
|
+
def recover!
|
24
|
+
Hekenga.log "Recovering migration #{@migration.to_key}: #{@migration.description}"
|
25
|
+
@migration.tasks.each.with_index do |task, idx|
|
26
|
+
recover_task(task, idx)
|
27
|
+
report_while_active(task, idx) if @active_thread
|
28
|
+
rescue Hekenga::TaskFailedError
|
35
29
|
return false
|
36
|
-
|
37
|
-
|
38
|
-
if @migration.log(task_idx).cancel
|
39
|
-
return false
|
40
|
-
end
|
30
|
+
ensure
|
31
|
+
@active_thread = nil
|
41
32
|
end
|
42
|
-
cleanup
|
43
33
|
true
|
44
34
|
end
|
45
35
|
|
46
|
-
|
47
|
-
Hekenga::Failure::Validation.where(pkey: @migration.to_key, task_idx: idx).any?
|
48
|
-
end
|
49
|
-
|
50
|
-
def handle_validation_errors(task, idx)
|
51
|
-
return unless task.respond_to?(:invalid_strategy)
|
52
|
-
return if idx == @migration.tasks.length - 1
|
53
|
-
case task.invalid_strategy
|
54
|
-
when :prompt
|
55
|
-
unless continue_prompt?("There were validation errors in the last task.")
|
56
|
-
@migration.log(idx).set(cancel: true)
|
57
|
-
return
|
58
|
-
end
|
59
|
-
when :stop
|
60
|
-
Hekenga.log "TERMINATING DUE TO VALIDATION ERRORS"
|
61
|
-
@migration.log(idx).set(cancel: true)
|
62
|
-
return
|
63
|
-
end
|
64
|
-
end
|
36
|
+
private
|
65
37
|
|
66
|
-
def report_errors(idx)
|
67
|
-
scope = @migration.log(idx).failures
|
68
|
-
log_id = @migration.log(idx).id
|
69
|
-
# Validation errors
|
70
|
-
valid_errs = scope.where(_type: "Hekenga::Failure::Validation")
|
71
|
-
valid_errs_ctr = valid_errs.count
|
72
|
-
unless valid_errs_ctr.zero?
|
73
|
-
Hekenga.log "#{valid_errs_ctr} records failed validation. To get a list:"
|
74
|
-
Hekenga.log "Hekenga::Failure::Validation.lookup('#{log_id}', #{idx})"
|
75
|
-
end
|
76
|
-
# Write failures
|
77
|
-
write_errs = scope.where(_type: "Hekenga::Failure::Write")
|
78
|
-
write_errs_ctr = write_errs.count
|
79
|
-
unless write_errs_ctr.zero?
|
80
|
-
Hekenga.log "#{write_errs_ctr} write errors detected. Error messages:"
|
81
|
-
Hekenga.log(write_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
|
82
|
-
Hekenga.log "To get a list:"
|
83
|
-
Hekenga.log "Hekenga::Failure::Write.lookup('#{log_id}', #{idx})"
|
84
|
-
# TODO - recover message
|
85
|
-
end
|
86
|
-
# Migration errors
|
87
|
-
general_errs = scope.where(_type: "Hekenga::Failure::Error")
|
88
|
-
general_errs_ctr = general_errs.count
|
89
|
-
unless general_errs_ctr.zero?
|
90
|
-
Hekenga.log "#{general_errs_ctr} migration errors detected. Error messages:"
|
91
|
-
Hekenga.log(general_errs.pluck(:message).uniq.map {|x| "- #{x}"}.join("\n"))
|
92
|
-
Hekenga.log "To get a list:"
|
93
|
-
Hekenga.log "Hekenga::Failure::Error.lookup('#{log_id}', #{idx})"
|
94
|
-
# TODO - recover message
|
95
|
-
end
|
96
|
-
end
|
97
|
-
def launch_task(task, idx, scope = nil)
|
98
|
-
Hekenga.log "Launching task##{idx}: #{task.description}"
|
99
|
-
@active_thread = Thread.new do
|
100
|
-
@migration.perform_task!(idx, scope)
|
101
|
-
end.tap {|t| t.abort_on_exception = true }
|
102
|
-
end
|
103
38
|
def report_while_active(task, idx)
|
104
39
|
# Wait for the log to be generated
|
105
40
|
until (@migration.log(idx) rescue nil)
|
@@ -107,43 +42,187 @@ module Hekenga
|
|
107
42
|
end
|
108
43
|
# Periodically report on thread progress
|
109
44
|
until @migration.log(idx).reload.done
|
110
|
-
@active_thread.join
|
45
|
+
@active_thread.join
|
111
46
|
report_status(task, idx)
|
112
|
-
return if @migration.log(idx).cancel
|
113
47
|
sleep Hekenga.config.report_sleep
|
114
48
|
end
|
115
|
-
report_status(task, idx)
|
116
|
-
|
117
|
-
report_errors(idx)
|
49
|
+
report_status(task, idx) if task.is_a?(Hekenga::DocumentTask)
|
50
|
+
report_result(task, idx)
|
118
51
|
Hekenga.log "Completed"
|
119
52
|
end
|
53
|
+
|
54
|
+
def recover_task(task, idx)
|
55
|
+
case task
|
56
|
+
when Hekenga::DocumentTask
|
57
|
+
recover_document_task(task, idx)
|
58
|
+
when Hekenga::SimpleTask
|
59
|
+
recover_simple_task(task, idx)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def recover_document_task(task, idx)
|
64
|
+
log = @migration.log(idx) rescue nil
|
65
|
+
if log.nil?
|
66
|
+
launch_task(task, idx)
|
67
|
+
elsif document_task_failed?(log, idx, fail_on_invalid: true)
|
68
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
69
|
+
log.set_without_session({
|
70
|
+
done: false,
|
71
|
+
error: false,
|
72
|
+
cancel: false,
|
73
|
+
finished: nil,
|
74
|
+
})
|
75
|
+
recover_write_failures(task, log)
|
76
|
+
task_records = @migration.task_records(idx)
|
77
|
+
if task.parallel
|
78
|
+
in_thread do
|
79
|
+
Hekenga::ParallelTask.new(
|
80
|
+
migration: @migration,
|
81
|
+
task: task,
|
82
|
+
task_idx: idx,
|
83
|
+
test_mode: @migration.test_mode
|
84
|
+
).resume!
|
85
|
+
end
|
86
|
+
else
|
87
|
+
# Strategy: clear failures; reset state
|
88
|
+
log.failures.delete_all
|
89
|
+
task_records.incomplete.delete_all
|
90
|
+
task_records.each do |record|
|
91
|
+
Hekenga::TaskSplitter.new(record, @executor_key).call&.destroy
|
92
|
+
end
|
93
|
+
@migration.active_idx = idx
|
94
|
+
in_thread do
|
95
|
+
@migration.start_document_task(task, idx, recover: true)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
else
|
100
|
+
Hekenga.log "Skipping completed task##{idx}: #{task.description}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def document_task_failed?(log, idx, fail_on_invalid:)
|
105
|
+
return true if log.nil?
|
106
|
+
return true if log.error
|
107
|
+
return true if log.cancel
|
108
|
+
return true if @migration.task_records(idx).incomplete.any?
|
109
|
+
|
110
|
+
stats = combined_stats(idx)
|
111
|
+
return false if stats.blank?
|
112
|
+
return true if stats['failed'].positive?
|
113
|
+
return true if fail_on_invalid && stats['invalid'].positive?
|
114
|
+
|
115
|
+
false
|
116
|
+
end
|
117
|
+
|
118
|
+
def recover_write_failures(task, log)
|
119
|
+
klass = task.scope.klass
|
120
|
+
log.failures.where(_type: "Hekenga::Failure::Write").each do |write_failure|
|
121
|
+
next unless write_failure.documents.any?
|
122
|
+
|
123
|
+
existing = klass.in(_id: write_failure.documents.map {|doc| doc["_id"]}).pluck(:_id).to_set
|
124
|
+
to_write = write_failure.documents.reject {|doc| existing.include?(doc["_id"])}
|
125
|
+
next if to_write.empty?
|
126
|
+
|
127
|
+
Hekenga.log("Recovering #{to_write.length} write failures")
|
128
|
+
klass.collection.insert_many(to_write)
|
129
|
+
end.delete_all
|
130
|
+
end
|
131
|
+
|
132
|
+
def recover_simple_task(task, idx)
|
133
|
+
log = @migration.log(idx) rescue nil
|
134
|
+
if log.nil?
|
135
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
136
|
+
launch_task(task, idx)
|
137
|
+
elsif log.error
|
138
|
+
Hekenga.log "Recovering task##{idx}: #{task.description}"
|
139
|
+
# Strategy: clear logs + rerun
|
140
|
+
log.failures.delete_all
|
141
|
+
log.destroy
|
142
|
+
@migration.reload_logs
|
143
|
+
launch_task(task, idx)
|
144
|
+
else
|
145
|
+
Hekenga.log "Skipping completed task##{idx}: #{task.description}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def launch_task(task, idx)
|
150
|
+
Hekenga.log "Launching task##{idx}: #{task.description}"
|
151
|
+
in_thread do
|
152
|
+
@migration.perform_task!(idx)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def in_thread(&block)
|
157
|
+
@active_thread = Thread.new(&block).tap do |t|
|
158
|
+
t.report_on_exception = false
|
159
|
+
t.abort_on_exception = true
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
120
163
|
def report_status(task, idx)
|
121
|
-
# Simple tasks
|
122
164
|
case task
|
123
165
|
when Hekenga::DocumentTask
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
166
|
+
if task.parallel
|
167
|
+
Hekenga.log "#{@migration.task_records(idx).complete.count} of #{@migration.task_records(idx).count} batches processed"
|
168
|
+
check_for_completion!(idx)
|
169
|
+
else
|
170
|
+
Hekenga.log "#{@migration.task_records(idx).complete.count} batches processed"
|
171
|
+
end
|
129
172
|
when Hekenga::SimpleTask
|
130
|
-
Hekenga.log "Waiting
|
173
|
+
Hekenga.log "Waiting for task to complete"
|
131
174
|
end
|
132
175
|
end
|
133
|
-
|
134
|
-
|
176
|
+
|
177
|
+
def report_result(task, idx)
|
178
|
+
case task
|
179
|
+
when Hekenga::DocumentTask
|
180
|
+
Hekenga.log "Migration result:"
|
181
|
+
combined_stats(idx)&.each do |stat, count|
|
182
|
+
Hekenga.log " - #{stat.capitalize}: #{count}"
|
183
|
+
end
|
184
|
+
if document_task_failed?(@migration.log(idx), idx, fail_on_invalid: false)
|
185
|
+
Hekenga.log "There were failures while running the task. Stopping"
|
186
|
+
raise Hekenga::TaskFailedError
|
187
|
+
end
|
188
|
+
when Hekenga::SimpleTask
|
189
|
+
report_simple_result(idx)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def combined_stats(idx)
|
194
|
+
Hekenga::DocumentTaskRecord.collection.aggregate([
|
195
|
+
{ "$match" => @migration.task_records(idx).selector },
|
196
|
+
{ "$group" => {
|
197
|
+
"_id" => "1",
|
198
|
+
"failed" => { "$sum" => "$stats.failed" },
|
199
|
+
"invalid" => { "$sum" => "$stats.invalid" },
|
200
|
+
"written" => { "$sum" => "$stats.written" },
|
201
|
+
}}
|
202
|
+
]).to_a[0]&.except("_id")
|
135
203
|
end
|
136
204
|
|
137
|
-
def
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
return true
|
143
|
-
when "n"
|
144
|
-
return false
|
205
|
+
def report_simple_result(idx)
|
206
|
+
if @migration.log(idx).failures.any?
|
207
|
+
Hekenga.log "The task crashed with the following error message:"
|
208
|
+
@migration.log(idx).failures.each do |failure|
|
209
|
+
Hekenga.log failure.message
|
145
210
|
end
|
211
|
+
raise Hekenga::TaskFailedError
|
212
|
+
else
|
213
|
+
Hekenga.log "Task succeeded"
|
146
214
|
end
|
147
215
|
end
|
216
|
+
|
217
|
+
def check_for_completion!(idx)
|
218
|
+
complete = @migration.task_records(idx).incomplete.none?
|
219
|
+
return unless complete
|
220
|
+
|
221
|
+
@migration.log(idx).set_without_session(
|
222
|
+
done: true,
|
223
|
+
finished: Time.now,
|
224
|
+
error: @migration.task_records(idx).failed.any?
|
225
|
+
)
|
226
|
+
end
|
148
227
|
end
|
149
228
|
end
|