evilution 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.beads/interactions.jsonl +28 -0
- data/.rubocop_todo.yml +1 -0
- data/CHANGELOG.md +31 -0
- data/README.md +12 -10
- data/docs/integrations.md +15 -0
- data/docs/isolation.md +46 -2
- data/lib/evilution/baseline.rb +11 -4
- data/lib/evilution/cli/parser/options_builder.rb +17 -0
- data/lib/evilution/config/validators/example_targeting_strategy.rb +22 -0
- data/lib/evilution/config.rb +16 -2
- data/lib/evilution/coverage/digest.rb +16 -0
- data/lib/evilution/coverage/map.rb +64 -0
- data/lib/evilution/coverage/map_builder.rb +82 -0
- data/lib/evilution/coverage/map_store.rb +87 -0
- data/lib/evilution/coverage/recorder.rb +85 -0
- data/lib/evilution/coverage.rb +8 -0
- data/lib/evilution/coverage_example_filter.rb +41 -0
- data/lib/evilution/integration/loading/test_load_path.rb +76 -0
- data/lib/evilution/integration/minitest.rb +5 -1
- data/lib/evilution/integration/rspec/state_guard/configuration_state.rb +72 -0
- data/lib/evilution/integration/rspec/state_guard/configuration_streams.rb +45 -0
- data/lib/evilution/integration/rspec/state_guard.rb +3 -1
- data/lib/evilution/integration/test_unit.rb +12 -4
- data/lib/evilution/isolation/fork.rb +38 -50
- data/lib/evilution/parallel/work_queue/dispatcher/deadline_tracker.rb +63 -0
- data/lib/evilution/parallel/work_queue/dispatcher.rb +70 -25
- data/lib/evilution/parallel/work_queue/worker.rb +50 -14
- data/lib/evilution/parallel/work_queue.rb +8 -0
- data/lib/evilution/process_supervisor.rb +259 -0
- data/lib/evilution/reporter/cli/line_formatters/unresolved_rate_warning.rb +50 -0
- data/lib/evilution/reporter/cli/metrics_block.rb +2 -0
- data/lib/evilution/runner/baseline_runner.rb +52 -0
- data/lib/evilution/runner/isolation_resolver.rb +106 -12
- data/lib/evilution/runner/mutation_executor/strategy/parallel.rb +28 -1
- data/lib/evilution/runner.rb +7 -0
- data/lib/evilution/spec_resolver.rb +147 -9
- data/lib/evilution/spec_selector.rb +14 -4
- data/lib/evilution/version.rb +1 -1
- data/lib/evilution.rb +1 -0
- data/lib/tasks/stress.rake +15 -0
- data/scripts/canary_manifest.yml +47 -0
- data/scripts/compare_targeting +277 -0
- data/scripts/compare_targeting.example.yml +24 -0
- metadata +20 -2
|
@@ -5,7 +5,7 @@ require "tmpdir"
|
|
|
5
5
|
require_relative "../memory"
|
|
6
6
|
require_relative "../temp_dir_tracker"
|
|
7
7
|
require_relative "../child_output"
|
|
8
|
-
require_relative "../
|
|
8
|
+
require_relative "../process_supervisor"
|
|
9
9
|
|
|
10
10
|
require_relative "../isolation"
|
|
11
11
|
|
|
@@ -15,21 +15,25 @@ class Evilution::Isolation::Fork
|
|
|
15
15
|
|
|
16
16
|
def initialize(hooks: nil)
|
|
17
17
|
@hooks = hooks
|
|
18
|
+
# EV-3aw3 / EV-5rrh step 2: the supervisor owns this path's lifecycle --
|
|
19
|
+
# spawn + process-group isolation, the TERM/grace/KILL ladder, and reap +
|
|
20
|
+
# sandbox removal. fork.rb keeps only the marshal-pipe read protocol.
|
|
21
|
+
@supervisor = Evilution::ProcessSupervisor.new
|
|
18
22
|
end
|
|
19
23
|
|
|
20
24
|
def call(mutation:, test_command:, timeout:)
|
|
21
|
-
|
|
25
|
+
handle = nil
|
|
22
26
|
sandbox_dir = Dir.mktmpdir("evilution-run")
|
|
23
27
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
24
28
|
parent_rss = Evilution::Memory.rss_kb
|
|
25
29
|
read_io, write_io = binary_pipe
|
|
26
|
-
|
|
30
|
+
handle = spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
|
|
27
31
|
write_io.close
|
|
28
|
-
result = wait_for_result(
|
|
32
|
+
result = wait_for_result(handle, read_io, timeout)
|
|
29
33
|
duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
30
34
|
build_mutation_result(mutation, result, duration, parent_rss)
|
|
31
35
|
ensure
|
|
32
|
-
cleanup_resources(read_io, write_io,
|
|
36
|
+
cleanup_resources(read_io, write_io, handle, sandbox_dir)
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
private
|
|
@@ -46,13 +50,18 @@ class Evilution::Isolation::Fork
|
|
|
46
50
|
[read_io, write_io]
|
|
47
51
|
end
|
|
48
52
|
|
|
49
|
-
|
|
50
|
-
|
|
53
|
+
# Supervisor.spawn makes the child its own process-group leader (setpgid)
|
|
54
|
+
# before this block runs, so any grandchildren test_command forks inherit the
|
|
55
|
+
# group and the TERM/KILL ladder sweeps the whole subtree on timeout (EV-2sh8
|
|
56
|
+
# / GH #1330). The block keeps the marshal-pipe protocol: write a
|
|
57
|
+
# length-prefixed payload, then exit with the pass/fail code.
|
|
58
|
+
def spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
|
|
59
|
+
@supervisor.spawn(sandbox_dir: sandbox_dir) do
|
|
51
60
|
ENV["TMPDIR"] = sandbox_dir
|
|
52
61
|
# Path-relativizing mutations (e.g. File.join(dir, name) -> name) would
|
|
53
62
|
# otherwise write into the parent's CWD (typically the repo root) and
|
|
54
63
|
# leak past the run. chdir here keeps such writes inside sandbox_dir,
|
|
55
|
-
# which the
|
|
64
|
+
# which the supervisor removes on reap. The in_isolated_worker! flag
|
|
56
65
|
# signals the rest of evilution (SpecResolver/SpecSelector/SpecAstCache/
|
|
57
66
|
# MutationApplier/SourceEvaluator/Integration) to anchor project-relative
|
|
58
67
|
# paths to Evilution::PROJECT_ROOT instead of the sandbox CWD.
|
|
@@ -70,12 +79,20 @@ class Evilution::Isolation::Fork
|
|
|
70
79
|
end
|
|
71
80
|
end
|
|
72
81
|
|
|
73
|
-
|
|
82
|
+
# The parent owns read_io/write_io (write_io is closed right after spawn so
|
|
83
|
+
# read_io can see EOF), so they are closed here rather than handed to the
|
|
84
|
+
# supervisor. The supervisor reaps the child and removes the sandbox dir; on
|
|
85
|
+
# the early-failure path (binary_pipe raised before spawn) handle is nil, so
|
|
86
|
+
# the orphaned sandbox is removed directly.
|
|
87
|
+
def cleanup_resources(read_io, write_io, handle, sandbox_dir)
|
|
74
88
|
read_io.close unless read_io.nil?
|
|
75
89
|
write_io.close unless write_io.nil?
|
|
76
|
-
|
|
90
|
+
if handle
|
|
91
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
92
|
+
elsif sandbox_dir
|
|
93
|
+
FileUtils.rm_rf(sandbox_dir)
|
|
94
|
+
end
|
|
77
95
|
restore_original_source
|
|
78
|
-
FileUtils.rm_rf(sandbox_dir) if sandbox_dir
|
|
79
96
|
end
|
|
80
97
|
|
|
81
98
|
def restore_original_source
|
|
@@ -109,21 +126,21 @@ class Evilution::Isolation::Fork
|
|
|
109
126
|
# never sees EOF and hangs forever. The length prefix makes payload reads
|
|
110
127
|
# bounded; the waitpid-WNOHANG check inside the poll loop lets us exit
|
|
111
128
|
# promptly when the child died without writing anything.
|
|
112
|
-
def wait_for_result(
|
|
129
|
+
def wait_for_result(handle, read_io, timeout)
|
|
113
130
|
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
|
|
114
131
|
loop do
|
|
115
132
|
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
116
|
-
return timeout_result(
|
|
133
|
+
return timeout_result(handle) if remaining <= 0
|
|
117
134
|
|
|
118
135
|
if read_io.wait_readable([remaining, 0.5].min)
|
|
119
136
|
payload = read_payload(read_io, deadline)
|
|
120
|
-
return reap_and_decode(
|
|
137
|
+
return reap_and_decode(handle, payload) if payload
|
|
121
138
|
end
|
|
122
139
|
|
|
123
|
-
next unless
|
|
140
|
+
next unless @supervisor.reap_nonblock(handle)
|
|
124
141
|
|
|
125
142
|
# Child exited. Drain any final payload that arrived between
|
|
126
|
-
# wait_readable timeout and
|
|
143
|
+
# wait_readable timeout and the reap (race) before declaring empty.
|
|
127
144
|
final = read_payload(read_io, Process.clock_gettime(Process::CLOCK_MONOTONIC) + 0.1)
|
|
128
145
|
return decode_payload(final) if final
|
|
129
146
|
|
|
@@ -137,13 +154,13 @@ class Evilution::Isolation::Fork
|
|
|
137
154
|
# in execute_in_child waiting on a subject grandchild the mutation broke.
|
|
138
155
|
# wait_for_result has already returned by this point, so the per-mutation
|
|
139
156
|
# timeout cannot fire. Bound the wait and fall back to the TERM/KILL ladder.
|
|
140
|
-
def reap_and_decode(
|
|
157
|
+
def reap_and_decode(handle, payload)
|
|
141
158
|
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + REAP_DEADLINE
|
|
142
159
|
loop do
|
|
143
|
-
break if
|
|
160
|
+
break if @supervisor.reap_nonblock(handle)
|
|
144
161
|
|
|
145
162
|
if Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline
|
|
146
|
-
|
|
163
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
147
164
|
break
|
|
148
165
|
end
|
|
149
166
|
sleep 0.05
|
|
@@ -194,40 +211,11 @@ class Evilution::Isolation::Fork
|
|
|
194
211
|
{ timeout: false, passed: false, error: "empty result from child" }
|
|
195
212
|
end
|
|
196
213
|
|
|
197
|
-
def timeout_result(
|
|
198
|
-
|
|
214
|
+
def timeout_result(handle)
|
|
215
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
199
216
|
{ timeout: true }
|
|
200
217
|
end
|
|
201
218
|
|
|
202
|
-
# Defensive reap: if normal control flow raised before wait_for_result
|
|
203
|
-
# reaped the child (e.g. Marshal.load on corrupt payload), the child becomes
|
|
204
|
-
# a zombie. Reuse terminate_child for the bounded TERM + GRACE_PERIOD + KILL
|
|
205
|
-
# ladder so this never hangs the ensure path; swallow SystemCallError so
|
|
206
|
-
# cleanup can't mask the primary failure.
|
|
207
|
-
def ensure_reaped(pid)
|
|
208
|
-
return unless pid
|
|
209
|
-
|
|
210
|
-
reaped = ::Process.waitpid(pid, ::Process::WNOHANG)
|
|
211
|
-
return if reaped
|
|
212
|
-
|
|
213
|
-
terminate_child(pid)
|
|
214
|
-
rescue SystemCallError
|
|
215
|
-
nil
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
def terminate_child(pid)
|
|
219
|
-
Evilution::ProcessCleanup.safe_kill("TERM", pid)
|
|
220
|
-
_, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
|
|
221
|
-
return if status
|
|
222
|
-
|
|
223
|
-
sleep(GRACE_PERIOD)
|
|
224
|
-
_, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
|
|
225
|
-
return if status
|
|
226
|
-
|
|
227
|
-
Evilution::ProcessCleanup.safe_kill("KILL", pid)
|
|
228
|
-
Evilution::ProcessCleanup.safe_wait(pid)
|
|
229
|
-
end
|
|
230
|
-
|
|
231
219
|
def classify_status(result)
|
|
232
220
|
return :timeout if result[:timeout]
|
|
233
221
|
return :killed if result[:test_crashed]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../dispatcher"
|
|
4
|
+
|
|
5
|
+
# Owns the per-worker item-timeout deadline clock for the Dispatcher: arming a
|
|
6
|
+
# worker's deadline when it goes busy, re-arming it on each result, surfacing the
|
|
7
|
+
# workers whose deadline has passed, and computing how long IO.select may block.
|
|
8
|
+
# Each worker carries its own deadline so a single stuck worker is reaped in
|
|
9
|
+
# isolation rather than aborting the whole pool (EV-gl1e). Pulling this cohesive
|
|
10
|
+
# timeout concern out of the Dispatcher keeps the dispatcher focused on the
|
|
11
|
+
# collect/recycle orchestration (EV-9mij).
|
|
12
|
+
#
|
|
13
|
+
# `workers` is the Dispatcher's live array (mutated in place as workers recycle),
|
|
14
|
+
# so the tracker always reads the current pool. `clock` is injectable for tests.
|
|
15
|
+
class Evilution::Parallel::WorkQueue::Dispatcher::DeadlineTracker
|
|
16
|
+
def initialize(item_timeout:, workers:, clock: -> { Process.clock_gettime(Process::CLOCK_MONOTONIC) })
|
|
17
|
+
@item_timeout = item_timeout
|
|
18
|
+
@workers = workers
|
|
19
|
+
@clock = clock
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def enabled?
|
|
23
|
+
!@item_timeout.nil?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Seconds IO.select may block: until the nearest worker deadline (never
|
|
27
|
+
# negative), or the raw timeout when no worker is currently on the clock.
|
|
28
|
+
def select_timeout
|
|
29
|
+
return @item_timeout unless enabled?
|
|
30
|
+
|
|
31
|
+
deadlines = @workers.filter_map(&:deadline)
|
|
32
|
+
return @item_timeout if deadlines.empty?
|
|
33
|
+
|
|
34
|
+
[deadlines.min - now, 0].max
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Workers whose deadline has passed while still holding in-flight work.
|
|
38
|
+
def overdue
|
|
39
|
+
return [] unless enabled?
|
|
40
|
+
|
|
41
|
+
moment = now
|
|
42
|
+
@workers.select { |worker| worker.deadline && worker.deadline <= moment && worker.pending.positive? }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Arm a worker's clock when it first goes busy; idempotent for the in-flight
|
|
46
|
+
# item so a refresh does not extend an already-running deadline.
|
|
47
|
+
def start(worker)
|
|
48
|
+
return unless enabled?
|
|
49
|
+
|
|
50
|
+
worker.deadline ||= now + @item_timeout
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# After a result: re-arm while work remains, otherwise stop the clock.
|
|
54
|
+
def refresh(worker)
|
|
55
|
+
worker.deadline = (now + @item_timeout if enabled? && worker.pending.positive?)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def now
|
|
61
|
+
@clock.call
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -12,11 +12,11 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
12
12
|
@workers = workers
|
|
13
13
|
@items = items
|
|
14
14
|
@prefetch = prefetch
|
|
15
|
-
@item_timeout = item_timeout
|
|
16
15
|
@worker_max_items = worker_max_items
|
|
17
16
|
@recycle_factory = recycle_factory
|
|
18
17
|
@state = Evilution::Parallel::WorkQueue.send(:const_get, :CollectionState).new(items.length)
|
|
19
18
|
@retired = []
|
|
19
|
+
@deadlines = DeadlineTracker.new(item_timeout:, workers: @workers)
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def run
|
|
@@ -38,24 +38,34 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
38
38
|
end
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
# Each worker carries its own deadline (set when it goes busy, refreshed on
|
|
42
|
+
# every result). The select blocks only until the nearest worker deadline,
|
|
43
|
+
# so a single stuck worker is reaped in isolation -- its in-flight item gets
|
|
44
|
+
# the WorkQueue::TIMED_OUT sentinel and the worker is recycled -- instead of
|
|
45
|
+
# the old pool-wide watchdog that SIGKILLed every worker and aborted the run.
|
|
41
46
|
def collect
|
|
42
47
|
io_to_worker = @workers.to_h { |w| [w.res_io, w] }
|
|
43
48
|
result_ios = io_to_worker.keys
|
|
44
49
|
|
|
45
50
|
while @state.in_flight.positive?
|
|
46
|
-
readable, = IO.select(result_ios, nil, nil, @
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
break
|
|
50
|
-
end
|
|
51
|
+
readable, = IO.select(result_ios, nil, nil, @deadlines.select_timeout)
|
|
52
|
+
reap_timed_out(io_to_worker, result_ios)
|
|
53
|
+
next if readable.nil?
|
|
51
54
|
|
|
52
|
-
readable.each
|
|
55
|
+
readable.each do |io|
|
|
56
|
+
process_readable(io, io_to_worker, result_ios) if result_ios.include?(io)
|
|
57
|
+
end
|
|
53
58
|
end
|
|
54
59
|
end
|
|
55
60
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
|
|
61
|
+
def reap_timed_out(io_to_worker, result_ios)
|
|
62
|
+
@deadlines.overdue.each { |worker| time_out_worker(worker, io_to_worker, result_ios) }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def time_out_worker(worker, io_to_worker, result_ios)
|
|
66
|
+
worker.kill
|
|
67
|
+
mark_unfinished(worker, Evilution::Parallel::WorkQueue::TIMED_OUT)
|
|
68
|
+
retire_or_replace(worker, io_to_worker, result_ios)
|
|
59
69
|
end
|
|
60
70
|
|
|
61
71
|
def process_readable(io, io_to_worker, result_ios)
|
|
@@ -65,7 +75,7 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
65
75
|
|
|
66
76
|
def handle(worker, io_to_worker, result_ios)
|
|
67
77
|
message = worker.read_result
|
|
68
|
-
return handle_dead(worker) if message.nil?
|
|
78
|
+
return handle_dead(worker, io_to_worker, result_ios) if message.nil?
|
|
69
79
|
|
|
70
80
|
record(message, worker)
|
|
71
81
|
return false if recycle_and_dispatch(worker, io_to_worker, result_ios)
|
|
@@ -82,13 +92,24 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
82
92
|
@state.in_flight -= 1
|
|
83
93
|
worker.pending -= 1
|
|
84
94
|
worker.items_completed += 1
|
|
95
|
+
worker.in_flight_indices.delete(index)
|
|
96
|
+
@deadlines.refresh(worker)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# A worker that exited without replying loses only its in-flight item(s)
|
|
100
|
+
# (marked :died) and is recycled; the run continues rather than aborting.
|
|
101
|
+
def handle_dead(worker, io_to_worker, result_ios)
|
|
102
|
+
mark_unfinished(worker, Evilution::Parallel::WorkQueue::DIED)
|
|
103
|
+
retire_or_replace(worker, io_to_worker, result_ios)
|
|
104
|
+
false
|
|
85
105
|
end
|
|
86
106
|
|
|
87
|
-
def
|
|
88
|
-
|
|
107
|
+
def mark_unfinished(worker, sentinel)
|
|
108
|
+
worker.in_flight_indices.each { |index| @state.results[index] = sentinel }
|
|
89
109
|
@state.in_flight -= worker.pending
|
|
90
110
|
worker.pending = 0
|
|
91
|
-
|
|
111
|
+
worker.in_flight_indices.clear
|
|
112
|
+
worker.deadline = nil
|
|
92
113
|
end
|
|
93
114
|
|
|
94
115
|
def draining_for_recycle?(worker)
|
|
@@ -113,28 +134,52 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
113
134
|
end
|
|
114
135
|
|
|
115
136
|
def recycle(old_worker, io_to_worker, result_ios)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
@retired << old_worker.retire
|
|
119
|
-
|
|
137
|
+
index = @workers.index(old_worker)
|
|
138
|
+
detach(old_worker, io_to_worker, result_ios)
|
|
120
139
|
new_worker = @recycle_factory.call(old_worker)
|
|
121
|
-
@workers[
|
|
122
|
-
|
|
123
|
-
result_ios << new_worker.res_io
|
|
140
|
+
@workers[index] = new_worker
|
|
141
|
+
attach(new_worker, io_to_worker, result_ios)
|
|
124
142
|
new_worker
|
|
125
143
|
end
|
|
126
144
|
|
|
145
|
+
# Shared failure-path recovery: retire the worker, and as long as work
|
|
146
|
+
# remains spin up a replacement to keep the pool full and hand it the next
|
|
147
|
+
# item. When the queue is already drained, just drop the worker.
|
|
148
|
+
def retire_or_replace(worker, io_to_worker, result_ios)
|
|
149
|
+
index = @workers.index(worker)
|
|
150
|
+
detach(worker, io_to_worker, result_ios)
|
|
151
|
+
|
|
152
|
+
if more_to_send? && @state.first_error.nil?
|
|
153
|
+
new_worker = @recycle_factory.call(worker)
|
|
154
|
+
@workers[index] = new_worker
|
|
155
|
+
attach(new_worker, io_to_worker, result_ios)
|
|
156
|
+
send_item(new_worker)
|
|
157
|
+
else
|
|
158
|
+
@workers.delete_at(index)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def detach(worker, io_to_worker, result_ios)
|
|
163
|
+
io_to_worker.delete(worker.res_io)
|
|
164
|
+
result_ios.delete(worker.res_io)
|
|
165
|
+
@retired << worker.retire
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def attach(worker, io_to_worker, result_ios)
|
|
169
|
+
io_to_worker[worker.res_io] = worker
|
|
170
|
+
result_ios << worker.res_io
|
|
171
|
+
end
|
|
172
|
+
|
|
127
173
|
def send_item(worker)
|
|
128
174
|
worker.send_item(@state.next_index, @items[@state.next_index])
|
|
129
175
|
@state.next_index += 1
|
|
130
176
|
@state.in_flight += 1
|
|
177
|
+
@deadlines.start(worker)
|
|
131
178
|
end
|
|
132
179
|
|
|
133
180
|
def more_to_send?
|
|
134
181
|
@state.next_index < @items.length
|
|
135
182
|
end
|
|
136
|
-
|
|
137
|
-
def terminate_stuck
|
|
138
|
-
@workers.each(&:kill)
|
|
139
|
-
end
|
|
140
183
|
end
|
|
184
|
+
|
|
185
|
+
require_relative "dispatcher/deadline_tracker"
|
|
@@ -2,23 +2,34 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../work_queue"
|
|
4
4
|
require_relative "../../child_output"
|
|
5
|
+
require_relative "../../process_supervisor"
|
|
6
|
+
require_relative "../../temp_dir_tracker"
|
|
5
7
|
require_relative "channel"
|
|
6
8
|
require_relative "channel/frame"
|
|
7
9
|
|
|
8
10
|
class Evilution::Parallel::WorkQueue::Worker
|
|
9
11
|
Timing = Data.define(:busy, :wall)
|
|
10
12
|
|
|
11
|
-
attr_reader :pid, :worker_index
|
|
12
|
-
attr_accessor :items_completed, :pending, :busy_time, :wall_time
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
attr_reader :pid, :worker_index, :in_flight_indices
|
|
14
|
+
attr_accessor :items_completed, :pending, :busy_time, :wall_time, :deadline
|
|
15
|
+
|
|
16
|
+
# EV-dg69 / EV-5rrh step 3: the supervisor owns the worker's process-group
|
|
17
|
+
# isolation, signal-safe registry, group-kill and reap. spawn passes
|
|
18
|
+
# isolate_in_child: false so the worker becomes its own group leader only
|
|
19
|
+
# parent-side, AFTER the supervisor has registered it -- preserving the
|
|
20
|
+
# EV-jwao register-before-isolate ordering (the trap can never see a leader
|
|
21
|
+
# missing from the registry). EV-cnx8 group-leadership (so #kill sweeps the
|
|
22
|
+
# whole subtree) is still established, now by the supervisor's parent-side
|
|
23
|
+
# setpgid.
|
|
24
|
+
def self.spawn(worker_index:, hooks:, supervisor: Evilution::ProcessSupervisor.new, &block)
|
|
15
25
|
cmd_read, cmd_write = IO.pipe
|
|
16
26
|
res_read, res_write = IO.pipe
|
|
17
27
|
[cmd_read, cmd_write, res_read, res_write].each(&:binmode)
|
|
18
28
|
|
|
19
|
-
|
|
29
|
+
handle = supervisor.spawn(isolate_in_child: false) do
|
|
20
30
|
cmd_write.close
|
|
21
31
|
res_read.close
|
|
32
|
+
install_child_signal_handlers
|
|
22
33
|
ENV["TEST_ENV_NUMBER"] = test_env_number_for(worker_index)
|
|
23
34
|
Evilution::ChildOutput.redirect!
|
|
24
35
|
Loop.run(cmd_read, res_write, hooks: hooks, &block)
|
|
@@ -26,7 +37,25 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
26
37
|
|
|
27
38
|
cmd_read.close
|
|
28
39
|
res_write.close
|
|
29
|
-
new(
|
|
40
|
+
new(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# EV-7a91: a worker is the parent of the inner per-mutation Fork children it
|
|
44
|
+
# spawns, and those children are their own process-group leaders (EV-2sh8), so
|
|
45
|
+
# the Runner's group-kill of the worker never reaches them. On a terminal
|
|
46
|
+
# INT/TERM the worker must therefore tear down AND reap the inner children it
|
|
47
|
+
# owns before it dies, or they survive as zombies (their parent gone) until an
|
|
48
|
+
# ancestor exits. cleanup_all clears any per-mutation sandbox dirs the inner
|
|
49
|
+
# children registered in this worker's TempDirTracker.
|
|
50
|
+
def self.install_child_signal_handlers
|
|
51
|
+
%w[INT TERM].each do |sig|
|
|
52
|
+
Signal.trap(sig) do
|
|
53
|
+
Evilution::TempDirTracker.cleanup_all
|
|
54
|
+
Evilution::ProcessSupervisor.kill_and_reap_all
|
|
55
|
+
Signal.trap(sig, "DEFAULT")
|
|
56
|
+
Process.kill(sig, Process.pid)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
30
59
|
end
|
|
31
60
|
|
|
32
61
|
# EV-kdns / GH #817: translate 0-based worker slot to parallel_tests'
|
|
@@ -37,8 +66,10 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
37
66
|
worker_index.zero? ? "" : (worker_index + 1).to_s
|
|
38
67
|
end
|
|
39
68
|
|
|
40
|
-
def initialize(
|
|
41
|
-
@
|
|
69
|
+
def initialize(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
|
|
70
|
+
@handle = handle
|
|
71
|
+
@supervisor = supervisor
|
|
72
|
+
@pid = handle.pid
|
|
42
73
|
@cmd_write = cmd_write
|
|
43
74
|
@res_read = res_read
|
|
44
75
|
@worker_index = worker_index
|
|
@@ -46,6 +77,8 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
46
77
|
@pending = 0
|
|
47
78
|
@busy_time = 0.0
|
|
48
79
|
@wall_time = 0.0
|
|
80
|
+
@in_flight_indices = []
|
|
81
|
+
@deadline = nil
|
|
49
82
|
end
|
|
50
83
|
|
|
51
84
|
def res_io
|
|
@@ -55,6 +88,7 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
55
88
|
def send_item(index, item)
|
|
56
89
|
Evilution::Parallel::WorkQueue::Channel.write(@cmd_write, [index, item])
|
|
57
90
|
@pending += 1
|
|
91
|
+
@in_flight_indices << index
|
|
58
92
|
end
|
|
59
93
|
|
|
60
94
|
def read_result
|
|
@@ -67,10 +101,11 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
67
101
|
nil
|
|
68
102
|
end
|
|
69
103
|
|
|
104
|
+
# SIGKILL the worker's whole process group (negative pid), reaping any
|
|
105
|
+
# grandchildren it forked, with the bare pid as a fallback for the case where
|
|
106
|
+
# the group is gone (already reaped, or setpgid did not take).
|
|
70
107
|
def kill
|
|
71
|
-
|
|
72
|
-
rescue Errno::ESRCH
|
|
73
|
-
nil
|
|
108
|
+
@supervisor.signal_group("KILL", @handle)
|
|
74
109
|
end
|
|
75
110
|
|
|
76
111
|
def close_pipes
|
|
@@ -78,10 +113,11 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
78
113
|
@res_read.close unless @res_read.closed?
|
|
79
114
|
end
|
|
80
115
|
|
|
116
|
+
# Reap the leader and drop it from the registry so the trap never signals a
|
|
117
|
+
# group whose pid the OS may have recycled. ECHILD-tolerant; unregister is a
|
|
118
|
+
# no-op if it was never registered.
|
|
81
119
|
def reap
|
|
82
|
-
|
|
83
|
-
rescue Errno::ECHILD
|
|
84
|
-
nil
|
|
120
|
+
@supervisor.reap(@handle)
|
|
85
121
|
end
|
|
86
122
|
|
|
87
123
|
def retire
|
|
@@ -9,6 +9,14 @@ class Evilution::Parallel::WorkQueue
|
|
|
9
9
|
|
|
10
10
|
TIMING_GRACE_PERIOD = 5
|
|
11
11
|
|
|
12
|
+
# Sentinel results for items whose worker never produced a value. The
|
|
13
|
+
# dispatcher writes these into the results array (instead of aborting the
|
|
14
|
+
# whole run) so a single stuck/dead worker only loses its own in-flight
|
|
15
|
+
# item(s). Mutation-aware callers translate the reason into a status.
|
|
16
|
+
Unfinished = Data.define(:reason)
|
|
17
|
+
TIMED_OUT = Unfinished.new(reason: :timeout)
|
|
18
|
+
DIED = Unfinished.new(reason: :died)
|
|
19
|
+
|
|
12
20
|
def initialize(size:, hooks: nil, prefetch: 1, item_timeout: nil, worker_max_items: nil)
|
|
13
21
|
Validators::PositiveInt.call!(:size, size)
|
|
14
22
|
Validators::PositiveInt.call!(:prefetch, prefetch)
|