evilution 0.33.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.beads/interactions.jsonl +16 -0
- data/.rubocop_todo.yml +1 -1
- data/CHANGELOG.md +14 -0
- data/README.md +11 -9
- data/docs/isolation.md +31 -2
- data/lib/evilution/cli/parser/options_builder.rb +17 -0
- data/lib/evilution/config/validators/example_targeting_strategy.rb +22 -0
- data/lib/evilution/config.rb +16 -2
- data/lib/evilution/coverage/digest.rb +16 -0
- data/lib/evilution/coverage/map.rb +64 -0
- data/lib/evilution/coverage/map_builder.rb +82 -0
- data/lib/evilution/coverage/map_store.rb +87 -0
- data/lib/evilution/coverage/recorder.rb +85 -0
- data/lib/evilution/coverage.rb +8 -0
- data/lib/evilution/coverage_example_filter.rb +41 -0
- data/lib/evilution/isolation/fork.rb +38 -76
- data/lib/evilution/parallel/work_queue/dispatcher/deadline_tracker.rb +63 -0
- data/lib/evilution/parallel/work_queue/dispatcher.rb +7 -34
- data/lib/evilution/parallel/work_queue/worker.rb +41 -51
- data/lib/evilution/process_supervisor.rb +259 -0
- data/lib/evilution/runner/baseline_runner.rb +52 -0
- data/lib/evilution/runner/isolation_resolver.rb +106 -12
- data/lib/evilution/runner.rb +3 -2
- data/lib/evilution/spec_resolver.rb +66 -0
- data/lib/evilution/spec_selector.rb +14 -4
- data/lib/evilution/version.rb +1 -1
- data/lib/evilution.rb +1 -0
- data/scripts/canary_manifest.yml +47 -0
- data/scripts/compare_targeting +277 -0
- data/scripts/compare_targeting.example.yml +24 -0
- metadata +15 -3
- data/lib/evilution/parallel/work_queue/worker_registry.rb +0 -47
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../coverage"
|
|
4
|
+
require_relative "map"
|
|
5
|
+
|
|
6
|
+
# Wraps each example with a before/after coverage diff and attributes the
|
|
7
|
+
# newly-executed lines (in target files only) to that example's location.
|
|
8
|
+
# coverage_source is injected for testability; in production it is
|
|
9
|
+
# -> { ::Coverage.peek_result }.
|
|
10
|
+
class Evilution::Coverage::Recorder
|
|
11
|
+
def initialize(target_files:, coverage_source: -> { ::Coverage.peek_result })
|
|
12
|
+
@target_files = target_files.to_a
|
|
13
|
+
@coverage_source = coverage_source
|
|
14
|
+
@index = Hash.new { |h, file| h[file] = Hash.new { |g, line| g[line] = [] } }
|
|
15
|
+
@executed = Hash.new { |h, file| h[file] = [] }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def around_example(example_location)
|
|
19
|
+
before = snapshot
|
|
20
|
+
result = yield
|
|
21
|
+
after = snapshot
|
|
22
|
+
attribute(before, after, example_location)
|
|
23
|
+
result
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_map(built_files:)
|
|
27
|
+
Evilution::Coverage::Map.new(
|
|
28
|
+
index: materialize(@index),
|
|
29
|
+
built_files: built_files,
|
|
30
|
+
executed_lines: @executed.transform_values(&:uniq)
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def snapshot
|
|
37
|
+
@coverage_source.call || {}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def attribute(before, after, example_location)
|
|
41
|
+
@target_files.each do |file|
|
|
42
|
+
after_counts = line_counts(after[file])
|
|
43
|
+
next unless after_counts
|
|
44
|
+
|
|
45
|
+
record_executed(file, after_counts)
|
|
46
|
+
record_increases(file, line_counts(before[file]) || [], after_counts, example_location)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Every line with a non-zero count in the after-snapshot has run at least once
|
|
51
|
+
# by now -- including lines covered only at load (a `def` line is already > 0
|
|
52
|
+
# in the first example's after-snapshot). Recording them lets the Map tell a
|
|
53
|
+
# load-covered line from a line that never ran.
|
|
54
|
+
def record_executed(file, after_counts)
|
|
55
|
+
after_counts.each_with_index do |count, idx|
|
|
56
|
+
next if count.nil? || count.zero?
|
|
57
|
+
|
|
58
|
+
@executed[file] << (idx + 1)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Credit example_location with every line whose execution count rose between
|
|
63
|
+
# the before/after snapshots (a newly-executed, executable line).
|
|
64
|
+
def record_increases(file, before_counts, after_counts, example_location)
|
|
65
|
+
after_counts.each_with_index do |count, idx|
|
|
66
|
+
next if count.nil? || count.zero?
|
|
67
|
+
next unless count > (before_counts[idx] || 0)
|
|
68
|
+
|
|
69
|
+
@index[file][idx + 1] << example_location
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Coverage.peek_result yields per-file line counts either as a bare array
|
|
74
|
+
# (legacy Coverage.start) or as a { lines: [...] } hash (Coverage.start with
|
|
75
|
+
# lines:/branches:/methods: modes). Normalize to the bare counts array.
|
|
76
|
+
def line_counts(entry)
|
|
77
|
+
entry.is_a?(Hash) ? entry[:lines] : entry
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def materialize(index)
|
|
81
|
+
index.each_with_object({}) do |(file, lines), out|
|
|
82
|
+
out[file] = lines.each_with_object({}) { |(line, locs), inner| inner[line] = locs }
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../evilution"
|
|
4
|
+
require_relative "coverage/map"
|
|
5
|
+
|
|
6
|
+
# Per-mutation example targeting backed by a real line-coverage Map (EV-ndjd).
|
|
7
|
+
# Honours the same contract as the lexical Evilution::ExampleFilter --
|
|
8
|
+
# call(mutation, spec_paths) -> Array[location] | spec_paths | nil -- so it drops
|
|
9
|
+
# straight into the existing ExampleFilter seam.
|
|
10
|
+
#
|
|
11
|
+
# Resolution order for the mutated source file F at line L:
|
|
12
|
+
# - F not fully built in the map (digest miss / partial build) -> delegate to
|
|
13
|
+
# the lexical filter (safe fallback) with the original spec_paths.
|
|
14
|
+
# - F built and L covered by examples -> run exactly those covering examples
|
|
15
|
+
# (a SUBSET of what the resolved spec runs, so a strict speedup that cannot
|
|
16
|
+
# lose a kill full-file would catch).
|
|
17
|
+
# - F not built, or L attributed to no example -> defer to lexical/full-file.
|
|
18
|
+
#
|
|
19
|
+
# Accuracy-first: coverage ONLY narrows the example set when it positively knows
|
|
20
|
+
# the covering examples. It never marks a mutation :unresolved on "no coverage" --
|
|
21
|
+
# on real repos a line can be exercised indirectly (before(:all), load time, a
|
|
22
|
+
# spec the per-example diff did not attribute), and asserting a gap there loses
|
|
23
|
+
# kills (EV-7uui validation). When coverage has no answer, the proven lexical
|
|
24
|
+
# path decides.
|
|
25
|
+
class Evilution::CoverageExampleFilter
|
|
26
|
+
def initialize(map:, lexical:, project_root: Evilution::PROJECT_ROOT)
|
|
27
|
+
@map = map
|
|
28
|
+
@lexical = lexical
|
|
29
|
+
@project_root = project_root.to_s
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def call(mutation, spec_paths)
|
|
33
|
+
file = File.expand_path(mutation.file_path, @project_root)
|
|
34
|
+
return @lexical.call(mutation, spec_paths) unless @map.built?(file)
|
|
35
|
+
|
|
36
|
+
examples = @map.examples_for(file, mutation.line)
|
|
37
|
+
return examples unless examples.empty?
|
|
38
|
+
|
|
39
|
+
@lexical.call(mutation, spec_paths)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -5,7 +5,7 @@ require "tmpdir"
|
|
|
5
5
|
require_relative "../memory"
|
|
6
6
|
require_relative "../temp_dir_tracker"
|
|
7
7
|
require_relative "../child_output"
|
|
8
|
-
require_relative "../
|
|
8
|
+
require_relative "../process_supervisor"
|
|
9
9
|
|
|
10
10
|
require_relative "../isolation"
|
|
11
11
|
|
|
@@ -15,21 +15,25 @@ class Evilution::Isolation::Fork
|
|
|
15
15
|
|
|
16
16
|
def initialize(hooks: nil)
|
|
17
17
|
@hooks = hooks
|
|
18
|
+
# EV-3aw3 / EV-5rrh step 2: the supervisor owns this path's lifecycle --
|
|
19
|
+
# spawn + process-group isolation, the TERM/grace/KILL ladder, and reap +
|
|
20
|
+
# sandbox removal. fork.rb keeps only the marshal-pipe read protocol.
|
|
21
|
+
@supervisor = Evilution::ProcessSupervisor.new
|
|
18
22
|
end
|
|
19
23
|
|
|
20
24
|
def call(mutation:, test_command:, timeout:)
|
|
21
|
-
|
|
25
|
+
handle = nil
|
|
22
26
|
sandbox_dir = Dir.mktmpdir("evilution-run")
|
|
23
27
|
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
24
28
|
parent_rss = Evilution::Memory.rss_kb
|
|
25
29
|
read_io, write_io = binary_pipe
|
|
26
|
-
|
|
30
|
+
handle = spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
|
|
27
31
|
write_io.close
|
|
28
|
-
result = wait_for_result(
|
|
32
|
+
result = wait_for_result(handle, read_io, timeout)
|
|
29
33
|
duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
30
34
|
build_mutation_result(mutation, result, duration, parent_rss)
|
|
31
35
|
ensure
|
|
32
|
-
cleanup_resources(read_io, write_io,
|
|
36
|
+
cleanup_resources(read_io, write_io, handle, sandbox_dir)
|
|
33
37
|
end
|
|
34
38
|
|
|
35
39
|
private
|
|
@@ -46,14 +50,18 @@ class Evilution::Isolation::Fork
|
|
|
46
50
|
[read_io, write_io]
|
|
47
51
|
end
|
|
48
52
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
# Supervisor.spawn makes the child its own process-group leader (setpgid)
|
|
54
|
+
# before this block runs, so any grandchildren test_command forks inherit the
|
|
55
|
+
# group and the TERM/KILL ladder sweeps the whole subtree on timeout (EV-2sh8
|
|
56
|
+
# / GH #1330). The block keeps the marshal-pipe protocol: write a
|
|
57
|
+
# length-prefixed payload, then exit with the pass/fail code.
|
|
58
|
+
def spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
|
|
59
|
+
@supervisor.spawn(sandbox_dir: sandbox_dir) do
|
|
52
60
|
ENV["TMPDIR"] = sandbox_dir
|
|
53
61
|
# Path-relativizing mutations (e.g. File.join(dir, name) -> name) would
|
|
54
62
|
# otherwise write into the parent's CWD (typically the repo root) and
|
|
55
63
|
# leak past the run. chdir here keeps such writes inside sandbox_dir,
|
|
56
|
-
# which the
|
|
64
|
+
# which the supervisor removes on reap. The in_isolated_worker! flag
|
|
57
65
|
# signals the rest of evilution (SpecResolver/SpecSelector/SpecAstCache/
|
|
58
66
|
# MutationApplier/SourceEvaluator/Integration) to anchor project-relative
|
|
59
67
|
# paths to Evilution::PROJECT_ROOT instead of the sandbox CWD.
|
|
@@ -71,27 +79,20 @@ class Evilution::Isolation::Fork
|
|
|
71
79
|
end
|
|
72
80
|
end
|
|
73
81
|
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
#
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
# so EV-cnx8's outer process-group kill never sweeps it. Done child-side (not
|
|
81
|
-
# parent-side as in Worker) because the per-mutation timeout fires seconds
|
|
82
|
-
# later, long after this line has run, so no fork-before-setpgid race exists.
|
|
83
|
-
def isolate_into_own_process_group
|
|
84
|
-
::Process.setpgid(0, 0)
|
|
85
|
-
rescue SystemCallError
|
|
86
|
-
nil
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def cleanup_resources(read_io, write_io, pid, sandbox_dir)
|
|
82
|
+
# The parent owns read_io/write_io (write_io is closed right after spawn so
|
|
83
|
+
# read_io can see EOF), so they are closed here rather than handed to the
|
|
84
|
+
# supervisor. The supervisor reaps the child and removes the sandbox dir; on
|
|
85
|
+
# the early-failure path (binary_pipe raised before spawn) handle is nil, so
|
|
86
|
+
# the orphaned sandbox is removed directly.
|
|
87
|
+
def cleanup_resources(read_io, write_io, handle, sandbox_dir)
|
|
90
88
|
read_io.close unless read_io.nil?
|
|
91
89
|
write_io.close unless write_io.nil?
|
|
92
|
-
|
|
90
|
+
if handle
|
|
91
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
92
|
+
elsif sandbox_dir
|
|
93
|
+
FileUtils.rm_rf(sandbox_dir)
|
|
94
|
+
end
|
|
93
95
|
restore_original_source
|
|
94
|
-
FileUtils.rm_rf(sandbox_dir) if sandbox_dir
|
|
95
96
|
end
|
|
96
97
|
|
|
97
98
|
def restore_original_source
|
|
@@ -125,21 +126,21 @@ class Evilution::Isolation::Fork
|
|
|
125
126
|
# never sees EOF and hangs forever. The length prefix makes payload reads
|
|
126
127
|
# bounded; the waitpid-WNOHANG check inside the poll loop lets us exit
|
|
127
128
|
# promptly when the child died without writing anything.
|
|
128
|
-
def wait_for_result(
|
|
129
|
+
def wait_for_result(handle, read_io, timeout)
|
|
129
130
|
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
|
|
130
131
|
loop do
|
|
131
132
|
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
132
|
-
return timeout_result(
|
|
133
|
+
return timeout_result(handle) if remaining <= 0
|
|
133
134
|
|
|
134
135
|
if read_io.wait_readable([remaining, 0.5].min)
|
|
135
136
|
payload = read_payload(read_io, deadline)
|
|
136
|
-
return reap_and_decode(
|
|
137
|
+
return reap_and_decode(handle, payload) if payload
|
|
137
138
|
end
|
|
138
139
|
|
|
139
|
-
next unless
|
|
140
|
+
next unless @supervisor.reap_nonblock(handle)
|
|
140
141
|
|
|
141
142
|
# Child exited. Drain any final payload that arrived between
|
|
142
|
-
# wait_readable timeout and
|
|
143
|
+
# wait_readable timeout and the reap (race) before declaring empty.
|
|
143
144
|
final = read_payload(read_io, Process.clock_gettime(Process::CLOCK_MONOTONIC) + 0.1)
|
|
144
145
|
return decode_payload(final) if final
|
|
145
146
|
|
|
@@ -153,13 +154,13 @@ class Evilution::Isolation::Fork
|
|
|
153
154
|
# in execute_in_child waiting on a subject grandchild the mutation broke.
|
|
154
155
|
# wait_for_result has already returned by this point, so the per-mutation
|
|
155
156
|
# timeout cannot fire. Bound the wait and fall back to the TERM/KILL ladder.
|
|
156
|
-
def reap_and_decode(
|
|
157
|
+
def reap_and_decode(handle, payload)
|
|
157
158
|
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + REAP_DEADLINE
|
|
158
159
|
loop do
|
|
159
|
-
break if
|
|
160
|
+
break if @supervisor.reap_nonblock(handle)
|
|
160
161
|
|
|
161
162
|
if Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline
|
|
162
|
-
|
|
163
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
163
164
|
break
|
|
164
165
|
end
|
|
165
166
|
sleep 0.05
|
|
@@ -210,50 +211,11 @@ class Evilution::Isolation::Fork
|
|
|
210
211
|
{ timeout: false, passed: false, error: "empty result from child" }
|
|
211
212
|
end
|
|
212
213
|
|
|
213
|
-
def timeout_result(
|
|
214
|
-
|
|
214
|
+
def timeout_result(handle)
|
|
215
|
+
@supervisor.terminate(handle, grace: GRACE_PERIOD)
|
|
215
216
|
{ timeout: true }
|
|
216
217
|
end
|
|
217
218
|
|
|
218
|
-
# Defensive reap: if normal control flow raised before wait_for_result
|
|
219
|
-
# reaped the child (e.g. Marshal.load on corrupt payload), the child becomes
|
|
220
|
-
# a zombie. Reuse terminate_child for the bounded TERM + GRACE_PERIOD + KILL
|
|
221
|
-
# ladder so this never hangs the ensure path; swallow SystemCallError so
|
|
222
|
-
# cleanup can't mask the primary failure.
|
|
223
|
-
def ensure_reaped(pid)
|
|
224
|
-
return unless pid
|
|
225
|
-
|
|
226
|
-
reaped = ::Process.waitpid(pid, ::Process::WNOHANG)
|
|
227
|
-
return if reaped
|
|
228
|
-
|
|
229
|
-
terminate_child(pid)
|
|
230
|
-
rescue SystemCallError
|
|
231
|
-
nil
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
def terminate_child(pid)
|
|
235
|
-
signal_tree("TERM", pid)
|
|
236
|
-
_, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
|
|
237
|
-
return if status
|
|
238
|
-
|
|
239
|
-
sleep(GRACE_PERIOD)
|
|
240
|
-
_, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
|
|
241
|
-
return if status
|
|
242
|
-
|
|
243
|
-
signal_tree("KILL", pid)
|
|
244
|
-
Evilution::ProcessCleanup.safe_wait(pid)
|
|
245
|
-
end
|
|
246
|
-
|
|
247
|
-
# Signal the child's whole process group (-pid) to sweep any grandchildren it
|
|
248
|
-
# forked, then the bare pid as a fallback for the case where setpgid failed
|
|
249
|
-
# (no group exists, so the group signal is a harmless Errno::ESRCH). Only the
|
|
250
|
-
# leader pid is reaped here -- group-killed grandchildren are not our direct
|
|
251
|
-
# children, so init reaps them once they die.
|
|
252
|
-
def signal_tree(sig, pid)
|
|
253
|
-
Evilution::ProcessCleanup.safe_kill(sig, -pid)
|
|
254
|
-
Evilution::ProcessCleanup.safe_kill(sig, pid)
|
|
255
|
-
end
|
|
256
|
-
|
|
257
219
|
def classify_status(result)
|
|
258
220
|
return :timeout if result[:timeout]
|
|
259
221
|
return :killed if result[:test_crashed]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../dispatcher"
|
|
4
|
+
|
|
5
|
+
# Owns the per-worker item-timeout deadline clock for the Dispatcher: arming a
|
|
6
|
+
# worker's deadline when it goes busy, re-arming it on each result, surfacing the
|
|
7
|
+
# workers whose deadline has passed, and computing how long IO.select may block.
|
|
8
|
+
# Each worker carries its own deadline so a single stuck worker is reaped in
|
|
9
|
+
# isolation rather than aborting the whole pool (EV-gl1e). Pulling this cohesive
|
|
10
|
+
# timeout concern out of the Dispatcher keeps the dispatcher focused on the
|
|
11
|
+
# collect/recycle orchestration (EV-9mij).
|
|
12
|
+
#
|
|
13
|
+
# `workers` is the Dispatcher's live array (mutated in place as workers recycle),
|
|
14
|
+
# so the tracker always reads the current pool. `clock` is injectable for tests.
|
|
15
|
+
class Evilution::Parallel::WorkQueue::Dispatcher::DeadlineTracker
|
|
16
|
+
def initialize(item_timeout:, workers:, clock: -> { Process.clock_gettime(Process::CLOCK_MONOTONIC) })
|
|
17
|
+
@item_timeout = item_timeout
|
|
18
|
+
@workers = workers
|
|
19
|
+
@clock = clock
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def enabled?
|
|
23
|
+
!@item_timeout.nil?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Seconds IO.select may block: until the nearest worker deadline (never
|
|
27
|
+
# negative), or the raw timeout when no worker is currently on the clock.
|
|
28
|
+
def select_timeout
|
|
29
|
+
return @item_timeout unless enabled?
|
|
30
|
+
|
|
31
|
+
deadlines = @workers.filter_map(&:deadline)
|
|
32
|
+
return @item_timeout if deadlines.empty?
|
|
33
|
+
|
|
34
|
+
[deadlines.min - now, 0].max
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Workers whose deadline has passed while still holding in-flight work.
|
|
38
|
+
def overdue
|
|
39
|
+
return [] unless enabled?
|
|
40
|
+
|
|
41
|
+
moment = now
|
|
42
|
+
@workers.select { |worker| worker.deadline && worker.deadline <= moment && worker.pending.positive? }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Arm a worker's clock when it first goes busy; idempotent for the in-flight
|
|
46
|
+
# item so a refresh does not extend an already-running deadline.
|
|
47
|
+
def start(worker)
|
|
48
|
+
return unless enabled?
|
|
49
|
+
|
|
50
|
+
worker.deadline ||= now + @item_timeout
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# After a result: re-arm while work remains, otherwise stop the clock.
|
|
54
|
+
def refresh(worker)
|
|
55
|
+
worker.deadline = (now + @item_timeout if enabled? && worker.pending.positive?)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def now
|
|
61
|
+
@clock.call
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -12,11 +12,11 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
12
12
|
@workers = workers
|
|
13
13
|
@items = items
|
|
14
14
|
@prefetch = prefetch
|
|
15
|
-
@item_timeout = item_timeout
|
|
16
15
|
@worker_max_items = worker_max_items
|
|
17
16
|
@recycle_factory = recycle_factory
|
|
18
17
|
@state = Evilution::Parallel::WorkQueue.send(:const_get, :CollectionState).new(items.length)
|
|
19
18
|
@retired = []
|
|
19
|
+
@deadlines = DeadlineTracker.new(item_timeout:, workers: @workers)
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def run
|
|
@@ -48,7 +48,7 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
48
48
|
result_ios = io_to_worker.keys
|
|
49
49
|
|
|
50
50
|
while @state.in_flight.positive?
|
|
51
|
-
readable, = IO.select(result_ios, nil, nil, select_timeout)
|
|
51
|
+
readable, = IO.select(result_ios, nil, nil, @deadlines.select_timeout)
|
|
52
52
|
reap_timed_out(io_to_worker, result_ios)
|
|
53
53
|
next if readable.nil?
|
|
54
54
|
|
|
@@ -58,21 +58,8 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
58
58
|
end
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
def select_timeout
|
|
62
|
-
return @item_timeout unless @item_timeout
|
|
63
|
-
|
|
64
|
-
deadlines = @workers.filter_map(&:deadline)
|
|
65
|
-
return @item_timeout if deadlines.empty?
|
|
66
|
-
|
|
67
|
-
[deadlines.min - monotonic, 0].max
|
|
68
|
-
end
|
|
69
|
-
|
|
70
61
|
def reap_timed_out(io_to_worker, result_ios)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
now = monotonic
|
|
74
|
-
stuck = @workers.select { |w| w.deadline && w.deadline <= now && w.pending.positive? }
|
|
75
|
-
stuck.each { |w| time_out_worker(w, io_to_worker, result_ios) }
|
|
62
|
+
@deadlines.overdue.each { |worker| time_out_worker(worker, io_to_worker, result_ios) }
|
|
76
63
|
end
|
|
77
64
|
|
|
78
65
|
def time_out_worker(worker, io_to_worker, result_ios)
|
|
@@ -106,7 +93,7 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
106
93
|
worker.pending -= 1
|
|
107
94
|
worker.items_completed += 1
|
|
108
95
|
worker.in_flight_indices.delete(index)
|
|
109
|
-
|
|
96
|
+
@deadlines.refresh(worker)
|
|
110
97
|
end
|
|
111
98
|
|
|
112
99
|
# A worker that exited without replying loses only its in-flight item(s)
|
|
@@ -187,26 +174,12 @@ class Evilution::Parallel::WorkQueue::Dispatcher
|
|
|
187
174
|
worker.send_item(@state.next_index, @items[@state.next_index])
|
|
188
175
|
@state.next_index += 1
|
|
189
176
|
@state.in_flight += 1
|
|
190
|
-
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
def start_deadline(worker)
|
|
194
|
-
return unless @item_timeout
|
|
195
|
-
|
|
196
|
-
worker.deadline ||= monotonic + @item_timeout
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
def next_deadline(worker)
|
|
200
|
-
return nil unless @item_timeout && worker.pending.positive?
|
|
201
|
-
|
|
202
|
-
monotonic + @item_timeout
|
|
177
|
+
@deadlines.start(worker)
|
|
203
178
|
end
|
|
204
179
|
|
|
205
180
|
def more_to_send?
|
|
206
181
|
@state.next_index < @items.length
|
|
207
182
|
end
|
|
208
|
-
|
|
209
|
-
def monotonic
|
|
210
|
-
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
211
|
-
end
|
|
212
183
|
end
|
|
184
|
+
|
|
185
|
+
require_relative "dispatcher/deadline_tracker"
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../work_queue"
|
|
4
4
|
require_relative "../../child_output"
|
|
5
|
+
require_relative "../../process_supervisor"
|
|
6
|
+
require_relative "../../temp_dir_tracker"
|
|
5
7
|
require_relative "channel"
|
|
6
8
|
require_relative "channel/frame"
|
|
7
|
-
require_relative "worker_registry"
|
|
8
9
|
|
|
9
10
|
class Evilution::Parallel::WorkQueue::Worker
|
|
10
11
|
Timing = Data.define(:busy, :wall)
|
|
@@ -12,14 +13,23 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
12
13
|
attr_reader :pid, :worker_index, :in_flight_indices
|
|
13
14
|
attr_accessor :items_completed, :pending, :busy_time, :wall_time, :deadline
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
# EV-dg69 / EV-5rrh step 3: the supervisor owns the worker's process-group
|
|
17
|
+
# isolation, signal-safe registry, group-kill and reap. spawn passes
|
|
18
|
+
# isolate_in_child: false so the worker becomes its own group leader only
|
|
19
|
+
# parent-side, AFTER the supervisor has registered it -- preserving the
|
|
20
|
+
# EV-jwao register-before-isolate ordering (the trap can never see a leader
|
|
21
|
+
# missing from the registry). EV-cnx8 group-leadership (so #kill sweeps the
|
|
22
|
+
# whole subtree) is still established, now by the supervisor's parent-side
|
|
23
|
+
# setpgid.
|
|
24
|
+
def self.spawn(worker_index:, hooks:, supervisor: Evilution::ProcessSupervisor.new, &block)
|
|
16
25
|
cmd_read, cmd_write = IO.pipe
|
|
17
26
|
res_read, res_write = IO.pipe
|
|
18
27
|
[cmd_read, cmd_write, res_read, res_write].each(&:binmode)
|
|
19
28
|
|
|
20
|
-
|
|
29
|
+
handle = supervisor.spawn(isolate_in_child: false) do
|
|
21
30
|
cmd_write.close
|
|
22
31
|
res_read.close
|
|
32
|
+
install_child_signal_handlers
|
|
23
33
|
ENV["TEST_ENV_NUMBER"] = test_env_number_for(worker_index)
|
|
24
34
|
Evilution::ChildOutput.redirect!
|
|
25
35
|
Loop.run(cmd_read, res_write, hooks: hooks, &block)
|
|
@@ -27,36 +37,25 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
27
37
|
|
|
28
38
|
cmd_read.close
|
|
29
39
|
res_write.close
|
|
30
|
-
|
|
31
|
-
# already its own group leader yet missing from the registry (EV-jwao race,
|
|
32
|
-
# GH #1333 review): the spawn runs on the same main thread the trap
|
|
33
|
-
# interrupts, so a signal arriving between setpgid and register would
|
|
34
|
-
# otherwise leak a leader the trap cannot reach. Ordering register first
|
|
35
|
-
# leaves only safe windows -- pre-setpgid the child still shares the parent
|
|
36
|
-
# group and receives the terminal signal directly; once it is its own
|
|
37
|
-
# leader the registry already lists it. Registering unconditionally is safe
|
|
38
|
-
# because signal_all's kill(-pid) is a no-op (Errno::ESRCH) for a pid that
|
|
39
|
-
# never became a group leader (setpgid failed).
|
|
40
|
-
Evilution::Parallel::WorkQueue::WorkerRegistry.register(pid)
|
|
41
|
-
isolate_process_group(pid)
|
|
42
|
-
new(pid:, cmd_write:, res_read:, worker_index:)
|
|
40
|
+
new(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
|
|
43
41
|
end
|
|
44
42
|
|
|
45
|
-
# EV-
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
43
|
+
# EV-7a91: a worker is the parent of the inner per-mutation Fork children it
|
|
44
|
+
# spawns, and those children are their own process-group leaders (EV-2sh8), so
|
|
45
|
+
# the Runner's group-kill of the worker never reaches them. On a terminal
|
|
46
|
+
# INT/TERM the worker must therefore tear down AND reap the inner children it
|
|
47
|
+
# owns before it dies, or they survive as zombies (their parent gone) until an
|
|
48
|
+
# ancestor exits. cleanup_all clears any per-mutation sandbox dirs the inner
|
|
49
|
+
# children registered in this worker's TempDirTracker.
|
|
50
|
+
def self.install_child_signal_handlers
|
|
51
|
+
%w[INT TERM].each do |sig|
|
|
52
|
+
Signal.trap(sig) do
|
|
53
|
+
Evilution::TempDirTracker.cleanup_all
|
|
54
|
+
Evilution::ProcessSupervisor.kill_and_reap_all
|
|
55
|
+
Signal.trap(sig, "DEFAULT")
|
|
56
|
+
Process.kill(sig, Process.pid)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
60
59
|
end
|
|
61
60
|
|
|
62
61
|
# EV-kdns / GH #817: translate 0-based worker slot to parallel_tests'
|
|
@@ -67,8 +66,10 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
67
66
|
worker_index.zero? ? "" : (worker_index + 1).to_s
|
|
68
67
|
end
|
|
69
68
|
|
|
70
|
-
def initialize(
|
|
71
|
-
@
|
|
69
|
+
def initialize(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
|
|
70
|
+
@handle = handle
|
|
71
|
+
@supervisor = supervisor
|
|
72
|
+
@pid = handle.pid
|
|
72
73
|
@cmd_write = cmd_write
|
|
73
74
|
@res_read = res_read
|
|
74
75
|
@worker_index = worker_index
|
|
@@ -101,18 +102,10 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
101
102
|
end
|
|
102
103
|
|
|
103
104
|
# SIGKILL the worker's whole process group (negative pid), reaping any
|
|
104
|
-
# grandchildren it forked
|
|
105
|
-
#
|
|
105
|
+
# grandchildren it forked, with the bare pid as a fallback for the case where
|
|
106
|
+
# the group is gone (already reaped, or setpgid did not take).
|
|
106
107
|
def kill
|
|
107
|
-
|
|
108
|
-
rescue Errno::ESRCH
|
|
109
|
-
kill_pid
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
def kill_pid
|
|
113
|
-
Process.kill("KILL", @pid)
|
|
114
|
-
rescue Errno::ESRCH
|
|
115
|
-
nil
|
|
108
|
+
@supervisor.signal_group("KILL", @handle)
|
|
116
109
|
end
|
|
117
110
|
|
|
118
111
|
def close_pipes
|
|
@@ -120,14 +113,11 @@ class Evilution::Parallel::WorkQueue::Worker
|
|
|
120
113
|
@res_read.close unless @res_read.closed?
|
|
121
114
|
end
|
|
122
115
|
|
|
116
|
+
# Reap the leader and drop it from the registry so the trap never signals a
|
|
117
|
+
# group whose pid the OS may have recycled. ECHILD-tolerant; unregister is a
|
|
118
|
+
# no-op if it was never registered.
|
|
123
119
|
def reap
|
|
124
|
-
|
|
125
|
-
rescue Errno::ECHILD
|
|
126
|
-
nil
|
|
127
|
-
ensure
|
|
128
|
-
# Drop the pgid once the leader is reaped so the trap never signals a group
|
|
129
|
-
# whose pid the OS may have recycled. No-op if it was never registered.
|
|
130
|
-
Evilution::Parallel::WorkQueue::WorkerRegistry.unregister(@pid)
|
|
120
|
+
@supervisor.reap(@handle)
|
|
131
121
|
end
|
|
132
122
|
|
|
133
123
|
def retire
|