evilution 0.33.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../coverage"
4
+ require_relative "map"
5
+
6
+ # Wraps each example with a before/after coverage diff and attributes the
7
+ # newly-executed lines (in target files only) to that example's location.
8
+ # coverage_source is injected for testability; in production it is
9
+ # -> { ::Coverage.peek_result }.
10
+ class Evilution::Coverage::Recorder
11
+ def initialize(target_files:, coverage_source: -> { ::Coverage.peek_result })
12
+ @target_files = target_files.to_a
13
+ @coverage_source = coverage_source
14
+ @index = Hash.new { |h, file| h[file] = Hash.new { |g, line| g[line] = [] } }
15
+ @executed = Hash.new { |h, file| h[file] = [] }
16
+ end
17
+
18
+ def around_example(example_location)
19
+ before = snapshot
20
+ result = yield
21
+ after = snapshot
22
+ attribute(before, after, example_location)
23
+ result
24
+ end
25
+
26
+ def to_map(built_files:)
27
+ Evilution::Coverage::Map.new(
28
+ index: materialize(@index),
29
+ built_files: built_files,
30
+ executed_lines: @executed.transform_values(&:uniq)
31
+ )
32
+ end
33
+
34
+ private
35
+
36
+ def snapshot
37
+ @coverage_source.call || {}
38
+ end
39
+
40
+ def attribute(before, after, example_location)
41
+ @target_files.each do |file|
42
+ after_counts = line_counts(after[file])
43
+ next unless after_counts
44
+
45
+ record_executed(file, after_counts)
46
+ record_increases(file, line_counts(before[file]) || [], after_counts, example_location)
47
+ end
48
+ end
49
+
50
+ # Every line with a non-zero count in the after-snapshot has run at least once
51
+ # by now -- including lines covered only at load (a `def` line is already > 0
52
+ # in the first example's after-snapshot). Recording them lets the Map tell a
53
+ # load-covered line from a line that never ran.
54
+ def record_executed(file, after_counts)
55
+ after_counts.each_with_index do |count, idx|
56
+ next if count.nil? || count.zero?
57
+
58
+ @executed[file] << (idx + 1)
59
+ end
60
+ end
61
+
62
+ # Credit example_location with every line whose execution count rose between
63
+ # the before/after snapshots (a newly-executed, executable line).
64
+ def record_increases(file, before_counts, after_counts, example_location)
65
+ after_counts.each_with_index do |count, idx|
66
+ next if count.nil? || count.zero?
67
+ next unless count > (before_counts[idx] || 0)
68
+
69
+ @index[file][idx + 1] << example_location
70
+ end
71
+ end
72
+
73
+ # Coverage.peek_result yields per-file line counts either as a bare array
74
+ # (legacy Coverage.start) or as a { lines: [...] } hash (Coverage.start with
75
+ # lines:/branches:/methods: modes). Normalize to the bare counts array.
76
+ def line_counts(entry)
77
+ entry.is_a?(Hash) ? entry[:lines] : entry
78
+ end
79
+
80
+ def materialize(index)
81
+ index.each_with_object({}) do |(file, lines), out|
82
+ out[file] = lines.each_with_object({}) { |(line, locs), inner| inner[line] = locs }
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../evilution"
4
+
5
+ # Per-example line-coverage support: build a `source file:line -> [examples]`
6
+ # map so mutation targeting can run exactly the examples that execute a line.
7
+ module Evilution::Coverage
8
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../evilution"
4
+ require_relative "coverage/map"
5
+
6
+ # Per-mutation example targeting backed by a real line-coverage Map (EV-ndjd).
7
+ # Honours the same contract as the lexical Evilution::ExampleFilter --
8
+ # call(mutation, spec_paths) -> Array[location] | spec_paths | nil -- so it drops
9
+ # straight into the existing ExampleFilter seam.
10
+ #
11
+ # Resolution order for the mutated source file F at line L:
12
+ # - F not fully built in the map (digest miss / partial build) -> delegate to
13
+ # the lexical filter (safe fallback) with the original spec_paths.
14
+ # - F built and L covered by examples -> run exactly those covering examples
15
+ # (a SUBSET of what the resolved spec runs, so a strict speedup that cannot
16
+ # lose a kill full-file would catch).
17
+ # - F not built, or L attributed to no example -> defer to lexical/full-file.
18
+ #
19
+ # Accuracy-first: coverage ONLY narrows the example set when it positively knows
20
+ # the covering examples. It never marks a mutation :unresolved on "no coverage" --
21
+ # on real repos a line can be exercised indirectly (before(:all), load time, a
22
+ # spec the per-example diff did not attribute), and asserting a gap there loses
23
+ # kills (EV-7uui validation). When coverage has no answer, the proven lexical
24
+ # path decides.
25
+ class Evilution::CoverageExampleFilter
26
+ def initialize(map:, lexical:, project_root: Evilution::PROJECT_ROOT)
27
+ @map = map
28
+ @lexical = lexical
29
+ @project_root = project_root.to_s
30
+ end
31
+
32
+ def call(mutation, spec_paths)
33
+ file = File.expand_path(mutation.file_path, @project_root)
34
+ return @lexical.call(mutation, spec_paths) unless @map.built?(file)
35
+
36
+ examples = @map.examples_for(file, mutation.line)
37
+ return examples unless examples.empty?
38
+
39
+ @lexical.call(mutation, spec_paths)
40
+ end
41
+ end
@@ -5,7 +5,7 @@ require "tmpdir"
5
5
  require_relative "../memory"
6
6
  require_relative "../temp_dir_tracker"
7
7
  require_relative "../child_output"
8
- require_relative "../process_cleanup"
8
+ require_relative "../process_supervisor"
9
9
 
10
10
  require_relative "../isolation"
11
11
 
@@ -15,21 +15,25 @@ class Evilution::Isolation::Fork
15
15
 
16
16
  def initialize(hooks: nil)
17
17
  @hooks = hooks
18
+ # EV-3aw3 / EV-5rrh step 2: the supervisor owns this path's lifecycle --
19
+ # spawn + process-group isolation, the TERM/grace/KILL ladder, and reap +
20
+ # sandbox removal. fork.rb keeps only the marshal-pipe read protocol.
21
+ @supervisor = Evilution::ProcessSupervisor.new
18
22
  end
19
23
 
20
24
  def call(mutation:, test_command:, timeout:)
21
- pid = nil
25
+ handle = nil
22
26
  sandbox_dir = Dir.mktmpdir("evilution-run")
23
27
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
24
28
  parent_rss = Evilution::Memory.rss_kb
25
29
  read_io, write_io = binary_pipe
26
- pid = fork_child(read_io, write_io, sandbox_dir, mutation, test_command)
30
+ handle = spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
27
31
  write_io.close
28
- result = wait_for_result(pid, read_io, timeout)
32
+ result = wait_for_result(handle, read_io, timeout)
29
33
  duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
30
34
  build_mutation_result(mutation, result, duration, parent_rss)
31
35
  ensure
32
- cleanup_resources(read_io, write_io, pid, sandbox_dir)
36
+ cleanup_resources(read_io, write_io, handle, sandbox_dir)
33
37
  end
34
38
 
35
39
  private
@@ -46,14 +50,18 @@ class Evilution::Isolation::Fork
46
50
  [read_io, write_io]
47
51
  end
48
52
 
49
- def fork_child(read_io, write_io, sandbox_dir, mutation, test_command)
50
- ::Process.fork do
51
- isolate_into_own_process_group
53
+ # Supervisor.spawn makes the child its own process-group leader (setpgid)
54
+ # before this block runs, so any grandchildren test_command forks inherit the
55
+ # group and the TERM/KILL ladder sweeps the whole subtree on timeout (EV-2sh8
56
+ # / GH #1330). The block keeps the marshal-pipe protocol: write a
57
+ # length-prefixed payload, then exit with the pass/fail code.
58
+ def spawn_child(read_io, write_io, sandbox_dir, mutation, test_command)
59
+ @supervisor.spawn(sandbox_dir: sandbox_dir) do
52
60
  ENV["TMPDIR"] = sandbox_dir
53
61
  # Path-relativizing mutations (e.g. File.join(dir, name) -> name) would
54
62
  # otherwise write into the parent's CWD (typically the repo root) and
55
63
  # leak past the run. chdir here keeps such writes inside sandbox_dir,
56
- # which the ensure block of #call removes. The in_isolated_worker! flag
64
+ # which the supervisor removes on reap. The in_isolated_worker! flag
57
65
  # signals the rest of evilution (SpecResolver/SpecSelector/SpecAstCache/
58
66
  # MutationApplier/SourceEvaluator/Integration) to anchor project-relative
59
67
  # paths to Evilution::PROJECT_ROOT instead of the sandbox CWD.
@@ -71,27 +79,20 @@ class Evilution::Isolation::Fork
71
79
  end
72
80
  end
73
81
 
74
- # EV-2sh8 / GH #1330: make the mutation child its own process-group leader as
75
- # its very first act, before it runs test_command (which may fork blocking
76
- # grandchildren -- e.g. connection_pool / ractor / thread subject specs).
77
- # Grandchildren then inherit this group, so terminate_child can group-kill the
78
- # whole subtree on timeout. Without it, a blocking grandchild orphans to init
79
- # and survives the rest of the run -- the inner path never SIGKILLs the worker,
80
- # so EV-cnx8's outer process-group kill never sweeps it. Done child-side (not
81
- # parent-side as in Worker) because the per-mutation timeout fires seconds
82
- # later, long after this line has run, so no fork-before-setpgid race exists.
83
- def isolate_into_own_process_group
84
- ::Process.setpgid(0, 0)
85
- rescue SystemCallError
86
- nil
87
- end
88
-
89
- def cleanup_resources(read_io, write_io, pid, sandbox_dir)
82
+ # The parent owns read_io/write_io (write_io is closed right after spawn so
83
+ # read_io can see EOF), so they are closed here rather than handed to the
84
+ # supervisor. The supervisor reaps the child and removes the sandbox dir; on
85
+ # the early-failure path (binary_pipe raised before spawn) handle is nil, so
86
+ # the orphaned sandbox is removed directly.
87
+ def cleanup_resources(read_io, write_io, handle, sandbox_dir)
90
88
  read_io.close unless read_io.nil?
91
89
  write_io.close unless write_io.nil?
92
- ensure_reaped(pid)
90
+ if handle
91
+ @supervisor.terminate(handle, grace: GRACE_PERIOD)
92
+ elsif sandbox_dir
93
+ FileUtils.rm_rf(sandbox_dir)
94
+ end
93
95
  restore_original_source
94
- FileUtils.rm_rf(sandbox_dir) if sandbox_dir
95
96
  end
96
97
 
97
98
  def restore_original_source
@@ -125,21 +126,21 @@ class Evilution::Isolation::Fork
125
126
  # never sees EOF and hangs forever. The length prefix makes payload reads
126
127
  # bounded; the waitpid-WNOHANG check inside the poll loop lets us exit
127
128
  # promptly when the child died without writing anything.
128
- def wait_for_result(pid, read_io, timeout)
129
+ def wait_for_result(handle, read_io, timeout)
129
130
  deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
130
131
  loop do
131
132
  remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
132
- return timeout_result(pid) if remaining <= 0
133
+ return timeout_result(handle) if remaining <= 0
133
134
 
134
135
  if read_io.wait_readable([remaining, 0.5].min)
135
136
  payload = read_payload(read_io, deadline)
136
- return reap_and_decode(pid, payload) if payload
137
+ return reap_and_decode(handle, payload) if payload
137
138
  end
138
139
 
139
- next unless ::Process.waitpid(pid, ::Process::WNOHANG)
140
+ next unless @supervisor.reap_nonblock(handle)
140
141
 
141
142
  # Child exited. Drain any final payload that arrived between
142
- # wait_readable timeout and waitpid (race) before declaring empty.
143
+ # wait_readable timeout and the reap (race) before declaring empty.
143
144
  final = read_payload(read_io, Process.clock_gettime(Process::CLOCK_MONOTONIC) + 0.1)
144
145
  return decode_payload(final) if final
145
146
 
@@ -153,13 +154,13 @@ class Evilution::Isolation::Fork
153
154
  # in execute_in_child waiting on a subject grandchild the mutation broke.
154
155
  # wait_for_result has already returned by this point, so the per-mutation
155
156
  # timeout cannot fire. Bound the wait and fall back to the TERM/KILL ladder.
156
- def reap_and_decode(pid, payload)
157
+ def reap_and_decode(handle, payload)
157
158
  deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + REAP_DEADLINE
158
159
  loop do
159
- break if ::Process.waitpid(pid, ::Process::WNOHANG)
160
+ break if @supervisor.reap_nonblock(handle)
160
161
 
161
162
  if Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline
162
- terminate_child(pid)
163
+ @supervisor.terminate(handle, grace: GRACE_PERIOD)
163
164
  break
164
165
  end
165
166
  sleep 0.05
@@ -210,50 +211,11 @@ class Evilution::Isolation::Fork
210
211
  { timeout: false, passed: false, error: "empty result from child" }
211
212
  end
212
213
 
213
- def timeout_result(pid)
214
- terminate_child(pid)
214
+ def timeout_result(handle)
215
+ @supervisor.terminate(handle, grace: GRACE_PERIOD)
215
216
  { timeout: true }
216
217
  end
217
218
 
218
- # Defensive reap: if normal control flow raised before wait_for_result
219
- # reaped the child (e.g. Marshal.load on corrupt payload), the child becomes
220
- # a zombie. Reuse terminate_child for the bounded TERM + GRACE_PERIOD + KILL
221
- # ladder so this never hangs the ensure path; swallow SystemCallError so
222
- # cleanup can't mask the primary failure.
223
- def ensure_reaped(pid)
224
- return unless pid
225
-
226
- reaped = ::Process.waitpid(pid, ::Process::WNOHANG)
227
- return if reaped
228
-
229
- terminate_child(pid)
230
- rescue SystemCallError
231
- nil
232
- end
233
-
234
- def terminate_child(pid)
235
- signal_tree("TERM", pid)
236
- _, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
237
- return if status
238
-
239
- sleep(GRACE_PERIOD)
240
- _, status = ::Process.waitpid2(pid, ::Process::WNOHANG)
241
- return if status
242
-
243
- signal_tree("KILL", pid)
244
- Evilution::ProcessCleanup.safe_wait(pid)
245
- end
246
-
247
- # Signal the child's whole process group (-pid) to sweep any grandchildren it
248
- # forked, then the bare pid as a fallback for the case where setpgid failed
249
- # (no group exists, so the group signal is a harmless Errno::ESRCH). Only the
250
- # leader pid is reaped here -- group-killed grandchildren are not our direct
251
- # children, so init reaps them once they die.
252
- def signal_tree(sig, pid)
253
- Evilution::ProcessCleanup.safe_kill(sig, -pid)
254
- Evilution::ProcessCleanup.safe_kill(sig, pid)
255
- end
256
-
257
219
  def classify_status(result)
258
220
  return :timeout if result[:timeout]
259
221
  return :killed if result[:test_crashed]
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../dispatcher"
4
+
5
+ # Owns the per-worker item-timeout deadline clock for the Dispatcher: arming a
6
+ # worker's deadline when it goes busy, re-arming it on each result, surfacing the
7
+ # workers whose deadline has passed, and computing how long IO.select may block.
8
+ # Each worker carries its own deadline so a single stuck worker is reaped in
9
+ # isolation rather than aborting the whole pool (EV-gl1e). Pulling this cohesive
10
+ # timeout concern out of the Dispatcher keeps the dispatcher focused on the
11
+ # collect/recycle orchestration (EV-9mij).
12
+ #
13
+ # `workers` is the Dispatcher's live array (mutated in place as workers recycle),
14
+ # so the tracker always reads the current pool. `clock` is injectable for tests.
15
+ class Evilution::Parallel::WorkQueue::Dispatcher::DeadlineTracker
16
+ def initialize(item_timeout:, workers:, clock: -> { Process.clock_gettime(Process::CLOCK_MONOTONIC) })
17
+ @item_timeout = item_timeout
18
+ @workers = workers
19
+ @clock = clock
20
+ end
21
+
22
+ def enabled?
23
+ !@item_timeout.nil?
24
+ end
25
+
26
+ # Seconds IO.select may block: until the nearest worker deadline (never
27
+ # negative), or the raw timeout when no worker is currently on the clock.
28
+ def select_timeout
29
+ return @item_timeout unless enabled?
30
+
31
+ deadlines = @workers.filter_map(&:deadline)
32
+ return @item_timeout if deadlines.empty?
33
+
34
+ [deadlines.min - now, 0].max
35
+ end
36
+
37
+ # Workers whose deadline has passed while still holding in-flight work.
38
+ def overdue
39
+ return [] unless enabled?
40
+
41
+ moment = now
42
+ @workers.select { |worker| worker.deadline && worker.deadline <= moment && worker.pending.positive? }
43
+ end
44
+
45
+ # Arm a worker's clock when it first goes busy; idempotent for the in-flight
46
+ # item so a refresh does not extend an already-running deadline.
47
+ def start(worker)
48
+ return unless enabled?
49
+
50
+ worker.deadline ||= now + @item_timeout
51
+ end
52
+
53
+ # After a result: re-arm while work remains, otherwise stop the clock.
54
+ def refresh(worker)
55
+ worker.deadline = (now + @item_timeout if enabled? && worker.pending.positive?)
56
+ end
57
+
58
+ private
59
+
60
+ def now
61
+ @clock.call
62
+ end
63
+ end
@@ -12,11 +12,11 @@ class Evilution::Parallel::WorkQueue::Dispatcher
12
12
  @workers = workers
13
13
  @items = items
14
14
  @prefetch = prefetch
15
- @item_timeout = item_timeout
16
15
  @worker_max_items = worker_max_items
17
16
  @recycle_factory = recycle_factory
18
17
  @state = Evilution::Parallel::WorkQueue.send(:const_get, :CollectionState).new(items.length)
19
18
  @retired = []
19
+ @deadlines = DeadlineTracker.new(item_timeout:, workers: @workers)
20
20
  end
21
21
 
22
22
  def run
@@ -48,7 +48,7 @@ class Evilution::Parallel::WorkQueue::Dispatcher
48
48
  result_ios = io_to_worker.keys
49
49
 
50
50
  while @state.in_flight.positive?
51
- readable, = IO.select(result_ios, nil, nil, select_timeout)
51
+ readable, = IO.select(result_ios, nil, nil, @deadlines.select_timeout)
52
52
  reap_timed_out(io_to_worker, result_ios)
53
53
  next if readable.nil?
54
54
 
@@ -58,21 +58,8 @@ class Evilution::Parallel::WorkQueue::Dispatcher
58
58
  end
59
59
  end
60
60
 
61
- def select_timeout
62
- return @item_timeout unless @item_timeout
63
-
64
- deadlines = @workers.filter_map(&:deadline)
65
- return @item_timeout if deadlines.empty?
66
-
67
- [deadlines.min - monotonic, 0].max
68
- end
69
-
70
61
  def reap_timed_out(io_to_worker, result_ios)
71
- return unless @item_timeout
72
-
73
- now = monotonic
74
- stuck = @workers.select { |w| w.deadline && w.deadline <= now && w.pending.positive? }
75
- stuck.each { |w| time_out_worker(w, io_to_worker, result_ios) }
62
+ @deadlines.overdue.each { |worker| time_out_worker(worker, io_to_worker, result_ios) }
76
63
  end
77
64
 
78
65
  def time_out_worker(worker, io_to_worker, result_ios)
@@ -106,7 +93,7 @@ class Evilution::Parallel::WorkQueue::Dispatcher
106
93
  worker.pending -= 1
107
94
  worker.items_completed += 1
108
95
  worker.in_flight_indices.delete(index)
109
- worker.deadline = next_deadline(worker)
96
+ @deadlines.refresh(worker)
110
97
  end
111
98
 
112
99
  # A worker that exited without replying loses only its in-flight item(s)
@@ -187,26 +174,12 @@ class Evilution::Parallel::WorkQueue::Dispatcher
187
174
  worker.send_item(@state.next_index, @items[@state.next_index])
188
175
  @state.next_index += 1
189
176
  @state.in_flight += 1
190
- start_deadline(worker)
191
- end
192
-
193
- def start_deadline(worker)
194
- return unless @item_timeout
195
-
196
- worker.deadline ||= monotonic + @item_timeout
197
- end
198
-
199
- def next_deadline(worker)
200
- return nil unless @item_timeout && worker.pending.positive?
201
-
202
- monotonic + @item_timeout
177
+ @deadlines.start(worker)
203
178
  end
204
179
 
205
180
  def more_to_send?
206
181
  @state.next_index < @items.length
207
182
  end
208
-
209
- def monotonic
210
- Process.clock_gettime(Process::CLOCK_MONOTONIC)
211
- end
212
183
  end
184
+
185
+ require_relative "dispatcher/deadline_tracker"
@@ -2,9 +2,10 @@
2
2
 
3
3
  require_relative "../work_queue"
4
4
  require_relative "../../child_output"
5
+ require_relative "../../process_supervisor"
6
+ require_relative "../../temp_dir_tracker"
5
7
  require_relative "channel"
6
8
  require_relative "channel/frame"
7
- require_relative "worker_registry"
8
9
 
9
10
  class Evilution::Parallel::WorkQueue::Worker
10
11
  Timing = Data.define(:busy, :wall)
@@ -12,14 +13,23 @@ class Evilution::Parallel::WorkQueue::Worker
12
13
  attr_reader :pid, :worker_index, :in_flight_indices
13
14
  attr_accessor :items_completed, :pending, :busy_time, :wall_time, :deadline
14
15
 
15
- def self.spawn(worker_index:, hooks:, &block)
16
+ # EV-dg69 / EV-5rrh step 3: the supervisor owns the worker's process-group
17
+ # isolation, signal-safe registry, group-kill and reap. spawn passes
18
+ # isolate_in_child: false so the worker becomes its own group leader only
19
+ # parent-side, AFTER the supervisor has registered it -- preserving the
20
+ # EV-jwao register-before-isolate ordering (the trap can never see a leader
21
+ # missing from the registry). EV-cnx8 group-leadership (so #kill sweeps the
22
+ # whole subtree) is still established, now by the supervisor's parent-side
23
+ # setpgid.
24
+ def self.spawn(worker_index:, hooks:, supervisor: Evilution::ProcessSupervisor.new, &block)
16
25
  cmd_read, cmd_write = IO.pipe
17
26
  res_read, res_write = IO.pipe
18
27
  [cmd_read, cmd_write, res_read, res_write].each(&:binmode)
19
28
 
20
- pid = Process.fork do
29
+ handle = supervisor.spawn(isolate_in_child: false) do
21
30
  cmd_write.close
22
31
  res_read.close
32
+ install_child_signal_handlers
23
33
  ENV["TEST_ENV_NUMBER"] = test_env_number_for(worker_index)
24
34
  Evilution::ChildOutput.redirect!
25
35
  Loop.run(cmd_read, res_write, hooks: hooks, &block)
@@ -27,36 +37,25 @@ class Evilution::Parallel::WorkQueue::Worker
27
37
 
28
38
  cmd_read.close
29
39
  res_write.close
30
- # Register BEFORE isolating so the trap can never observe a worker that is
31
- # already its own group leader yet missing from the registry (EV-jwao race,
32
- # GH #1333 review): the spawn runs on the same main thread the trap
33
- # interrupts, so a signal arriving between setpgid and register would
34
- # otherwise leak a leader the trap cannot reach. Ordering register first
35
- # leaves only safe windows -- pre-setpgid the child still shares the parent
36
- # group and receives the terminal signal directly; once it is its own
37
- # leader the registry already lists it. Registering unconditionally is safe
38
- # because signal_all's kill(-pid) is a no-op (Errno::ESRCH) for a pid that
39
- # never became a group leader (setpgid failed).
40
- Evilution::Parallel::WorkQueue::WorkerRegistry.register(pid)
41
- isolate_process_group(pid)
42
- new(pid:, cmd_write:, res_read:, worker_index:)
40
+ new(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
43
41
  end
44
42
 
45
- # EV-cnx8 / GH #1324: make the worker its own process-group leader so #kill
46
- # can signal the whole subtree. A mutation's spec may fork a grandchild that
47
- # blocks (e.g. ConditionVariable#wait); when the dispatcher SIGKILLs a stuck
48
- # worker, that grandchild must die with it rather than orphan to init holding
49
- # memory/fds/connections. Done parent-side (before the child forks anything)
50
- # so a failure is visible here instead of being swallowed in the child.
51
- def self.isolate_process_group(pid)
52
- Process.setpgid(pid, pid)
53
- rescue Errno::EACCES, Errno::ESRCH
54
- # EACCES: child already exec'd/changed group; ESRCH: child already exited.
55
- # Both are benign -- reaping handles the child either way.
56
- nil
57
- rescue SystemCallError => e
58
- warn "evilution: could not isolate worker #{pid} into its own process " \
59
- "group (#{e.class}: #{e.message}); grandchildren may survive a kill."
43
+ # EV-7a91: a worker is the parent of the inner per-mutation Fork children it
44
+ # spawns, and those children are their own process-group leaders (EV-2sh8), so
45
+ # the Runner's group-kill of the worker never reaches them. On a terminal
46
+ # INT/TERM the worker must therefore tear down AND reap the inner children it
47
+ # owns before it dies, or they survive as zombies (their parent gone) until an
48
+ # ancestor exits. cleanup_all clears any per-mutation sandbox dirs the inner
49
+ # children registered in this worker's TempDirTracker.
50
+ def self.install_child_signal_handlers
51
+ %w[INT TERM].each do |sig|
52
+ Signal.trap(sig) do
53
+ Evilution::TempDirTracker.cleanup_all
54
+ Evilution::ProcessSupervisor.kill_and_reap_all
55
+ Signal.trap(sig, "DEFAULT")
56
+ Process.kill(sig, Process.pid)
57
+ end
58
+ end
60
59
  end
61
60
 
62
61
  # EV-kdns / GH #817: translate 0-based worker slot to parallel_tests'
@@ -67,8 +66,10 @@ class Evilution::Parallel::WorkQueue::Worker
67
66
  worker_index.zero? ? "" : (worker_index + 1).to_s
68
67
  end
69
68
 
70
- def initialize(pid:, cmd_write:, res_read:, worker_index:)
71
- @pid = pid
69
+ def initialize(handle:, supervisor:, cmd_write:, res_read:, worker_index:)
70
+ @handle = handle
71
+ @supervisor = supervisor
72
+ @pid = handle.pid
72
73
  @cmd_write = cmd_write
73
74
  @res_read = res_read
74
75
  @worker_index = worker_index
@@ -101,18 +102,10 @@ class Evilution::Parallel::WorkQueue::Worker
101
102
  end
102
103
 
103
104
  # SIGKILL the worker's whole process group (negative pid), reaping any
104
- # grandchildren it forked. Falls back to the single pid if the group is gone
105
- # -- already reaped, or setpgid did not take in the child.
105
+ # grandchildren it forked, with the bare pid as a fallback for the case where
106
+ # the group is gone (already reaped, or setpgid did not take).
106
107
  def kill
107
- Process.kill("KILL", -@pid)
108
- rescue Errno::ESRCH
109
- kill_pid
110
- end
111
-
112
- def kill_pid
113
- Process.kill("KILL", @pid)
114
- rescue Errno::ESRCH
115
- nil
108
+ @supervisor.signal_group("KILL", @handle)
116
109
  end
117
110
 
118
111
  def close_pipes
@@ -120,14 +113,11 @@ class Evilution::Parallel::WorkQueue::Worker
120
113
  @res_read.close unless @res_read.closed?
121
114
  end
122
115
 
116
+ # Reap the leader and drop it from the registry so the trap never signals a
117
+ # group whose pid the OS may have recycled. ECHILD-tolerant; unregister is a
118
+ # no-op if it was never registered.
123
119
  def reap
124
- Process.wait(@pid)
125
- rescue Errno::ECHILD
126
- nil
127
- ensure
128
- # Drop the pgid once the leader is reaped so the trap never signals a group
129
- # whose pid the OS may have recycled. No-op if it was never registered.
130
- Evilution::Parallel::WorkQueue::WorkerRegistry.unregister(@pid)
120
+ @supervisor.reap(@handle)
131
121
  end
132
122
 
133
123
  def retire