henitai 0.1.10 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "date"
4
- require "digest"
5
4
  require "fileutils"
6
5
  require "json"
7
6
  require "sqlite3"
8
7
  require "time"
9
- require "unparser"
8
+ require_relative "mutant_identity"
10
9
 
11
10
  module Henitai
12
11
  # Persists mutant outcomes across runs in a lightweight SQLite database.
@@ -87,7 +86,7 @@ module Henitai
87
86
  with_database do |db|
88
87
  ensure_schema(db)
89
88
  db.transaction do
90
- insert_run(db, result, version, recorded_at)
89
+ insert_run(db, result, version, recorded_at) unless partial_rerun?(result)
91
90
  Array(result.mutants).each do |mutant|
92
91
  upsert_mutant(db, mutant, version, recorded_at)
93
92
  end
@@ -108,6 +107,10 @@ module Henitai
108
107
 
109
108
  private
110
109
 
110
+ def partial_rerun?(result)
111
+ result.respond_to?(:partial_rerun?) && result.partial_rerun?
112
+ end
113
+
111
114
  def with_database
112
115
  db = SQLite3::Database.new(path)
113
116
  db.results_as_hash = true
@@ -140,25 +143,7 @@ module Henitai
140
143
  end
141
144
 
142
145
  def stable_mutant_id(mutant)
143
- Digest::SHA256.hexdigest(
144
- [
145
- mutant.subject.expression,
146
- mutant.operator,
147
- mutant.description,
148
- mutant.location[:file],
149
- mutant.location[:start_line],
150
- mutant.location[:end_line],
151
- mutant.location[:start_col],
152
- mutant.location[:end_col],
153
- mutation_signature(mutant)
154
- ].join("\0")
155
- )
156
- end
157
-
158
- def mutation_signature(mutant)
159
- Unparser.unparse(mutant.mutated_node)
160
- rescue StandardError
161
- mutant.mutated_node.class.name
146
+ MutantIdentity.stable_id(mutant)
162
147
  end
163
148
 
164
149
  def mutation_history_entry(mutant, version, recorded_at)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "unparser"
5
+
6
+ module Henitai
7
+ # Computes a stable, run-independent SHA256 identity for a mutant.
8
+ #
9
+ # The identity is derived from the mutant's semantic content, not the
10
+ # session UUID or source coordinates, so it survives ordinary line shifts.
11
+ module MutantIdentity
12
+ def self.stable_id(mutant)
13
+ Digest::SHA256.hexdigest(identity_components(mutant).join("\0"))
14
+ end
15
+
16
+ def self.identity_components(mutant)
17
+ [
18
+ mutant.subject.expression,
19
+ mutant.operator,
20
+ mutant.description,
21
+ mutant.location[:file],
22
+ mutation_signature(mutant)
23
+ ]
24
+ end
25
+ private_class_method :identity_components
26
+
27
+ def self.mutation_signature(mutant)
28
+ Unparser.unparse(mutant.mutated_node)
29
+ rescue StandardError
30
+ mutant.mutated_node.class.name
31
+ end
32
+ private_class_method :mutation_signature
33
+ end
34
+ end
@@ -31,15 +31,26 @@ module Henitai
31
31
  start_parallel_stdin_watcher(context, stdin_pipe)
32
32
  parallel_workers(context, process_mutant).each(&:join)
33
33
  ensure
34
+ teardown_parallel_execution(context)
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :worker_count
40
+
41
+ def teardown_parallel_execution(context)
34
42
  stop_parallel_stdin_watcher(context)
35
43
  restore_parallel_signal_traps(context)
44
+ emit_scheduler_diagnostics if Integration::SchedulerDiagnostics.enabled?
36
45
  raise context.state[:error] if context&.state&.fetch(:error, nil)
37
46
  raise Interrupt if context&.state&.fetch(:stopping, false)
38
47
  end
39
48
 
40
- private
41
-
42
- attr_reader :worker_count
49
+ def emit_scheduler_diagnostics
50
+ summary = Integration::SchedulerDiagnostics.summary
51
+ warn "[henitai-scheduler] max_concurrent_children=#{summary[:max_concurrent]}"
52
+ warn "[henitai-scheduler] child_intervals=#{summary[:intervals].inspect}"
53
+ end
43
54
 
44
55
  def build_parallel_queue(mutants)
45
56
  Queue.new.tap { |queue| mutants.each { |mutant| queue << mutant } }
@@ -84,20 +95,16 @@ module Henitai
84
95
  end
85
96
 
86
97
  def parallel_workers(context, process_mutant)
87
- Array.new(worker_count) { Thread.new { process_parallel_worker(context, process_mutant) } }
98
+ Array.new(worker_count) do
99
+ Thread.new { process_parallel_worker(context, process_mutant) }
100
+ end
88
101
  end
89
102
 
90
103
  def process_parallel_worker(context, process_mutant)
91
104
  loop do
92
105
  break if context.state[:stopping]
93
106
 
94
- process_mutant.call(
95
- context.queue.pop(true),
96
- context.integration,
97
- context.config,
98
- context.progress_reporter,
99
- context.mutex
100
- )
107
+ run_one_mutant(context, process_mutant)
101
108
  rescue ThreadError
102
109
  break
103
110
  rescue StandardError => e
@@ -106,6 +113,17 @@ module Henitai
106
113
  end
107
114
  end
108
115
 
116
+ def run_one_mutant(context, process_mutant)
117
+ mutant = context.queue.pop(true)
118
+ process_mutant.call(
119
+ mutant,
120
+ context.integration,
121
+ context.config,
122
+ context.progress_reporter,
123
+ context.mutex
124
+ )
125
+ end
126
+
109
127
  def stop_parallel_stdin_watcher(context)
110
128
  context&.stdin_watcher&.kill
111
129
  end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Henitai
4
+ # Wakeup pipe used to interrupt child-process wait loops when CHLD arrives.
5
+ class ProcessWakeup
6
+ def initialize(signal_name: "CHLD")
7
+ @signal_name = signal_name
8
+ @reader, @writer = IO.pipe
9
+ end
10
+
11
+ def install
12
+ @previous_handler = Signal.trap(signal_name) { signal }
13
+ self
14
+ end
15
+
16
+ def wait(timeout)
17
+ # rubocop:disable Lint/IncompatibleIoSelectWithFiberScheduler
18
+ IO.select([reader], nil, nil, timeout)
19
+ # rubocop:enable Lint/IncompatibleIoSelectWithFiberScheduler
20
+ rescue Errno::EINTR
21
+ nil
22
+ end
23
+
24
+ def drain
25
+ loop do
26
+ reader.read_nonblock(4096)
27
+ end
28
+ rescue IO::WaitReadable, EOFError
29
+ nil
30
+ end
31
+
32
+ def signal
33
+ writer.write_nonblock(".")
34
+ rescue IO::WaitWritable, IOError, Errno::EPIPE
35
+ nil
36
+ end
37
+
38
+ def close
39
+ Signal.trap(signal_name, previous_handler) if previous_handler
40
+ ensure
41
+ reader.close unless reader.closed?
42
+ writer.close unless writer.closed?
43
+ end
44
+
45
+ private
46
+
47
+ attr_reader :previous_handler, :reader, :signal_name, :writer
48
+ end
49
+ end
@@ -0,0 +1,434 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Henitai
4
+ # Flat, single-threaded process-slot scheduler for parallel mutation runs.
5
+ #
6
+ # Owns the process table: it is the sole caller of Process.wait* so there
7
+ # are no race conditions between threads reaping the same child.
8
+ class ProcessWorkerRunner # rubocop:disable Metrics/ClassLength
9
+ PROCESS_DRAIN_WINDOW = 0.2
10
+
11
+ # Default bridge to process and signal primitives used by the scheduler.
12
+ class Runtime
13
+ def clock_gettime(clock_id)
14
+ Process.clock_gettime(clock_id)
15
+ end
16
+
17
+ def wait2(pid, flags = nil)
18
+ Process.wait2(pid, flags)
19
+ end
20
+
21
+ def kill(signal, pid)
22
+ Process.kill(signal, pid)
23
+ end
24
+
25
+ def wait(pid)
26
+ Process.wait(pid)
27
+ end
28
+
29
+ def trap(signal, handler = nil, &block)
30
+ Kernel.trap(signal, handler || block)
31
+ end
32
+ end
33
+
34
+ # Tracks one in-flight mutant child process.
35
+ Slot = Struct.new(
36
+ :slot_id, :mutant, :pid, :started_at_monotonic, :timeout,
37
+ :log_paths, :retry_count, :draining, :term_sent_at_monotonic,
38
+ :forced_outcome
39
+ )
40
+
41
+ def initialize(worker_count:, runtime: Runtime.new, wakeup: nil)
42
+ @worker_count = worker_count
43
+ @runtime = runtime
44
+ @wakeup = wakeup
45
+ @shutdown_requested = false
46
+ end
47
+
48
+ # Trigger a graceful shutdown from outside the event loop.
49
+ # Safe to call from any thread. The loop observes the flag on its next tick.
50
+ def request_shutdown
51
+ @shutdown_requested = true
52
+ @wakeup&.signal
53
+ end
54
+
55
+ # Runs all mutants and returns an array of ScenarioExecutionResult.
56
+ #
57
+ # @param mutants [Array<Mutant>]
58
+ # @param integration [Integration::Base]
59
+ # @param config [Configuration]
60
+ # @param progress_reporter [#progress, nil]
61
+ # @param options [Hash]
62
+ # @return [Array<ScenarioExecutionResult>]
63
+ def run(mutants, integration, config, progress_reporter, options = {})
64
+ Integration::SchedulerDiagnostics.reset! if Integration::SchedulerDiagnostics.enabled?
65
+ prepare_run(mutants, integration, config, progress_reporter, options)
66
+
67
+ event_loop
68
+ @results
69
+ ensure
70
+ @wakeup&.close
71
+ @wakeup = nil
72
+ end
73
+
74
+ private
75
+
76
+ attr_reader :worker_count, :pending, :slots, :pid_to_slot, :results,
77
+ :integration, :config, :progress_reporter, :runtime
78
+
79
+ def event_loop
80
+ saved_traps = install_signal_traps
81
+ loop do
82
+ break if done?
83
+
84
+ break if process_cycle == :shutdown
85
+ end
86
+ ensure
87
+ restore_signal_traps(saved_traps)
88
+ raise Interrupt if @shutdown_requested
89
+ end
90
+
91
+ def process_cycle
92
+ fill_idle_slots unless @shutdown_requested
93
+ reap_all_completed_children
94
+ check_timeouts
95
+ fill_idle_slots unless @shutdown_requested
96
+ return handle_shutdown if @shutdown_requested
97
+
98
+ drain_draining_slots if draining_slots?
99
+ fill_idle_slots unless @shutdown_requested
100
+ return :done if done?
101
+
102
+ wait_for_next_event
103
+ nil
104
+ end
105
+
106
+ def handle_shutdown
107
+ interrupt_active_slots
108
+ drain_draining_slots
109
+ :shutdown
110
+ end
111
+
112
+ def done?
113
+ pending.empty? && slots.empty?
114
+ end
115
+
116
+ def fill_idle_slots
117
+ while slots.size < worker_count && !pending.empty?
118
+ mutant = pending.shift
119
+ spawn_into_slot(mutant)
120
+ end
121
+ end
122
+
123
+ def spawn_into_slot(mutant)
124
+ test_files = resolve_test_files(mutant)
125
+ mutant.covered_by = test_files if mutant.respond_to?(:covered_by=)
126
+ mutant.tests_completed = test_files.size if mutant.respond_to?(:tests_completed=)
127
+ handle = integration.spawn_mutant(mutant: mutant, test_files: test_files)
128
+ register_slot(handle, mutant)
129
+ rescue StandardError => e
130
+ record_spawn_failure(mutant, e)
131
+ end
132
+
133
+ def register_slot(handle, mutant)
134
+ slot_id = next_slot_id!
135
+ slot = build_slot(slot_id, mutant, handle)
136
+ slots[slot_id] = slot
137
+ pid_to_slot[handle.pid] = slot_id
138
+ Integration::SchedulerDiagnostics.child_started(handle.pid)
139
+ end
140
+
141
+ def build_slot(slot_id, mutant, handle)
142
+ Slot.new(
143
+ slot_id, mutant, handle.pid,
144
+ monotonic_time,
145
+ config.timeout, handle.log_paths, 0, false, nil, nil
146
+ )
147
+ end
148
+
149
+ def reap_all_completed_children
150
+ loop do
151
+ pid, status = runtime.wait2(-1, Process::WNOHANG)
152
+ break unless pid
153
+
154
+ complete_slot(pid, status)
155
+ end
156
+ rescue Errno::ECHILD
157
+ nil
158
+ end
159
+
160
+ def complete_slot(pid, wait_result)
161
+ slot_id = pid_to_slot.delete(pid)
162
+ return unless slot_id
163
+
164
+ slot = slots[slot_id]
165
+ return unless slot
166
+
167
+ Integration::SchedulerDiagnostics.child_ended(pid)
168
+ result = integration.build_result(wait_result, slot.log_paths)
169
+ dispatch_slot_result(slot, result)
170
+ end
171
+
172
+ def dispatch_slot_result(slot, result)
173
+ if should_retry?(slot, result)
174
+ retry_slot(slot)
175
+ else
176
+ slots.delete(slot.slot_id)
177
+ slot.mutant.status = result.status
178
+ results << result
179
+ progress_reporter&.progress(slot.mutant, scenario_result: result)
180
+ end
181
+ end
182
+
183
+ # Per-slot timeout check. Must be called after reap_all_completed_children
184
+ # so that naturally-exited processes are already removed from slots.
185
+ def check_timeouts
186
+ now = monotonic_time
187
+ slots.each_value do |slot|
188
+ next if slot.draining
189
+ next unless now >= slot.started_at_monotonic + slot.timeout
190
+
191
+ # Final targeted reap: if the child already exited, classify it normally.
192
+ pid, status = runtime.wait2(slot.pid, Process::WNOHANG)
193
+ if pid
194
+ complete_slot(pid, status)
195
+ else
196
+ slot.forced_outcome = :timeout
197
+ slot.draining = true
198
+ end
199
+ end
200
+ end
201
+
202
+ def draining_slots?
203
+ slots.any? { |_, slot| slot.draining }
204
+ end
205
+
206
+ # Two-phase broadcast cleanup for all slots that are in draining state.
207
+ #
208
+ # Precision rule: before signalling, do one final WNOHANG pass to catch
209
+ # processes that exited naturally in the window between check_timeouts and
210
+ # now. If SIGTERM gets ESRCH, the process is already gone — we must not
211
+ # force-label those as :timeout.
212
+ def drain_draining_slots
213
+ draining = draining_slots
214
+ return if draining.empty?
215
+
216
+ prune_raced_draining_slots(draining)
217
+
218
+ return if draining.empty?
219
+
220
+ broadcast_term(draining)
221
+ wait_for_drain_window
222
+ signal_draining_slots(draining)
223
+ reap_and_remove_draining(draining)
224
+ end
225
+
226
+ def draining_slots
227
+ slots.select { |_, slot| slot.draining }
228
+ end
229
+
230
+ def prune_raced_draining_slots(draining)
231
+ draining.reject! do |_, slot|
232
+ pid, status = wnohang_reap(slot.pid)
233
+ next false unless pid
234
+
235
+ complete_slot(pid, status)
236
+ true
237
+ end
238
+ end
239
+
240
+ def wait_for_drain_window
241
+ @wakeup&.wait(PROCESS_DRAIN_WINDOW)
242
+ @wakeup&.drain
243
+ end
244
+
245
+ def signal_draining_slots(draining)
246
+ draining.each_value { |slot| signal_process_group(slot.pid, :SIGKILL) }
247
+ end
248
+
249
+ def broadcast_term(draining)
250
+ now = monotonic_time
251
+ draining.each_value do |slot|
252
+ slot.term_sent_at_monotonic = now
253
+ signal_process_group(slot.pid, :SIGTERM)
254
+ end
255
+ end
256
+
257
+ # After SIGKILL window: blocking reap each slot, then build its result.
258
+ #
259
+ # Interrupted slots are cleaned up but produce no result — the scheduler
260
+ # is shutting down and does not emit verdicts for in-flight mutants.
261
+ #
262
+ # For timeout slots: a real exit status only wins if observed before any
263
+ # parent signal was sent. Once SIGTERM has been dispatched, the forced
264
+ # outcome is authoritative — a child handling SIGTERM and exiting 0 must
265
+ # not be misclassified as :survived.
266
+ def reap_and_remove_draining(draining) # rubocop:disable Metrics/AbcSize
267
+ draining.each_value do |slot|
268
+ # One last WNOHANG before blocking: catches processes that exited
269
+ # between SIGKILL and here.
270
+ _, final_status = wnohang_reap(slot.pid)
271
+ reap_pid(slot.pid) unless final_status
272
+
273
+ pid_to_slot.delete(slot.pid)
274
+ slots.delete(slot.slot_id)
275
+ Integration::SchedulerDiagnostics.child_ended(slot.pid)
276
+
277
+ next if slot.forced_outcome == :interrupted
278
+
279
+ result = build_drain_result(slot, final_status)
280
+ slot.mutant.status = result.status
281
+ results << result
282
+ progress_reporter&.progress(slot.mutant, scenario_result: result)
283
+ end
284
+ end
285
+
286
+ # Choose result: use real exit status only if observed before any parent
287
+ # signal was sent. After SIGTERM, the forced outcome is authoritative.
288
+ def build_drain_result(slot, final_status)
289
+ if final_status&.exited? && slot.term_sent_at_monotonic.nil?
290
+ integration.build_result(final_status, slot.log_paths)
291
+ else
292
+ integration.build_result(slot.forced_outcome || :timeout, slot.log_paths)
293
+ end
294
+ end
295
+
296
+ def install_signal_traps
297
+ saved = {}
298
+ %w[INT TERM HUP].each do |sig|
299
+ saved[sig] = runtime.trap(sig) { @shutdown_requested = true }
300
+ end
301
+ saved
302
+ end
303
+
304
+ def restore_signal_traps(saved)
305
+ saved&.each { |sig, handler| runtime.trap(sig, handler) }
306
+ end
307
+
308
+ def interrupt_active_slots
309
+ slots.each_value do |slot|
310
+ next if slot.draining
311
+
312
+ slot.forced_outcome = :interrupted
313
+ slot.draining = true
314
+ end
315
+ end
316
+
317
+ def should_retry?(slot, result)
318
+ !@shutdown_requested && result.survived? && slot.retry_count < config.max_flaky_retries.to_i
319
+ end
320
+
321
+ def prepare_run(mutants, integration, config, progress_reporter, options)
322
+ @pending = mutants.dup
323
+ @slots = {}
324
+ @pid_to_slot = {}
325
+ @results = []
326
+ @next_slot_id = 0
327
+ @integration = integration
328
+ @config = config
329
+ @progress_reporter = progress_reporter
330
+ @options = options
331
+ @wakeup = Henitai::ProcessWakeup.new.install if @wakeup.nil?
332
+ end
333
+
334
+ def next_event_timeout
335
+ now = monotonic_time
336
+ slot_timeouts = slots.each_value.filter_map do |slot|
337
+ remaining_slot_timeout(slot, now)
338
+ end
339
+
340
+ slot_timeouts.min
341
+ end
342
+
343
+ def remaining_slot_timeout(slot, now)
344
+ deadline =
345
+ if slot.draining
346
+ slot.term_sent_at_monotonic + PROCESS_DRAIN_WINDOW
347
+ else
348
+ slot.started_at_monotonic + slot.timeout
349
+ end
350
+ remaining = deadline - now
351
+ remaining.positive? ? remaining : 0.0
352
+ end
353
+
354
+ def wait_for_next_event
355
+ @wakeup&.wait(next_event_timeout)
356
+ @wakeup&.drain
357
+ end
358
+
359
+ def retry_slot(slot) # rubocop:disable Metrics/AbcSize
360
+ slot.retry_count += 1
361
+ test_files = resolve_test_files(slot.mutant)
362
+ handle = integration.spawn_mutant(mutant: slot.mutant, test_files: test_files)
363
+ slot.pid = handle.pid
364
+ slot.log_paths = handle.log_paths
365
+ slot.started_at_monotonic = monotonic_time
366
+ slot.draining = false
367
+ slot.term_sent_at_monotonic = nil
368
+ slot.forced_outcome = nil
369
+ pid_to_slot[handle.pid] = slot.slot_id
370
+ Integration::SchedulerDiagnostics.child_started(handle.pid)
371
+ rescue StandardError => e
372
+ slots.delete(slot.slot_id)
373
+ record_spawn_failure(slot.mutant, e)
374
+ end
375
+
376
+ def record_spawn_failure(mutant, error)
377
+ result = ScenarioExecutionResult.new(
378
+ status: :compile_error,
379
+ stdout: "",
380
+ stderr: "spawn failed: #{error.message}",
381
+ log_path: "/dev/null",
382
+ exit_status: nil
383
+ )
384
+ mutant.status = result.status
385
+ results << result
386
+ progress_reporter&.progress(mutant, scenario_result: result)
387
+ end
388
+
389
+ def wnohang_reap(pid)
390
+ runtime.wait2(pid, Process::WNOHANG)
391
+ rescue Errno::ECHILD, Errno::ESRCH
392
+ nil
393
+ end
394
+
395
+ def signal_process_group(pid, signal)
396
+ runtime.kill(signal, -pid)
397
+ rescue Errno::ESRCH
398
+ nil
399
+ rescue Errno::EPERM
400
+ # Process group not yet established; fall back to signalling the pid.
401
+ begin
402
+ runtime.kill(signal, pid)
403
+ rescue Errno::ESRCH
404
+ nil
405
+ end
406
+ end
407
+
408
+ def reap_pid(pid)
409
+ runtime.wait(pid)
410
+ rescue Errno::ECHILD, Errno::ESRCH
411
+ nil
412
+ end
413
+
414
+ def monotonic_time
415
+ runtime.clock_gettime(Process::CLOCK_MONOTONIC)
416
+ end
417
+
418
+ def resolve_test_files(mutant)
419
+ if @options.key?(:test_file_resolver)
420
+ @options[:test_file_resolver].call(mutant)
421
+ elsif @options.key?(:test_files)
422
+ @options[:test_files]
423
+ else
424
+ integration.select_tests(mutant.subject)
425
+ end
426
+ end
427
+
428
+ def next_slot_id!
429
+ id = @next_slot_id
430
+ @next_slot_id += 1
431
+ id
432
+ end
433
+ end
434
+ end