evilution 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.beads/interactions.jsonl +28 -0
- data/.rubocop_todo.yml +1 -0
- data/CHANGELOG.md +31 -0
- data/README.md +12 -10
- data/docs/integrations.md +15 -0
- data/docs/isolation.md +46 -2
- data/lib/evilution/baseline.rb +11 -4
- data/lib/evilution/cli/parser/options_builder.rb +17 -0
- data/lib/evilution/config/validators/example_targeting_strategy.rb +22 -0
- data/lib/evilution/config.rb +16 -2
- data/lib/evilution/coverage/digest.rb +16 -0
- data/lib/evilution/coverage/map.rb +64 -0
- data/lib/evilution/coverage/map_builder.rb +82 -0
- data/lib/evilution/coverage/map_store.rb +87 -0
- data/lib/evilution/coverage/recorder.rb +85 -0
- data/lib/evilution/coverage.rb +8 -0
- data/lib/evilution/coverage_example_filter.rb +41 -0
- data/lib/evilution/integration/loading/test_load_path.rb +76 -0
- data/lib/evilution/integration/minitest.rb +5 -1
- data/lib/evilution/integration/rspec/state_guard/configuration_state.rb +72 -0
- data/lib/evilution/integration/rspec/state_guard/configuration_streams.rb +45 -0
- data/lib/evilution/integration/rspec/state_guard.rb +3 -1
- data/lib/evilution/integration/test_unit.rb +12 -4
- data/lib/evilution/isolation/fork.rb +38 -50
- data/lib/evilution/parallel/work_queue/dispatcher/deadline_tracker.rb +63 -0
- data/lib/evilution/parallel/work_queue/dispatcher.rb +70 -25
- data/lib/evilution/parallel/work_queue/worker.rb +50 -14
- data/lib/evilution/parallel/work_queue.rb +8 -0
- data/lib/evilution/process_supervisor.rb +259 -0
- data/lib/evilution/reporter/cli/line_formatters/unresolved_rate_warning.rb +50 -0
- data/lib/evilution/reporter/cli/metrics_block.rb +2 -0
- data/lib/evilution/runner/baseline_runner.rb +52 -0
- data/lib/evilution/runner/isolation_resolver.rb +106 -12
- data/lib/evilution/runner/mutation_executor/strategy/parallel.rb +28 -1
- data/lib/evilution/runner.rb +7 -0
- data/lib/evilution/spec_resolver.rb +147 -9
- data/lib/evilution/spec_selector.rb +14 -4
- data/lib/evilution/version.rb +1 -1
- data/lib/evilution.rb +1 -0
- data/lib/tasks/stress.rake +15 -0
- data/scripts/canary_manifest.yml +47 -0
- data/scripts/compare_targeting +277 -0
- data/scripts/compare_targeting.example.yml +24 -0
- metadata +20 -2
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
class Evilution::SpecResolver
|
|
4
4
|
STRIPPABLE_PREFIXES = %w[lib/ app/].freeze
|
|
5
5
|
CONTROLLER_PREFIX = "controllers/"
|
|
6
|
+
# Conventional test subdirectories appended to @test_dir. Real-world gems
|
|
7
|
+
# frequently park specs under spec/unit or spec/lib (test/unit, test/lib)
|
|
8
|
+
# rather than mirroring the lib/ tree 1:1 (EV-z7f5 / GH #1325).
|
|
9
|
+
CONVENTIONAL_SUBDIRS = %w[unit lib].freeze
|
|
10
|
+
MINITEST_SUFFIX = "_test.rb"
|
|
6
11
|
|
|
7
12
|
def initialize(test_dir: "spec", test_suffix: "_spec.rb", request_dir: "requests")
|
|
8
13
|
@test_dir = test_dir
|
|
@@ -23,8 +28,54 @@ class Evilution::SpecResolver
|
|
|
23
28
|
Array(source_paths).filter_map { |path| call(path) }.uniq
|
|
24
29
|
end
|
|
25
30
|
|
|
31
|
+
# Like #call, but returns an ARRAY of test files and additionally covers the
|
|
32
|
+
# dir-grouped layout (a source file's tests live in a directory named after
|
|
33
|
+
# the source basename, e.g. lib/x/branch.rb -> test/unit/branch/*_test.rb,
|
|
34
|
+
# rather than a single mirror file). The deterministic file mirror from #call
|
|
35
|
+
# always wins; only when no mirror file exists is the first matching grouped
|
|
36
|
+
# directory expanded into its test files (EV-bi41). Returns nil when nothing
|
|
37
|
+
# resolves.
|
|
38
|
+
def resolve_specs(source_path, spec_pattern: nil)
|
|
39
|
+
return nil if source_path.nil? || source_path.empty?
|
|
40
|
+
|
|
41
|
+
file = call(source_path, spec_pattern: spec_pattern)
|
|
42
|
+
return [file] if file
|
|
43
|
+
|
|
44
|
+
resolve_grouped_dir(source_path, spec_pattern: spec_pattern)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Best-guess candidate for an unresolved source, found by basename glob
|
|
48
|
+
# rather than the deterministic path mirroring used by #call. Used only to
|
|
49
|
+
# enrich the "no matching test" hint (EV-z7f5 / GH #1325) — never to pick a
|
|
50
|
+
# test to run — so a fuzzy substring match is acceptable here. Returns the
|
|
51
|
+
# shallowest match, or nil when nothing resembles the basename.
|
|
52
|
+
def suggest(source_path)
|
|
53
|
+
return nil if source_path.nil? || source_path.empty?
|
|
54
|
+
|
|
55
|
+
stem = File.basename(normalize_path(source_path), ".rb")
|
|
56
|
+
return nil if stem.empty?
|
|
57
|
+
|
|
58
|
+
suggestion_globs(stem).flat_map { |glob| glob_relative(glob) }.uniq.min_by(&:length)
|
|
59
|
+
end
|
|
60
|
+
|
|
26
61
|
private
|
|
27
62
|
|
|
63
|
+
# Glob for project-relative paths. Mirrors #call's project_relative_exists?
|
|
64
|
+
# contract: when run inside an isolated worker chdir'd into a sandbox, glob
|
|
65
|
+
# against PROJECT_ROOT so suggestions still find real project files. base:
|
|
66
|
+
# already yields paths relative to the root, matching the CWD-glob shape.
|
|
67
|
+
def glob_relative(glob)
|
|
68
|
+
return Dir.glob(glob) unless Evilution.in_isolated_worker?
|
|
69
|
+
|
|
70
|
+
Dir.glob(glob, base: Evilution::PROJECT_ROOT)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def suggestion_globs(stem)
|
|
74
|
+
globs = ["#{@test_dir}/**/*#{stem}*#{@test_suffix}"]
|
|
75
|
+
globs << "#{@test_dir}/**/test_#{stem}.rb" if @test_suffix == MINITEST_SUFFIX
|
|
76
|
+
globs
|
|
77
|
+
end
|
|
78
|
+
|
|
28
79
|
# Existence check that succeeds against the current CWD. When the caller
|
|
29
80
|
# is an isolated worker that chdir'd into a per-mutation sandbox (Evilution
|
|
30
81
|
# signals this via in_isolated_worker?), also try PROJECT_ROOT so the
|
|
@@ -40,6 +91,56 @@ class Evilution::SpecResolver
|
|
|
40
91
|
candidates.select { |path| File.fnmatch?(pattern, path, File::FNM_PATHNAME | File::FNM_EXTGLOB) }
|
|
41
92
|
end
|
|
42
93
|
|
|
94
|
+
# Dir-grouped resolution: find the first candidate directory that exists and
|
|
95
|
+
# expand it into its test files. Ranked below #call's file mirrors (callers
|
|
96
|
+
# try #call first), so a 1:1 spec always wins when present.
|
|
97
|
+
def resolve_grouped_dir(source_path, spec_pattern: nil)
|
|
98
|
+
dir = directory_candidates(normalize_path(source_path)).find { |c| project_relative_dir?(c) }
|
|
99
|
+
return nil unless dir
|
|
100
|
+
|
|
101
|
+
files = test_files_in(dir)
|
|
102
|
+
files = filter_by_pattern(files, spec_pattern) if spec_pattern
|
|
103
|
+
files.empty? ? nil : files
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# The grouped-directory analogue of #mirror_candidates: cross every
|
|
107
|
+
# conventional root with each layout variant, using the source basename
|
|
108
|
+
# (suffix dropped) as a DIRECTORY name rather than a test FILE name.
|
|
109
|
+
def directory_candidates(source_path)
|
|
110
|
+
stripped = strip_source_prefix(source_path)
|
|
111
|
+
mirror_variants(stripped).flat_map { |variant| grouped_dir_candidates(variant) }.uniq
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def grouped_dir_candidates(variant)
|
|
115
|
+
dir, _, file = variant.rpartition("/")
|
|
116
|
+
name = file.delete_suffix(@test_suffix)
|
|
117
|
+
return [] if name.empty?
|
|
118
|
+
|
|
119
|
+
relative = dir.empty? ? name : "#{dir}/#{name}"
|
|
120
|
+
roots.map { |root| "#{root}/#{relative}" }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def strip_source_prefix(source_path)
|
|
124
|
+
base = source_path.sub(/\.rb\z/, @test_suffix)
|
|
125
|
+
prefix = STRIPPABLE_PREFIXES.find { |p| source_path.start_with?(p) }
|
|
126
|
+
prefix ? base.delete_prefix(prefix) : base
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Every test file under a grouped directory: the mirrored suffix plus, for
|
|
130
|
+
# minitest/test-unit, the `test_` prefix convention. Sorted for determinism.
|
|
131
|
+
def test_files_in(dir)
|
|
132
|
+
globs = ["#{dir}/**/*#{@test_suffix}", ("#{dir}/**/test_*.rb" if @test_suffix == MINITEST_SUFFIX)]
|
|
133
|
+
globs.compact.flat_map { |glob| glob_relative(glob) }.uniq.sort
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Directory analogue of #project_relative_exists?.
|
|
137
|
+
def project_relative_dir?(path)
|
|
138
|
+
return true if File.directory?(path)
|
|
139
|
+
return false unless Evilution.in_isolated_worker?
|
|
140
|
+
|
|
141
|
+
File.directory?(File.expand_path(path, Evilution::PROJECT_ROOT))
|
|
142
|
+
end
|
|
143
|
+
|
|
43
144
|
def normalize_path(path)
|
|
44
145
|
path = path.delete_prefix("./")
|
|
45
146
|
if path.start_with?("/")
|
|
@@ -52,18 +153,55 @@ class Evilution::SpecResolver
|
|
|
52
153
|
def candidate_test_paths(source_path)
|
|
53
154
|
base = source_path.sub(/\.rb\z/, @test_suffix)
|
|
54
155
|
prefix = STRIPPABLE_PREFIXES.find { |p| source_path.start_with?(p) }
|
|
156
|
+
stripped = prefix ? base.delete_prefix(prefix) : base
|
|
157
|
+
|
|
158
|
+
primary = mirror_candidates(stripped)
|
|
159
|
+
primary.unshift(controller_to_request_test(stripped)) if prefix
|
|
160
|
+
primary.compact!
|
|
55
161
|
|
|
56
|
-
|
|
57
|
-
stripped = base.delete_prefix(prefix)
|
|
58
|
-
request_test = controller_to_request_test(stripped)
|
|
59
|
-
[request_test, "#{@test_dir}/#{stripped}", "#{@test_dir}/#{base}"].compact
|
|
60
|
-
else
|
|
61
|
-
["#{@test_dir}/#{base}"]
|
|
62
|
-
end
|
|
162
|
+
fallbacks = primary.flat_map { |c| parent_fallback_candidates(c) }
|
|
63
163
|
|
|
64
|
-
|
|
164
|
+
(primary + fallbacks + prefix_convention_candidates(stripped)).uniq
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Conventional roots that may hold tests: the mirrored root plus the common
|
|
168
|
+
# spec/unit, spec/lib (test/unit, test/lib) buckets.
|
|
169
|
+
def roots
|
|
170
|
+
[@test_dir, *CONVENTIONAL_SUBDIRS.map { |d| "#{@test_dir}/#{d}" }]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Cross every conventional root with every layout variant of the stripped
|
|
174
|
+
# source path: the full mirror, the mirror with the leading gem-namespace
|
|
175
|
+
# dir dropped, and the bare basename. Full mirrors rank above dropped ones
|
|
176
|
+
# so a 1:1 layout always wins when present.
|
|
177
|
+
def mirror_candidates(stripped)
|
|
178
|
+
mirror_variants(stripped).flat_map do |variant|
|
|
179
|
+
roots.map { |root| "#{root}/#{variant}" }
|
|
180
|
+
end
|
|
181
|
+
end
|
|
65
182
|
|
|
66
|
-
|
|
183
|
+
def mirror_variants(stripped)
|
|
184
|
+
segments = stripped.split("/")
|
|
185
|
+
variants = [stripped]
|
|
186
|
+
variants << segments[1..].join("/") if segments.length > 1
|
|
187
|
+
variants << segments.last if segments.length > 2
|
|
188
|
+
variants.uniq
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Test::Unit / minitest gems frequently name files with a `test_` PREFIX
|
|
192
|
+
# (test/test_connection_pool.rb) instead of the mirrored `_test.rb` suffix.
|
|
193
|
+
# Only meaningful when resolving against the minitest suffix.
|
|
194
|
+
def prefix_convention_candidates(stripped)
|
|
195
|
+
return [] unless @test_suffix == MINITEST_SUFFIX
|
|
196
|
+
|
|
197
|
+
mirror_variants(stripped).flat_map do |variant|
|
|
198
|
+
dir, _, file = variant.rpartition("/")
|
|
199
|
+
name = file.delete_suffix(@test_suffix)
|
|
200
|
+
next [] if name.empty?
|
|
201
|
+
|
|
202
|
+
relative = dir.empty? ? "test_#{name}.rb" : "#{dir}/test_#{name}.rb"
|
|
203
|
+
roots.map { |root| "#{root}/#{relative}" }
|
|
204
|
+
end
|
|
67
205
|
end
|
|
68
206
|
|
|
69
207
|
def controller_to_request_test(stripped_path)
|
|
@@ -19,12 +19,23 @@ class Evilution::SpecSelector
|
|
|
19
19
|
return existing unless existing.empty?
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
resolved =
|
|
23
|
-
resolved ?
|
|
22
|
+
resolved = resolve_via_resolver(source_path)
|
|
23
|
+
resolved && !resolved.empty? ? resolved : nil
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
private
|
|
27
27
|
|
|
28
|
+
# Prefer the array-returning #resolve_specs, but fall back to the older single-file #call contract so a custom
|
|
29
|
+
# resolver that only implements #call keeps working.
|
|
30
|
+
def resolve_via_resolver(source_path)
|
|
31
|
+
if @spec_resolver.respond_to?(:resolve_specs)
|
|
32
|
+
@spec_resolver.resolve_specs(source_path, spec_pattern: @spec_pattern)
|
|
33
|
+
else
|
|
34
|
+
file = @spec_resolver.call(source_path, spec_pattern: @spec_pattern)
|
|
35
|
+
file ? [file] : nil
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
28
39
|
def mapping_for(source_path)
|
|
29
40
|
@spec_mappings[normalize(source_path)]
|
|
30
41
|
end
|
|
@@ -40,8 +51,7 @@ class Evilution::SpecSelector
|
|
|
40
51
|
normalized.delete_prefix("./")
|
|
41
52
|
end
|
|
42
53
|
|
|
43
|
-
# Same semantics as Evilution::SpecResolver#project_relative_exists?
|
|
44
|
-
# that method for the EV-wqxu / GH #1278 rationale.
|
|
54
|
+
# Same semantics as Evilution::SpecResolver#project_relative_exists?
|
|
45
55
|
def project_relative_exists?(path)
|
|
46
56
|
return true if File.exist?(path)
|
|
47
57
|
return false unless Evilution.in_isolated_worker?
|
data/lib/evilution/version.rb
CHANGED
data/lib/evilution.rb
CHANGED
|
@@ -100,6 +100,7 @@ require_relative "evilution/mutator/registry"
|
|
|
100
100
|
require_relative "evilution/equivalent"
|
|
101
101
|
require_relative "evilution/equivalent/heuristic"
|
|
102
102
|
require_relative "evilution/equivalent/detector"
|
|
103
|
+
require_relative "evilution/process_supervisor"
|
|
103
104
|
require_relative "evilution/isolation"
|
|
104
105
|
require_relative "evilution/isolation/fork"
|
|
105
106
|
require_relative "evilution/isolation/in_process"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rspec/core/rake_task"
|
|
4
|
+
|
|
5
|
+
# RUN_STRESS lifts the default :stress exclusion in spec_helper. Set via a
|
|
6
|
+
# prerequisite so it runs before the RSpec task, without polluting other tasks.
|
|
7
|
+
task :stress_env do
|
|
8
|
+
ENV["RUN_STRESS"] = "1"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
desc "Run parallel/isolation stress + load specs (tagged :stress, slow)"
|
|
12
|
+
RSpec::Core::RakeTask.new(stress: :stress_env) do |t|
|
|
13
|
+
t.pattern = "spec/evilution/parallel/stress_spec.rb"
|
|
14
|
+
t.rspec_opts = "--tag stress"
|
|
15
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Canary manifest for the EV-7ydn validation run of scripts/compare_targeting.
|
|
2
|
+
#
|
|
3
|
+
# scripts/compare_targeting scripts/canary_manifest.yml --out .artifacts/targeting_validation.md
|
|
4
|
+
#
|
|
5
|
+
# Each repo is run through evilution three times (full_file / lexical / coverage)
|
|
6
|
+
# over the SAME mutation set; the gate is total lost_kills == 0 (coverage must
|
|
7
|
+
# never lose a kill full-file caught) before the default flips to coverage.
|
|
8
|
+
#
|
|
9
|
+
# `dir:` assumes the EV-rxob canary checkout layout (/tmp/ev-canaries/<repo>,
|
|
10
|
+
# already `bundle install`ed). Adjust to your checkout. Per-repo `args` mirror
|
|
11
|
+
# the EV-rxob R2 findings: repos whose specs are NOT lib-mirrored need an
|
|
12
|
+
# explicit --spec (EV-z7f5 / GH #1325), or auto spec-resolution yields 0.0 and
|
|
13
|
+
# the comparison is meaningless.
|
|
14
|
+
#
|
|
15
|
+
# Start with the R2 infra=none tier that ran clean; extend with the DB-tier
|
|
16
|
+
# repos once their services are up.
|
|
17
|
+
|
|
18
|
+
repos:
|
|
19
|
+
# --- R2 PASS, lib-mirrored specs (auto-resolution works) ---
|
|
20
|
+
- name: thoughtbot/factory_bot
|
|
21
|
+
dir: /tmp/ev-canaries/factory_bot
|
|
22
|
+
args: ["lib", "--jobs", "4"]
|
|
23
|
+
|
|
24
|
+
- name: jnunemaker/httparty
|
|
25
|
+
dir: /tmp/ev-canaries/httparty
|
|
26
|
+
args: ["lib", "--jobs", "4"]
|
|
27
|
+
|
|
28
|
+
- name: rubocop/rubocop
|
|
29
|
+
dir: /tmp/ev-canaries/rubocop
|
|
30
|
+
args: ["lib", "--jobs", "4"]
|
|
31
|
+
|
|
32
|
+
- name: rack/rack
|
|
33
|
+
dir: /tmp/ev-canaries/rack
|
|
34
|
+
args: ["lib", "--jobs", "4"]
|
|
35
|
+
|
|
36
|
+
# --- Non-lib-mirrored specs: explicit --spec required (EV-z7f5 / GH #1325) ---
|
|
37
|
+
- name: bblimke/webmock
|
|
38
|
+
dir: /tmp/ev-canaries/webmock
|
|
39
|
+
args: ["lib", "--spec", "spec/unit", "--jobs", "4"]
|
|
40
|
+
|
|
41
|
+
- name: doorkeeper-gem/doorkeeper
|
|
42
|
+
dir: /tmp/ev-canaries/doorkeeper
|
|
43
|
+
args: ["lib", "--spec", "spec", "--jobs", "4"]
|
|
44
|
+
|
|
45
|
+
- name: ruby-concurrency/concurrent-ruby
|
|
46
|
+
dir: /tmp/ev-canaries/concurrent-ruby
|
|
47
|
+
args: ["lib", "--spec", "spec", "--jobs", "4"]
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Targeting-mode comparison harness.
|
|
5
|
+
#
|
|
6
|
+
# For each repo in a manifest, runs evilution over the SAME mutation set under
|
|
7
|
+
# three example-targeting modes -- full_file (baseline), lexical (current), and
|
|
8
|
+
# coverage (new) -- joins the per-mutation results, and emits a per-repo table:
|
|
9
|
+
# score_full / score_lexical / score_coverage,
|
|
10
|
+
# lost_kills (mutations full_file KILLED but coverage did NOT -> the gate),
|
|
11
|
+
# wall_ratio_lexical / wall_ratio_coverage (vs the full_file baseline).
|
|
12
|
+
#
|
|
13
|
+
# The accuracy gate is lost_kills == 0: coverage targeting must never lose a kill
|
|
14
|
+
# the full-file run would catch. Speed is reported, not gated.
|
|
15
|
+
#
|
|
16
|
+
# Usage:
|
|
17
|
+
# scripts/compare_targeting MANIFEST.yml [--out report.md]
|
|
18
|
+
#
|
|
19
|
+
# MANIFEST.yml:
|
|
20
|
+
# repos:
|
|
21
|
+
# - name: acme/foo
|
|
22
|
+
# dir: /checkouts/foo # already bundled
|
|
23
|
+
# args: ["lib/foo.rb", "--jobs", "4"]
|
|
24
|
+
#
|
|
25
|
+
# The actual canary execution (checkout + bundle over the EV-rxob manifest) is
|
|
26
|
+
# the validation run, EV-7ydn; this harness is the reusable comparison engine.
|
|
27
|
+
|
|
28
|
+
require "json"
|
|
29
|
+
require "yaml"
|
|
30
|
+
require "open3"
|
|
31
|
+
require "optparse"
|
|
32
|
+
require "digest"
|
|
33
|
+
|
|
34
|
+
module CompareTargeting
|
|
35
|
+
MODES = %w[full_file lexical coverage].freeze
|
|
36
|
+
KEY_FIELDS = %w[file line operator].freeze
|
|
37
|
+
|
|
38
|
+
class ConfigError < StandardError; end
|
|
39
|
+
|
|
40
|
+
module_function
|
|
41
|
+
|
|
42
|
+
# Stable per-mutation identity across modes: only TARGETING differs between
|
|
43
|
+
# runs, so the same mutation has the same file/line/operator (+ diff to
|
|
44
|
+
# separate distinct mutations sharing a line+operator).
|
|
45
|
+
def key_for(detail)
|
|
46
|
+
base = KEY_FIELDS.map { |field| detail[field] }.join(":")
|
|
47
|
+
digest = detail["diff"].to_s
|
|
48
|
+
digest.empty? ? base : "#{base}##{Digest::SHA256.hexdigest(digest)[0, 8]}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# The label shown in lost-kill output: file:line:operator (no diff hash).
|
|
52
|
+
def label_for(detail)
|
|
53
|
+
KEY_FIELDS.map { |field| detail[field] }.join(":")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# One mode's run, indexed by mutation key.
|
|
57
|
+
class ModeResult
|
|
58
|
+
CATEGORIES = %w[killed survived neutral equivalent unresolved unparseable timed_out errors].freeze
|
|
59
|
+
|
|
60
|
+
def self.from_json(data)
|
|
61
|
+
by_key = {}
|
|
62
|
+
CATEGORIES.each do |category|
|
|
63
|
+
Array(data[category]).each do |detail|
|
|
64
|
+
key = CompareTargeting.key_for(detail)
|
|
65
|
+
by_key[key] ||= {
|
|
66
|
+
status: detail["status"],
|
|
67
|
+
duration: (detail["duration"] || 0.0).to_f,
|
|
68
|
+
label: CompareTargeting.label_for(detail)
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
new(by_key)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def initialize(by_key)
|
|
76
|
+
@by_key = by_key
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def keys
|
|
80
|
+
@by_key.keys
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def status(key)
|
|
84
|
+
@by_key.dig(key, :status)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def label(key)
|
|
88
|
+
@by_key.dig(key, :label)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def killed_count
|
|
92
|
+
@by_key.count { |_, value| value[:status] == "killed" }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Measurable = the run actually observed a verdict: killed or survived.
|
|
96
|
+
# unresolved/equivalent/errors/unparseable are excluded from the score.
|
|
97
|
+
def measurable_count
|
|
98
|
+
@by_key.count { |_, value| %w[killed survived].include?(value[:status]) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def total_duration
|
|
102
|
+
@by_key.values.sum { |value| value[:duration] }
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Joins the three modes for one repo and derives the comparison metrics.
|
|
107
|
+
class Comparison
|
|
108
|
+
def initialize(full_file:, lexical:, coverage:)
|
|
109
|
+
@modes = { "full_file" => full_file, "lexical" => lexical, "coverage" => coverage }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def score(mode)
|
|
113
|
+
result = @modes.fetch(mode)
|
|
114
|
+
return 0.0 if result.measurable_count.zero?
|
|
115
|
+
|
|
116
|
+
result.killed_count.to_f / result.measurable_count
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def wall_ratio(mode)
|
|
120
|
+
baseline = @modes.fetch("full_file").total_duration
|
|
121
|
+
return 0.0 if baseline.zero?
|
|
122
|
+
|
|
123
|
+
@modes.fetch(mode).total_duration / baseline
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Mutation labels that full_file KILLED but `mode` failed to kill -- the
|
|
127
|
+
# lost kills that must be zero before coverage can become the default.
|
|
128
|
+
def lost_kills(mode = "coverage")
|
|
129
|
+
full = @modes.fetch("full_file")
|
|
130
|
+
other = @modes.fetch(mode)
|
|
131
|
+
full.keys.select { |key| full.status(key) == "killed" && other.status(key) != "killed" }
|
|
132
|
+
.map { |key| full.label(key) }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def to_row(repo)
|
|
136
|
+
{
|
|
137
|
+
repo: repo,
|
|
138
|
+
score_full: score("full_file"),
|
|
139
|
+
score_lexical: score("lexical"),
|
|
140
|
+
score_coverage: score("coverage"),
|
|
141
|
+
lost_kills: lost_kills("coverage").size,
|
|
142
|
+
wall_ratio_lexical: wall_ratio("lexical"),
|
|
143
|
+
wall_ratio_coverage: wall_ratio("coverage")
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Renders the per-repo rows as a markdown table with a PASS/FAIL gate line.
|
|
149
|
+
class TableReporter
|
|
150
|
+
COLUMNS = %w[repo score_full score_lexical score_coverage lost_kills
|
|
151
|
+
wall_ratio_lexical wall_ratio_coverage].freeze
|
|
152
|
+
|
|
153
|
+
def initialize(rows)
|
|
154
|
+
@rows = rows
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def to_markdown
|
|
158
|
+
(table_lines + ["", gate_line]).join("\n")
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
private
|
|
162
|
+
|
|
163
|
+
def table_lines
|
|
164
|
+
[header_row, separator_row, *@rows.map { |row| data_row(row) }]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def header_row
|
|
168
|
+
"| #{COLUMNS.join(" | ")} |"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def separator_row
|
|
172
|
+
"| #{COLUMNS.map { "---" }.join(" | ")} |"
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def data_row(row)
|
|
176
|
+
"| #{COLUMNS.map { |col| format_cell(row[col.to_sym]) }.join(" | ")} |"
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def gate_line
|
|
180
|
+
total_lost = @rows.sum { |row| row[:lost_kills] }
|
|
181
|
+
gate = total_lost.zero? ? "PASS" : "FAIL"
|
|
182
|
+
"GATE (total lost_kills == 0): #{gate} (#{total_lost} lost kills across #{@rows.size} repos)"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def format_cell(value)
|
|
186
|
+
value.is_a?(Float) ? format("%.3f", value) : value.to_s
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Runs evilution for one (repo, mode) and parses the JSON into a ModeResult.
|
|
191
|
+
# command_runner is injected so the pure pipeline is testable without a real
|
|
192
|
+
# mutation run.
|
|
193
|
+
class ModeRunner
|
|
194
|
+
# Run the inner `bundle exec evilution` in the TARGET repo's bundler context.
|
|
195
|
+
# If this harness is itself launched under `bundle exec`, Bundler exports
|
|
196
|
+
# BUNDLE_GEMFILE/RUBYOPT into the child, which would make the inner bundle
|
|
197
|
+
# resolve against evilution's Gemfile instead of the target repo's. Strip
|
|
198
|
+
# that inherited bundler env first.
|
|
199
|
+
DEFAULT_RUNNER = lambda do |cmd, dir|
|
|
200
|
+
stdout, stderr, status =
|
|
201
|
+
if defined?(Bundler)
|
|
202
|
+
Bundler.with_unbundled_env { Open3.capture3(*cmd, chdir: dir) }
|
|
203
|
+
else
|
|
204
|
+
Open3.capture3(*cmd, chdir: dir)
|
|
205
|
+
end
|
|
206
|
+
raise ConfigError, "evilution failed in #{dir}: #{stderr}" unless status.success?
|
|
207
|
+
|
|
208
|
+
stdout
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def initialize(command_runner: DEFAULT_RUNNER)
|
|
212
|
+
@command_runner = command_runner
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def run(repo_dir:, evilution_args:, mode:)
|
|
216
|
+
cmd = ["bundle", "exec", "evilution", *evilution_args,
|
|
217
|
+
"--example-targeting", mode, "--format", "json"]
|
|
218
|
+
json = @command_runner.call(cmd, repo_dir)
|
|
219
|
+
ModeResult.from_json(JSON.parse(json))
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Drives the manifest: every repo through every mode, then the table.
|
|
224
|
+
class Harness
|
|
225
|
+
def initialize(mode_runner: ModeRunner.new)
|
|
226
|
+
@mode_runner = mode_runner
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def call(repos)
|
|
230
|
+
rows = repos.map { |repo| compare_repo(repo) }
|
|
231
|
+
TableReporter.new(rows).to_markdown
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def compare_repo(repo)
|
|
235
|
+
results = MODES.to_h do |mode|
|
|
236
|
+
[mode, @mode_runner.run(repo_dir: repo.fetch(:dir), evilution_args: repo.fetch(:args), mode: mode)]
|
|
237
|
+
end
|
|
238
|
+
Comparison.new(**results.transform_keys(&:to_sym)).to_row(repo.fetch(:name))
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def load_manifest(path)
|
|
243
|
+
data = YAML.safe_load_file(path, symbolize_names: true)
|
|
244
|
+
raise ConfigError, "manifest must list repos:" unless data.is_a?(Hash) && data[:repos].is_a?(Array)
|
|
245
|
+
|
|
246
|
+
data[:repos]
|
|
247
|
+
rescue Errno::ENOENT
|
|
248
|
+
raise ConfigError, "manifest not found: #{path}"
|
|
249
|
+
rescue Psych::SyntaxError => e
|
|
250
|
+
raise ConfigError, "manifest is not valid YAML: #{e.message}"
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
if __FILE__ == $PROGRAM_NAME
|
|
255
|
+
out_path = nil
|
|
256
|
+
parser = OptionParser.new do |opts|
|
|
257
|
+
opts.banner = "Usage: scripts/compare_targeting MANIFEST.yml [--out report.md]"
|
|
258
|
+
opts.on("--out PATH", "Write the markdown report to PATH (default: stdout)") { |p| out_path = p }
|
|
259
|
+
end
|
|
260
|
+
parser.parse!
|
|
261
|
+
|
|
262
|
+
manifest_path = ARGV.first
|
|
263
|
+
unless manifest_path
|
|
264
|
+
warn parser.banner
|
|
265
|
+
exit 2
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
begin
|
|
269
|
+
repos = CompareTargeting.load_manifest(manifest_path)
|
|
270
|
+
report = CompareTargeting::Harness.new.call(repos)
|
|
271
|
+
out_path ? File.write(out_path, report) : puts(report)
|
|
272
|
+
exit 0
|
|
273
|
+
rescue CompareTargeting::ConfigError => e
|
|
274
|
+
warn "Error: #{e.message}"
|
|
275
|
+
exit 2
|
|
276
|
+
end
|
|
277
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Example manifest for scripts/compare_targeting (EV-51d4).
|
|
2
|
+
#
|
|
3
|
+
# Each repo is run through evilution three times -- once per example-targeting
|
|
4
|
+
# mode (full_file, lexical, coverage) -- over the SAME mutation set, and the
|
|
5
|
+
# harness emits a per-repo table with the lost_kills gate and wall-time ratios.
|
|
6
|
+
#
|
|
7
|
+
# Repos must already be checked out and `bundle install`ed; the harness only
|
|
8
|
+
# runs evilution inside them. The real EV-rxob canary list is wired up in the
|
|
9
|
+
# validation run (EV-7ydn).
|
|
10
|
+
#
|
|
11
|
+
# scripts/compare_targeting scripts/compare_targeting.example.yml --out report.md
|
|
12
|
+
|
|
13
|
+
repos:
|
|
14
|
+
- name: thoughtbot/factory_bot
|
|
15
|
+
dir: /tmp/ev-canaries/factory_bot
|
|
16
|
+
args: ["lib", "--jobs", "4"]
|
|
17
|
+
|
|
18
|
+
- name: bblimke/webmock
|
|
19
|
+
dir: /tmp/ev-canaries/webmock
|
|
20
|
+
args: ["lib", "--jobs", "4"]
|
|
21
|
+
|
|
22
|
+
- name: rack/rack
|
|
23
|
+
dir: /tmp/ev-canaries/rack
|
|
24
|
+
args: ["lib", "--jobs", "4"]
|