ace-test-runner-e2e 0.29.8 → 0.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ace-defaults/e2e-runner/config.yml +14 -2
- data/CHANGELOG.md +233 -0
- data/README.md +2 -2
- data/exe/ace-test-e2e-sh +9 -4
- data/handbook/guides/e2e-testing.g.md +75 -9
- data/handbook/guides/scenario-yml-reference.g.md +21 -8
- data/handbook/guides/tc-authoring.g.md +23 -5
- data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
- data/handbook/skills/as-e2e-review/SKILL.md +2 -2
- data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
- data/handbook/templates/agent-experience-report.template.md +3 -2
- data/handbook/templates/scenario.yml.template.yml +7 -2
- data/handbook/templates/tc-file.template.md +16 -4
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
- data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
- data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
- data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
- data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
- data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
- data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
- data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
- data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
- data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
- data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
- data/lib/ace/test/end_to_end_runner/version.rb +1 -1
- data/lib/ace/test/end_to_end_runner.rb +4 -0
- metadata +21 -2
|
@@ -6,16 +6,18 @@ module Ace
|
|
|
6
6
|
module Test
|
|
7
7
|
module EndToEndRunner
|
|
8
8
|
module Molecules
|
|
9
|
-
# Discovers
|
|
9
|
+
# Discovers deterministic preflight tests and agent E2E scenarios in packages
|
|
10
10
|
#
|
|
11
11
|
# Finds test scenarios in the TS-format directory structure:
|
|
12
|
+
# {package}/test/feat/**/*_test.rb
|
|
12
13
|
# {package}/test/e2e/TS-*/scenario.yml
|
|
13
14
|
#
|
|
14
15
|
# Note: This is a Molecule (not an Atom) because it performs filesystem
|
|
15
16
|
# I/O via Dir.glob.
|
|
16
17
|
class TestDiscoverer
|
|
17
|
-
|
|
18
|
+
TEST_DIRS = ["test/e2e"].freeze
|
|
18
19
|
SCENARIO_FILE = "scenario.yml"
|
|
20
|
+
DEFAULT_PREFLIGHT_GLOBS = ["test/feat/**/*_test.rb"].freeze
|
|
19
21
|
SCENARIO_DIR_PATTERN = "TS-*"
|
|
20
22
|
|
|
21
23
|
# Find E2E test scenario files matching criteria
|
|
@@ -47,6 +49,17 @@ module Ace
|
|
|
47
49
|
).map(&:file_path).sort
|
|
48
50
|
end
|
|
49
51
|
|
|
52
|
+
# @return [Array<String>] Sorted list of matching deterministic preflight test files
|
|
53
|
+
def find_integration_tests(package:, base_dir: Dir.pwd)
|
|
54
|
+
package_path = File.join(base_dir, package)
|
|
55
|
+
preflight_globs.each do |glob|
|
|
56
|
+
files = Dir.glob(File.join(package_path, glob)).sort
|
|
57
|
+
return files unless files.empty?
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
[]
|
|
61
|
+
end
|
|
62
|
+
|
|
50
63
|
# Find TS-format scenario directories and load them as TestScenario models
|
|
51
64
|
#
|
|
52
65
|
# @param package [String] Package name
|
|
@@ -56,9 +69,11 @@ module Ace
|
|
|
56
69
|
# @param base_dir [String] Base directory to search from
|
|
57
70
|
# @return [Array<Models::TestScenario>] Loaded scenario models with test_cases
|
|
58
71
|
def find_scenarios(package:, test_id: nil, tags: nil, exclude_tags: nil, base_dir: Dir.pwd)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
72
|
+
patterns = TEST_DIRS.map do |test_dir_name|
|
|
73
|
+
test_dir = File.join(base_dir, package, test_dir_name)
|
|
74
|
+
File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
|
|
75
|
+
end
|
|
76
|
+
scenario_files = Dir.glob(patterns).sort
|
|
62
77
|
|
|
63
78
|
loader = ScenarioLoader.new
|
|
64
79
|
scenarios = scenario_files.map do |yml_path|
|
|
@@ -82,11 +97,13 @@ module Ace
|
|
|
82
97
|
# @param base_dir [String] Base directory to search from
|
|
83
98
|
# @return [Array<String>] Sorted list of package names
|
|
84
99
|
def list_packages(base_dir: Dir.pwd)
|
|
85
|
-
|
|
100
|
+
patterns = TEST_DIRS.map do |test_dir_name|
|
|
101
|
+
File.join(base_dir, "*/#{test_dir_name}/#{SCENARIO_DIR_PATTERN}/#{SCENARIO_FILE}")
|
|
102
|
+
end
|
|
86
103
|
|
|
87
104
|
base = Pathname.new(base_dir)
|
|
88
105
|
|
|
89
|
-
Dir.glob(
|
|
106
|
+
Dir.glob(patterns)
|
|
90
107
|
.map { |f| Pathname.new(f).relative_path_from(base).each_filename.first }
|
|
91
108
|
.uniq
|
|
92
109
|
.sort
|
|
@@ -96,12 +113,14 @@ module Ace
|
|
|
96
113
|
|
|
97
114
|
# Build glob pattern for finding TS-format scenario.yml files
|
|
98
115
|
def build_scenario_pattern(package, test_id, base_dir)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
116
|
+
TEST_DIRS.map do |test_dir_name|
|
|
117
|
+
test_dir = File.join(base_dir, package, test_dir_name)
|
|
118
|
+
|
|
119
|
+
if test_id
|
|
120
|
+
File.join(test_dir, "*#{test_id}*", SCENARIO_FILE)
|
|
121
|
+
else
|
|
122
|
+
File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
|
|
123
|
+
end
|
|
105
124
|
end
|
|
106
125
|
end
|
|
107
126
|
|
|
@@ -129,6 +148,12 @@ module Ace
|
|
|
129
148
|
|
|
130
149
|
filtered
|
|
131
150
|
end
|
|
151
|
+
|
|
152
|
+
def preflight_globs
|
|
153
|
+
configured = Molecules::ConfigLoader.load.dig("patterns", "preflight")
|
|
154
|
+
globs = [configured, *DEFAULT_PREFLIGHT_GLOBS].compact.uniq
|
|
155
|
+
globs.reject(&:empty?)
|
|
156
|
+
end
|
|
132
157
|
end
|
|
133
158
|
end
|
|
134
159
|
end
|
|
@@ -16,12 +16,18 @@ module Ace
|
|
|
16
16
|
# @param provider [String] LLM provider:model string
|
|
17
17
|
# @param timeout [Integer] Request timeout in seconds
|
|
18
18
|
# @param config [Hash] Configuration hash (string keys) from ConfigLoader
|
|
19
|
-
def initialize(provider: nil, timeout: nil, config: nil)
|
|
19
|
+
def initialize(provider: nil, timeout: nil, config: nil, sandbox_backend_factory: nil)
|
|
20
20
|
config ||= Molecules::ConfigLoader.load
|
|
21
|
-
@provider = provider || config.dig("execution", "
|
|
21
|
+
@provider = provider || config.dig("execution", "runner_provider") ||
|
|
22
|
+
config.dig("execution", "provider") || "claude:sonnet"
|
|
23
|
+
@verifier_provider = config.dig("execution", "verifier_provider") ||
|
|
24
|
+
config.dig("execution", "provider") || @provider
|
|
22
25
|
@timeout = timeout || config.dig("execution", "timeout") || 300
|
|
23
26
|
@prompt_builder = Atoms::PromptBuilder.new
|
|
24
27
|
@cli_provider_adapter = Atoms::CliProviderAdapter.new(config)
|
|
28
|
+
@sandbox_backend_factory = sandbox_backend_factory || lambda { |sandbox_path, source_root: nil|
|
|
29
|
+
Molecules::BwrapSandboxBackend.new(sandbox_root: sandbox_path, source_root: source_root)
|
|
30
|
+
}
|
|
25
31
|
end
|
|
26
32
|
|
|
27
33
|
# Execute a single test scenario via LLM
|
|
@@ -192,9 +198,10 @@ module Ace
|
|
|
192
198
|
# Execute TC via skill invocation for CLI providers
|
|
193
199
|
def execute_tc_via_skill(test_case, sandbox_path, scenario, cli_args: nil, run_id: nil, env_vars: nil)
|
|
194
200
|
with_tc_error_handling(scenario) do |started_at|
|
|
201
|
+
sandbox_backend, prepared_env = prepared_env_for(sandbox_path, env_vars)
|
|
195
202
|
prompt = @cli_provider_adapter.build_tc_skill_prompt(
|
|
196
203
|
test_case: test_case, scenario: scenario,
|
|
197
|
-
sandbox_path: sandbox_path, run_id: run_id, env_vars:
|
|
204
|
+
sandbox_path: sandbox_path, run_id: run_id, env_vars: prepared_env
|
|
198
205
|
)
|
|
199
206
|
|
|
200
207
|
response = Ace::LLM::QueryInterface.query(
|
|
@@ -202,7 +209,8 @@ module Ace
|
|
|
202
209
|
system: nil, cli_args: cli_args,
|
|
203
210
|
timeout: @timeout, fallback: false,
|
|
204
211
|
working_dir: sandbox_path,
|
|
205
|
-
subprocess_env:
|
|
212
|
+
subprocess_env: prepared_env,
|
|
213
|
+
subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: prepared_env)
|
|
206
214
|
)
|
|
207
215
|
|
|
208
216
|
invocation_error = detect_skill_invocation_error(response[:text])
|
|
@@ -322,9 +330,23 @@ module Ace
|
|
|
322
330
|
@pipeline_executors ||= {}
|
|
323
331
|
@pipeline_executors[timeout] ||= Molecules::PipelineExecutor.new(
|
|
324
332
|
provider: @provider,
|
|
325
|
-
|
|
333
|
+
verifier_provider: @verifier_provider,
|
|
334
|
+
timeout: timeout,
|
|
335
|
+
sandbox_backend_factory: @sandbox_backend_factory
|
|
326
336
|
)
|
|
327
337
|
end
|
|
338
|
+
|
|
339
|
+
def build_sandbox_backend(sandbox_path, env_vars)
|
|
340
|
+
@sandbox_backend_factory.call(
|
|
341
|
+
sandbox_path,
|
|
342
|
+
source_root: env_vars&.dig("ACE_E2E_SOURCE_ROOT") || env_vars&.dig(:ACE_E2E_SOURCE_ROOT)
|
|
343
|
+
)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def prepared_env_for(sandbox_path, env_vars)
|
|
347
|
+
sandbox_backend = build_sandbox_backend(sandbox_path, env_vars || {})
|
|
348
|
+
[sandbox_backend, sandbox_backend.prepared_env(env_vars || {})]
|
|
349
|
+
end
|
|
328
350
|
end
|
|
329
351
|
end
|
|
330
352
|
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require "open3"
|
|
4
4
|
require "fileutils"
|
|
5
5
|
require "yaml"
|
|
6
|
+
require "set"
|
|
6
7
|
require "ace/b36ts"
|
|
7
8
|
|
|
8
9
|
module Ace
|
|
@@ -30,7 +31,8 @@ module Ace
|
|
|
30
31
|
# @param timestamp_generator Timestamp generator (injectable)
|
|
31
32
|
def initialize(max_parallel: 4, base_dir: nil, discoverer: nil, affected_detector: nil,
|
|
32
33
|
failure_finder: nil, output: $stdout, use_color: nil, progress: false,
|
|
33
|
-
suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil
|
|
34
|
+
suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil,
|
|
35
|
+
runtime_builder: nil, shared_runtime_cache_root: nil)
|
|
34
36
|
@max_parallel = max_parallel
|
|
35
37
|
@base_dir = base_dir || Dir.pwd
|
|
36
38
|
@discoverer = discoverer || Molecules::TestDiscoverer.new
|
|
@@ -43,6 +45,14 @@ module Ace
|
|
|
43
45
|
@suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
|
|
44
46
|
@loader = scenario_loader || Molecules::ScenarioLoader.new
|
|
45
47
|
@timestamp_generator = timestamp_generator || method(:default_timestamp)
|
|
48
|
+
@runtime_builder = runtime_builder || Molecules::SandboxRuntimeBuilder.new(
|
|
49
|
+
source_root: @base_dir,
|
|
50
|
+
ruby_version: config.dig("sandbox", "ruby_version") || Molecules::ConfigLoader.default_sandbox_ruby_version
|
|
51
|
+
)
|
|
52
|
+
@shared_runtime_cache_root = File.expand_path(
|
|
53
|
+
shared_runtime_cache_root || File.join(@base_dir, Molecules::SandboxRuntimeBuilder::DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
|
|
54
|
+
)
|
|
55
|
+
@shared_runtime_root = nil
|
|
46
56
|
end
|
|
47
57
|
|
|
48
58
|
# Run E2E tests across all packages
|
|
@@ -57,6 +67,7 @@ module Ace
|
|
|
57
67
|
# @option options [Integer] :timeout Timeout per test in seconds
|
|
58
68
|
# @return [Hash] Summary of results
|
|
59
69
|
def run(options = {})
|
|
70
|
+
pre_run_worktree = git_status_snapshot
|
|
60
71
|
packages = @discoverer.list_packages(base_dir: @base_dir)
|
|
61
72
|
|
|
62
73
|
if packages.empty?
|
|
@@ -122,6 +133,7 @@ module Ace
|
|
|
122
133
|
|
|
123
134
|
total_tests = package_tests.values.flatten.size
|
|
124
135
|
pkg_count = package_tests.keys.size
|
|
136
|
+
prepare_shared_runtime_cache if total_tests > 0
|
|
125
137
|
|
|
126
138
|
# Pre-compute column widths for aligned output
|
|
127
139
|
compute_column_widths(package_tests)
|
|
@@ -135,9 +147,9 @@ module Ace
|
|
|
135
147
|
|
|
136
148
|
# Execute tests
|
|
137
149
|
if options[:parallel]
|
|
138
|
-
run_parallel(package_tests, options)
|
|
150
|
+
run_parallel(package_tests, options, pre_run_worktree)
|
|
139
151
|
else
|
|
140
|
-
run_sequential(package_tests, options)
|
|
152
|
+
run_sequential(package_tests, options, pre_run_worktree)
|
|
141
153
|
end
|
|
142
154
|
end
|
|
143
155
|
|
|
@@ -210,7 +222,7 @@ module Ace
|
|
|
210
222
|
# @param package_tests [Hash] Package to tests mapping
|
|
211
223
|
# @param options [Hash] Execution options
|
|
212
224
|
# @return [Hash] Summary of results
|
|
213
|
-
def run_sequential(package_tests, options)
|
|
225
|
+
def run_sequential(package_tests, options, pre_run_worktree)
|
|
214
226
|
results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
|
|
215
227
|
start_time = Time.now
|
|
216
228
|
|
|
@@ -265,7 +277,7 @@ module Ace
|
|
|
265
277
|
done = true
|
|
266
278
|
refresh_thread&.join
|
|
267
279
|
|
|
268
|
-
finalize_run(results, package_tests, start_time)
|
|
280
|
+
finalize_run(results, package_tests, start_time, pre_run_worktree)
|
|
269
281
|
end
|
|
270
282
|
|
|
271
283
|
# Run tests in parallel using subprocesses
|
|
@@ -273,7 +285,7 @@ module Ace
|
|
|
273
285
|
# @param package_tests [Hash] Package to tests mapping
|
|
274
286
|
# @param options [Hash] Execution options
|
|
275
287
|
# @return [Hash] Summary of results
|
|
276
|
-
def run_parallel(package_tests, options)
|
|
288
|
+
def run_parallel(package_tests, options, pre_run_worktree)
|
|
277
289
|
results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
|
|
278
290
|
queue = build_test_queue(package_tests)
|
|
279
291
|
run_ids = generate_run_ids(queue.size)
|
|
@@ -297,7 +309,7 @@ module Ace
|
|
|
297
309
|
check_running_processes(running, results)
|
|
298
310
|
end
|
|
299
311
|
|
|
300
|
-
finalize_run(results, package_tests, start_time)
|
|
312
|
+
finalize_run(results, package_tests, start_time, pre_run_worktree)
|
|
301
313
|
end
|
|
302
314
|
|
|
303
315
|
# Build a flat queue of test items
|
|
@@ -328,7 +340,7 @@ module Ace
|
|
|
328
340
|
cmd_array = build_test_command(package, test_file, options, run_id: run_id)
|
|
329
341
|
|
|
330
342
|
# Spawn process with array form (no shell invocation)
|
|
331
|
-
stdin, stdout, stderr, thread = Open3.popen3(*cmd_array, chdir: @base_dir)
|
|
343
|
+
stdin, stdout, stderr, thread = Open3.popen3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
|
|
332
344
|
|
|
333
345
|
{pid: thread.pid, thread: thread, stdout: stdout, stderr: stderr,
|
|
334
346
|
stdin: stdin, package: package, test_file: test_file, output: String.new}
|
|
@@ -393,6 +405,18 @@ module Ace
|
|
|
393
405
|
File.executable?(local) ? local : "ace-test-e2e"
|
|
394
406
|
end
|
|
395
407
|
|
|
408
|
+
def prepare_shared_runtime_cache
|
|
409
|
+
@shared_runtime_root = @runtime_builder.prepare_shared_runtime(
|
|
410
|
+
cache_root: @shared_runtime_cache_root
|
|
411
|
+
)
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def suite_subprocess_env
|
|
415
|
+
return {} if @shared_runtime_root.to_s.empty?
|
|
416
|
+
|
|
417
|
+
{Molecules::SandboxRuntimeBuilder::SHARED_RUNTIME_ENV_KEY => @shared_runtime_root}
|
|
418
|
+
end
|
|
419
|
+
|
|
396
420
|
# Extract test ID from file path
|
|
397
421
|
#
|
|
398
422
|
# @param test_file [String] Path to scenario.yml file
|
|
@@ -497,6 +521,7 @@ module Ace
|
|
|
497
521
|
# @return [Hash] Parsed result with :passed_cases and :total_cases
|
|
498
522
|
def parse_subprocess_result(process)
|
|
499
523
|
result = parse_test_output(process[:output], process[:thread].value.exitstatus, extract_test_name(process[:test_file]))
|
|
524
|
+
result[:report_dir] = normalize_report_dir(result[:report_dir], result[:test_name])
|
|
500
525
|
result[:raw_output] = process[:output]
|
|
501
526
|
|
|
502
527
|
# For non-pass results, check agent-written metadata as authoritative source
|
|
@@ -510,6 +535,34 @@ module Ace
|
|
|
510
535
|
{status: "error", error: "Failed to parse result: #{e.message}"}
|
|
511
536
|
end
|
|
512
537
|
|
|
538
|
+
def normalize_report_dir(report_dir, test_name)
|
|
539
|
+
return report_dir if report_dir.nil? || report_dir.empty?
|
|
540
|
+
return report_dir if File.directory?(report_dir)
|
|
541
|
+
return report_dir unless File.file?(report_dir)
|
|
542
|
+
|
|
543
|
+
resolved = resolve_report_dir_from_suite_report(report_dir, canonical_test_id(test_name))
|
|
544
|
+
resolved || report_dir
|
|
545
|
+
rescue
|
|
546
|
+
report_dir
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
def resolve_report_dir_from_suite_report(report_path, test_id)
|
|
550
|
+
return nil unless report_path.end_with?(".md")
|
|
551
|
+
return nil if test_id.nil? || test_id.empty?
|
|
552
|
+
|
|
553
|
+
content = File.read(report_path)
|
|
554
|
+
escaped = Regexp.escape(test_id)
|
|
555
|
+
table_match = content.match(/^\|\s*#{escaped}\s*\|\s*`([^`]+)`\s*\|$/m)
|
|
556
|
+
return nil unless table_match
|
|
557
|
+
|
|
558
|
+
File.expand_path(table_match[1], File.dirname(report_path))
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def canonical_test_id(test_name)
|
|
562
|
+
match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
|
|
563
|
+
match ? match[1].upcase : test_name
|
|
564
|
+
end
|
|
565
|
+
|
|
513
566
|
# Override result from agent-written metadata.yml when subprocess exit code is misleading
|
|
514
567
|
#
|
|
515
568
|
# @param result [Hash] Parsed result with :report_dir
|
|
@@ -576,7 +629,9 @@ module Ace
|
|
|
576
629
|
error_msg ||= "Test execution returned ERROR status"
|
|
577
630
|
base.merge(status: "error", error: error_msg)
|
|
578
631
|
else
|
|
579
|
-
summary = output.
|
|
632
|
+
summary = output.lines.filter_map { |line| line[/^(Preflight failed: .+?)\s*$/, 1] }.last
|
|
633
|
+
summary ||= output.match(/(\d+)\/(\d+) passed/)&.captures&.join("/")
|
|
634
|
+
summary ||= "Test failed"
|
|
580
635
|
base.merge(status: "fail", summary: summary)
|
|
581
636
|
end
|
|
582
637
|
rescue => e
|
|
@@ -589,8 +644,9 @@ module Ace
|
|
|
589
644
|
# @param package_tests [Hash] Package to test files mapping
|
|
590
645
|
# @param start_time [Time] When the run started
|
|
591
646
|
# @return [Hash] Results with optional :report_path
|
|
592
|
-
def finalize_run(results, package_tests, start_time)
|
|
647
|
+
def finalize_run(results, package_tests, start_time, pre_run_worktree)
|
|
593
648
|
write_failure_stubs(results, package_tests)
|
|
649
|
+
results[:suite_diagnostics] = build_suite_diagnostics(pre_run_worktree)
|
|
594
650
|
|
|
595
651
|
@display.show_summary(results, Time.now - start_time)
|
|
596
652
|
warn_on_lingering_claude_processes
|
|
@@ -641,6 +697,7 @@ module Ace
|
|
|
641
697
|
"status" => result[:status]
|
|
642
698
|
}
|
|
643
699
|
File.write(File.join(stub_dir, "metadata.yml"), YAML.dump(stub_data))
|
|
700
|
+
result[:report_dir] = stub_dir
|
|
644
701
|
|
|
645
702
|
if result[:raw_output] && !result[:raw_output].empty?
|
|
646
703
|
File.write(File.join(stub_dir, "subprocess_output.log"), result[:raw_output])
|
|
@@ -709,7 +766,9 @@ module Ace
|
|
|
709
766
|
all_results, all_scenarios,
|
|
710
767
|
package: "suite",
|
|
711
768
|
timestamp: timestamp,
|
|
712
|
-
base_dir: @base_dir
|
|
769
|
+
base_dir: @base_dir,
|
|
770
|
+
report_kind: :suite,
|
|
771
|
+
diagnostics: results[:suite_diagnostics]
|
|
713
772
|
)
|
|
714
773
|
rescue => e
|
|
715
774
|
warn "Warning: Suite report generation failed (#{e.class}: #{e.message})"
|
|
@@ -726,19 +785,40 @@ module Ace
|
|
|
726
785
|
total = result_hash[:total_cases] || 0
|
|
727
786
|
failed = [total - passed, 0].max
|
|
728
787
|
|
|
729
|
-
test_cases = []
|
|
730
|
-
passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass"} }
|
|
731
|
-
failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail"} }
|
|
732
|
-
|
|
733
788
|
Models::TestResult.new(
|
|
734
789
|
test_id: result_hash[:test_name] || "unknown",
|
|
735
790
|
status: result_hash[:status] || "error",
|
|
736
|
-
test_cases:
|
|
791
|
+
test_cases: [],
|
|
737
792
|
summary: result_hash[:summary] || result_hash[:error] || "",
|
|
738
|
-
report_dir: result_hash[:report_dir]
|
|
793
|
+
report_dir: result_hash[:report_dir],
|
|
794
|
+
metadata: {"tcs-passed" => passed, "tcs-total" => total, "tcs-failed" => failed}
|
|
739
795
|
)
|
|
740
796
|
end
|
|
741
797
|
|
|
798
|
+
def git_status_snapshot
|
|
799
|
+
stdout, _stderr, status = Open3.capture3("git", "status", "--short", chdir: @base_dir)
|
|
800
|
+
return nil unless status.success?
|
|
801
|
+
|
|
802
|
+
stdout.lines.map(&:rstrip)
|
|
803
|
+
rescue
|
|
804
|
+
nil
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
def build_suite_diagnostics(pre_run_worktree)
|
|
808
|
+
post_run_worktree = git_status_snapshot
|
|
809
|
+
return {} unless pre_run_worktree && post_run_worktree
|
|
810
|
+
|
|
811
|
+
before = pre_run_worktree.to_set
|
|
812
|
+
new_entries = post_run_worktree.reject { |line| before.include?(line) }
|
|
813
|
+
new_tracked_entries = new_entries.reject { |line| line.start_with?("?? ") }
|
|
814
|
+
return {} if new_tracked_entries.empty?
|
|
815
|
+
|
|
816
|
+
{
|
|
817
|
+
dirty_worktree: true,
|
|
818
|
+
new_tracked_entries: new_tracked_entries
|
|
819
|
+
}
|
|
820
|
+
end
|
|
821
|
+
|
|
742
822
|
# Load a scenario from file into a Models::TestScenario, with fallback
|
|
743
823
|
#
|
|
744
824
|
# @param package [String] Package name
|
|
@@ -806,7 +886,7 @@ module Ace
|
|
|
806
886
|
# @return [Hash] Test result
|
|
807
887
|
def run_single_test(package, test_file, options, run_id: nil)
|
|
808
888
|
cmd_array = build_test_command(package, test_file, options, run_id: run_id)
|
|
809
|
-
output, stderr, status = Open3.capture3(*cmd_array, chdir: @base_dir)
|
|
889
|
+
output, stderr, status = Open3.capture3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
|
|
810
890
|
|
|
811
891
|
# Combine stdout and stderr for parsing
|
|
812
892
|
combined_output = output + stderr
|