ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "date"
|
|
5
|
+
require "yaml"
|
|
6
|
+
require "ace/b36ts"
|
|
7
|
+
|
|
8
|
+
module Ace
|
|
9
|
+
module Test
|
|
10
|
+
module EndToEndRunner
|
|
11
|
+
module Organisms
|
|
12
|
+
# Orchestrates E2E test discovery, execution, and reporting
|
|
13
|
+
#
|
|
14
|
+
# Handles both single test and package-wide test execution flows.
|
|
15
|
+
# Coordinates between test discovery, scenario parsing, LLM execution,
|
|
16
|
+
# and report writing.
|
|
17
|
+
#
|
|
18
|
+
# For CLI providers: agents write their own reports via workflow/skill,
|
|
19
|
+
# so the orchestrator skips ReportWriter and looks for agent-written
|
|
20
|
+
# report directories on disk.
|
|
21
|
+
#
|
|
22
|
+
# For API providers: orchestrator writes reports as before.
|
|
23
|
+
class TestOrchestrator
|
|
24
|
+
# @param provider [String] LLM provider:model string
|
|
25
|
+
# @param timeout [Integer] Request timeout per test in seconds
|
|
26
|
+
# @param parallel [Integer] Number of tests to run in parallel
|
|
27
|
+
# @param base_dir [String] Base directory for test discovery
|
|
28
|
+
# @param timestamp_generator [#call] Callable that returns a timestamp string
|
|
29
|
+
# @param executor [#execute] Injectable test executor (for testing)
|
|
30
|
+
# @param progress [Boolean] Enable animated progress display
|
|
31
|
+
def initialize(provider: nil, timeout: nil, parallel: nil, base_dir: nil, timestamp_generator: nil, executor: nil, progress: false)
|
|
32
|
+
config = Molecules::ConfigLoader.load
|
|
33
|
+
@provider = provider || config.dig("execution", "provider") || "claude:sonnet"
|
|
34
|
+
@timeout = timeout || config.dig("execution", "timeout") || 300
|
|
35
|
+
@parallel = parallel || config.dig("execution", "parallel") || 3
|
|
36
|
+
@base_dir = base_dir || Dir.pwd
|
|
37
|
+
@timestamp_generator = timestamp_generator || method(:default_timestamp)
|
|
38
|
+
@progress = progress
|
|
39
|
+
@discoverer = Molecules::TestDiscoverer.new
|
|
40
|
+
@loader = Molecules::ScenarioLoader.new
|
|
41
|
+
@executor = executor || Molecules::TestExecutor.new(provider: @provider, timeout: @timeout, config: config)
|
|
42
|
+
@report_writer = Molecules::ReportWriter.new
|
|
43
|
+
@suite_report_writer = Molecules::SuiteReportWriter.new(config: config)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Run E2E tests for a package, optionally filtering by test ID
|
|
47
|
+
#
|
|
48
|
+
# @param package [String] Package name (e.g., "ace-lint")
|
|
49
|
+
# @param test_id [String, nil] Optional test ID to run specific test
|
|
50
|
+
# @param test_cases [Array<String>, nil] Optional normalized test case IDs to filter
|
|
51
|
+
# @param tags [Array<String>, nil] Optional scenario tags for discovery filtering
|
|
52
|
+
# @param cli_args [String, nil] Extra args for CLI providers
|
|
53
|
+
# @param output [IO] Output stream for progress messages (default: $stdout)
|
|
54
|
+
# @return [Array<Models::TestResult>] List of test results
|
|
55
|
+
def run(package:, test_id: nil, test_cases: nil, verify: false, tags: nil,
|
|
56
|
+
cli_args: nil, run_id: nil, report_dir: nil, output: $stdout)
|
|
57
|
+
# Discover tests
|
|
58
|
+
files = @discoverer.find_tests(
|
|
59
|
+
package: package,
|
|
60
|
+
test_id: test_id,
|
|
61
|
+
tags: tags,
|
|
62
|
+
base_dir: @base_dir
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if files.empty?
|
|
66
|
+
output.puts "No E2E tests found in #{package}" +
|
|
67
|
+
(test_id ? " matching #{test_id}" : "")
|
|
68
|
+
return []
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Generate timestamp for this run (use external run_id when provided)
|
|
72
|
+
timestamp = run_id || generate_timestamp
|
|
73
|
+
|
|
74
|
+
if files.size == 1
|
|
75
|
+
run_single_test(
|
|
76
|
+
files.first,
|
|
77
|
+
timestamp,
|
|
78
|
+
cli_args,
|
|
79
|
+
output,
|
|
80
|
+
test_cases: test_cases,
|
|
81
|
+
verify: verify,
|
|
82
|
+
report_dir: report_dir
|
|
83
|
+
)
|
|
84
|
+
else
|
|
85
|
+
run_package_tests(files, package, timestamp, cli_args, output, test_cases: test_cases, verify: verify)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Check if the current provider is a CLI provider
|
|
92
|
+
# @return [Boolean]
|
|
93
|
+
def cli_provider?
|
|
94
|
+
Atoms::CliProviderAdapter.cli_provider?(@provider)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Run deterministic setup in Ruby before handing off to LLM
|
|
98
|
+
#
|
|
99
|
+
# For scenarios with setup steps and CLI providers, creates
|
|
100
|
+
# a sandbox and runs SetupExecutor so the LLM only does TC execution.
|
|
101
|
+
#
|
|
102
|
+
# @param scenario [Models::TestScenario] The test scenario
|
|
103
|
+
# @param timestamp [String] Timestamp for sandbox directory naming
|
|
104
|
+
# @param output [IO] Output stream for progress messages
|
|
105
|
+
# @return [Array(String, Hash, SetupExecutor)] [sandbox_path, env_vars, setup_executor] or [nil, nil, nil]
|
|
106
|
+
def setup_sandbox_if_ts(scenario, timestamp, output)
|
|
107
|
+
return [nil, nil, nil] unless cli_provider? && scenario.setup_steps.any?
|
|
108
|
+
|
|
109
|
+
sandbox_dir = File.join(@base_dir, ".ace-local", "test-e2e", scenario.dir_name(timestamp))
|
|
110
|
+
setup_executor = Molecules::SetupExecutor.new
|
|
111
|
+
result = setup_executor.execute(
|
|
112
|
+
setup_steps: scenario.setup_steps,
|
|
113
|
+
sandbox_dir: sandbox_dir,
|
|
114
|
+
fixture_source: scenario.fixture_path,
|
|
115
|
+
scenario_name: scenario.test_id,
|
|
116
|
+
run_id: timestamp
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
unless result[:success]
|
|
120
|
+
output.puts "Warning: sandbox setup failed: #{result[:error]}"
|
|
121
|
+
setup_executor.teardown
|
|
122
|
+
return [nil, nil, nil]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
env = result[:env]
|
|
126
|
+
if env["PROJECT_ROOT_PATH"] && !env["PROJECT_ROOT_PATH"].start_with?("/")
|
|
127
|
+
env["PROJECT_ROOT_PATH"] = File.expand_path(env["PROJECT_ROOT_PATH"], sandbox_dir)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
[File.expand_path(sandbox_dir), env, setup_executor]
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Run a single test
|
|
134
|
+
# @param test_cases [Array<String>, nil] Optional test case IDs to filter
|
|
135
|
+
# @param report_dir [String, nil] Explicit report directory path (overrides computed path)
|
|
136
|
+
# @return [Array<Models::TestResult>] Single-element result array
|
|
137
|
+
def run_single_test(file, timestamp, cli_args, output, test_cases: nil, verify: false, report_dir: nil)
|
|
138
|
+
scenario = @loader.load(File.dirname(file))
|
|
139
|
+
display = build_display_manager([scenario], output)
|
|
140
|
+
setup_executor = nil
|
|
141
|
+
|
|
142
|
+
output.puts "Running E2E test: #{scenario.test_id} (#{scenario.package})"
|
|
143
|
+
if test_cases
|
|
144
|
+
output.puts "Filtering test cases: #{test_cases.join(", ")}"
|
|
145
|
+
end
|
|
146
|
+
output.puts "Executing via #{@provider}#{" (pipeline mode: runner+verifier)" if cli_provider?}..."
|
|
147
|
+
|
|
148
|
+
run_id = cli_provider? ? timestamp : nil
|
|
149
|
+
# When report_dir is provided, derive sandbox path from it (strip -reports suffix)
|
|
150
|
+
if report_dir
|
|
151
|
+
sandbox_path = report_dir.sub(/-reports\z/, "")
|
|
152
|
+
sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, timestamp, output) unless Dir.exist?(sandbox_path)
|
|
153
|
+
else
|
|
154
|
+
sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, timestamp, output)
|
|
155
|
+
end
|
|
156
|
+
result = execute_scenario(
|
|
157
|
+
scenario,
|
|
158
|
+
cli_args: cli_args,
|
|
159
|
+
run_id: run_id,
|
|
160
|
+
test_cases: test_cases,
|
|
161
|
+
sandbox_path: sandbox_path,
|
|
162
|
+
env_vars: env_vars,
|
|
163
|
+
report_dir: report_dir,
|
|
164
|
+
verify: verify
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Use explicit report_dir when provided, otherwise compute from scenario
|
|
168
|
+
expected_dir = report_dir || report_dir_for(scenario, timestamp)
|
|
169
|
+
|
|
170
|
+
if cli_provider?
|
|
171
|
+
# CLI providers write reports via workflow at a deterministic path.
|
|
172
|
+
# Do not fall back to older report directories from other runs.
|
|
173
|
+
result = if Dir.exist?(expected_dir)
|
|
174
|
+
verify ? result.with_report_dir(expected_dir) : read_agent_result(scenario, expected_dir, result)
|
|
175
|
+
else
|
|
176
|
+
missing_agent_report_result(scenario, expected_dir, result)
|
|
177
|
+
end
|
|
178
|
+
else
|
|
179
|
+
# API providers: write reports via ReportWriter
|
|
180
|
+
@report_writer.write(result, scenario, report_dir: expected_dir)
|
|
181
|
+
result = result.with_report_dir(expected_dir)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
display.show_single_result(result)
|
|
185
|
+
output.puts "Report: #{result.report_dir}" if result.report_dir
|
|
186
|
+
|
|
187
|
+
[result]
|
|
188
|
+
ensure
|
|
189
|
+
setup_executor&.teardown
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Run all tests in a package
|
|
193
|
+
# @param test_cases [Array<String>, nil] Optional test case IDs to filter
|
|
194
|
+
# @return [Array<Models::TestResult>] Results for all tests
|
|
195
|
+
def run_package_tests(files, package, timestamp, cli_args, output, test_cases: nil, verify: false)
|
|
196
|
+
# Load scenarios upfront for titles and report generation
|
|
197
|
+
scenarios = files.map { |f| @loader.load(File.dirname(f)) }
|
|
198
|
+
|
|
199
|
+
display = build_display_manager(scenarios, output)
|
|
200
|
+
display.initialize_display
|
|
201
|
+
|
|
202
|
+
# Generate unique timestamps per test for CLI providers (deterministic report paths)
|
|
203
|
+
run_ids = cli_provider? ? generate_timestamps(scenarios.size) : Array.new(scenarios.size)
|
|
204
|
+
|
|
205
|
+
queue = Queue.new
|
|
206
|
+
scenarios.each_with_index { |scenario, index| queue << [index, scenario, run_ids[index]] }
|
|
207
|
+
|
|
208
|
+
results = Array.new(files.size)
|
|
209
|
+
mutex = Mutex.new
|
|
210
|
+
completed = 0
|
|
211
|
+
|
|
212
|
+
thread_count = [@parallel, files.size].min
|
|
213
|
+
done = false
|
|
214
|
+
|
|
215
|
+
refresh_thread = if @progress
|
|
216
|
+
Thread.new do
|
|
217
|
+
until done
|
|
218
|
+
sleep REFRESH_INTERVAL
|
|
219
|
+
mutex.synchronize { display.refresh }
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
threads = thread_count.times.map do
|
|
225
|
+
Thread.new do
|
|
226
|
+
while (item = begin; queue.pop(true); rescue ThreadError; nil; end)
|
|
227
|
+
index, scenario, run_id = item
|
|
228
|
+
|
|
229
|
+
mutex.synchronize do
|
|
230
|
+
display.test_started(scenario)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Intersect test_cases with scenario's available IDs to avoid
|
|
234
|
+
# workflow validation errors when filtering across multiple scenarios
|
|
235
|
+
scenario_test_cases = if test_cases
|
|
236
|
+
available = scenario.test_case_ids
|
|
237
|
+
filtered = test_cases & available
|
|
238
|
+
filtered.empty? ? nil : filtered
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Skip scenario entirely when filtering is active but no test cases match
|
|
242
|
+
setup_executor = nil
|
|
243
|
+
if test_cases && scenario_test_cases.nil?
|
|
244
|
+
result = Models::TestResult.new(
|
|
245
|
+
test_id: scenario.test_id,
|
|
246
|
+
status: "skip",
|
|
247
|
+
test_cases: [],
|
|
248
|
+
summary: "Skipped: no matching test cases"
|
|
249
|
+
)
|
|
250
|
+
else
|
|
251
|
+
begin
|
|
252
|
+
sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, run_id || timestamp, output)
|
|
253
|
+
result = execute_scenario(
|
|
254
|
+
scenario,
|
|
255
|
+
cli_args: cli_args,
|
|
256
|
+
run_id: run_id,
|
|
257
|
+
test_cases: scenario_test_cases,
|
|
258
|
+
sandbox_path: sandbox_path,
|
|
259
|
+
env_vars: env_vars,
|
|
260
|
+
verify: verify
|
|
261
|
+
)
|
|
262
|
+
ensure
|
|
263
|
+
setup_executor&.teardown
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
report_dir = report_dir_for(scenario, run_id || timestamp)
|
|
268
|
+
|
|
269
|
+
if cli_provider?
|
|
270
|
+
expected_dir = report_dir_for(scenario, run_id || timestamp)
|
|
271
|
+
result = if Dir.exist?(expected_dir)
|
|
272
|
+
verify ? result.with_report_dir(expected_dir) : read_agent_result(scenario, expected_dir, result)
|
|
273
|
+
else
|
|
274
|
+
missing_agent_report_result(scenario, expected_dir, result)
|
|
275
|
+
end
|
|
276
|
+
else
|
|
277
|
+
@report_writer.write(result, scenario, report_dir: report_dir)
|
|
278
|
+
result = result.with_report_dir(report_dir)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
mutex.synchronize do
|
|
282
|
+
results[index] = result
|
|
283
|
+
completed += 1
|
|
284
|
+
display.test_completed(scenario, result, completed, files.size)
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
threads.each(&:join)
|
|
291
|
+
done = true
|
|
292
|
+
refresh_thread&.join
|
|
293
|
+
|
|
294
|
+
# Write suite report
|
|
295
|
+
report_path = @suite_report_writer.write(
|
|
296
|
+
results, scenarios,
|
|
297
|
+
package: package, timestamp: timestamp, base_dir: @base_dir
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
display.show_summary(results, report_path)
|
|
301
|
+
|
|
302
|
+
results
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Build the appropriate display manager for this run
|
|
306
|
+
# @param scenarios [Array<Models::TestScenario>]
|
|
307
|
+
# @param output [IO]
|
|
308
|
+
# @return [Molecules::SimpleDisplayManager, Molecules::ProgressDisplayManager]
|
|
309
|
+
def build_display_manager(scenarios, output)
|
|
310
|
+
if @progress
|
|
311
|
+
Molecules::ProgressDisplayManager.new(scenarios, output: output, parallel: @parallel)
|
|
312
|
+
else
|
|
313
|
+
Molecules::SimpleDisplayManager.new(scenarios, output: output, parallel: @parallel)
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Build report directory path for a scenario
|
|
318
|
+
# @return [String] Absolute path to reports directory
|
|
319
|
+
def report_dir_for(scenario, timestamp)
|
|
320
|
+
cache_dir = File.join(@base_dir, ".ace-local", "test-e2e")
|
|
321
|
+
File.join(cache_dir, "#{scenario.dir_name(timestamp)}-reports")
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Generate a timestamp ID via injected generator
|
|
325
|
+
# @return [String] 7-char timestamp ID
|
|
326
|
+
def generate_timestamp
|
|
327
|
+
@timestamp_generator.call
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# Generate N unique timestamps for batch test runs
|
|
331
|
+
#
|
|
332
|
+
# Uses Ace::B36ts library to encode unique IDs with 50ms precision,
|
|
333
|
+
# ensuring distinct timestamps for parallel test runs.
|
|
334
|
+
#
|
|
335
|
+
# @param count [Integer] Number of unique timestamps needed
|
|
336
|
+
# @return [Array<String>] Array of unique timestamp strings
|
|
337
|
+
def generate_timestamps(count)
|
|
338
|
+
count.times.map do |i|
|
|
339
|
+
time = Time.now.utc + (i * 0.05) # 50ms offset per ID
|
|
340
|
+
Ace::B36ts.encode(time, format: :"50ms")
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Read the agent-written metadata.yml to determine authoritative test status
|
|
345
|
+
#
|
|
346
|
+
# When CLI providers run tests, they write metadata.yml with the real
|
|
347
|
+
# pass/fail status. The orchestrator's parsed response text may not
|
|
348
|
+
# match, so we trust metadata.yml when present.
|
|
349
|
+
#
|
|
350
|
+
# @param scenario [Models::TestScenario] The test scenario
|
|
351
|
+
# @param agent_dir [String] Path to agent report directory
|
|
352
|
+
# @param fallback_result [Models::TestResult] Result to use if metadata unreadable
|
|
353
|
+
# @return [Models::TestResult] Result with authoritative status
|
|
354
|
+
def read_agent_result(scenario, agent_dir, fallback_result)
|
|
355
|
+
metadata_path = File.join(agent_dir, "metadata.yml")
|
|
356
|
+
return fallback_result.with_report_dir(agent_dir) unless File.exist?(metadata_path)
|
|
357
|
+
|
|
358
|
+
metadata = YAML.safe_load_file(metadata_path, permitted_classes: [Date])
|
|
359
|
+
status = metadata["status"] || fallback_result.status
|
|
360
|
+
passed = metadata["tcs-passed"] || metadata.dig("results", "passed") || 0
|
|
361
|
+
failed = metadata["tcs-failed"] || metadata.dig("results", "failed") || 0
|
|
362
|
+
total = metadata["tcs-total"] || metadata.dig("results", "total") || 0
|
|
363
|
+
|
|
364
|
+
# Reconcile: if all cases passed, status should be "pass"
|
|
365
|
+
if passed == total && total > 0 && status != "pass"
|
|
366
|
+
status = "pass"
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# Build synthetic test cases from counts
|
|
370
|
+
test_cases = []
|
|
371
|
+
passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
|
|
372
|
+
failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
|
|
373
|
+
|
|
374
|
+
Models::TestResult.new(
|
|
375
|
+
test_id: scenario.test_id,
|
|
376
|
+
status: status,
|
|
377
|
+
test_cases: test_cases,
|
|
378
|
+
summary: "#{passed}/#{total} passed",
|
|
379
|
+
started_at: fallback_result.started_at,
|
|
380
|
+
completed_at: fallback_result.completed_at,
|
|
381
|
+
report_dir: agent_dir
|
|
382
|
+
)
|
|
383
|
+
rescue
|
|
384
|
+
fallback_result.with_report_dir(agent_dir)
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
# Build a deterministic infrastructure error when the expected report
|
|
388
|
+
# directory for a CLI-provider run is missing.
|
|
389
|
+
#
|
|
390
|
+
# @param scenario [Models::TestScenario]
|
|
391
|
+
# @param expected_dir [String] Deterministic report directory path
|
|
392
|
+
# @param fallback_result [Models::TestResult]
|
|
393
|
+
# @return [Models::TestResult]
|
|
394
|
+
def missing_agent_report_result(scenario, expected_dir, fallback_result)
|
|
395
|
+
return fallback_result.with_report_dir(expected_dir) if fallback_result.status == "skip"
|
|
396
|
+
|
|
397
|
+
Models::TestResult.new(
|
|
398
|
+
test_id: scenario.test_id,
|
|
399
|
+
status: "error",
|
|
400
|
+
test_cases: fallback_result.test_cases,
|
|
401
|
+
summary: "Missing CLI report directory",
|
|
402
|
+
error: "Expected report directory was not created: #{expected_dir}",
|
|
403
|
+
started_at: fallback_result.started_at,
|
|
404
|
+
completed_at: fallback_result.completed_at,
|
|
405
|
+
report_dir: expected_dir
|
|
406
|
+
)
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Default timestamp generator using Ace::B36ts library
|
|
410
|
+
# @return [String] 7-char timestamp ID
|
|
411
|
+
def default_timestamp
|
|
412
|
+
Ace::B36ts.encode(Time.now.utc, format: :"50ms")
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Execute a scenario while preserving compatibility with legacy executor
|
|
416
|
+
# doubles that do not accept the newer :verify keyword.
|
|
417
|
+
def execute_scenario(scenario, cli_args:, run_id:, test_cases:, sandbox_path:, env_vars:, report_dir: nil, verify: false)
|
|
418
|
+
kwargs = {
|
|
419
|
+
cli_args: cli_args,
|
|
420
|
+
run_id: run_id,
|
|
421
|
+
test_cases: test_cases,
|
|
422
|
+
sandbox_path: sandbox_path,
|
|
423
|
+
env_vars: env_vars,
|
|
424
|
+
report_dir: report_dir
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
supports_timeout = @executor.method(:execute).parameters.any? do |type, name|
|
|
428
|
+
type == :keyrest || (%i[key keyreq].include?(type) && name == :timeout)
|
|
429
|
+
end
|
|
430
|
+
supports_verify = @executor.method(:execute).parameters.any? do |type, name|
|
|
431
|
+
type == :keyrest || (%i[key keyreq].include?(type) && name == :verify)
|
|
432
|
+
end
|
|
433
|
+
kwargs[:timeout] = (scenario.timeout || @timeout) if supports_timeout
|
|
434
|
+
kwargs[:verify] = verify if supports_verify
|
|
435
|
+
|
|
436
|
+
@executor.execute(scenario, **kwargs)
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "end_to_end_runner/version"
|
|
4
|
+
|
|
5
|
+
# Models
|
|
6
|
+
require_relative "end_to_end_runner/models/test_case"
|
|
7
|
+
require_relative "end_to_end_runner/models/test_scenario"
|
|
8
|
+
require_relative "end_to_end_runner/models/test_result"
|
|
9
|
+
|
|
10
|
+
# Atoms
|
|
11
|
+
require_relative "end_to_end_runner/atoms/prompt_builder"
|
|
12
|
+
require_relative "end_to_end_runner/atoms/result_parser"
|
|
13
|
+
require_relative "end_to_end_runner/atoms/skill_prompt_builder"
|
|
14
|
+
require_relative "end_to_end_runner/atoms/skill_result_parser"
|
|
15
|
+
require_relative "end_to_end_runner/atoms/suite_report_prompt_builder"
|
|
16
|
+
require_relative "end_to_end_runner/atoms/test_case_parser"
|
|
17
|
+
require_relative "end_to_end_runner/atoms/tc_fidelity_validator"
|
|
18
|
+
require_relative "end_to_end_runner/atoms/display_helpers"
|
|
19
|
+
|
|
20
|
+
# Molecules
|
|
21
|
+
require_relative "end_to_end_runner/molecules/fixture_copier"
|
|
22
|
+
require_relative "end_to_end_runner/molecules/scenario_loader"
|
|
23
|
+
require_relative "end_to_end_runner/molecules/setup_executor"
|
|
24
|
+
require_relative "end_to_end_runner/molecules/config_loader"
|
|
25
|
+
require_relative "end_to_end_runner/molecules/test_discoverer"
|
|
26
|
+
require_relative "end_to_end_runner/molecules/test_executor"
|
|
27
|
+
require_relative "end_to_end_runner/molecules/pipeline_sandbox_builder"
|
|
28
|
+
require_relative "end_to_end_runner/molecules/pipeline_prompt_bundler"
|
|
29
|
+
require_relative "end_to_end_runner/molecules/pipeline_report_generator"
|
|
30
|
+
require_relative "end_to_end_runner/molecules/pipeline_executor"
|
|
31
|
+
require_relative "end_to_end_runner/molecules/report_writer"
|
|
32
|
+
require_relative "end_to_end_runner/molecules/suite_report_writer"
|
|
33
|
+
require_relative "end_to_end_runner/molecules/simple_display_manager"
|
|
34
|
+
require_relative "end_to_end_runner/molecules/progress_display_manager"
|
|
35
|
+
require_relative "end_to_end_runner/molecules/suite_simple_display_manager"
|
|
36
|
+
require_relative "end_to_end_runner/molecules/suite_progress_display_manager"
|
|
37
|
+
require_relative "end_to_end_runner/molecules/affected_detector"
|
|
38
|
+
require_relative "end_to_end_runner/molecules/failure_finder"
|
|
39
|
+
|
|
40
|
+
# Organisms
|
|
41
|
+
require_relative "end_to_end_runner/organisms/test_orchestrator"
|
|
42
|
+
require_relative "end_to_end_runner/organisms/suite_orchestrator"
|
|
43
|
+
|
|
44
|
+
# CLI
|
|
45
|
+
require_relative "end_to_end_runner/cli/commands/run_test"
|
|
46
|
+
require_relative "end_to_end_runner/cli/commands/run_suite"
|
|
47
|
+
|
|
48
|
+
module Ace
|
|
49
|
+
module Test
|
|
50
|
+
module EndToEndRunner
|
|
51
|
+
# Entry point for gem
|
|
52
|
+
#
|
|
53
|
+
# This gem provides infrastructure for agent-executed end-to-end tests:
|
|
54
|
+
# - CLI command (ace-test-e2e) for running tests via LLM
|
|
55
|
+
# - Workflows for test execution (run-e2e-test.wf.md)
|
|
56
|
+
# - Templates for test scenarios (test-e2e.template.md)
|
|
57
|
+
# - Conventions for E2E testing (e2e-testing.g.md)
|
|
58
|
+
#
|
|
59
|
+
# Tests can be executed by AI agents or via the CLI tool.
|
|
60
|
+
# See handbook/ for workflows and guides.
|
|
61
|
+
|
|
62
|
+
REFRESH_INTERVAL = 0.25
|
|
63
|
+
|
|
64
|
+
# Module namespaces
|
|
65
|
+
module Atoms; end
|
|
66
|
+
module Molecules; end
|
|
67
|
+
module Organisms; end
|
|
68
|
+
module Models; end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|