ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,442 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "date"
5
+ require "yaml"
6
+ require "ace/b36ts"
7
+
8
+ module Ace
9
+ module Test
10
+ module EndToEndRunner
11
+ module Organisms
12
+ # Orchestrates E2E test discovery, execution, and reporting
13
+ #
14
+ # Handles both single test and package-wide test execution flows.
15
+ # Coordinates between test discovery, scenario parsing, LLM execution,
16
+ # and report writing.
17
+ #
18
+ # For CLI providers: agents write their own reports via workflow/skill,
19
+ # so the orchestrator skips ReportWriter and looks for agent-written
20
+ # report directories on disk.
21
+ #
22
+ # For API providers: orchestrator writes reports as before.
23
+ class TestOrchestrator
24
+ # @param provider [String] LLM provider:model string
25
+ # @param timeout [Integer] Request timeout per test in seconds
26
+ # @param parallel [Integer] Number of tests to run in parallel
27
+ # @param base_dir [String] Base directory for test discovery
28
+ # @param timestamp_generator [#call] Callable that returns a timestamp string
29
+ # @param executor [#execute] Injectable test executor (for testing)
30
+ # @param progress [Boolean] Enable animated progress display
31
+ def initialize(provider: nil, timeout: nil, parallel: nil, base_dir: nil, timestamp_generator: nil, executor: nil, progress: false)
32
+ config = Molecules::ConfigLoader.load
33
+ @provider = provider || config.dig("execution", "provider") || "claude:sonnet"
34
+ @timeout = timeout || config.dig("execution", "timeout") || 300
35
+ @parallel = parallel || config.dig("execution", "parallel") || 3
36
+ @base_dir = base_dir || Dir.pwd
37
+ @timestamp_generator = timestamp_generator || method(:default_timestamp)
38
+ @progress = progress
39
+ @discoverer = Molecules::TestDiscoverer.new
40
+ @loader = Molecules::ScenarioLoader.new
41
+ @executor = executor || Molecules::TestExecutor.new(provider: @provider, timeout: @timeout, config: config)
42
+ @report_writer = Molecules::ReportWriter.new
43
+ @suite_report_writer = Molecules::SuiteReportWriter.new(config: config)
44
+ end
45
+
46
+ # Run E2E tests for a package, optionally filtering by test ID
47
+ #
48
+ # @param package [String] Package name (e.g., "ace-lint")
49
+ # @param test_id [String, nil] Optional test ID to run specific test
50
+ # @param test_cases [Array<String>, nil] Optional normalized test case IDs to filter
51
+ # @param tags [Array<String>, nil] Optional scenario tags for discovery filtering
52
+ # @param cli_args [String, nil] Extra args for CLI providers
53
+ # @param output [IO] Output stream for progress messages (default: $stdout)
54
+ # @return [Array<Models::TestResult>] List of test results
55
+ def run(package:, test_id: nil, test_cases: nil, verify: false, tags: nil,
56
+ cli_args: nil, run_id: nil, report_dir: nil, output: $stdout)
57
+ # Discover tests
58
+ files = @discoverer.find_tests(
59
+ package: package,
60
+ test_id: test_id,
61
+ tags: tags,
62
+ base_dir: @base_dir
63
+ )
64
+
65
+ if files.empty?
66
+ output.puts "No E2E tests found in #{package}" +
67
+ (test_id ? " matching #{test_id}" : "")
68
+ return []
69
+ end
70
+
71
+ # Generate timestamp for this run (use external run_id when provided)
72
+ timestamp = run_id || generate_timestamp
73
+
74
+ if files.size == 1
75
+ run_single_test(
76
+ files.first,
77
+ timestamp,
78
+ cli_args,
79
+ output,
80
+ test_cases: test_cases,
81
+ verify: verify,
82
+ report_dir: report_dir
83
+ )
84
+ else
85
+ run_package_tests(files, package, timestamp, cli_args, output, test_cases: test_cases, verify: verify)
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ # Check if the current provider is a CLI provider
92
+ # @return [Boolean]
93
+ def cli_provider?
94
+ Atoms::CliProviderAdapter.cli_provider?(@provider)
95
+ end
96
+
97
+ # Run deterministic setup in Ruby before handing off to LLM
98
+ #
99
+ # For scenarios with setup steps and CLI providers, creates
100
+ # a sandbox and runs SetupExecutor so the LLM only does TC execution.
101
+ #
102
+ # @param scenario [Models::TestScenario] The test scenario
103
+ # @param timestamp [String] Timestamp for sandbox directory naming
104
+ # @param output [IO] Output stream for progress messages
105
+ # @return [Array(String, Hash, SetupExecutor)] [sandbox_path, env_vars, setup_executor] or [nil, nil, nil]
106
+ def setup_sandbox_if_ts(scenario, timestamp, output)
107
+ return [nil, nil, nil] unless cli_provider? && scenario.setup_steps.any?
108
+
109
+ sandbox_dir = File.join(@base_dir, ".ace-local", "test-e2e", scenario.dir_name(timestamp))
110
+ setup_executor = Molecules::SetupExecutor.new
111
+ result = setup_executor.execute(
112
+ setup_steps: scenario.setup_steps,
113
+ sandbox_dir: sandbox_dir,
114
+ fixture_source: scenario.fixture_path,
115
+ scenario_name: scenario.test_id,
116
+ run_id: timestamp
117
+ )
118
+
119
+ unless result[:success]
120
+ output.puts "Warning: sandbox setup failed: #{result[:error]}"
121
+ setup_executor.teardown
122
+ return [nil, nil, nil]
123
+ end
124
+
125
+ env = result[:env]
126
+ if env["PROJECT_ROOT_PATH"] && !env["PROJECT_ROOT_PATH"].start_with?("/")
127
+ env["PROJECT_ROOT_PATH"] = File.expand_path(env["PROJECT_ROOT_PATH"], sandbox_dir)
128
+ end
129
+
130
+ [File.expand_path(sandbox_dir), env, setup_executor]
131
+ end
132
+
133
+ # Run a single test
134
+ # @param test_cases [Array<String>, nil] Optional test case IDs to filter
135
+ # @param report_dir [String, nil] Explicit report directory path (overrides computed path)
136
+ # @return [Array<Models::TestResult>] Single-element result array
137
+ def run_single_test(file, timestamp, cli_args, output, test_cases: nil, verify: false, report_dir: nil)
138
+ scenario = @loader.load(File.dirname(file))
139
+ display = build_display_manager([scenario], output)
140
+ setup_executor = nil
141
+
142
+ output.puts "Running E2E test: #{scenario.test_id} (#{scenario.package})"
143
+ if test_cases
144
+ output.puts "Filtering test cases: #{test_cases.join(", ")}"
145
+ end
146
+ output.puts "Executing via #{@provider}#{" (pipeline mode: runner+verifier)" if cli_provider?}..."
147
+
148
+ run_id = cli_provider? ? timestamp : nil
149
+ # When report_dir is provided, derive sandbox path from it (strip -reports suffix)
150
+ if report_dir
151
+ sandbox_path = report_dir.sub(/-reports\z/, "")
152
+ sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, timestamp, output) unless Dir.exist?(sandbox_path)
153
+ else
154
+ sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, timestamp, output)
155
+ end
156
+ result = execute_scenario(
157
+ scenario,
158
+ cli_args: cli_args,
159
+ run_id: run_id,
160
+ test_cases: test_cases,
161
+ sandbox_path: sandbox_path,
162
+ env_vars: env_vars,
163
+ report_dir: report_dir,
164
+ verify: verify
165
+ )
166
+
167
+ # Use explicit report_dir when provided, otherwise compute from scenario
168
+ expected_dir = report_dir || report_dir_for(scenario, timestamp)
169
+
170
+ if cli_provider?
171
+ # CLI providers write reports via workflow at a deterministic path.
172
+ # Do not fall back to older report directories from other runs.
173
+ result = if Dir.exist?(expected_dir)
174
+ verify ? result.with_report_dir(expected_dir) : read_agent_result(scenario, expected_dir, result)
175
+ else
176
+ missing_agent_report_result(scenario, expected_dir, result)
177
+ end
178
+ else
179
+ # API providers: write reports via ReportWriter
180
+ @report_writer.write(result, scenario, report_dir: expected_dir)
181
+ result = result.with_report_dir(expected_dir)
182
+ end
183
+
184
+ display.show_single_result(result)
185
+ output.puts "Report: #{result.report_dir}" if result.report_dir
186
+
187
+ [result]
188
+ ensure
189
+ setup_executor&.teardown
190
+ end
191
+
192
+ # Run all tests in a package
193
+ # @param test_cases [Array<String>, nil] Optional test case IDs to filter
194
+ # @return [Array<Models::TestResult>] Results for all tests
195
+ def run_package_tests(files, package, timestamp, cli_args, output, test_cases: nil, verify: false)
196
+ # Load scenarios upfront for titles and report generation
197
+ scenarios = files.map { |f| @loader.load(File.dirname(f)) }
198
+
199
+ display = build_display_manager(scenarios, output)
200
+ display.initialize_display
201
+
202
+ # Generate unique timestamps per test for CLI providers (deterministic report paths)
203
+ run_ids = cli_provider? ? generate_timestamps(scenarios.size) : Array.new(scenarios.size)
204
+
205
+ queue = Queue.new
206
+ scenarios.each_with_index { |scenario, index| queue << [index, scenario, run_ids[index]] }
207
+
208
+ results = Array.new(files.size)
209
+ mutex = Mutex.new
210
+ completed = 0
211
+
212
+ thread_count = [@parallel, files.size].min
213
+ done = false
214
+
215
+ refresh_thread = if @progress
216
+ Thread.new do
217
+ until done
218
+ sleep REFRESH_INTERVAL
219
+ mutex.synchronize { display.refresh }
220
+ end
221
+ end
222
+ end
223
+
224
+ threads = thread_count.times.map do
225
+ Thread.new do
226
+ while (item = begin; queue.pop(true); rescue ThreadError; nil; end)
227
+ index, scenario, run_id = item
228
+
229
+ mutex.synchronize do
230
+ display.test_started(scenario)
231
+ end
232
+
233
+ # Intersect test_cases with scenario's available IDs to avoid
234
+ # workflow validation errors when filtering across multiple scenarios
235
+ scenario_test_cases = if test_cases
236
+ available = scenario.test_case_ids
237
+ filtered = test_cases & available
238
+ filtered.empty? ? nil : filtered
239
+ end
240
+
241
+ # Skip scenario entirely when filtering is active but no test cases match
242
+ setup_executor = nil
243
+ if test_cases && scenario_test_cases.nil?
244
+ result = Models::TestResult.new(
245
+ test_id: scenario.test_id,
246
+ status: "skip",
247
+ test_cases: [],
248
+ summary: "Skipped: no matching test cases"
249
+ )
250
+ else
251
+ begin
252
+ sandbox_path, env_vars, setup_executor = setup_sandbox_if_ts(scenario, run_id || timestamp, output)
253
+ result = execute_scenario(
254
+ scenario,
255
+ cli_args: cli_args,
256
+ run_id: run_id,
257
+ test_cases: scenario_test_cases,
258
+ sandbox_path: sandbox_path,
259
+ env_vars: env_vars,
260
+ verify: verify
261
+ )
262
+ ensure
263
+ setup_executor&.teardown
264
+ end
265
+ end
266
+
267
+ report_dir = report_dir_for(scenario, run_id || timestamp)
268
+
269
+ if cli_provider?
270
+ expected_dir = report_dir_for(scenario, run_id || timestamp)
271
+ result = if Dir.exist?(expected_dir)
272
+ verify ? result.with_report_dir(expected_dir) : read_agent_result(scenario, expected_dir, result)
273
+ else
274
+ missing_agent_report_result(scenario, expected_dir, result)
275
+ end
276
+ else
277
+ @report_writer.write(result, scenario, report_dir: report_dir)
278
+ result = result.with_report_dir(report_dir)
279
+ end
280
+
281
+ mutex.synchronize do
282
+ results[index] = result
283
+ completed += 1
284
+ display.test_completed(scenario, result, completed, files.size)
285
+ end
286
+ end
287
+ end
288
+ end
289
+
290
+ threads.each(&:join)
291
+ done = true
292
+ refresh_thread&.join
293
+
294
+ # Write suite report
295
+ report_path = @suite_report_writer.write(
296
+ results, scenarios,
297
+ package: package, timestamp: timestamp, base_dir: @base_dir
298
+ )
299
+
300
+ display.show_summary(results, report_path)
301
+
302
+ results
303
+ end
304
+
305
+ # Build the appropriate display manager for this run
306
+ # @param scenarios [Array<Models::TestScenario>]
307
+ # @param output [IO]
308
+ # @return [Molecules::SimpleDisplayManager, Molecules::ProgressDisplayManager]
309
+ def build_display_manager(scenarios, output)
310
+ if @progress
311
+ Molecules::ProgressDisplayManager.new(scenarios, output: output, parallel: @parallel)
312
+ else
313
+ Molecules::SimpleDisplayManager.new(scenarios, output: output, parallel: @parallel)
314
+ end
315
+ end
316
+
317
+ # Build report directory path for a scenario
318
+ # @return [String] Absolute path to reports directory
319
+ def report_dir_for(scenario, timestamp)
320
+ cache_dir = File.join(@base_dir, ".ace-local", "test-e2e")
321
+ File.join(cache_dir, "#{scenario.dir_name(timestamp)}-reports")
322
+ end
323
+
324
+ # Generate a timestamp ID via injected generator
325
+ # @return [String] 7-char timestamp ID
326
+ def generate_timestamp
327
+ @timestamp_generator.call
328
+ end
329
+
330
+ # Generate N unique timestamps for batch test runs
331
+ #
332
+ # Uses Ace::B36ts library to encode unique IDs with 50ms precision,
333
+ # ensuring distinct timestamps for parallel test runs.
334
+ #
335
+ # @param count [Integer] Number of unique timestamps needed
336
+ # @return [Array<String>] Array of unique timestamp strings
337
+ def generate_timestamps(count)
338
+ count.times.map do |i|
339
+ time = Time.now.utc + (i * 0.05) # 50ms offset per ID
340
+ Ace::B36ts.encode(time, format: :"50ms")
341
+ end
342
+ end
343
+
344
+ # Read the agent-written metadata.yml to determine authoritative test status
345
+ #
346
+ # When CLI providers run tests, they write metadata.yml with the real
347
+ # pass/fail status. The orchestrator's parsed response text may not
348
+ # match, so we trust metadata.yml when present.
349
+ #
350
+ # @param scenario [Models::TestScenario] The test scenario
351
+ # @param agent_dir [String] Path to agent report directory
352
+ # @param fallback_result [Models::TestResult] Result to use if metadata unreadable
353
+ # @return [Models::TestResult] Result with authoritative status
354
+ def read_agent_result(scenario, agent_dir, fallback_result)
355
+ metadata_path = File.join(agent_dir, "metadata.yml")
356
+ return fallback_result.with_report_dir(agent_dir) unless File.exist?(metadata_path)
357
+
358
+ metadata = YAML.safe_load_file(metadata_path, permitted_classes: [Date])
359
+ status = metadata["status"] || fallback_result.status
360
+ passed = metadata["tcs-passed"] || metadata.dig("results", "passed") || 0
361
+ failed = metadata["tcs-failed"] || metadata.dig("results", "failed") || 0
362
+ total = metadata["tcs-total"] || metadata.dig("results", "total") || 0
363
+
364
+ # Reconcile: if all cases passed, status should be "pass"
365
+ if passed == total && total > 0 && status != "pass"
366
+ status = "pass"
367
+ end
368
+
369
+ # Build synthetic test cases from counts
370
+ test_cases = []
371
+ passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
372
+ failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
373
+
374
+ Models::TestResult.new(
375
+ test_id: scenario.test_id,
376
+ status: status,
377
+ test_cases: test_cases,
378
+ summary: "#{passed}/#{total} passed",
379
+ started_at: fallback_result.started_at,
380
+ completed_at: fallback_result.completed_at,
381
+ report_dir: agent_dir
382
+ )
383
+ rescue
384
+ fallback_result.with_report_dir(agent_dir)
385
+ end
386
+
387
+ # Build a deterministic infrastructure error when the expected report
388
+ # directory for a CLI-provider run is missing.
389
+ #
390
+ # @param scenario [Models::TestScenario]
391
+ # @param expected_dir [String] Deterministic report directory path
392
+ # @param fallback_result [Models::TestResult]
393
+ # @return [Models::TestResult]
394
+ def missing_agent_report_result(scenario, expected_dir, fallback_result)
395
+ return fallback_result.with_report_dir(expected_dir) if fallback_result.status == "skip"
396
+
397
+ Models::TestResult.new(
398
+ test_id: scenario.test_id,
399
+ status: "error",
400
+ test_cases: fallback_result.test_cases,
401
+ summary: "Missing CLI report directory",
402
+ error: "Expected report directory was not created: #{expected_dir}",
403
+ started_at: fallback_result.started_at,
404
+ completed_at: fallback_result.completed_at,
405
+ report_dir: expected_dir
406
+ )
407
+ end
408
+
409
+ # Default timestamp generator using Ace::B36ts library
410
+ # @return [String] 7-char timestamp ID
411
+ def default_timestamp
412
+ Ace::B36ts.encode(Time.now.utc, format: :"50ms")
413
+ end
414
+
415
+ # Execute a scenario while preserving compatibility with legacy executor
416
+ # doubles that do not accept the newer :verify keyword.
417
+ def execute_scenario(scenario, cli_args:, run_id:, test_cases:, sandbox_path:, env_vars:, report_dir: nil, verify: false)
418
+ kwargs = {
419
+ cli_args: cli_args,
420
+ run_id: run_id,
421
+ test_cases: test_cases,
422
+ sandbox_path: sandbox_path,
423
+ env_vars: env_vars,
424
+ report_dir: report_dir
425
+ }
426
+
427
+ supports_timeout = @executor.method(:execute).parameters.any? do |type, name|
428
+ type == :keyrest || (%i[key keyreq].include?(type) && name == :timeout)
429
+ end
430
+ supports_verify = @executor.method(:execute).parameters.any? do |type, name|
431
+ type == :keyrest || (%i[key keyreq].include?(type) && name == :verify)
432
+ end
433
+ kwargs[:timeout] = (scenario.timeout || @timeout) if supports_timeout
434
+ kwargs[:verify] = verify if supports_verify
435
+
436
+ @executor.execute(scenario, **kwargs)
437
+ end
438
+ end
439
+ end
440
+ end
441
+ end
442
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ VERSION = "0.29.0"
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "end_to_end_runner/version"
4
+
5
+ # Models
6
+ require_relative "end_to_end_runner/models/test_case"
7
+ require_relative "end_to_end_runner/models/test_scenario"
8
+ require_relative "end_to_end_runner/models/test_result"
9
+
10
+ # Atoms
11
+ require_relative "end_to_end_runner/atoms/prompt_builder"
12
+ require_relative "end_to_end_runner/atoms/result_parser"
13
+ require_relative "end_to_end_runner/atoms/skill_prompt_builder"
14
+ require_relative "end_to_end_runner/atoms/skill_result_parser"
15
+ require_relative "end_to_end_runner/atoms/suite_report_prompt_builder"
16
+ require_relative "end_to_end_runner/atoms/test_case_parser"
17
+ require_relative "end_to_end_runner/atoms/tc_fidelity_validator"
18
+ require_relative "end_to_end_runner/atoms/display_helpers"
19
+
20
+ # Molecules
21
+ require_relative "end_to_end_runner/molecules/fixture_copier"
22
+ require_relative "end_to_end_runner/molecules/scenario_loader"
23
+ require_relative "end_to_end_runner/molecules/setup_executor"
24
+ require_relative "end_to_end_runner/molecules/config_loader"
25
+ require_relative "end_to_end_runner/molecules/test_discoverer"
26
+ require_relative "end_to_end_runner/molecules/test_executor"
27
+ require_relative "end_to_end_runner/molecules/pipeline_sandbox_builder"
28
+ require_relative "end_to_end_runner/molecules/pipeline_prompt_bundler"
29
+ require_relative "end_to_end_runner/molecules/pipeline_report_generator"
30
+ require_relative "end_to_end_runner/molecules/pipeline_executor"
31
+ require_relative "end_to_end_runner/molecules/report_writer"
32
+ require_relative "end_to_end_runner/molecules/suite_report_writer"
33
+ require_relative "end_to_end_runner/molecules/simple_display_manager"
34
+ require_relative "end_to_end_runner/molecules/progress_display_manager"
35
+ require_relative "end_to_end_runner/molecules/suite_simple_display_manager"
36
+ require_relative "end_to_end_runner/molecules/suite_progress_display_manager"
37
+ require_relative "end_to_end_runner/molecules/affected_detector"
38
+ require_relative "end_to_end_runner/molecules/failure_finder"
39
+
40
+ # Organisms
41
+ require_relative "end_to_end_runner/organisms/test_orchestrator"
42
+ require_relative "end_to_end_runner/organisms/suite_orchestrator"
43
+
44
+ # CLI
45
+ require_relative "end_to_end_runner/cli/commands/run_test"
46
+ require_relative "end_to_end_runner/cli/commands/run_suite"
47
+
48
+ module Ace
49
+ module Test
50
+ module EndToEndRunner
51
+ # Entry point for gem
52
+ #
53
+ # This gem provides infrastructure for agent-executed end-to-end tests:
54
+ # - CLI command (ace-test-e2e) for running tests via LLM
55
+ # - Workflows for test execution (run-e2e-test.wf.md)
56
+ # - Templates for test scenarios (test-e2e.template.md)
57
+ # - Conventions for E2E testing (e2e-testing.g.md)
58
+ #
59
+ # Tests can be executed by AI agents or via the CLI tool.
60
+ # See handbook/ for workflows and guides.
61
+
62
+ REFRESH_INTERVAL = 0.25
63
+
64
+ # Module namespaces
65
+ module Atoms; end
66
+ module Molecules; end
67
+ module Organisms; end
68
+ module Models; end
69
+ end
70
+ end
71
+ end