ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,830 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+ require "fileutils"
5
+ require "yaml"
6
+ require "ace/b36ts"
7
+
8
+ module Ace
9
+ module Test
10
+ module EndToEndRunner
11
+ module Organisms
12
+ # Orchestrates E2E test execution across multiple packages
13
+ #
14
+ # Discovers all E2E tests across the monorepo and executes them
15
+ # either sequentially or in parallel using subprocess isolation.
16
+ # Supports filtering to affected packages based on git diff.
17
+ class SuiteOrchestrator
18
+ attr_reader :max_parallel, :base_dir
19
+
20
+ # @param max_parallel [Integer] Number of parallel workers
21
+ # @param base_dir [String] Base directory for test discovery
22
+ # @param discoverer [#find_tests, #list_packages] Test discoverer (injectable)
23
+ # @param affected_detector [#detect] Affected package detector (injectable)
24
+ # @param failure_finder [#find_failures_by_scenario] Failure finder (injectable)
25
+ # @param output [IO] Output stream for progress messages
26
+ # @param use_color [Boolean] Enable ANSI color output (default: auto-detect TTY)
27
+ # @param progress [Boolean] Enable animated progress display
28
+ # @param suite_report_writer Suite report writer (injectable)
29
+ # @param scenario_loader Scenario loader (injectable)
30
+ # @param timestamp_generator Timestamp generator (injectable)
31
+ def initialize(max_parallel: 4, base_dir: nil, discoverer: nil, affected_detector: nil,
32
+ failure_finder: nil, output: $stdout, use_color: nil, progress: false,
33
+ suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil)
34
+ @max_parallel = max_parallel
35
+ @base_dir = base_dir || Dir.pwd
36
+ @discoverer = discoverer || Molecules::TestDiscoverer.new
37
+ @affected_detector = affected_detector || Molecules::AffectedDetector.new
38
+ @failure_finder = failure_finder || Molecules::FailureFinder.new
39
+ @output = output
40
+ @use_color = use_color.nil? ? output.respond_to?(:tty?) && output.tty? : use_color
41
+ @progress = progress
42
+ config = Molecules::ConfigLoader.load
43
+ @suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
44
+ @loader = scenario_loader || Molecules::ScenarioLoader.new
45
+ @timestamp_generator = timestamp_generator || method(:default_timestamp)
46
+ end
47
+
48
+ # Run E2E tests across all packages
49
+ #
50
+ # @param options [Hash] Execution options
51
+ # @option options [Boolean] :parallel Enable parallel execution
52
+ # @option options [Boolean] :affected Only test affected packages
53
+ # @option options [Boolean] :only_failures Re-run only failed test cases
54
+ # @option options [String] :packages Comma-separated package names to filter
55
+ # @option options [String] :cli_args Extra args for CLI providers
56
+ # @option options [String] :provider LLM provider:model
57
+ # @option options [Integer] :timeout Timeout per test in seconds
58
+ # @return [Hash] Summary of results
59
+ def run(options = {})
60
+ packages = @discoverer.list_packages(base_dir: @base_dir)
61
+
62
+ if packages.empty?
63
+ @output.puts "No packages with E2E tests found"
64
+ return {total: 0, passed: 0, failed: 0, errors: 0, packages: {}}
65
+ end
66
+
67
+ # Filter to specific packages if requested
68
+ if options[:packages]
69
+ requested = options[:packages].split(",").map(&:strip)
70
+ packages &= requested
71
+
72
+ if packages.empty?
73
+ @output.puts "No matching packages with E2E tests found"
74
+ return {total: 0, passed: 0, failed: 0, errors: 0, packages: {}}
75
+ end
76
+ end
77
+
78
+ # Filter to affected packages if requested
79
+ if options[:affected]
80
+ affected = @affected_detector.detect(base_dir: @base_dir)
81
+ packages &= affected
82
+
83
+ if packages.empty?
84
+ @output.puts "No affected packages with E2E tests"
85
+ return {total: 0, passed: 0, failed: 0, errors: 0, packages: {}}
86
+ end
87
+
88
+ @output.puts "Affected packages: #{packages.join(", ")}"
89
+ end
90
+
91
+ # Collect failures by scenario if --only-failures requested
92
+ scenario_failures = nil
93
+ if options[:only_failures]
94
+ scenario_failures = @failure_finder.find_failures_by_scenario(
95
+ packages: packages, base_dir: @base_dir
96
+ )
97
+
98
+ if scenario_failures.empty?
99
+ @output.puts "No failed test scenarios found in cache"
100
+ return {total: 0, passed: 0, failed: 0, errors: 0, packages: {}}
101
+ end
102
+
103
+ # Filter packages to only those with failures
104
+ packages &= scenario_failures.keys
105
+ @output.puts "Packages with failed scenarios: #{packages.join(", ")}"
106
+ packages.each do |pkg|
107
+ scenario_failures[pkg].each_key do |test_id|
108
+ @output.puts " #{pkg}/#{test_id}"
109
+ end
110
+ end
111
+ end
112
+
113
+ # Store scenario failures for test discovery filters
114
+ @scenario_failures = scenario_failures
115
+ @discovery_filters = {
116
+ tags: options[:tags],
117
+ exclude_tags: options[:exclude_tags]
118
+ }
119
+
120
+ # Discover tests in each package
121
+ package_tests = discover_package_tests(packages)
122
+
123
+ total_tests = package_tests.values.flatten.size
124
+ pkg_count = package_tests.keys.size
125
+
126
+ # Pre-compute column widths for aligned output
127
+ compute_column_widths(package_tests)
128
+
129
+ # Build display manager
130
+ test_queue = build_test_queue(package_tests)
131
+ @display = build_display_manager(test_queue)
132
+
133
+ # Print suite header
134
+ @display.show_header(total_tests, pkg_count)
135
+
136
+ # Execute tests
137
+ if options[:parallel]
138
+ run_parallel(package_tests, options)
139
+ else
140
+ run_sequential(package_tests, options)
141
+ end
142
+ end
143
+
144
+ private
145
+
146
+ # Build the appropriate display manager based on progress flag
147
+ #
148
+ # @param test_queue [Array<Hash>] flat list of test items
149
+ # @return [Molecules::SuiteSimpleDisplayManager, Molecules::SuiteProgressDisplayManager]
150
+ def build_display_manager(test_queue)
151
+ if @progress
152
+ Molecules::SuiteProgressDisplayManager.new(
153
+ test_queue, output: @output, use_color: @use_color,
154
+ pkg_width: @pkg_width, name_width: @name_width
155
+ )
156
+ else
157
+ Molecules::SuiteSimpleDisplayManager.new(
158
+ test_queue, output: @output, use_color: @use_color,
159
+ pkg_width: @pkg_width, name_width: @name_width
160
+ )
161
+ end
162
+ end
163
+
164
+ # Discover all tests in each package
165
+ #
166
+ # When @scenario_failures is set (--only-failures mode), filters to only
167
+ # test files whose test-id appears in the failures hash for that package.
168
+ #
169
+ # @param packages [Array<String>] List of package names
170
+ # @return [Hash] Package name to list of test files
171
+ def discover_package_tests(packages)
172
+ package_tests = {}
173
+ packages.each do |package|
174
+ tests = @discoverer.find_tests(
175
+ package: package,
176
+ base_dir: @base_dir,
177
+ tags: @discovery_filters[:tags],
178
+ exclude_tags: @discovery_filters[:exclude_tags]
179
+ )
180
+
181
+ # Filter to only failing scenarios when in --only-failures mode
182
+ if @scenario_failures && @scenario_failures[package]
183
+ failing_test_ids = @scenario_failures[package].keys
184
+ tests = tests.select { |f| failing_test_ids.any? { |tid| file_matches_test_id?(f, tid) } }
185
+ end
186
+
187
+ package_tests[package] = tests unless tests.empty?
188
+ end
189
+ package_tests
190
+ end
191
+
192
+ # Pre-compute column widths for aligned output
193
+ #
194
+ # @param package_tests [Hash] Package to tests mapping
195
+ def compute_column_widths(package_tests)
196
+ @pkg_width = package_tests.keys.map(&:length).max || 10
197
+ @name_width = package_tests.values.flatten.map { |f| extract_test_name(f).length }.max || 20
198
+ end
199
+
200
+ # Extract human-readable test name from file path
201
+ #
202
+ # @param test_file [String] Path to scenario.yml file
203
+ # @return [String] e.g. "TS-LINT-001-ruby-validator-fallback"
204
+ def extract_test_name(test_file)
205
+ File.basename(File.dirname(test_file))
206
+ end
207
+
208
+ # Run tests sequentially
209
+ #
210
+ # @param package_tests [Hash] Package to tests mapping
211
+ # @param options [Hash] Execution options
212
+ # @return [Hash] Summary of results
213
+ def run_sequential(package_tests, options)
214
+ results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
215
+ start_time = Time.now
216
+
217
+ # Pre-generate unique run IDs for all tests
218
+ all_tests = package_tests.flat_map { |pkg, tests| tests.map { |t| [pkg, t] } }
219
+ run_ids = generate_run_ids(all_tests.size)
220
+ run_id_map = all_tests.zip(run_ids).to_h
221
+
222
+ display_mutex = Mutex.new
223
+ done = false
224
+ refresh_thread = if @progress
225
+ Thread.new do
226
+ until done
227
+ sleep REFRESH_INTERVAL
228
+ display_mutex.synchronize { @display.refresh }
229
+ end
230
+ end
231
+ end
232
+
233
+ package_tests.each do |package, tests|
234
+ package_results = []
235
+
236
+ tests.each do |test_file|
237
+ display_mutex.synchronize { @display.test_started(package, test_file) }
238
+
239
+ test_start = Time.now
240
+ run_id = run_id_map[[package, test_file]]
241
+ result = run_single_test(package, test_file, options, run_id: run_id)
242
+ elapsed = Time.now - test_start
243
+ package_results << result
244
+
245
+ # Update totals
246
+ results[:total] += 1
247
+ results[:total_cases] += result[:total_cases] || 0
248
+ results[:passed_cases] += result[:passed_cases] || 0
249
+ case result[:status]
250
+ when "pass"
251
+ results[:passed] += 1
252
+ when "fail", "partial"
253
+ results[:failed] += 1
254
+ when "error"
255
+ results[:errors] += 1
256
+ end
257
+
258
+ # Show columnar progress line
259
+ display_mutex.synchronize { @display.test_completed(result, package, test_file, elapsed) }
260
+ end
261
+
262
+ results[:packages][package] = package_results
263
+ end
264
+
265
+ done = true
266
+ refresh_thread&.join
267
+
268
+ finalize_run(results, package_tests, start_time)
269
+ end
270
+
271
+ # Run tests in parallel using subprocesses
272
+ #
273
+ # @param package_tests [Hash] Package to tests mapping
274
+ # @param options [Hash] Execution options
275
+ # @return [Hash] Summary of results
276
+ def run_parallel(package_tests, options)
277
+ results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
278
+ queue = build_test_queue(package_tests)
279
+ run_ids = generate_run_ids(queue.size)
280
+ queue.each_with_index { |item, i| item[:run_id] = run_ids[i] }
281
+ start_time = Time.now
282
+
283
+ running = {}
284
+
285
+ while !queue.empty? || !running.empty?
286
+ # Start new processes up to max_parallel
287
+ while running.size < @max_parallel && !queue.empty?
288
+ test_item = queue.shift
289
+ @display.test_started(test_item[:package], test_item[:test_file])
290
+ process = spawn_test_process(test_item, options)
291
+ running[process[:pid]] = process.merge(started_at: Time.now)
292
+ end
293
+
294
+ # Check for completed processes
295
+ sleep 0.1
296
+ @display.refresh
297
+ check_running_processes(running, results)
298
+ end
299
+
300
+ finalize_run(results, package_tests, start_time)
301
+ end
302
+
303
+ # Build a flat queue of test items
304
+ #
305
+ # @param package_tests [Hash] Package to tests mapping
306
+ # @return [Array<Hash>] Queue of test items
307
+ def build_test_queue(package_tests)
308
+ queue = []
309
+ package_tests.each do |package, tests|
310
+ tests.each do |test_file|
311
+ queue << {package: package, test_file: test_file}
312
+ end
313
+ end
314
+ queue
315
+ end
316
+
317
+ # Spawn a subprocess for a single test
318
+ #
319
+ # @param test_item [Hash] Test item with package and test_file
320
+ # @param options [Hash] Execution options
321
+ # @return [Hash] Process info
322
+ def spawn_test_process(test_item, options)
323
+ package = test_item[:package]
324
+ test_file = test_item[:test_file]
325
+ run_id = test_item[:run_id]
326
+
327
+ # Build command as array for safe execution
328
+ cmd_array = build_test_command(package, test_file, options, run_id: run_id)
329
+
330
+ # Spawn process with array form (no shell invocation)
331
+ stdin, stdout, stderr, thread = Open3.popen3(*cmd_array, chdir: @base_dir)
332
+
333
+ {pid: thread.pid, thread: thread, stdout: stdout, stderr: stderr,
334
+ stdin: stdin, package: package, test_file: test_file, output: String.new}
335
+ end
336
+
337
+ # Build the command to run a single test
338
+ #
339
+ # @param package [String] Package name
340
+ # @param test_file [String] Path to test file
341
+ # @param options [Hash] Execution options
342
+ # @return [Array<String>] Command array for safe execution
343
+ def build_test_command(package, test_file, options, run_id: nil)
344
+ # Extract test_id from filename
345
+ test_id = extract_test_id(test_file)
346
+
347
+ cmd_parts = [e2e_executable_path, package, test_id]
348
+ scenario = nil
349
+
350
+ # Add provider if specified
351
+ if options[:provider]
352
+ cmd_parts.concat(["--provider", options[:provider]])
353
+ end
354
+
355
+ # Add timeout if specified
356
+ if options[:timeout]
357
+ scenario ||= parse_scenario(package, test_file)
358
+ effective_timeout = scenario.timeout || options[:timeout]
359
+ cmd_parts.concat(["--timeout", effective_timeout.to_s]) if effective_timeout
360
+ end
361
+
362
+ # Add CLI args if specified - passed as a single string argument
363
+ if options[:cli_args]
364
+ cmd_parts.concat(["--cli-args", options[:cli_args]])
365
+ end
366
+
367
+ # Add pre-generated run ID for deterministic report paths
368
+ if run_id
369
+ cmd_parts.concat(["--run-id", run_id])
370
+
371
+ # Pass explicit report directory so the agent doesn't compute it independently
372
+ scenario ||= parse_scenario(package, test_file)
373
+ report_dir = File.join(@base_dir, ".ace-local", "test-e2e", "#{scenario.dir_name(run_id)}-reports")
374
+ cmd_parts.concat(["--report-dir", report_dir])
375
+ end
376
+
377
+ cmd_parts << "--verify" if options[:verify]
378
+
379
+ # Add parallel=1 for subprocess isolation
380
+ cmd_parts.concat(["--parallel", "1"])
381
+
382
+ cmd_parts
383
+ end
384
+
385
+ # Resolve the ace-test-e2e executable used by suite subprocesses.
386
+ #
387
+ # Prefer the workspace wrapper (bin/ace-test-e2e) to avoid PATH drift
388
+ # against older globally-installed binaries.
389
+ #
390
+ # @return [String]
391
+ def e2e_executable_path
392
+ local = File.join(@base_dir, "bin", "ace-test-e2e")
393
+ File.executable?(local) ? local : "ace-test-e2e"
394
+ end
395
+
396
+ # Extract test ID from file path
397
+ #
398
+ # @param test_file [String] Path to scenario.yml file
399
+ # @return [String] Test ID (e.g., "TS-LINT-001")
400
+ def extract_test_id(test_file)
401
+ dir_name = File.basename(File.dirname(test_file))
402
+ dir_name.match(/(TS-[A-Z]+-\d+[a-z]?)/)&.[](1) || dir_name
403
+ end
404
+
405
+ # Check if a test file matches a metadata test-id
406
+ #
407
+ # Directory names may include a descriptive suffix (e.g. TS-COMMIT-002-specific-file)
408
+ # while metadata stores only the short test-id (TS-COMMIT-002). This method handles
409
+ # both exact matches and prefix matches where the suffix starts with "-".
410
+ #
411
+ # @param test_file [String] Path to scenario.yml file
412
+ # @param test_id [String] Metadata test-id to match against
413
+ # @return [Boolean]
414
+ def file_matches_test_id?(test_file, test_id)
415
+ dir_name = File.basename(File.dirname(test_file))
416
+ dir_name == test_id || dir_name.start_with?("#{test_id}-")
417
+ end
418
+
419
+ # Check running processes for completion
420
+ #
421
+ # @param running [Hash] Running processes
422
+ # @param results [Hash] Results accumulator
423
+ def check_running_processes(running, results)
424
+ running.dup.each do |pid, process|
425
+ thread = process[:thread]
426
+
427
+ # Read available output from both stdout and stderr
428
+ begin
429
+ readable, = IO.select([process[:stdout], process[:stderr]], nil, nil, 0)
430
+ readable&.each do |stream|
431
+ chunk = stream.read_nonblock(4096)
432
+ process[:output] << chunk
433
+ end
434
+ rescue IO::WaitReadable, EOFError
435
+ # No data or stream closed
436
+ end
437
+
438
+ # Check if completed
439
+ unless thread.alive?
440
+ # Get remaining output from both streams
441
+ [process[:stdout], process[:stderr]].each do |stream|
442
+ process[:output] << stream.read
443
+ rescue IOError
444
+ # Stream already closed
445
+ end
446
+
447
+ # Close streams
448
+ begin
449
+ process[:stdout].close
450
+ rescue
451
+ nil
452
+ end
453
+ begin
454
+ process[:stderr].close
455
+ rescue
456
+ nil
457
+ end
458
+ begin
459
+ process[:stdin].close
460
+ rescue
461
+ nil
462
+ end
463
+
464
+ # Parse result
465
+ result = parse_subprocess_result(process)
466
+ save_subprocess_output(result)
467
+
468
+ # Update results
469
+ results[:total] += 1
470
+ results[:total_cases] += result[:total_cases] || 0
471
+ results[:passed_cases] += result[:passed_cases] || 0
472
+ results[:packages][process[:package]] ||= []
473
+ results[:packages][process[:package]] << result
474
+
475
+ case result[:status]
476
+ when "pass"
477
+ results[:passed] += 1
478
+ when "fail", "partial"
479
+ results[:failed] += 1
480
+ when "error"
481
+ results[:errors] += 1
482
+ end
483
+
484
+ # Show columnar progress line
485
+ elapsed = Time.now - process[:started_at]
486
+ @display.test_completed(result, process[:package], process[:test_file], elapsed)
487
+
488
+ # Remove from running
489
+ running.delete(pid)
490
+ end
491
+ end
492
+ end
493
+
494
+ # Parse result from subprocess output
495
+ #
496
+ # @param process [Hash] Process info with output
497
+ # @return [Hash] Parsed result with :passed_cases and :total_cases
498
+ def parse_subprocess_result(process)
499
+ result = parse_test_output(process[:output], process[:thread].value.exitstatus, extract_test_name(process[:test_file]))
500
+ result[:raw_output] = process[:output]
501
+
502
+ # For non-pass results, check agent-written metadata as authoritative source
503
+ # (mirrors TestOrchestrator#read_agent_result behavior)
504
+ if result[:status] != "pass" && result[:report_dir]
505
+ result = override_from_metadata(result)
506
+ end
507
+
508
+ result
509
+ rescue => e
510
+ {status: "error", error: "Failed to parse result: #{e.message}"}
511
+ end
512
+
513
+ # Override result from agent-written metadata.yml when subprocess exit code is misleading
514
+ #
515
+ # @param result [Hash] Parsed result with :report_dir
516
+ # @return [Hash] Result with status/counts from metadata.yml, or original on failure
517
+ def override_from_metadata(result)
518
+ metadata_path = File.join(result[:report_dir], "metadata.yml")
519
+ return result unless File.exist?(metadata_path)
520
+
521
+ metadata = YAML.safe_load_file(metadata_path, permitted_classes: [Date])
522
+ status = metadata["status"]
523
+ return result unless status
524
+
525
+ passed = metadata["tcs-passed"] || metadata.dig("results", "passed") || result[:passed_cases] || 0
526
+ total = metadata["tcs-total"] || metadata.dig("results", "total") || result[:total_cases] || 0
527
+
528
+ # Reconcile: if all cases passed, status should be "pass"
529
+ if passed == total && total > 0 && status != "pass"
530
+ status = "pass"
531
+ end
532
+
533
+ summary = if status == "error"
534
+ metadata["summary"] || result[:error] || result[:summary] || "Test errored"
535
+ else
536
+ "#{passed}/#{total} passed"
537
+ end
538
+
539
+ result.merge(
540
+ status: status,
541
+ passed_cases: passed,
542
+ total_cases: total,
543
+ summary: summary
544
+ )
545
+ rescue => e
546
+ warn "Warning: Failed to override from metadata: #{e.message}" if ENV["DEBUG"]
547
+ result
548
+ end
549
+
550
+ # Shared helper to parse test output from combined stdout/stderr
551
+ #
552
+ # @param output [String] Combined test output
553
+ # @param exit_status [Integer] Process exit status
554
+ # @param test_name [String] Test name for result
555
+ # @return [Hash] Parsed result with :status, :passed_cases, :total_cases, etc.
556
+ def parse_test_output(output, exit_status, test_name)
557
+ # Try to find report directory in output
558
+ report_dir = output.lines.filter_map { |line| line[/^Report:\s+(.+)\s*$/, 1] }.last
559
+
560
+ # Extract test case counts from "Result: ... N/M cases" line
561
+ cases_match = output.match(/(\d+)\/(\d+) cases/)
562
+ passed_cases = cases_match ? cases_match[1].to_i : nil
563
+ total_cases = cases_match ? cases_match[2].to_i : nil
564
+
565
+ base = {report_dir: report_dir, passed_cases: passed_cases, total_cases: total_cases,
566
+ test_name: test_name}
567
+
568
+ if exit_status == 0
569
+ if passed_cases && total_cases && passed_cases < total_cases
570
+ base.merge(status: "fail", summary: "#{passed_cases}/#{total_cases} passed")
571
+ else
572
+ base.merge(status: "pass", summary: "Test passed")
573
+ end
574
+ elsif output.include?("ERROR") || output.include?("Error:")
575
+ error_msg = output.lines.filter_map { |line| line[/^Error:\s+(.+?)\s*$/, 1] }.last
576
+ error_msg ||= "Test execution returned ERROR status"
577
+ base.merge(status: "error", error: error_msg)
578
+ else
579
+ summary = output.match(/(\d+)\/(\d+) passed/)&.captures&.join("/") || "Test failed"
580
+ base.merge(status: "fail", summary: summary)
581
+ end
582
+ rescue => e
583
+ {status: "error", error: "Failed to parse result: #{e.message}"}
584
+ end
585
+
586
+ # Finalize a test run: show summary, generate report, return results
587
+ #
588
+ # @param results [Hash] Accumulated results
589
+ # @param package_tests [Hash] Package to test files mapping
590
+ # @param start_time [Time] When the run started
591
+ # @return [Hash] Results with optional :report_path
592
+ def finalize_run(results, package_tests, start_time)
593
+ write_failure_stubs(results, package_tests)
594
+
595
+ @display.show_summary(results, Time.now - start_time)
596
+ warn_on_lingering_claude_processes
597
+
598
+ report_path = generate_suite_report(results, package_tests)
599
+ if report_path
600
+ @output.puts "Report: #{report_path}"
601
+ results[:report_path] = report_path
602
+ end
603
+
604
+ results
605
+ end
606
+
607
+ # Write stub metadata.yml for failed/errored tests that have no metadata on disk
608
+ #
609
+ # When a test subprocess errors (provider unavailable, timeout, etc.), no
610
+ # metadata.yml is written to cache. This method backfills stubs so that
611
+ # FailureFinder can pick them up on subsequent --only-failures runs.
612
+ #
613
+ # Contract: extract_test_name returns the scenario directory name (see line 195).
614
+ # The result[:test_name] values from parse_test_output must match this format
615
+ # for file_by_name lookups.
616
+ #
617
+ # @param results [Hash] Accumulated results with :packages hash
618
+ # @param package_tests [Hash] Package to test files mapping
619
+ def write_failure_stubs(results, package_tests)
620
+ cache_dir = File.join(@base_dir, ".ace-local", "test-e2e")
621
+
622
+ results[:packages].each do |package, pkg_results|
623
+ test_files = package_tests[package] || []
624
+ file_by_name = test_files.each_with_object({}) { |f, h| h[extract_test_name(f)] = f }
625
+
626
+ pkg_results.each do |result|
627
+ next if result[:status] == "pass"
628
+ next if metadata_exists?(result[:report_dir])
629
+
630
+ test_file = file_by_name[result[:test_name]]
631
+ next unless test_file
632
+
633
+ scenario = parse_scenario(package, test_file)
634
+ timestamp = @timestamp_generator.call
635
+ stub_dir = File.join(cache_dir, "#{scenario.dir_name(timestamp)}-reports")
636
+ FileUtils.mkdir_p(stub_dir)
637
+
638
+ stub_data = {
639
+ "test-id" => scenario.test_id,
640
+ "package" => package,
641
+ "status" => result[:status]
642
+ }
643
+ File.write(File.join(stub_dir, "metadata.yml"), YAML.dump(stub_data))
644
+
645
+ if result[:raw_output] && !result[:raw_output].empty?
646
+ File.write(File.join(stub_dir, "subprocess_output.log"), result[:raw_output])
647
+ end
648
+ end
649
+ end
650
+ rescue => e
651
+ warn "Warning: Failed to write failure stubs (#{e.class}: #{e.message})"
652
+ warn e.backtrace.first(3).join("\n") if ENV["DEBUG"]
653
+ end
654
+
655
+ # Save subprocess output log to the report directory
656
+ #
657
+ # @param result [Hash] Parsed result with :report_dir and :raw_output
658
+ def save_subprocess_output(result)
659
+ dir = result[:report_dir]
660
+ return unless dir && result[:raw_output] && !result[:raw_output].empty?
661
+
662
+ # report_dir from parse_test_output may be a file path; use parent dir
663
+ dir = File.directory?(dir) ? dir : File.dirname(dir)
664
+ FileUtils.mkdir_p(dir)
665
+ File.write(File.join(dir, "subprocess_output.log"), result[:raw_output])
666
+ rescue => e
667
+ warn "Warning: Failed to save subprocess output: #{e.message}" if ENV["DEBUG"]
668
+ end
669
+
670
+ # Check if a metadata.yml file exists in the given report directory
671
+ #
672
+ # @param report_dir [String, nil] Path to the report directory
673
+ # @return [Boolean] true if metadata.yml exists
674
+ def metadata_exists?(report_dir)
675
+ report_dir && File.exist?(File.join(report_dir, "metadata.yml"))
676
+ end
677
+
678
+ # Generate a suite-level final report from results
679
+ #
680
+ # @param results [Hash] Suite results with :packages hash
681
+ # @param package_tests [Hash] Package to test files mapping
682
+ # @return [String, nil] Path to the report file, or nil on failure
683
+ def generate_suite_report(results, package_tests)
684
+ timestamp = @timestamp_generator.call
685
+
686
+ all_results = []
687
+ all_scenarios = []
688
+
689
+ package_tests.each do |package, test_files|
690
+ pkg_results = results[:packages][package] || []
691
+ results_by_name = pkg_results.each_with_object({}) { |r, h| h[r[:test_name]] = r }
692
+
693
+ test_files.each do |test_file|
694
+ test_name = extract_test_name(test_file)
695
+ result_hash = results_by_name[test_name]
696
+ next unless result_hash
697
+
698
+ all_results << build_test_result(result_hash)
699
+ all_scenarios << parse_scenario(package, test_file)
700
+ end
701
+ end
702
+
703
+ if all_results.empty?
704
+ warn "Warning: Suite report skipped — no results matched test files" if ENV["DEBUG"]
705
+ return nil
706
+ end
707
+
708
+ @suite_report_writer.write(
709
+ all_results, all_scenarios,
710
+ package: "suite",
711
+ timestamp: timestamp,
712
+ base_dir: @base_dir
713
+ )
714
+ rescue => e
715
+ warn "Warning: Suite report generation failed (#{e.class}: #{e.message})"
716
+ warn e.backtrace.first(5).join("\n") if ENV["DEBUG"]
717
+ nil
718
+ end
719
+
720
+ # Convert a result hash (from subprocess output) into a Models::TestResult
721
+ #
722
+ # @param result_hash [Hash] Raw result hash with :status, :passed_cases, etc.
723
+ # @return [Models::TestResult]
724
+ def build_test_result(result_hash)
725
+ passed = result_hash[:passed_cases] || 0
726
+ total = result_hash[:total_cases] || 0
727
+ failed = [total - passed, 0].max
728
+
729
+ test_cases = []
730
+ passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass"} }
731
+ failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail"} }
732
+
733
+ Models::TestResult.new(
734
+ test_id: result_hash[:test_name] || "unknown",
735
+ status: result_hash[:status] || "error",
736
+ test_cases: test_cases,
737
+ summary: result_hash[:summary] || result_hash[:error] || "",
738
+ report_dir: result_hash[:report_dir]
739
+ )
740
+ end
741
+
742
+ # Load a scenario from file into a Models::TestScenario, with fallback
743
+ #
744
+ # @param package [String] Package name
745
+ # @param test_file [String] Path to the scenario.yml file
746
+ # @return [Models::TestScenario]
747
+ def parse_scenario(package, test_file)
748
+ @loader.load(File.dirname(test_file))
749
+ rescue => _e
750
+ Models::TestScenario.new(
751
+ test_id: extract_test_id(test_file),
752
+ title: extract_test_name(test_file),
753
+ area: package.sub(/\Aace-/, ""),
754
+ package: package,
755
+ file_path: test_file,
756
+ content: ""
757
+ )
758
+ end
759
+
760
+ # Generate N unique run IDs for batch test runs
761
+ #
762
+ # Uses Ace::B36ts library to encode unique IDs with 50ms precision,
763
+ # ensuring distinct timestamps for coordinated sandbox/report paths.
764
+ #
765
+ # Offset uses 0.1 (100ms) instead of 0.05 to avoid collisions with 50ms
766
+ # encoding granularity, ensuring unique timestamps even at high throughput.
767
+ #
768
+ # @param count [Integer] Number of unique run IDs needed
769
+ # @return [Array<String>] Array of unique run ID strings
770
+ def generate_run_ids(count)
771
+ count.times.map do |i|
772
+ time = Time.now.utc + (i * 0.1)
773
+ Ace::B36ts.encode(time, format: :"50ms")
774
+ end
775
+ end
776
+
777
+ # Generate a timestamp for report naming
778
+ # @return [String] Compact Base36 timestamp ID
779
+ def default_timestamp
780
+ Ace::B36ts.encode(Time.now.utc, format: :"50ms")
781
+ end
782
+
783
+ # Emit diagnostics for lingering Claude one-shot processes.
784
+ # This is debug-only visibility and does not fail the suite.
785
+ def warn_on_lingering_claude_processes
786
+ return unless ENV["ACE_LLM_DEBUG_SUBPROCESS"] == "1"
787
+
788
+ output, status = Open3.capture2("pgrep", "-af", "claude .* -p")
789
+ return unless status.success?
790
+
791
+ lines = output.lines.map(&:strip).reject(&:empty?)
792
+ lines.reject! { |line| line.include?("pgrep -af") }
793
+ return if lines.empty?
794
+
795
+ @output.puts "Warning: Detected lingering claude -p processes (#{lines.size})"
796
+ lines.each { |line| @output.puts " #{line}" }
797
+ rescue => e
798
+ @output.puts "Warning: Failed to scan lingering Claude processes: #{e.message}" if ENV["DEBUG"]
799
+ end
800
+
801
+ # Run a single test (sequential mode)
802
+ #
803
+ # @param package [String] Package name
804
+ # @param test_file [String] Path to test file
805
+ # @param options [Hash] Execution options
806
+ # @return [Hash] Test result
807
+ def run_single_test(package, test_file, options, run_id: nil)
808
+ cmd_array = build_test_command(package, test_file, options, run_id: run_id)
809
+ output, stderr, status = Open3.capture3(*cmd_array, chdir: @base_dir)
810
+
811
+ # Combine stdout and stderr for parsing
812
+ combined_output = output + stderr
813
+ result = parse_test_output(combined_output, status.exitstatus, extract_test_name(test_file))
814
+ result[:raw_output] = combined_output
815
+
816
+ # Override from metadata for non-pass results
817
+ if result[:status] != "pass" && result[:report_dir]
818
+ result = override_from_metadata(result)
819
+ end
820
+
821
+ save_subprocess_output(result)
822
+ result
823
+ rescue => e
824
+ {status: "error", error: e.message}
825
+ end
826
+ end
827
+ end
828
+ end
829
+ end
830
+ end