ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.ace-defaults/e2e-runner/config.yml +14 -2
  3. data/CHANGELOG.md +233 -0
  4. data/README.md +2 -2
  5. data/exe/ace-test-e2e-sh +9 -4
  6. data/handbook/guides/e2e-testing.g.md +75 -9
  7. data/handbook/guides/scenario-yml-reference.g.md +21 -8
  8. data/handbook/guides/tc-authoring.g.md +23 -5
  9. data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
  10. data/handbook/skills/as-e2e-review/SKILL.md +2 -2
  11. data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
  12. data/handbook/templates/agent-experience-report.template.md +3 -2
  13. data/handbook/templates/scenario.yml.template.yml +7 -2
  14. data/handbook/templates/tc-file.template.md +16 -4
  15. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
  16. data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
  17. data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
  18. data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
  19. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
  20. data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
  21. data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
  22. data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
  23. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
  24. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  25. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
  26. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
  27. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  28. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
  29. data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
  30. data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
  31. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
  32. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
  33. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  34. data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
  35. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
  36. data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
  37. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
  38. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
  39. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
  40. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
  41. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
  42. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
  43. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
  44. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
  45. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
  46. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
  47. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
  48. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
  49. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
  50. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  51. data/lib/ace/test/end_to_end_runner.rb +4 -0
  52. metadata +21 -2
@@ -35,7 +35,7 @@ module Ace
35
35
  #
36
36
  # Resolves role: references to their concrete provider before checking.
37
37
  #
38
- # @param provider_string [String] Provider:model string (e.g., "claude:sonnet", "role:e2e-executor")
38
+ # @param provider_string [String] Provider:model string (e.g., "claude:sonnet", "role:e2e-runner")
39
39
  # @return [Boolean]
40
40
  def cli_provider?(provider_string)
41
41
  resolved = resolve_provider_name(provider_string)
@@ -44,9 +44,9 @@ module Ace
44
44
 
45
45
  def build_execution_prompt(command:, tc_mode:)
46
46
  return_contract = if tc_mode
47
- "- **Test ID**: ...\n- **TC ID**: ...\n- **Status**: pass | fail\n- **Report Paths**: ...\n- **Issues**: ..."
47
+ "- **Test ID**: ...\n- **TC ID**: ...\n- **Status**: pass | fail\n- **Report Paths**: ...\n- **Observations**: ...\n- **Issues**: ... (optional legacy alias)"
48
48
  else
49
- "- **Test ID**: ...\n- **Status**: pass | fail | partial\n- **Passed**: ...\n- **Failed**: ...\n- **Total**: ...\n- **Report Paths**: ...\n- **Issues**: ..."
49
+ "- **Test ID**: ...\n- **Status**: pass | fail | partial\n- **Passed**: ...\n- **Failed**: ...\n- **Total**: ...\n- **Report Paths**: ...\n- **Observations**: ...\n- **Issues**: ... (optional legacy alias)"
50
50
  end
51
51
 
52
52
  <<~PROMPT.strip
@@ -55,8 +55,9 @@ module Ace
55
55
 
56
56
  Execution requirements:
57
57
  - Do not run `/ace-...` inside a shell command.
58
- - If slash commands are unavailable, stop and report that limitation in `Issues`.
58
+ - If slash commands are unavailable, stop and report that limitation in `Observations`.
59
59
  - Write reports under `.ace-local/test-e2e/*-reports/`.
60
+ - `Observations` is required and must be a concise factual summary of actions, outcomes, and blockers without verdict language.
60
61
  - Return only this structured summary:
61
62
  #{return_contract}
62
63
  PROMPT
@@ -122,6 +123,7 @@ module Ace
122
123
 
123
124
  Verification requirements:
124
125
  - Inspect sandbox artifacts and scenario files directly.
126
+ - Judge from sandbox state first, then runner observations, then raw debug captures only when needed.
125
127
  - Evaluate each test case using `TC-*.verify.md` criteria when present.
126
128
  - Classify each failed test case with one category:
127
129
  `test-spec-error`, `tool-bug`, `runner-error`, or `infrastructure-error`.
@@ -145,7 +147,7 @@ module Ace
145
147
 
146
148
  # Resolve the bare provider name from a provider string.
147
149
  # For role: references, resolves via ProviderModelParser to find the
148
- # concrete provider (e.g. "role:e2e-executor" → "claude").
150
+ # concrete provider (e.g. "role:e2e-runner" → "claude").
149
151
  def resolve_provider_name(provider_string)
150
152
  name = self.class.provider_name(provider_string)
151
153
  return name unless name == "role"
@@ -13,6 +13,7 @@ module Ace
13
13
  # - **Failed**: 0
14
14
  # - **Total**: 8
15
15
  # - **Report Paths**: 8p5jo2-lint-ts001-reports/*
16
+ # - **Observations**: None
16
17
  # - **Issues**: None
17
18
  #
18
19
  # Falls back to ResultParser.parse() for JSON responses.
@@ -45,6 +46,7 @@ module Ace
45
46
  fields[:failed] = extract_field(text, "Failed")
46
47
  fields[:total] = extract_field(text, "Total")
47
48
  fields[:report_paths] = extract_field(text, "Report Paths")
49
+ fields[:observations] = extract_field(text, "Observations")
48
50
  fields[:issues] = extract_field(text, "Issues")
49
51
 
50
52
  # Need at least test_id and status for a valid parse
@@ -69,8 +71,7 @@ module Ace
69
71
  passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
70
72
  failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
71
73
 
72
- issues = parsed[:issues]
73
- observations = (issues && issues.downcase != "none") ? issues : ""
74
+ observations = normalize_observations(parsed[:observations], parsed[:issues])
74
75
 
75
76
  {
76
77
  test_id: parsed[:test_id],
@@ -131,8 +132,8 @@ module Ace
131
132
  fields[:failed_tcs] = extract_field(text, "Failed TCs")
132
133
  fields[:issues] = extract_field(text, "Issues")
133
134
 
134
- return parse(text) unless fields[:test_id] && fields[:status] &&
135
- fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
135
+ return parse_minimal_verifier(text) unless fields[:test_id] && fields[:status]
136
+ return parse(text) unless fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
136
137
 
137
138
  passed = fields[:tcs_passed].to_i
138
139
  failed = fields[:tcs_failed].to_i
@@ -180,6 +181,58 @@ module Ace
180
181
  }
181
182
  end
182
183
 
184
+ def self.parse_minimal_verifier(text)
185
+ compact = text.to_s.strip
186
+ results_match = compact.match(/Results:\s*(\d+)\s*\/\s*(\d+)\s*passed/i)
187
+ if results_match
188
+ passed = results_match[1].to_i
189
+ total = results_match[2].to_i
190
+ status = if total.zero?
191
+ "fail"
192
+ elsif passed == total
193
+ "pass"
194
+ elsif passed.zero?
195
+ "fail"
196
+ else
197
+ "partial"
198
+ end
199
+ failed = [total - passed, 0].max
200
+ test_cases = []
201
+ passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
202
+ failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: "", category: "unknown"} }
203
+
204
+ return {
205
+ test_id: "",
206
+ status: status,
207
+ test_cases: test_cases,
208
+ summary: "#{passed}/#{total} passed",
209
+ observations: compact
210
+ }
211
+ end
212
+
213
+ status_match = compact.match(/\b(PASS|FAIL|PARTIAL|ERROR)\b/i)
214
+ return parse(text) unless status_match
215
+
216
+ status = normalize_status(status_match[1])
217
+ evidence = compact.sub(/^.*?\b#{Regexp.escape(status_match[1])}\b[:\-\s]*/i, "").strip
218
+ tc_status = (status == "pass") ? "pass" : "fail"
219
+
220
+ {
221
+ test_id: "",
222
+ status: status,
223
+ test_cases: [{
224
+ id: "TC-001",
225
+ description: "",
226
+ status: tc_status,
227
+ actual: "",
228
+ notes: evidence,
229
+ category: ((tc_status == "fail") ? "unknown" : nil)
230
+ }],
231
+ summary: evidence.empty? ? status : evidence,
232
+ observations: evidence
233
+ }
234
+ end
235
+
183
236
  # Parse TC-level markdown return contract
184
237
  def self.parse_tc_markdown(text)
185
238
  fields = {}
@@ -188,6 +241,7 @@ module Ace
188
241
  fields[:tc_id] = extract_field(text, "TC ID")
189
242
  fields[:status] = extract_field(text, "Status")
190
243
  fields[:report_paths] = extract_field(text, "Report Paths")
244
+ fields[:observations] = extract_field(text, "Observations")
191
245
  fields[:issues] = extract_field(text, "Issues")
192
246
 
193
247
  # Need test_id, tc_id, and status for a valid TC parse
@@ -200,8 +254,7 @@ module Ace
200
254
  def self.to_tc_normalized(parsed)
201
255
  parsed[:status] = normalize_status(parsed[:status])
202
256
 
203
- issues = parsed[:issues]
204
- observations = (issues && issues.downcase != "none") ? issues : ""
257
+ observations = normalize_observations(parsed[:observations], parsed[:issues])
205
258
 
206
259
  {
207
260
  test_id: parsed[:test_id],
@@ -234,9 +287,22 @@ module Ace
234
287
  end
235
288
  end
236
289
 
290
+ def self.normalize_observations(primary, fallback = nil)
291
+ [primary, fallback].each do |value|
292
+ next if value.nil?
293
+
294
+ normalized = value.to_s.strip
295
+ next if normalized.empty? || normalized.casecmp("none").zero?
296
+
297
+ return normalized
298
+ end
299
+
300
+ ""
301
+ end
302
+
237
303
  private_class_method :parse_markdown, :to_normalized, :extract_field,
238
304
  :parse_tc_markdown, :to_tc_normalized, :normalize_status,
239
- :parse_failed_tcs
305
+ :parse_failed_tcs, :parse_minimal_verifier, :normalize_observations
240
306
  end
241
307
  end
242
308
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "ace/b36ts"
3
4
  require "ace/support/cli"
4
5
  require "stringio"
5
6
  require "ace/support/cli"
@@ -23,6 +24,7 @@ module Ace
23
24
  in the monorepo. Tests run sequentially by default or in parallel
24
25
  with --parallel flag. Use --affected to only test changed packages.
25
26
  Use --only-failures to re-run only previously failed scenarios.
27
+ Full unfiltered suite runs retry failed scenarios once by default.
26
28
  Optionally filter to specific packages with a comma-separated list.
27
29
 
28
30
  Output:
@@ -40,6 +42,8 @@ module Ace
40
42
  "--affected --parallel 8 # Parallel affected tests only",
41
43
  "--only-failures # Re-run failed scenarios from cache",
42
44
  "--affected --only-failures # Re-run failed scenarios in affected packages",
45
+ "--no-retry-failures-once # Disable default retry for a full suite run",
46
+ "--prune-artifacts # Remove stale .ace-local/test-e2e artifacts before running",
43
47
  "--tags smoke,happy-path # Include scenarios by tag",
44
48
  "--exclude-tags deep # Exclude scenarios by tag",
45
49
  "--cli-args dangerously-skip-permissions # Pass args to provider"
@@ -50,6 +54,8 @@ module Ace
50
54
  option :affected, type: :boolean, desc: "Only test affected packages"
51
55
  option :only_failures, type: :boolean,
52
56
  desc: "Re-run only previously failed scenarios"
57
+ option :retry_failures_once, type: :boolean,
58
+ desc: "Retry failed scenarios once after a full unfiltered suite run"
53
59
  option :cli_args, type: :string,
54
60
  desc: "Extra args for CLI-based LLM providers"
55
61
  option :provider, type: :string, default: Molecules::ConfigLoader.default_provider,
@@ -61,6 +67,8 @@ module Ace
61
67
  option :progress, type: :boolean, desc: "Enable live animated display"
62
68
  option :verify, type: :boolean,
63
69
  desc: "Run independent verifier pass for each scenario"
70
+ option :prune_artifacts, type: :boolean,
71
+ desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves suite reports and runtime-cache)"
64
72
  option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
65
73
  option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
66
74
  option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -69,21 +77,36 @@ module Ace
69
77
  options = coerce_types(options, parallel: :integer, timeout: :integer)
70
78
 
71
79
  parallel = options[:parallel]
72
- affected = options[:affected]
73
- only_failures = options[:only_failures]
80
+ affected = !!options[:affected]
81
+ only_failures = !!options[:only_failures]
82
+ prune_artifacts = !!options[:prune_artifacts]
74
83
  tags = parse_csv_list(options[:tags])
75
84
  exclude_tags = parse_csv_list(options[:exclude_tags])
85
+ if only_failures && prune_artifacts
86
+ raise Ace::Support::Cli::Error.new(
87
+ "--prune-artifacts cannot be used with --only-failures"
88
+ )
89
+ end
90
+ retry_failures_once = resolve_retry_failures_once(
91
+ requested: options[:retry_failures_once],
92
+ packages: packages,
93
+ affected: affected,
94
+ only_failures: only_failures,
95
+ tags: tags,
96
+ exclude_tags: exclude_tags
97
+ )
76
98
 
77
99
  output = quiet?(options) ? StringIO.new : $stdout
78
100
  progress = options[:progress] && !quiet?(options)
101
+ prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
79
102
 
80
- orchestrator = Organisms::SuiteOrchestrator.new(
103
+ orchestrator = build_orchestrator(
81
104
  max_parallel: [parallel, 1].max,
82
105
  output: output,
83
106
  progress: progress
84
107
  )
85
108
 
86
- results = orchestrator.run(
109
+ run_options = {
87
110
  parallel: parallel > 0,
88
111
  affected: affected,
89
112
  only_failures: only_failures,
@@ -94,6 +117,13 @@ module Ace
94
117
  tags: tags,
95
118
  exclude_tags: exclude_tags,
96
119
  verify: options[:verify]
120
+ }
121
+
122
+ results = run_suite_with_retry(
123
+ orchestrator,
124
+ run_options: run_options,
125
+ output: output,
126
+ retry_failures_once: retry_failures_once
97
127
  )
98
128
 
99
129
  if results[:total].zero?
@@ -110,13 +140,173 @@ module Ace
110
140
  if results[:failed] > 0 || results[:errors] > 0
111
141
  failed_count = results[:failed] + results[:errors]
112
142
  raise Ace::Support::Cli::Error.new(
113
- "#{failed_count} test(s) failed or errored"
143
+ results[:retry_attempted] ? "#{failed_count} test(s) failed or errored after retry" : "#{failed_count} test(s) failed or errored"
114
144
  )
115
145
  end
146
+
147
+ results
116
148
  end
117
149
 
118
150
  private
119
151
 
152
+ def build_orchestrator(max_parallel:, output:, progress:)
153
+ Organisms::SuiteOrchestrator.new(
154
+ max_parallel: max_parallel,
155
+ output: output,
156
+ progress: progress
157
+ )
158
+ end
159
+
160
+ def build_retry_report_writer
161
+ Molecules::SuiteReportWriter.new(config: Molecules::ConfigLoader.load)
162
+ end
163
+
164
+ def build_artifact_pruner
165
+ Molecules::ArtifactPruner.new
166
+ end
167
+
168
+ def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
169
+ return unless prune_artifacts
170
+
171
+ result = build_artifact_pruner.prune(base_dir: Dir.pwd)
172
+ return if quiet
173
+
174
+ output.puts(
175
+ "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved suite reports and runtime-cache)"
176
+ )
177
+ end
178
+
179
+ def run_suite_with_retry(orchestrator, run_options:, output:, retry_failures_once:)
180
+ initial_results = orchestrator.run(run_options)
181
+ annotated = annotate_results(
182
+ initial_results,
183
+ retry_attempted: false,
184
+ attempts: 1,
185
+ flaky_scenarios: [],
186
+ remaining_failures: failure_scenarios(initial_results),
187
+ initial_report_path: initial_results[:report_path],
188
+ retry_report_path: nil,
189
+ report_path: initial_results[:report_path]
190
+ )
191
+ return annotated unless retry_failures_once && suite_failed?(initial_results)
192
+
193
+ output.puts "Retrying failed scenarios once..."
194
+ retry_results = orchestrator.run(run_options.merge(only_failures: true))
195
+ if retry_results[:total].zero?
196
+ raise Ace::Support::Cli::Error.new(
197
+ "Retry pass found no failed test scenarios from attempt 1; aborting instead of silently passing"
198
+ )
199
+ end
200
+
201
+ flaky_scenarios = recovered_flaky_scenarios(initial_results, retry_results)
202
+ remaining_failures = failure_scenarios(retry_results)
203
+ final_report_path = write_retry_summary_report(initial_results, retry_results)
204
+ output.puts "Final Report: #{final_report_path}" if final_report_path
205
+
206
+ if remaining_failures.empty?
207
+ output.puts "#{flaky_scenarios.length} scenario(s) recovered on retry and were marked flaky"
208
+ else
209
+ output.puts "#{remaining_failures.length} scenario(s) still failing after retry"
210
+ end
211
+
212
+ annotate_results(
213
+ retry_results,
214
+ retry_attempted: true,
215
+ attempts: 2,
216
+ flaky_scenarios: flaky_scenarios,
217
+ remaining_failures: remaining_failures,
218
+ initial_report_path: initial_results[:report_path],
219
+ retry_report_path: retry_results[:report_path],
220
+ report_path: final_report_path || retry_results[:report_path]
221
+ )
222
+ end
223
+
224
+ def write_retry_summary_report(initial_results, retry_results)
225
+ build_retry_report_writer.write_retry_summary(
226
+ initial_results: initial_results,
227
+ retry_results: retry_results,
228
+ timestamp: Ace::B36ts.encode(Time.now.utc, format: :"50ms"),
229
+ base_dir: Dir.pwd
230
+ )
231
+ rescue => e
232
+ warn "Warning: Failed to write retry summary report: #{e.message}" if ENV["DEBUG"]
233
+ nil
234
+ end
235
+
236
+ def annotate_results(results, **extra)
237
+ results.merge(extra)
238
+ end
239
+
240
+ def suite_failed?(results)
241
+ results[:failed].to_i > 0 || results[:errors].to_i > 0
242
+ end
243
+
244
+ def failure_scenarios(results)
245
+ scenario_result_index(results)
246
+ .values
247
+ .select { |result| result[:status] != "pass" }
248
+ .map { |result| result[:test_id] }
249
+ .sort
250
+ end
251
+
252
+ def recovered_flaky_scenarios(initial_results, retry_results)
253
+ initial_by_test = scenario_result_index(initial_results)
254
+ retry_by_test = scenario_result_index(retry_results)
255
+
256
+ initial_by_test.each_with_object([]) do |(test_id, initial), flaky|
257
+ next if initial[:status] == "pass"
258
+
259
+ retry_result = retry_by_test[test_id]
260
+ next unless retry_result && retry_result[:status] == "pass"
261
+
262
+ flaky << {
263
+ "test_id" => test_id,
264
+ "initial_status" => initial[:status],
265
+ "retry_status" => retry_result[:status]
266
+ }
267
+ end.sort_by { |entry| entry["test_id"] }
268
+ end
269
+
270
+ def scenario_result_index(results)
271
+ results.fetch(:packages, {}).values.flatten.each_with_object({}) do |result, index|
272
+ test_name = result[:test_name] || result[:test_id] || ""
273
+ test_id = test_name[/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i, 1]&.upcase || test_name
274
+ next if test_id.empty?
275
+
276
+ index[test_id] = {
277
+ test_id: test_id,
278
+ status: result[:status],
279
+ summary: result[:summary],
280
+ error: result[:error]
281
+ }
282
+ end
283
+ end
284
+
285
+ def resolve_retry_failures_once(requested:, packages:, affected:, only_failures:, tags:, exclude_tags:)
286
+ scoped = scoped_suite_run?(
287
+ packages: packages,
288
+ affected: affected,
289
+ only_failures: only_failures,
290
+ tags: tags,
291
+ exclude_tags: exclude_tags
292
+ )
293
+ if requested == true && scoped
294
+ raise Ace::Support::Cli::Error.new(
295
+ "--retry-failures-once is only supported for full unfiltered suite runs"
296
+ )
297
+ end
298
+
299
+ return requested unless requested.nil?
300
+
301
+ !scoped
302
+ end
303
+
304
+ def scoped_suite_run?(packages:, affected:, only_failures:, tags:, exclude_tags:)
305
+ [packages, affected, only_failures].any? ||
306
+ !tags.empty? ||
307
+ !exclude_tags.empty?
308
+ end
309
+
120
310
  def parse_csv_list(raw)
121
311
  return [] if raw.nil? || raw.strip.empty?
122
312
 
@@ -20,9 +20,9 @@ module Ace
20
20
  desc <<~DESC.strip
21
21
  Run E2E tests via LLM execution
22
22
 
23
- Discovers and executes TS-* test scenarios in a package's test/e2e/ directory.
24
- Tests are sent to an LLM provider which executes the test steps and returns
25
- structured results.
23
+ Discovers and executes deterministic preflight tests from test/feat
24
+ before TS-* agent scenarios from test/e2e. Tests are sent to an LLM
25
+ provider which executes the scenario steps and returns structured results.
26
26
 
27
27
  Output:
28
28
  Exit codes: 0 (all pass), 1 (any fail/error)
@@ -35,7 +35,8 @@ module Ace
35
35
  "ace-lint --provider gemini:flash # Use specific provider",
36
36
  "ace-lint --provider glite # Use API provider (predict mode)",
37
37
  "ace-lint --tags smoke # Run only smoke-tagged scenarios",
38
- "ace-lint TS-LINT-003 --dry-run # Preview scenarios that would run"
38
+ "ace-lint --prune-artifacts # Remove stale .ace-local/test-e2e artifacts before running",
39
+ "ace-lint TS-LINT-003 --dry-run # Preview preflight and scenario phases"
39
40
  ]
40
41
 
41
42
  argument :package, required: true, desc: "Package name (e.g., ace-lint)"
@@ -55,11 +56,13 @@ module Ace
55
56
  option :report_dir, type: :string,
56
57
  desc: "Explicit report directory path (overrides computed path)"
57
58
  option :dry_run, type: :boolean,
58
- desc: "Preview which scenarios would run without executing"
59
+ desc: "Preview which preflight tests and scenarios would run without executing"
59
60
  option :tags, type: :string,
60
61
  desc: "Comma-separated scenario tags to include"
61
62
  option :verify, type: :boolean,
62
63
  desc: "Run independent verifier pass after runner execution"
64
+ option :prune_artifacts, type: :boolean,
65
+ desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves final reports and runtime-cache)"
63
66
  option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
64
67
  option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
65
68
  option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -67,13 +70,22 @@ module Ace
67
70
  def call(package:, test_id: nil, **options)
68
71
  options = coerce_types(options, timeout: :integer, parallel: :integer)
69
72
  output = quiet?(options) ? StringIO.new : $stdout
73
+ prune_artifacts = !!options[:prune_artifacts]
74
+
75
+ if options[:dry_run] && prune_artifacts
76
+ raise Ace::Support::Cli::Error.new(
77
+ "--prune-artifacts cannot be used with --dry-run"
78
+ )
79
+ end
80
+
81
+ prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
70
82
 
71
83
  # Handle dry-run mode
72
84
  if options[:dry_run]
73
85
  return handle_dry_run(package, test_id, output, tags: parse_tags(options[:tags]))
74
86
  end
75
87
 
76
- orchestrator = Organisms::TestOrchestrator.new(
88
+ orchestrator = build_orchestrator(
77
89
  provider: options[:provider],
78
90
  timeout: options[:timeout],
79
91
  parallel: options[:parallel],
@@ -110,7 +122,31 @@ module Ace
110
122
 
111
123
  private
112
124
 
113
- # Handle dry-run mode: preview which scenarios would run
125
+ def build_orchestrator(provider:, timeout:, parallel:, progress:)
126
+ Organisms::TestOrchestrator.new(
127
+ provider: provider,
128
+ timeout: timeout,
129
+ parallel: parallel,
130
+ progress: progress
131
+ )
132
+ end
133
+
134
+ def build_artifact_pruner
135
+ Molecules::ArtifactPruner.new
136
+ end
137
+
138
+ def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
139
+ return unless prune_artifacts
140
+
141
+ result = build_artifact_pruner.prune(base_dir: Dir.pwd)
142
+ return if quiet
143
+
144
+ output.puts(
145
+ "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved final reports and runtime-cache)"
146
+ )
147
+ end
148
+
149
+ # Handle dry-run mode: preview which preflight tests and scenarios would run
114
150
  #
115
151
  # @param package [String] Package name
116
152
  # @param test_id [String, nil] Test ID
@@ -125,15 +161,28 @@ module Ace
125
161
  tags: tags,
126
162
  base_dir: Dir.pwd
127
163
  )
128
- if files.empty?
164
+ preflight_files = discoverer.find_integration_tests(package: package, base_dir: Dir.pwd)
165
+ if files.empty? && preflight_files.empty?
129
166
  raise Ace::Support::Cli::Error.new(
130
167
  "No tests found for package '#{package}'" +
131
168
  (test_id ? " with ID '#{test_id}'" : "")
132
169
  )
133
170
  end
134
171
 
135
- output.puts "Dry run: preview of scenarios to execute"
172
+ output.puts "Dry run: preview of execution phases"
173
+ output.puts ""
174
+ output.puts "Phase 1: deterministic preflight"
175
+ if preflight_files.empty?
176
+ output.puts " (none)"
177
+ else
178
+ preflight_files.each do |file|
179
+ output.puts " [preflight] #{file}"
180
+ end
181
+ end
136
182
  output.puts ""
183
+ output.puts "Phase 2: scenarios"
184
+ output.puts " (none)" if files.empty?
185
+ output.puts "" unless files.empty?
137
186
 
138
187
  files.each do |file|
139
188
  scenario = loader.load(File.dirname(file))
@@ -9,7 +9,8 @@ module Ace
9
9
  # Contains parsed frontmatter metadata and the full markdown body
10
10
  # from an independent test case file within a scenario directory.
11
11
  class TestCase
12
- attr_reader :tc_id, :title, :content, :file_path, :pending, :goal_format
12
+ attr_reader :tc_id, :title, :content, :file_path, :pending, :goal_format,
13
+ :declared_artifacts, :optional_artifacts
13
14
 
14
15
  # @param tc_id [String] Test case identifier (e.g., "TC-001")
15
16
  # @param title [String] Test case title from frontmatter
@@ -17,13 +18,18 @@ module Ace
17
18
  # @param file_path [String] Absolute path to the source test file
18
19
  # @param pending [String, nil] Pending reason (presence = pending, value = reason)
19
20
  # @param goal_format [String, nil] Test case source format ("standalone")
20
- def initialize(tc_id:, title:, content:, file_path:, pending: nil, goal_format: nil)
21
+ # @param declared_artifacts [Array<String>] Required artifact paths under results/tc/*
22
+ # @param optional_artifacts [Array<String>] Optional artifact paths under results/tc/*
23
+ def initialize(tc_id:, title:, content:, file_path:, pending: nil, goal_format: nil,
24
+ declared_artifacts: [], optional_artifacts: [])
21
25
  @tc_id = tc_id
22
26
  @title = title
23
27
  @content = content
24
28
  @file_path = file_path
25
29
  @pending = pending
26
30
  @goal_format = goal_format
31
+ @declared_artifacts = declared_artifacts
32
+ @optional_artifacts = optional_artifacts
27
33
  end
28
34
 
29
35
  # Whether this test case is pending (should be skipped)
@@ -10,7 +10,7 @@ module Ace
10
10
  # from executing a test scenario via LLM.
11
11
  class TestResult
12
12
  attr_reader :test_id, :status, :test_cases, :summary,
13
- :started_at, :completed_at, :report_dir, :error
13
+ :started_at, :completed_at, :report_dir, :error, :metadata, :observations
14
14
 
15
15
  # @param test_id [String] Test identifier
16
16
  # @param status [String] Overall status: "pass", "fail", "partial", "error"
@@ -20,8 +20,10 @@ module Ace
20
20
  # @param completed_at [Time] When execution completed
21
21
  # @param report_dir [String, nil] Path to the reports directory
22
22
  # @param error [String, nil] Error message if execution failed
23
+ # @param observations [String] Runner/verifier observations for report context
24
+ # @param metadata [Hash] Additional structured phase/report metadata
23
25
  def initialize(test_id:, status:, test_cases: [], summary: "",
24
- started_at: nil, completed_at: nil, report_dir: nil, error: nil)
26
+ started_at: nil, completed_at: nil, report_dir: nil, error: nil, observations: "", metadata: {})
25
27
  @test_id = test_id
26
28
  @status = status
27
29
  @test_cases = test_cases
@@ -30,6 +32,8 @@ module Ace
30
32
  @completed_at = completed_at || Time.now
31
33
  @report_dir = report_dir
32
34
  @error = error
35
+ @observations = observations.to_s
36
+ @metadata = metadata
33
37
  end
34
38
 
35
39
  # Check if the test passed
@@ -94,7 +98,9 @@ module Ace
94
98
  started_at: started_at,
95
99
  completed_at: completed_at,
96
100
  report_dir: dir,
97
- error: error
101
+ error: error,
102
+ observations: observations,
103
+ metadata: metadata
98
104
  )
99
105
  end
100
106