ace-test-runner-e2e 0.38.11 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -0
  3. data/handbook/guides/e2e-testing.g.md +35 -3
  4. data/handbook/guides/scenario-yml-reference.g.md +8 -3
  5. data/handbook/guides/tc-authoring.g.md +15 -4
  6. data/handbook/templates/tc-file.template.md +4 -2
  7. data/handbook/workflow-instructions/e2e/create.wf.md +13 -3
  8. data/handbook/workflow-instructions/e2e/fix.wf.md +19 -0
  9. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +16 -0
  10. data/handbook/workflow-instructions/e2e/review.wf.md +14 -10
  11. data/handbook/workflow-instructions/e2e/rewrite.wf.md +10 -3
  12. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  13. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  14. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +37 -1
  15. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  16. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +90 -14
  17. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +43 -5
  18. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +7 -5
  19. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +2 -0
  20. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +101 -9
  21. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +28 -30
  22. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +24 -1
  23. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +182 -1
  24. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +25 -3
  25. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +44 -5
  26. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  27. data/lib/ace/test/end_to_end_runner.rb +2 -0
  28. metadata +4 -2
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "ace/b36ts"
3
4
  require "ace/support/cli"
4
5
  require "stringio"
5
6
  require "ace/support/cli"
@@ -23,6 +24,7 @@ module Ace
23
24
  in the monorepo. Tests run sequentially by default or in parallel
24
25
  with --parallel flag. Use --affected to only test changed packages.
25
26
  Use --only-failures to re-run only previously failed scenarios.
27
+ Full unfiltered suite runs retry failed scenarios once by default.
26
28
  Optionally filter to specific packages with a comma-separated list.
27
29
 
28
30
  Output:
@@ -40,6 +42,8 @@ module Ace
40
42
  "--affected --parallel 8 # Parallel affected tests only",
41
43
  "--only-failures # Re-run failed scenarios from cache",
42
44
  "--affected --only-failures # Re-run failed scenarios in affected packages",
45
+ "--no-retry-failures-once # Disable default retry for a full suite run",
46
+ "--prune-artifacts # Remove stale .ace-local/test-e2e artifacts before running",
43
47
  "--tags smoke,happy-path # Include scenarios by tag",
44
48
  "--exclude-tags deep # Exclude scenarios by tag",
45
49
  "--cli-args dangerously-skip-permissions # Pass args to provider"
@@ -50,6 +54,8 @@ module Ace
50
54
  option :affected, type: :boolean, desc: "Only test affected packages"
51
55
  option :only_failures, type: :boolean,
52
56
  desc: "Re-run only previously failed scenarios"
57
+ option :retry_failures_once, type: :boolean,
58
+ desc: "Retry failed scenarios once after a full unfiltered suite run"
53
59
  option :cli_args, type: :string,
54
60
  desc: "Extra args for CLI-based LLM providers"
55
61
  option :provider, type: :string, default: Molecules::ConfigLoader.default_provider,
@@ -61,6 +67,8 @@ module Ace
61
67
  option :progress, type: :boolean, desc: "Enable live animated display"
62
68
  option :verify, type: :boolean,
63
69
  desc: "Run independent verifier pass for each scenario"
70
+ option :prune_artifacts, type: :boolean,
71
+ desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves suite reports and runtime-cache)"
64
72
  option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
65
73
  option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
66
74
  option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -69,21 +77,36 @@ module Ace
69
77
  options = coerce_types(options, parallel: :integer, timeout: :integer)
70
78
 
71
79
  parallel = options[:parallel]
72
- affected = options[:affected]
73
- only_failures = options[:only_failures]
80
+ affected = !!options[:affected]
81
+ only_failures = !!options[:only_failures]
82
+ prune_artifacts = !!options[:prune_artifacts]
74
83
  tags = parse_csv_list(options[:tags])
75
84
  exclude_tags = parse_csv_list(options[:exclude_tags])
85
+ if only_failures && prune_artifacts
86
+ raise Ace::Support::Cli::Error.new(
87
+ "--prune-artifacts cannot be used with --only-failures"
88
+ )
89
+ end
90
+ retry_failures_once = resolve_retry_failures_once(
91
+ requested: options[:retry_failures_once],
92
+ packages: packages,
93
+ affected: affected,
94
+ only_failures: only_failures,
95
+ tags: tags,
96
+ exclude_tags: exclude_tags
97
+ )
76
98
 
77
99
  output = quiet?(options) ? StringIO.new : $stdout
78
100
  progress = options[:progress] && !quiet?(options)
101
+ prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
79
102
 
80
- orchestrator = Organisms::SuiteOrchestrator.new(
103
+ orchestrator = build_orchestrator(
81
104
  max_parallel: [parallel, 1].max,
82
105
  output: output,
83
106
  progress: progress
84
107
  )
85
108
 
86
- results = orchestrator.run(
109
+ run_options = {
87
110
  parallel: parallel > 0,
88
111
  affected: affected,
89
112
  only_failures: only_failures,
@@ -94,6 +117,13 @@ module Ace
94
117
  tags: tags,
95
118
  exclude_tags: exclude_tags,
96
119
  verify: options[:verify]
120
+ }
121
+
122
+ results = run_suite_with_retry(
123
+ orchestrator,
124
+ run_options: run_options,
125
+ output: output,
126
+ retry_failures_once: retry_failures_once
97
127
  )
98
128
 
99
129
  if results[:total].zero?
@@ -110,13 +140,173 @@ module Ace
110
140
  if results[:failed] > 0 || results[:errors] > 0
111
141
  failed_count = results[:failed] + results[:errors]
112
142
  raise Ace::Support::Cli::Error.new(
113
- "#{failed_count} test(s) failed or errored"
143
+ results[:retry_attempted] ? "#{failed_count} test(s) failed or errored after retry" : "#{failed_count} test(s) failed or errored"
114
144
  )
115
145
  end
146
+
147
+ results
116
148
  end
117
149
 
118
150
  private
119
151
 
152
+ def build_orchestrator(max_parallel:, output:, progress:)
153
+ Organisms::SuiteOrchestrator.new(
154
+ max_parallel: max_parallel,
155
+ output: output,
156
+ progress: progress
157
+ )
158
+ end
159
+
160
+ def build_retry_report_writer
161
+ Molecules::SuiteReportWriter.new(config: Molecules::ConfigLoader.load)
162
+ end
163
+
164
+ def build_artifact_pruner
165
+ Molecules::ArtifactPruner.new
166
+ end
167
+
168
+ def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
169
+ return unless prune_artifacts
170
+
171
+ result = build_artifact_pruner.prune(base_dir: Dir.pwd)
172
+ return if quiet
173
+
174
+ output.puts(
175
+ "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved suite reports and runtime-cache)"
176
+ )
177
+ end
178
+
179
+ def run_suite_with_retry(orchestrator, run_options:, output:, retry_failures_once:)
180
+ initial_results = orchestrator.run(run_options)
181
+ annotated = annotate_results(
182
+ initial_results,
183
+ retry_attempted: false,
184
+ attempts: 1,
185
+ flaky_scenarios: [],
186
+ remaining_failures: failure_scenarios(initial_results),
187
+ initial_report_path: initial_results[:report_path],
188
+ retry_report_path: nil,
189
+ report_path: initial_results[:report_path]
190
+ )
191
+ return annotated unless retry_failures_once && suite_failed?(initial_results)
192
+
193
+ output.puts "Retrying failed scenarios once..."
194
+ retry_results = orchestrator.run(run_options.merge(only_failures: true))
195
+ if retry_results[:total].zero?
196
+ raise Ace::Support::Cli::Error.new(
197
+ "Retry pass found no failed test scenarios from attempt 1; aborting instead of silently passing"
198
+ )
199
+ end
200
+
201
+ flaky_scenarios = recovered_flaky_scenarios(initial_results, retry_results)
202
+ remaining_failures = failure_scenarios(retry_results)
203
+ final_report_path = write_retry_summary_report(initial_results, retry_results)
204
+ output.puts "Final Report: #{final_report_path}" if final_report_path
205
+
206
+ if remaining_failures.empty?
207
+ output.puts "#{flaky_scenarios.length} scenario(s) recovered on retry and were marked flaky"
208
+ else
209
+ output.puts "#{remaining_failures.length} scenario(s) still failing after retry"
210
+ end
211
+
212
+ annotate_results(
213
+ retry_results,
214
+ retry_attempted: true,
215
+ attempts: 2,
216
+ flaky_scenarios: flaky_scenarios,
217
+ remaining_failures: remaining_failures,
218
+ initial_report_path: initial_results[:report_path],
219
+ retry_report_path: retry_results[:report_path],
220
+ report_path: final_report_path || retry_results[:report_path]
221
+ )
222
+ end
223
+
224
+ def write_retry_summary_report(initial_results, retry_results)
225
+ build_retry_report_writer.write_retry_summary(
226
+ initial_results: initial_results,
227
+ retry_results: retry_results,
228
+ timestamp: Ace::B36ts.encode(Time.now.utc, format: :"50ms"),
229
+ base_dir: Dir.pwd
230
+ )
231
+ rescue => e
232
+ warn "Warning: Failed to write retry summary report: #{e.message}" if ENV["DEBUG"]
233
+ nil
234
+ end
235
+
236
+ def annotate_results(results, **extra)
237
+ results.merge(extra)
238
+ end
239
+
240
+ def suite_failed?(results)
241
+ results[:failed].to_i > 0 || results[:errors].to_i > 0
242
+ end
243
+
244
+ def failure_scenarios(results)
245
+ scenario_result_index(results)
246
+ .values
247
+ .select { |result| result[:status] != "pass" }
248
+ .map { |result| result[:test_id] }
249
+ .sort
250
+ end
251
+
252
+ def recovered_flaky_scenarios(initial_results, retry_results)
253
+ initial_by_test = scenario_result_index(initial_results)
254
+ retry_by_test = scenario_result_index(retry_results)
255
+
256
+ initial_by_test.each_with_object([]) do |(test_id, initial), flaky|
257
+ next if initial[:status] == "pass"
258
+
259
+ retry_result = retry_by_test[test_id]
260
+ next unless retry_result && retry_result[:status] == "pass"
261
+
262
+ flaky << {
263
+ "test_id" => test_id,
264
+ "initial_status" => initial[:status],
265
+ "retry_status" => retry_result[:status]
266
+ }
267
+ end.sort_by { |entry| entry["test_id"] }
268
+ end
269
+
270
+ def scenario_result_index(results)
271
+ results.fetch(:packages, {}).values.flatten.each_with_object({}) do |result, index|
272
+ test_name = result[:test_name] || result[:test_id] || ""
273
+ test_id = test_name[/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i, 1]&.upcase || test_name
274
+ next if test_id.empty?
275
+
276
+ index[test_id] = {
277
+ test_id: test_id,
278
+ status: result[:status],
279
+ summary: result[:summary],
280
+ error: result[:error]
281
+ }
282
+ end
283
+ end
284
+
285
+ def resolve_retry_failures_once(requested:, packages:, affected:, only_failures:, tags:, exclude_tags:)
286
+ scoped = scoped_suite_run?(
287
+ packages: packages,
288
+ affected: affected,
289
+ only_failures: only_failures,
290
+ tags: tags,
291
+ exclude_tags: exclude_tags
292
+ )
293
+ if requested == true && scoped
294
+ raise Ace::Support::Cli::Error.new(
295
+ "--retry-failures-once is only supported for full unfiltered suite runs"
296
+ )
297
+ end
298
+
299
+ return requested unless requested.nil?
300
+
301
+ !scoped
302
+ end
303
+
304
+ def scoped_suite_run?(packages:, affected:, only_failures:, tags:, exclude_tags:)
305
+ [packages, affected, only_failures].any? ||
306
+ !tags.empty? ||
307
+ !exclude_tags.empty?
308
+ end
309
+
120
310
  def parse_csv_list(raw)
121
311
  return [] if raw.nil? || raw.strip.empty?
122
312
 
@@ -35,6 +35,7 @@ module Ace
35
35
  "ace-lint --provider gemini:flash # Use specific provider",
36
36
  "ace-lint --provider glite # Use API provider (predict mode)",
37
37
  "ace-lint --tags smoke # Run only smoke-tagged scenarios",
38
+ "ace-lint --prune-artifacts # Remove stale .ace-local/test-e2e artifacts before running",
38
39
  "ace-lint TS-LINT-003 --dry-run # Preview preflight and scenario phases"
39
40
  ]
40
41
 
@@ -60,6 +61,8 @@ module Ace
60
61
  desc: "Comma-separated scenario tags to include"
61
62
  option :verify, type: :boolean,
62
63
  desc: "Run independent verifier pass after runner execution"
64
+ option :prune_artifacts, type: :boolean,
65
+ desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves final reports and runtime-cache)"
63
66
  option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
64
67
  option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
65
68
  option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -67,13 +70,22 @@ module Ace
67
70
  def call(package:, test_id: nil, **options)
68
71
  options = coerce_types(options, timeout: :integer, parallel: :integer)
69
72
  output = quiet?(options) ? StringIO.new : $stdout
73
+ prune_artifacts = !!options[:prune_artifacts]
74
+
75
+ if options[:dry_run] && prune_artifacts
76
+ raise Ace::Support::Cli::Error.new(
77
+ "--prune-artifacts cannot be used with --dry-run"
78
+ )
79
+ end
80
+
81
+ prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
70
82
 
71
83
  # Handle dry-run mode
72
84
  if options[:dry_run]
73
85
  return handle_dry_run(package, test_id, output, tags: parse_tags(options[:tags]))
74
86
  end
75
87
 
76
- orchestrator = Organisms::TestOrchestrator.new(
88
+ orchestrator = build_orchestrator(
77
89
  provider: options[:provider],
78
90
  timeout: options[:timeout],
79
91
  parallel: options[:parallel],
@@ -110,6 +122,30 @@ module Ace
110
122
 
111
123
  private
112
124
 
125
+ def build_orchestrator(provider:, timeout:, parallel:, progress:)
126
+ Organisms::TestOrchestrator.new(
127
+ provider: provider,
128
+ timeout: timeout,
129
+ parallel: parallel,
130
+ progress: progress
131
+ )
132
+ end
133
+
134
+ def build_artifact_pruner
135
+ Molecules::ArtifactPruner.new
136
+ end
137
+
138
+ def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
139
+ return unless prune_artifacts
140
+
141
+ result = build_artifact_pruner.prune(base_dir: Dir.pwd)
142
+ return if quiet
143
+
144
+ output.puts(
145
+ "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved final reports and runtime-cache)"
146
+ )
147
+ end
148
+
113
149
  # Handle dry-run mode: preview which preflight tests and scenarios would run
114
150
  #
115
151
  # @param package [String] Package name
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+
5
+ module Ace
6
+ module Test
7
+ module EndToEndRunner
8
+ module Molecules
9
+ # Prunes stale E2E run artifacts while preserving suite reports and runtime cache.
10
+ class ArtifactPruner
11
+ ROOT_RELATIVE_PATH = File.join(".ace-local", "test-e2e")
12
+ PRESERVED_DIRECTORY_NAMES = %w[runtime-cache].freeze
13
+ PRESERVED_FILE_PATTERNS = [
14
+ /-suite-report\.md\z/,
15
+ /-suite-final-report\.md\z/
16
+ ].freeze
17
+
18
+ def prune(base_dir: Dir.pwd)
19
+ root = File.join(File.expand_path(base_dir), ROOT_RELATIVE_PATH)
20
+ return summary(root, [], []) unless Dir.exist?(root)
21
+
22
+ removed_paths = []
23
+ preserved_paths = []
24
+
25
+ Dir.children(root).sort.each do |entry|
26
+ path = File.join(root, entry)
27
+ if preserve_entry?(entry, path)
28
+ preserved_paths << path
29
+ else
30
+ FileUtils.rm_rf(path)
31
+ removed_paths << path
32
+ end
33
+ end
34
+
35
+ summary(root, removed_paths, preserved_paths)
36
+ end
37
+
38
+ private
39
+
40
+ def preserve_entry?(entry, path)
41
+ return true if File.directory?(path) && PRESERVED_DIRECTORY_NAMES.include?(entry)
42
+ return false unless File.file?(path)
43
+
44
+ PRESERVED_FILE_PATTERNS.any? { |pattern| pattern.match?(entry) }
45
+ end
46
+
47
+ def summary(root, removed_paths, preserved_paths)
48
+ {
49
+ root: root,
50
+ root_display: ROOT_RELATIVE_PATH,
51
+ removed_paths: removed_paths,
52
+ preserved_paths: preserved_paths,
53
+ deleted_count: removed_paths.length,
54
+ preserved_count: preserved_paths.length
55
+ }
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -69,7 +69,8 @@ module Ace
69
69
  runner = @prompt_bundler.prepare_runner(
70
70
  scenario: scenario,
71
71
  sandbox_path: sandbox_path,
72
- test_cases: test_cases
72
+ test_cases: test_cases,
73
+ artifact_contract: declared_artifact_contract(scenario, test_cases: test_cases)
73
74
  )
74
75
  runner_response = run_llm(
75
76
  prompt_path: runner[:prompt_path],
@@ -78,10 +79,44 @@ module Ace
78
79
  cli_args: cli_args,
79
80
  env_vars: merged_env,
80
81
  subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: merged_env),
81
- provider: @provider
82
+ provider: @provider,
83
+ fallback: false
82
84
  )
83
85
  runner_observations = extract_runner_observations(runner_response[:text])
84
- artifact_contract = snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases: test_cases)
86
+ initial_artifact_contract = snapshot_artifacts(
87
+ report_dir,
88
+ sandbox_path,
89
+ scenario,
90
+ test_cases: test_cases,
91
+ snapshot_name: "artifact-snapshot.initial.json"
92
+ )
93
+ artifact_contract = initial_artifact_contract
94
+
95
+ if missing_required_artifacts?(artifact_contract)
96
+ write_command_record(report_dir, "runner-repair", provider: @provider, cli_args: cli_args)
97
+ repair_runner = @prompt_bundler.prepare_runner(
98
+ scenario: scenario,
99
+ sandbox_path: sandbox_path,
100
+ test_cases: test_cases,
101
+ artifact_contract: artifact_contract,
102
+ repair_mode: true
103
+ )
104
+ repair_response = run_llm(
105
+ prompt_path: repair_runner[:prompt_path],
106
+ system_path: repair_runner[:system_path],
107
+ output_path: repair_runner[:output_path],
108
+ cli_args: cli_args,
109
+ env_vars: merged_env,
110
+ subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: merged_env),
111
+ provider: @provider,
112
+ fallback: false
113
+ )
114
+ repair_observations = extract_runner_observations(repair_response[:text])
115
+ runner_observations = merge_runner_observations(runner_observations, repair_observations)
116
+ artifact_contract = snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases: test_cases)
117
+ else
118
+ write_artifact_snapshot(report_dir, "artifact-snapshot.json", artifact_contract)
119
+ end
85
120
 
86
121
  verifier = @prompt_bundler.prepare_verifier(
87
122
  scenario: scenario,
@@ -98,7 +133,8 @@ module Ace
98
133
  cli_args: cli_args,
99
134
  env_vars: merged_env,
100
135
  subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: merged_env),
101
- provider: @verifier_provider
136
+ provider: @verifier_provider,
137
+ fallback: query_fallback_for(@verifier_provider)
102
138
  )
103
139
 
104
140
  @report_generator.generate(
@@ -111,7 +147,8 @@ module Ace
111
147
  metadata: base_metadata(
112
148
  report_dir,
113
149
  runner_observations: runner_observations,
114
- artifact_contract: artifact_contract
150
+ artifact_contract: artifact_contract,
151
+ initial_artifact_contract: initial_artifact_contract
115
152
  )
116
153
  )
117
154
  rescue => e
@@ -140,7 +177,7 @@ module Ace
140
177
 
141
178
  private
142
179
 
143
- def run_llm(prompt_path:, system_path:, output_path:, cli_args:, env_vars:, subprocess_command_prefix:, provider:)
180
+ def run_llm(prompt_path:, system_path:, output_path:, cli_args:, env_vars:, subprocess_command_prefix:, provider:, fallback:)
144
181
  prompt = File.read(prompt_path)
145
182
  system = File.read(system_path)
146
183
  sandbox_dir = env_vars["PROJECT_ROOT_PATH"] || env_vars[:PROJECT_ROOT_PATH]
@@ -151,7 +188,7 @@ module Ace
151
188
  system: system,
152
189
  cli_args: cli_args,
153
190
  timeout: @timeout,
154
- fallback: false,
191
+ fallback: fallback,
155
192
  output: output_path,
156
193
  subprocess_env: env_vars,
157
194
  subprocess_command_prefix: subprocess_command_prefix,
@@ -159,6 +196,10 @@ module Ace
159
196
  )
160
197
  end
161
198
 
199
+ def query_fallback_for(provider)
200
+ provider.to_s.start_with?("role:")
201
+ end
202
+
162
203
  def write_tc_manifests(report_dir, scenario, test_cases:)
163
204
  selected = select_test_cases(scenario, test_cases)
164
205
  selected.each do |test_case|
@@ -190,12 +231,18 @@ module Ace
190
231
  )
191
232
  end
192
233
 
193
- def snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases:)
194
- snapshot = select_test_cases(scenario, test_cases).to_h do |test_case|
234
+ def snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases:, snapshot_name: "artifact-snapshot.json")
235
+ snapshot = declared_artifact_contract(scenario, test_cases: test_cases, sandbox_path: sandbox_path)
236
+ write_artifact_snapshot(report_dir, snapshot_name, snapshot)
237
+ snapshot
238
+ end
239
+
240
+ def declared_artifact_contract(scenario, test_cases:, sandbox_path: nil)
241
+ select_test_cases(scenario, test_cases).to_h do |test_case|
195
242
  required = Array(test_case.declared_artifacts).sort
196
243
  optional = Array(test_case.optional_artifacts).sort
197
- present_required = required.select { |path| File.exist?(File.join(sandbox_path, path)) }
198
- present_optional = optional.select { |path| File.exist?(File.join(sandbox_path, path)) }
244
+ present_required = present_artifacts(required, sandbox_path)
245
+ present_optional = present_artifacts(optional, sandbox_path)
199
246
  missing_required = required - present_required
200
247
 
201
248
  [test_case.tc_id, {
@@ -207,8 +254,31 @@ module Ace
207
254
  "present_optional_artifacts" => present_optional
208
255
  }]
209
256
  end
210
- File.write(File.join(report_dir, "artifact-snapshot.json"), JSON.pretty_generate(snapshot))
211
- snapshot
257
+ end
258
+
259
+ def write_artifact_snapshot(report_dir, snapshot_name, snapshot)
260
+ File.write(File.join(report_dir, snapshot_name), JSON.pretty_generate(snapshot))
261
+ end
262
+
263
+ def present_artifacts(paths, sandbox_path)
264
+ return [] unless sandbox_path
265
+
266
+ Array(paths).select { |path| File.exist?(File.join(sandbox_path, path)) }
267
+ end
268
+
269
+ def missing_required_artifacts?(artifact_contract)
270
+ artifact_contract.any? do |_tc_id, entry|
271
+ Array(entry["missing_required_artifacts"]).any?
272
+ end
273
+ end
274
+
275
+ def merge_runner_observations(initial_observations, repair_observations)
276
+ initial = initial_observations.to_s.strip
277
+ repair = repair_observations.to_s.strip
278
+ return initial if repair.empty?
279
+ return repair if initial.empty?
280
+
281
+ "#{initial}\n\nRepair pass:\n#{repair}"
212
282
  end
213
283
 
214
284
  def select_test_cases(scenario, test_cases)
@@ -218,7 +288,7 @@ module Ace
218
288
  Array(scenario.test_cases).select { |tc| wanted.include?(tc.tc_id.to_s.upcase) }
219
289
  end
220
290
 
221
- def base_metadata(report_dir, runner_observations: nil, artifact_contract: nil)
291
+ def base_metadata(report_dir, runner_observations: nil, artifact_contract: nil, initial_artifact_contract: nil)
222
292
  metadata = {
223
293
  "runner_provider" => @provider,
224
294
  "verifier_provider" => @verifier_provider,
@@ -232,6 +302,12 @@ module Ace
232
302
  Array(entry["missing_required_artifacts"])
233
303
  end.reject { |_tc_id, paths| paths.empty? }
234
304
  end
305
+ if initial_artifact_contract
306
+ metadata["initial_missing_required_artifacts"] = initial_artifact_contract.to_h.transform_values do |entry|
307
+ Array(entry["missing_required_artifacts"])
308
+ end.reject { |_tc_id, paths| paths.empty? }
309
+ metadata["artifact_repair_attempted"] = true if missing_required_artifacts?(initial_artifact_contract)
310
+ end
235
311
  metadata
236
312
  end
237
313
 
@@ -53,21 +53,23 @@ module Ace
53
53
  # @param sandbox_path [String]
54
54
  # @param test_cases [Array<String>, nil]
55
55
  # @return [Hash]
56
- def prepare_runner(scenario:, sandbox_path:, test_cases: nil)
56
+ def prepare_runner(scenario:, sandbox_path:, test_cases: nil, artifact_contract: nil, repair_mode: false)
57
57
  cache_dir = ensure_cache_dir(sandbox_path)
58
- system_path = File.join(cache_dir, "runner-system.md")
59
- prompt_path = File.join(cache_dir, "runner-prompt.md")
58
+ file_prefix = repair_mode ? "runner-repair" : "runner"
59
+ system_path = File.join(cache_dir, "#{file_prefix}-system.md")
60
+ prompt_path = File.join(cache_dir, "#{file_prefix}-prompt.md")
60
61
 
61
62
  File.write(system_path, RUNNER_SYSTEM_PROMPT)
62
63
 
63
64
  bundled = bundle_markdown_file(File.join(scenario.dir_path, "runner.yml.md"), test_cases: test_cases)
64
65
  bundled = bundled.gsub("Workspace root: (current directory)", "Workspace root: #{File.expand_path(sandbox_path)}")
65
- File.write(prompt_path, bundled)
66
+ contract = build_runner_artifact_contract_section(artifact_contract, repair_mode: repair_mode)
67
+ File.write(prompt_path, [bundled, contract].reject(&:empty?).join("\n\n---\n\n"))
66
68
 
67
69
  {
68
70
  system_path: system_path,
69
71
  prompt_path: prompt_path,
70
- output_path: File.join(cache_dir, "runner-output.md")
72
+ output_path: File.join(cache_dir, "#{file_prefix}-output.md")
71
73
  }
72
74
  end
73
75
 
@@ -251,6 +253,42 @@ module Ace
251
253
  MARKDOWN
252
254
  end
253
255
 
256
+ def build_runner_artifact_contract_section(artifact_contract, repair_mode:)
257
+ return "" if artifact_contract.nil? || artifact_contract.empty?
258
+
259
+ parts = []
260
+ parts << "# Artifact Contract"
261
+ parts << ""
262
+ if repair_mode
263
+ parts << "This is a bounded repair pass."
264
+ parts << "- Do not rerun goals whose required artifacts are already complete."
265
+ parts << "- For each goal with missing required artifacts, produce only the missing files."
266
+ parts << "- Prefer the minimal real public command needed to create the missing capture set."
267
+ parts << "- If the missing file is supporting evidence copied from an already-generated real artifact, copy that real artifact into `results/`."
268
+ parts << "- Do not invent content, fabricate captures, or rewrite unrelated artifacts."
269
+ else
270
+ parts << "A goal is not complete unless every required artifact for that goal exists on disk under `results/`."
271
+ parts << "- After finishing each goal, self-check the required artifact list below."
272
+ parts << "- If a required artifact is missing, fix it before moving on."
273
+ end
274
+ parts << ""
275
+
276
+ artifact_contract.sort.each do |tc_id, entry|
277
+ parts << "## #{tc_id}"
278
+ parts << ""
279
+ parts << "- Required artifacts: #{format_artifact_list(entry["required_artifacts"])}"
280
+ missing = Array(entry["missing_required_artifacts"])
281
+ unless missing.empty?
282
+ parts << "- Missing required artifacts: #{format_artifact_list(missing)}"
283
+ end
284
+ optional = Array(entry["optional_artifacts"])
285
+ parts << "- Optional artifacts: #{format_artifact_list(optional)}" unless optional.empty?
286
+ parts << ""
287
+ end
288
+
289
+ parts.join("\n").rstrip
290
+ end
291
+
254
292
  def build_artifact_contract_section(artifact_contract)
255
293
  return "# Artifact Contract\n\n(no snapshot provided)" if artifact_contract.nil? || artifact_contract.empty?
256
294