ace-test-runner-e2e 0.29.8 → 0.38.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.ace-defaults/e2e-runner/config.yml +14 -2
  3. data/CHANGELOG.md +178 -0
  4. data/README.md +2 -2
  5. data/exe/ace-test-e2e-sh +9 -4
  6. data/handbook/guides/e2e-testing.g.md +43 -9
  7. data/handbook/guides/scenario-yml-reference.g.md +16 -8
  8. data/handbook/guides/tc-authoring.g.md +12 -5
  9. data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
  10. data/handbook/skills/as-e2e-review/SKILL.md +2 -2
  11. data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
  12. data/handbook/templates/agent-experience-report.template.md +3 -2
  13. data/handbook/templates/scenario.yml.template.yml +7 -2
  14. data/handbook/templates/tc-file.template.md +14 -4
  15. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
  16. data/handbook/workflow-instructions/e2e/create.wf.md +118 -25
  17. data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
  18. data/handbook/workflow-instructions/e2e/fix.wf.md +65 -15
  19. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +17 -1
  20. data/handbook/workflow-instructions/e2e/review.wf.md +36 -25
  21. data/handbook/workflow-instructions/e2e/rewrite.wf.md +15 -8
  22. data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
  23. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
  24. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
  25. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
  26. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +21 -8
  27. data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
  28. data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
  29. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
  30. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
  31. data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
  32. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
  33. data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
  34. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +157 -16
  35. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +121 -8
  36. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
  37. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +119 -18
  38. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +13 -12
  39. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +282 -0
  40. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +85 -5
  41. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +98 -16
  42. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +241 -97
  43. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
  44. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
  45. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +73 -15
  46. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +120 -19
  47. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  48. data/lib/ace/test/end_to_end_runner.rb +2 -0
  49. metadata +19 -2
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "open3"
5
+ require "shellwords"
6
+ require "ace/test_support/sandbox_package_copy"
7
+
8
+ module Ace
9
+ module Test
10
+ module EndToEndRunner
11
+ module Molecules
12
+ # Runs deterministic preflight tests inside a sandboxed package copy.
13
+ class IntegrationRunner
14
+ def initialize(base_dir: Dir.pwd, package_copy: nil)
15
+ @base_dir = File.expand_path(base_dir)
16
+ @package_copy = package_copy || Ace::TestSupport::SandboxPackageCopy.new(source_root: @base_dir)
17
+ end
18
+
19
+ def run(package:, files:, timestamp:, output: $stdout)
20
+ return nil if files.nil? || files.empty?
21
+
22
+ started_at = Time.now
23
+ sandbox_root = File.join(@base_dir, ".ace-local", "test-e2e", "#{timestamp}-#{package}-preflight")
24
+ FileUtils.mkdir_p(sandbox_root)
25
+
26
+ package_copy_result = @package_copy.prepare(package_name: package, sandbox_root: sandbox_root)
27
+ package_root = resolve_package_root(sandbox_root, package)
28
+ env = package_copy_result[:env] || {}
29
+
30
+ test_cases = files.map do |file|
31
+ run_file(package_root, file, env, output)
32
+ end
33
+
34
+ status = if test_cases.any? { |tc| tc[:status] == "error" }
35
+ "error"
36
+ elsif test_cases.any? { |tc| tc[:status] == "fail" }
37
+ "fail"
38
+ else
39
+ "pass"
40
+ end
41
+
42
+ Models::TestResult.new(
43
+ test_id: "PREFLIGHT",
44
+ status: status,
45
+ test_cases: test_cases,
46
+ summary: preflight_summary(status, test_cases),
47
+ started_at: started_at,
48
+ completed_at: Time.now,
49
+ metadata: {
50
+ phase: "preflight",
51
+ package: package,
52
+ sandbox_root: sandbox_root
53
+ }
54
+ )
55
+ end
56
+
57
+ private
58
+
59
+ def run_file(package_root, file, env, output)
60
+ relative = file.sub(%r{\A#{Regexp.escape(@base_dir)}/?}, "")
61
+ package_relative = relative.sub(%r{\A[^/]+/}, "")
62
+
63
+ stdout, stderr, status = Open3.capture3(
64
+ env,
65
+ "ace-test",
66
+ package_relative,
67
+ chdir: package_root
68
+ )
69
+
70
+ output.puts "Preflight: #{package_relative} (#{status.success? ? "pass" : "fail"})"
71
+
72
+ {
73
+ id: package_relative,
74
+ description: package_relative,
75
+ status: status.success? ? "pass" : "fail",
76
+ actual: stdout,
77
+ notes: stderr,
78
+ metadata: {
79
+ phase: "preflight",
80
+ exit_status: status.exitstatus,
81
+ command: Shellwords.join(["ace-test", package_relative])
82
+ }
83
+ }
84
+ rescue StandardError => e
85
+ output.puts "Preflight: #{package_relative} (error)"
86
+
87
+ {
88
+ id: package_relative,
89
+ description: package_relative,
90
+ status: "error",
91
+ actual: "",
92
+ notes: e.message,
93
+ metadata: {
94
+ phase: "preflight",
95
+ command: Shellwords.join(["ace-test", package_relative])
96
+ }
97
+ }
98
+ end
99
+
100
+ def resolve_package_root(sandbox_root, package)
101
+ candidate = File.join(sandbox_root, package)
102
+ return candidate if Dir.exist?(candidate)
103
+
104
+ sandbox_root
105
+ end
106
+
107
+ def preflight_summary(status, test_cases)
108
+ passed = test_cases.count { |tc| tc[:status] == "pass" }
109
+ total = test_cases.size
110
+ prefix =
111
+ case status
112
+ when "pass" then "Preflight passed"
113
+ when "fail" then "Preflight failed"
114
+ else "Preflight errored"
115
+ end
116
+ "#{prefix}: #{passed}/#{total} files passed"
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
4
+ require "json"
5
+ require "time"
3
6
  require "ace/llm"
4
7
  require "ace/llm/query_interface"
5
8
 
@@ -9,17 +12,25 @@ module Ace
9
12
  module Molecules
10
13
  # Executes standalone scenarios using the deterministic pipeline.
11
14
  class PipelineExecutor
15
+ AMBIENT_TMUX_ENV_VARS = %w[TMUX TMUX_PANE].freeze
16
+
12
17
  # @param provider [String]
18
+ # @param verifier_provider [String, nil]
13
19
  # @param timeout [Integer]
14
20
  # @param sandbox_builder [Molecules::PipelineSandboxBuilder]
15
21
  # @param prompt_bundler [Molecules::PipelinePromptBundler]
16
22
  # @param report_generator [Molecules::PipelineReportGenerator]
17
- def initialize(provider:, timeout:, sandbox_builder: nil, prompt_bundler: nil, report_generator: nil)
23
+ def initialize(provider:, verifier_provider: nil, timeout:, sandbox_builder: nil, prompt_bundler: nil,
24
+ report_generator: nil, sandbox_backend_factory: nil)
18
25
  @provider = provider
26
+ @verifier_provider = verifier_provider || provider
19
27
  @timeout = timeout
20
28
  @sandbox_builder = sandbox_builder || PipelineSandboxBuilder.new
21
29
  @prompt_bundler = prompt_bundler || PipelinePromptBundler.new
22
30
  @report_generator = report_generator || PipelineReportGenerator.new
31
+ @sandbox_backend_factory = sandbox_backend_factory || lambda { |sandbox_path, source_root: nil|
32
+ Molecules::BwrapSandboxBackend.new(sandbox_root: sandbox_path, source_root: source_root)
33
+ }
23
34
  end
24
35
 
25
36
  # @param scenario [Models::TestScenario]
@@ -31,57 +42,89 @@ module Ace
31
42
  # @return [Models::TestResult]
32
43
  def execute(scenario:, cli_args:, sandbox_path:, report_dir:, env_vars: nil, test_cases: nil)
33
44
  started_at = Time.now
45
+ FileUtils.mkdir_p(report_dir)
46
+ write_command_record(report_dir, "runner", provider: @provider, cli_args: cli_args)
47
+ write_tc_manifests(report_dir, scenario, test_cases: test_cases)
34
48
 
35
- build_env = @sandbox_builder.build(
36
- scenario: scenario,
37
- sandbox_path: sandbox_path,
38
- test_cases: test_cases
49
+ build_env = if prepared_sandbox?(sandbox_path, env_vars)
50
+ @sandbox_builder.prepare_existing_sandbox(
51
+ scenario: scenario,
52
+ sandbox_path: sandbox_path,
53
+ test_cases: test_cases
54
+ )
55
+ else
56
+ @sandbox_builder.build(
57
+ scenario: scenario,
58
+ sandbox_path: sandbox_path,
59
+ test_cases: test_cases
60
+ )
61
+ end
62
+ merged_env = sanitize_subprocess_env((env_vars || {}).merge(build_env))
63
+ sandbox_backend = @sandbox_backend_factory.call(
64
+ sandbox_path,
65
+ source_root: merged_env["ACE_E2E_SOURCE_ROOT"] || merged_env[:ACE_E2E_SOURCE_ROOT]
39
66
  )
40
- merged_env = (env_vars || {}).merge(build_env)
67
+ merged_env = sandbox_backend.prepared_env(merged_env)
41
68
 
42
69
  runner = @prompt_bundler.prepare_runner(
43
70
  scenario: scenario,
44
71
  sandbox_path: sandbox_path,
45
72
  test_cases: test_cases
46
73
  )
47
- run_llm(
74
+ runner_response = run_llm(
48
75
  prompt_path: runner[:prompt_path],
49
76
  system_path: runner[:system_path],
50
77
  output_path: runner[:output_path],
51
78
  cli_args: cli_args,
52
- env_vars: merged_env
79
+ env_vars: merged_env,
80
+ subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: merged_env),
81
+ provider: @provider
53
82
  )
83
+ runner_observations = extract_runner_observations(runner_response[:text])
84
+ artifact_contract = snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases: test_cases)
54
85
 
55
86
  verifier = @prompt_bundler.prepare_verifier(
56
87
  scenario: scenario,
57
88
  sandbox_path: sandbox_path,
58
- test_cases: test_cases
89
+ test_cases: test_cases,
90
+ runner_observations: runner_observations,
91
+ artifact_contract: artifact_contract
59
92
  )
93
+ write_command_record(report_dir, "verifier", provider: @verifier_provider, cli_args: cli_args)
60
94
  verifier_response = run_llm(
61
95
  prompt_path: verifier[:prompt_path],
62
96
  system_path: verifier[:system_path],
63
97
  output_path: verifier[:output_path],
64
98
  cli_args: cli_args,
65
- env_vars: merged_env
99
+ env_vars: merged_env,
100
+ subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: merged_env),
101
+ provider: @verifier_provider
66
102
  )
67
103
 
68
104
  @report_generator.generate(
69
105
  scenario: scenario,
70
106
  verifier_output: verifier_response[:text],
71
107
  report_dir: report_dir,
72
- provider: @provider,
108
+ provider: @verifier_provider,
73
109
  started_at: started_at,
74
- completed_at: Time.now
110
+ completed_at: Time.now,
111
+ metadata: base_metadata(
112
+ report_dir,
113
+ runner_observations: runner_observations,
114
+ artifact_contract: artifact_contract
115
+ )
75
116
  )
76
117
  rescue => e
77
118
  begin
78
119
  @report_generator.write_failure_report(
79
120
  scenario: scenario,
80
121
  report_dir: report_dir,
81
- provider: @provider,
122
+ provider: @verifier_provider,
82
123
  started_at: started_at || Time.now,
83
124
  completed_at: Time.now,
84
- error_message: "#{e.class}: #{e.message}"
125
+ error_message: "#{e.class}: #{e.message}",
126
+ failure_category: "runner-error",
127
+ metadata: base_metadata(report_dir)
85
128
  )
86
129
  rescue => write_error
87
130
  Models::TestResult.new(
@@ -97,13 +140,13 @@ module Ace
97
140
 
98
141
  private
99
142
 
100
- def run_llm(prompt_path:, system_path:, output_path:, cli_args:, env_vars:)
143
+ def run_llm(prompt_path:, system_path:, output_path:, cli_args:, env_vars:, subprocess_command_prefix:, provider:)
101
144
  prompt = File.read(prompt_path)
102
145
  system = File.read(system_path)
103
146
  sandbox_dir = env_vars["PROJECT_ROOT_PATH"] || env_vars[:PROJECT_ROOT_PATH]
104
147
 
105
148
  Ace::LLM::QueryInterface.query(
106
- @provider,
149
+ provider,
107
150
  prompt,
108
151
  system: system,
109
152
  cli_args: cli_args,
@@ -111,9 +154,107 @@ module Ace
111
154
  fallback: false,
112
155
  output: output_path,
113
156
  subprocess_env: env_vars,
157
+ subprocess_command_prefix: subprocess_command_prefix,
114
158
  working_dir: sandbox_dir
115
159
  )
116
160
  end
161
+
162
+ def write_tc_manifests(report_dir, scenario, test_cases:)
163
+ selected = select_test_cases(scenario, test_cases)
164
+ selected.each do |test_case|
165
+ manifest = {
166
+ tc_id: test_case.tc_id,
167
+ title: test_case.title,
168
+ declared_artifacts: Array(test_case.declared_artifacts),
169
+ optional_artifacts: Array(test_case.optional_artifacts),
170
+ goal_format: test_case.goal_format
171
+ }
172
+ File.write(
173
+ File.join(report_dir, "#{test_case.short_id}.manifest.json"),
174
+ JSON.pretty_generate(manifest)
175
+ )
176
+ end
177
+ end
178
+
179
+ def write_command_record(report_dir, phase, provider:, cli_args:)
180
+ record = {
181
+ phase: phase,
182
+ provider: provider,
183
+ cli_args: cli_args,
184
+ timeout: @timeout,
185
+ recorded_at: Time.now.utc.iso8601
186
+ }
187
+ File.write(
188
+ File.join(report_dir, "#{phase}.command.json"),
189
+ JSON.pretty_generate(record)
190
+ )
191
+ end
192
+
193
+ def snapshot_artifacts(report_dir, sandbox_path, scenario, test_cases:)
194
+ snapshot = select_test_cases(scenario, test_cases).to_h do |test_case|
195
+ required = Array(test_case.declared_artifacts).sort
196
+ optional = Array(test_case.optional_artifacts).sort
197
+ present_required = required.select { |path| File.exist?(File.join(sandbox_path, path)) }
198
+ present_optional = optional.select { |path| File.exist?(File.join(sandbox_path, path)) }
199
+ missing_required = required - present_required
200
+
201
+ [test_case.tc_id, {
202
+ "present_artifacts" => (present_required + present_optional).sort,
203
+ "required_artifacts" => required,
204
+ "present_required_artifacts" => present_required,
205
+ "missing_required_artifacts" => missing_required,
206
+ "optional_artifacts" => optional,
207
+ "present_optional_artifacts" => present_optional
208
+ }]
209
+ end
210
+ File.write(File.join(report_dir, "artifact-snapshot.json"), JSON.pretty_generate(snapshot))
211
+ snapshot
212
+ end
213
+
214
+ def select_test_cases(scenario, test_cases)
215
+ return Array(scenario.test_cases) if test_cases.nil? || test_cases.empty?
216
+
217
+ wanted = test_cases.map { |value| value.to_s.upcase }
218
+ Array(scenario.test_cases).select { |tc| wanted.include?(tc.tc_id.to_s.upcase) }
219
+ end
220
+
221
+ def base_metadata(report_dir, runner_observations: nil, artifact_contract: nil)
222
+ metadata = {
223
+ "runner_provider" => @provider,
224
+ "verifier_provider" => @verifier_provider,
225
+ "report_dir" => report_dir
226
+ }
227
+ if runner_observations && !runner_observations.empty?
228
+ metadata["runner_observations"] = runner_observations
229
+ end
230
+ if artifact_contract
231
+ metadata["missing_required_artifacts"] = artifact_contract.to_h.transform_values do |entry|
232
+ Array(entry["missing_required_artifacts"])
233
+ end.reject { |_tc_id, paths| paths.empty? }
234
+ end
235
+ metadata
236
+ end
237
+
238
+ def sanitize_subprocess_env(env_vars)
239
+ sanitized = env_vars.reject { |key, _value| AMBIENT_TMUX_ENV_VARS.include?(key.to_s) }
240
+ AMBIENT_TMUX_ENV_VARS.each { |key| sanitized[key] = nil }
241
+ sanitized
242
+ end
243
+
244
+ def prepared_sandbox?(sandbox_path, env_vars)
245
+ return false unless env_vars.is_a?(Hash) && !env_vars.empty?
246
+
247
+ env_root = env_vars["PROJECT_ROOT_PATH"] || env_vars[:PROJECT_ROOT_PATH]
248
+ return false if env_root.to_s.strip.empty?
249
+
250
+ File.expand_path(env_root) == File.expand_path(sandbox_path)
251
+ end
252
+
253
+ def extract_runner_observations(text)
254
+ Atoms::SkillResultParser.parse(text)[:observations].to_s
255
+ rescue Atoms::ResultParser::ParseError
256
+ ""
257
+ end
117
258
  end
118
259
  end
119
260
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "date"
4
4
  require "fileutils"
5
+ require "time"
5
6
  require "yaml"
6
7
 
7
8
  module Ace
@@ -15,21 +16,35 @@ module Ace
15
16
 
16
17
  Rules:
17
18
  - Execute each goal in order
18
- - Save all artifacts to results/tc/{NN}/ directories as specified
19
- - Treat the initial working directory as SANDBOX_ROOT; if a goal needs commands in a created worktree, cd there for execution but keep artifact writes under SANDBOX_ROOT/results
19
+ - Treat the initial working directory as SANDBOX_ROOT; if a goal needs commands in a created worktree, cd there for execution but keep any declared outcome artifacts under SANDBOX_ROOT/results
20
+ - Preserve the sandbox runtime environment; do not reset PATH, HOME, or other provided env vars
21
+ - If `ACE_E2E_SANDBOX_RUNTIME_ROOT` is set, make sure command execution uses `$ACE_E2E_SANDBOX_RUNTIME_ROOT/bin` on PATH in the shell where you run scenario commands
22
+ - Run `ace-*` commands directly; do not wrap them with `timeout`, `env -i`, or other execution wrappers that can change behavior or hide diagnostics
23
+ - Do not bypass the public CLI with repo-local executables such as `./exe/ace-*`, `bin/ace-*`, or `ruby .../exe/ace-*`
20
24
  - Do not fabricate output - all artifacts must come from real tool execution
25
+ - Never background commands or start dependent verification captures before the command they verify has completed
26
+ - When a goal requires command captures, keep stdout and stderr separate; do not merge streams and do not use `2>&1`
27
+ - A command capture set is incomplete unless the matching `.stdout`, `.stderr`, and `.exit` files all exist
28
+ - Persist each command's `.stdout`, `.stderr`, and `.exit` files immediately after that command finishes, before starting the next command
29
+ - For commands that establish state, write that command's `.exit` file before running any list/status/fs-check/tmux verification for the same goal
30
+ - When a successful command prints a filesystem path to a generated artifact, copy that artifact into `results/` if the goal asks for supporting evidence from the generated file
21
31
  - If a goal fails, note the failure and continue to the next goal
22
- - After all goals, output a brief summary of what you produced for each goal
32
+ - Do not create synthetic helper reports or temp input files under results/ unless the scenario explicitly treats them as product outcomes
33
+ - After all goals, return concise runner observations describing what you did and what happened
23
34
  PROMPT
24
35
 
25
36
  VERIFIER_SYSTEM_PROMPT = <<~PROMPT
26
37
  You are an E2E test verifier. You inspect artifacts and render PASS/FAIL verdicts.
27
38
 
28
39
  Rules:
29
- - Evaluate each goal independently based solely on the artifacts provided
30
- - Do not speculate about what the runner did - only judge what exists
40
+ - Evaluate each goal independently based on sandbox state first, then runner observations, then raw debug captures only when needed
41
+ - Treat declared artifacts and helper filenames as hints, not as the source of truth
42
+ - If a helper file is missing or stale, inspect the sandbox directly before failing the goal
43
+ - Use artifact mtimes to detect runner ordering mistakes; if postcondition captures are older than the primary command's stdout/stderr/exit, classify the goal as `runner-error` unless direct sandbox state proves a product failure after the command completed
44
+ - Use read-only commands in the sandbox when they materially improve confidence (for example: git log/status/show, ls/find/cat)
45
+ - Do not speculate beyond the provided sandbox evidence and runner observations
31
46
  - For each failed goal, include a category:
32
- test-spec-error | tool-bug | runner-error | infrastructure-error
47
+ test-spec-error | tool-bug | runner-error | infrastructure-error | missing-artifact
33
48
  - For each goal, cite specific evidence (filenames, content snippets)
34
49
  - Follow the output format exactly
35
50
  PROMPT
@@ -60,16 +75,20 @@ module Ace
60
75
  # @param sandbox_path [String]
61
76
  # @param test_cases [Array<String>, nil]
62
77
  # @return [Hash]
63
- def prepare_verifier(scenario:, sandbox_path:, test_cases: nil)
78
+ def prepare_verifier(scenario:, sandbox_path:, test_cases: nil, runner_observations: nil, artifact_contract: nil)
64
79
  cache_dir = ensure_cache_dir(sandbox_path)
65
80
  system_path = File.join(cache_dir, "verifier-system.md")
66
81
  prompt_path = File.join(cache_dir, "verifier-prompt.md")
67
82
 
68
83
  File.write(system_path, VERIFIER_SYSTEM_PROMPT)
69
84
 
85
+ project_context = build_project_context_section(scenario)
86
+ sandbox_context = build_sandbox_context_section(sandbox_path)
70
87
  artifacts = build_artifact_section(sandbox_path)
88
+ contract = build_artifact_contract_section(artifact_contract)
89
+ observations = build_runner_observation_section(runner_observations)
71
90
  criteria = bundle_markdown_file(File.join(scenario.dir_path, "verifier.yml.md"), test_cases: test_cases)
72
- File.write(prompt_path, [artifacts, criteria].join("\n\n---\n\n"))
91
+ File.write(prompt_path, [project_context, sandbox_context, artifacts, contract, observations, criteria].join("\n\n---\n\n"))
73
92
 
74
93
  {
75
94
  system_path: system_path,
@@ -154,6 +173,13 @@ module Ace
154
173
  parts.concat(tree_entries)
155
174
  parts << "```"
156
175
  parts << ""
176
+ parts << "## File metadata"
177
+ parts << "```"
178
+ files.each do |file|
179
+ parts << "#{relative_path(file, sandbox_path)}\tmtime=#{File.mtime(file).utc.iso8601}"
180
+ end
181
+ parts << "```"
182
+ parts << ""
157
183
  parts << "## File contents"
158
184
  parts << ""
159
185
 
@@ -168,6 +194,93 @@ module Ace
168
194
  parts.join("\n").rstrip
169
195
  end
170
196
 
197
+ def build_project_context_section(scenario)
198
+ package_root = File.expand_path("../../..", scenario.dir_path)
199
+ source_root = File.expand_path("..", package_root)
200
+ files = [
201
+ File.join(package_root, "README.md"),
202
+ File.join(package_root, "docs", "usage.md"),
203
+ File.join(package_root, "docs", "getting-started.md"),
204
+ File.join(source_root, "CLAUDE.md")
205
+ ].select { |path| File.file?(path) }.first(3)
206
+
207
+ parts = []
208
+ parts << "# Project Context"
209
+ parts << ""
210
+ parts << "- Package: `#{scenario.package}`"
211
+ parts << "- Test ID: `#{scenario.test_id}`"
212
+ parts << "- Sandbox profile: `#{scenario.sandbox_profile}`"
213
+ parts << ""
214
+
215
+ files.each do |file|
216
+ parts << "## `#{File.basename(file)}`"
217
+ parts << "```"
218
+ parts << safe_read(file)
219
+ parts << "```"
220
+ parts << ""
221
+ end
222
+
223
+ parts.join("\n").rstrip
224
+ end
225
+
226
+ def build_sandbox_context_section(sandbox_path)
227
+ sandbox_path = File.expand_path(sandbox_path)
228
+ entries = Dir.glob(File.join(sandbox_path, "*"), File::FNM_DOTMATCH)
229
+ .reject { |path| %w[. ..].include?(File.basename(path)) }
230
+ .sort
231
+
232
+ parts = []
233
+ parts << "# Sandbox Context"
234
+ parts << ""
235
+ parts << "- Sandbox root: `#{sandbox_path}`"
236
+ parts << "- Inspect the sandbox directly when verifying source-of-truth state."
237
+ parts << ""
238
+ parts << "## Top-level entries"
239
+ parts << "```"
240
+ parts.concat(entries.map { |path| relative_path(path, sandbox_path) })
241
+ parts << "```"
242
+
243
+ parts.join("\n").rstrip
244
+ end
245
+
246
+ def build_runner_observation_section(runner_observations)
247
+ <<~MARKDOWN.rstrip
248
+ # Runner Observations
249
+
250
+ #{runner_observations.to_s.strip.empty? ? "(none provided)" : runner_observations.to_s.strip}
251
+ MARKDOWN
252
+ end
253
+
254
+ def build_artifact_contract_section(artifact_contract)
255
+ return "# Artifact Contract\n\n(no snapshot provided)" if artifact_contract.nil? || artifact_contract.empty?
256
+
257
+ parts = []
258
+ parts << "# Artifact Contract"
259
+ parts << ""
260
+ parts << "Use this only as supporting context. Missing helper artifacts may be acceptable when sandbox state still proves the goal."
261
+ parts << ""
262
+
263
+ artifact_contract.sort.each do |tc_id, entry|
264
+ parts << "## #{tc_id}"
265
+ parts << ""
266
+ parts << "- Required artifacts: #{format_artifact_list(entry["required_artifacts"])}"
267
+ parts << "- Present required artifacts: #{format_artifact_list(entry["present_required_artifacts"])}"
268
+ parts << "- Missing required artifacts: #{format_artifact_list(entry["missing_required_artifacts"])}"
269
+ parts << "- Optional artifacts: #{format_artifact_list(entry["optional_artifacts"])}"
270
+ parts << "- Present optional artifacts: #{format_artifact_list(entry["present_optional_artifacts"])}"
271
+ parts << ""
272
+ end
273
+
274
+ parts.join("\n").rstrip
275
+ end
276
+
277
+ def format_artifact_list(paths)
278
+ items = Array(paths)
279
+ return "(none)" if items.empty?
280
+
281
+ items.map { |path| "`#{path}`" }.join(", ")
282
+ end
283
+
171
284
  def relative_path(path, root)
172
285
  File.expand_path(path).sub("#{File.expand_path(root)}/", "")
173
286
  end