ace-test-runner-e2e 0.38.11 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -0
  3. data/handbook/guides/e2e-testing.g.md +35 -3
  4. data/handbook/guides/scenario-yml-reference.g.md +8 -3
  5. data/handbook/guides/tc-authoring.g.md +15 -4
  6. data/handbook/templates/tc-file.template.md +4 -2
  7. data/handbook/workflow-instructions/e2e/create.wf.md +13 -3
  8. data/handbook/workflow-instructions/e2e/fix.wf.md +19 -0
  9. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +16 -0
  10. data/handbook/workflow-instructions/e2e/review.wf.md +14 -10
  11. data/handbook/workflow-instructions/e2e/rewrite.wf.md +10 -3
  12. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  13. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  14. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +37 -1
  15. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  16. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +90 -14
  17. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +43 -5
  18. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +7 -5
  19. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +2 -0
  20. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +101 -9
  21. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +28 -30
  22. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +24 -1
  23. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +182 -1
  24. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +25 -3
  25. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +44 -5
  26. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  27. data/lib/ace/test/end_to_end_runner.rb +2 -0
  28. metadata +4 -2
@@ -77,7 +77,7 @@ module Ace
77
77
  # non-mutating.
78
78
  #
79
79
  # @param sandbox_path [String]
80
- # @return [void]
80
+ # @return [Array<String>]
81
81
  def sync_protocol_sources_into(sandbox_path)
82
82
  sync_protocol_sources(File.expand_path(sandbox_path))
83
83
  end
@@ -108,12 +108,13 @@ module Ace
108
108
  end
109
109
 
110
110
  def sync_protocol_sources(sandbox_path)
111
- %w[skill wfi].each do |protocol|
111
+ packages = %w[skill wfi].flat_map do |protocol|
112
112
  Dir.glob(File.join(@config_root, "*", ".ace-defaults", "nav", "protocols",
113
- "#{protocol}-sources", "*.yml")).sort.each do |manifest_path|
113
+ "#{protocol}-sources", "*.yml")).sort.filter_map do |manifest_path|
114
114
  sync_protocol_source_manifest(protocol, manifest_path, sandbox_path)
115
115
  end
116
116
  end
117
+ packages.compact.uniq.sort
117
118
  end
118
119
 
119
120
  def sync_protocol_source_manifest(protocol, manifest_path, sandbox_path)
@@ -137,11 +138,12 @@ module Ace
137
138
 
138
139
  FileUtils.mkdir_p(File.dirname(target_manifest_path))
139
140
  FileUtils.cp(manifest_path, target_manifest_path) unless File.exist?(target_manifest_path)
140
- return unless File.directory?(source_dir)
141
+ return package_name unless File.directory?(source_dir)
141
142
  return if File.exist?(target_dir)
142
143
 
143
144
  FileUtils.mkdir_p(File.dirname(target_dir))
144
145
  FileUtils.cp_r(source_dir, target_dir)
146
+ package_name
145
147
  rescue Psych::SyntaxError
146
148
  nil
147
149
  end
@@ -214,7 +216,7 @@ module Ace
214
216
 
215
217
  stdout, stderr, status = Open3.capture3(
216
218
  env,
217
- "bash", "--noprofile", "--norc", "-c", "ace-config init && ace-handbook sync",
219
+ "bash", "--noprofile", "--norc", "-c", "ace-config sync ace-llm-providers-cli && ace-handbook sync",
218
220
  chdir: sandbox_path
219
221
  )
220
222
  return if status.success?
@@ -217,6 +217,8 @@ module Ace
217
217
  metadata["runner_observations"] = result.metadata["runner_observations"] if result.metadata.key?("runner_observations")
218
218
  metadata["verifier_observations"] = result.observations unless result.observations.to_s.empty?
219
219
  metadata["missing_required_artifacts"] = result.metadata["missing_required_artifacts"] if result.metadata.key?("missing_required_artifacts")
220
+ metadata["initial_missing_required_artifacts"] = result.metadata["initial_missing_required_artifacts"] if result.metadata.key?("initial_missing_required_artifacts")
221
+ metadata["artifact_repair_attempted"] = result.metadata["artifact_repair_attempted"] if result.metadata.key?("artifact_repair_attempted")
220
222
  metadata["works_for_end_user"] = result.metadata["works_for_end_user"] if result.metadata.key?("works_for_end_user")
221
223
  metadata["user_friction"] = result.metadata["user_friction"] if result.metadata.key?("user_friction")
222
224
  metadata["user_feedback"] = result.metadata["user_feedback"] if result.metadata.key?("user_feedback")
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "fileutils"
4
4
  require "open3"
5
+ require "digest"
5
6
 
6
7
  module Ace
7
8
  module Test
@@ -10,6 +11,9 @@ module Ace
10
11
  # Builds a sandbox-local Ruby/Bundler runtime for E2E execution.
11
12
  class SandboxRuntimeBuilder
12
13
  DEFAULT_RUBY_VERSION = "3.4.9"
14
+ DEFAULT_SHARED_RUNTIME_CACHE_ROOT = ".ace-local/test-e2e/runtime-cache"
15
+ SHARED_RUNTIME_ENV_KEY = "ACE_E2E_SHARED_RUNTIME_ROOT"
16
+ RUNTIME_CACHE_LAYOUT_VERSION = 1
13
17
  RESERVED_ENV_KEYS = %w[
14
18
  PROJECT_ROOT_PATH
15
19
  ACE_E2E_SOURCE_ROOT
@@ -37,14 +41,22 @@ module Ace
37
41
 
38
42
  def prepare(sandbox_root:, env: {}, tool_names: nil)
39
43
  sandbox_root = File.expand_path(sandbox_root)
40
- runtime_root = File.join(sandbox_root, ".ace-local", "e2e-runtime")
41
- FileUtils.mkdir_p(runtime_root)
42
-
43
- runtime_env = build_runtime_env(sandbox_root, runtime_root, env)
44
+ local_runtime_root = File.join(sandbox_root, ".ace-local", "e2e-runtime")
45
+ runtime_root = resolve_runtime_root(local_runtime_root, env)
46
+ FileUtils.mkdir_p(runtime_root) unless shared_runtime_root?(env)
47
+
48
+ runtime_env = build_runtime_env(
49
+ sandbox_root,
50
+ runtime_root,
51
+ env,
52
+ mutable_runtime_root: local_runtime_root
53
+ )
44
54
  ensure_runtime_dirs(runtime_env)
45
- write_runtime_gemfile(runtime_root)
46
- write_command_shims(runtime_root, tool_names)
47
- install_runtime!(runtime_root, runtime_env)
55
+ if shared_runtime_root?(env)
56
+ ensure_shared_runtime!(runtime_root, tool_names)
57
+ else
58
+ prepare_runtime_root!(runtime_root, runtime_env, tool_names)
59
+ end
48
60
 
49
61
  {
50
62
  runtime_root: runtime_root,
@@ -52,15 +64,23 @@ module Ace
52
64
  }
53
65
  end
54
66
 
67
+ def prepare_shared_runtime(cache_root: nil, tool_names: nil)
68
+ runtime_root = shared_runtime_root(cache_root: cache_root)
69
+ runtime_env = build_shared_runtime_env(runtime_root)
70
+ ensure_runtime_dirs(runtime_env)
71
+ prepare_runtime_root!(runtime_root, runtime_env, tool_names)
72
+ runtime_root
73
+ end
74
+
55
75
  private
56
76
 
57
77
  def capture3(env, *cmd, chdir:)
58
78
  Open3.capture3(env, *cmd, chdir: chdir, unsetenv_others: true)
59
79
  end
60
80
 
61
- def build_runtime_env(sandbox_root, runtime_root, env)
81
+ def build_runtime_env(sandbox_root, runtime_root, env, mutable_runtime_root: runtime_root)
62
82
  merged = stringify_keys(env).reject { |key, _value| RESERVED_ENV_KEYS.include?(key) }
63
- bundler_root = File.join(runtime_root, "bundler")
83
+ bundler_root = File.join(mutable_runtime_root, "bundler")
64
84
  gem_root = File.join(runtime_root, "gems")
65
85
  bin_root = File.join(runtime_root, "bin")
66
86
  path = merged["PATH"].to_s
@@ -87,6 +107,10 @@ module Ace
87
107
  )
88
108
  end
89
109
 
110
+ def build_shared_runtime_env(runtime_root)
111
+ build_runtime_env(@source_root, runtime_root, {}, mutable_runtime_root: runtime_root)
112
+ end
113
+
90
114
  def ensure_runtime_dirs(env)
91
115
  [
92
116
  env.fetch("ACE_CONFIG_PATH"),
@@ -169,6 +193,74 @@ module Ace
169
193
  .uniq
170
194
  end
171
195
 
196
+ def resolve_runtime_root(local_runtime_root, env)
197
+ shared_root = shared_runtime_root_from_env(env)
198
+ return shared_root if shared_root
199
+
200
+ local_runtime_root
201
+ end
202
+
203
+ def shared_runtime_root?(env)
204
+ !shared_runtime_root_from_env(env).nil?
205
+ end
206
+
207
+ def shared_runtime_root_from_env(env)
208
+ merged = stringify_keys(ENV.to_h).merge(stringify_keys(env))
209
+ raw = merged[SHARED_RUNTIME_ENV_KEY].to_s.strip
210
+ return nil if raw.empty?
211
+
212
+ File.expand_path(raw)
213
+ end
214
+
215
+ def shared_runtime_root(cache_root: nil)
216
+ base = if cache_root
217
+ File.expand_path(cache_root)
218
+ else
219
+ File.join(@source_root, DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
220
+ end
221
+
222
+ File.join(base, runtime_cache_key)
223
+ end
224
+
225
+ def runtime_cache_key
226
+ @runtime_cache_key ||= begin
227
+ digest = Digest::SHA256.new
228
+ digest.update("layout:#{RUNTIME_CACHE_LAYOUT_VERSION}\n")
229
+ digest.update("ruby:#{@ruby_version}\n")
230
+ digest.update(File.read(File.join(@source_root, "Gemfile")))
231
+ lockfile_path = File.join(@source_root, "Gemfile.lock")
232
+ digest.update(File.read(lockfile_path)) if File.file?(lockfile_path)
233
+ digest.hexdigest[0, 16]
234
+ end
235
+ end
236
+
237
+ def ensure_shared_runtime!(runtime_root, tool_names)
238
+ runtime_env = build_shared_runtime_env(runtime_root)
239
+ ensure_runtime_dirs(runtime_env)
240
+ prepare_runtime_root!(runtime_root, runtime_env, tool_names)
241
+ end
242
+
243
+ def prepare_runtime_root!(runtime_root, env, tool_names)
244
+ with_runtime_lock(runtime_root) do
245
+ marker_path = File.join(runtime_root, ".bootstrapped")
246
+ return if File.exist?(marker_path)
247
+
248
+ FileUtils.mkdir_p(runtime_root)
249
+ write_runtime_gemfile(runtime_root)
250
+ write_command_shims(runtime_root, tool_names)
251
+ install_runtime!(runtime_root, env)
252
+ end
253
+ end
254
+
255
+ def with_runtime_lock(runtime_root)
256
+ lock_path = "#{runtime_root}.lock"
257
+ FileUtils.mkdir_p(File.dirname(lock_path))
258
+ File.open(lock_path, File::RDWR | File::CREAT, 0o644) do |lock_file|
259
+ lock_file.flock(File::LOCK_EX)
260
+ yield
261
+ end
262
+ end
263
+
172
264
  def install_runtime!(runtime_root, env)
173
265
  marker_path = File.join(runtime_root, ".bootstrapped")
174
266
  return if File.exist?(marker_path)
@@ -256,27 +256,39 @@ module Ace
256
256
 
257
257
  def declared_artifacts_for(scenario_dir, tc_id, runner_content, verify_content)
258
258
  scenario_frontmatter = parse_scenario_yml(File.join(scenario_dir, "scenario.yml"))
259
- required_artifacts = []
260
- optional_artifacts = []
261
-
262
- required_artifacts.concat(
259
+ scenario_references = Atoms::ArtifactContractValidator.references_from_paths(
263
260
  Array((scenario_frontmatter["sandbox-layout"] || {}).keys).select do |path|
264
261
  declared_artifact_matches_tc?(path, tc_id)
265
- end
262
+ end,
263
+ source: File.join(scenario_dir, "scenario.yml")
264
+ )
265
+ runner_references = Atoms::ArtifactContractValidator.extract(
266
+ runner_content,
267
+ source: File.join(scenario_dir, "#{tc_id}.runner.md")
268
+ )
269
+ verifier_references = Atoms::ArtifactContractValidator.extract(
270
+ verify_content,
271
+ source: File.join(scenario_dir, "#{tc_id}.verify.md")
266
272
  )
267
273
 
268
- [runner_content, verify_content].each do |content|
269
- extract_declared_artifacts(content).each do |entry|
270
- if entry[:optional]
271
- optional_artifacts << entry[:path]
272
- else
273
- required_artifacts << entry[:path]
274
- end
275
- end
276
- end
274
+ Atoms::ArtifactContractValidator.validate!(
275
+ tc_id: tc_id,
276
+ scenario_dir: scenario_dir,
277
+ runner_references: runner_references,
278
+ verifier_references: verifier_references,
279
+ scenario_references: scenario_references
280
+ )
277
281
 
278
- required = required_artifacts.map { |path| normalize_declared_artifact(path) }.compact.uniq
279
- optional = optional_artifacts.map { |path| normalize_declared_artifact(path) }.compact.uniq
282
+ required = (scenario_references + runner_references)
283
+ .reject(&:optional)
284
+ .map(&:path)
285
+ .compact
286
+ .uniq
287
+ optional = runner_references
288
+ .select(&:optional)
289
+ .map(&:path)
290
+ .compact
291
+ .uniq
280
292
  optional = optional.reject { |path| required.include?(path) }
281
293
 
282
294
  [required.sort, optional.sort]
@@ -300,20 +312,6 @@ module Ace
300
312
  match ? match[1].to_i : nil
301
313
  end
302
314
 
303
- def extract_declared_artifacts(markdown)
304
- markdown.to_s.scan(%r{(?:`|"|')?(results/tc/\d{2}/[^\s`)"']+|results/tc/\d{2}/)(?:`|"|')?(\s*\(optional\))?}i).map do |match|
305
- path, optional = match
306
- {path: path, optional: !optional.to_s.empty?}
307
- end
308
- end
309
-
310
- def normalize_declared_artifact(path)
311
- value = path.to_s.strip
312
- return nil unless value.start_with?("results/tc/")
313
-
314
- value.sub(%r{/+\z}, "")
315
- end
316
-
317
315
  # Infer package name from scenario directory path
318
316
  #
319
317
  # @param scenario_dir [String] Path to scenario directory
@@ -39,13 +39,15 @@ module Ace
39
39
  # @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
40
40
  # @param run_id [String, nil] Unique run ID for deterministic tmux session naming
41
41
  # @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
42
- def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {})
42
+ def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {},
43
+ git_excludes: [])
43
44
  FileUtils.mkdir_p(sandbox_dir)
44
45
  env = if @sandbox_backend
45
46
  @sandbox_backend.prepared_env(initial_env.dup)
46
47
  else
47
48
  initial_env.dup
48
49
  end
50
+ @git_excludes = normalize_git_excludes(git_excludes)
49
51
  steps_completed = 0
50
52
  @tmux_session = nil
51
53
  @scenario_name = scenario_name
@@ -162,6 +164,7 @@ module Ace
162
164
  run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
163
165
  run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
164
166
  run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
167
+ seed_git_excludes(sandbox_dir)
165
168
  end
166
169
 
167
170
  # Copy fixture files into sandbox
@@ -245,6 +248,26 @@ module Ace
245
248
  Open3.capture3(*args, **kwargs)
246
249
  end
247
250
 
251
+ def seed_git_excludes(sandbox_dir)
252
+ patterns = (default_git_excludes + @git_excludes).uniq
253
+ return if patterns.empty?
254
+
255
+ exclude_path = File.join(sandbox_dir, ".git", "info", "exclude")
256
+ existing = File.exist?(exclude_path) ? File.readlines(exclude_path, chomp: true) : []
257
+ additions = patterns.reject { |pattern| existing.include?(pattern) }
258
+ return if additions.empty?
259
+
260
+ File.write(exclude_path, (existing + additions).join("\n") + "\n")
261
+ end
262
+
263
+ def normalize_git_excludes(git_excludes)
264
+ Array(git_excludes).map(&:to_s).map(&:strip).reject(&:empty?).uniq
265
+ end
266
+
267
+ def default_git_excludes
268
+ [".ace-local/", "reports/", "results/"]
269
+ end
270
+
248
271
  def sanitized_process_environment
249
272
  ENV.to_h.each_with_object({}) do |(key, value), env|
250
273
  if AMBIENT_TMUX_ENV_VARS.include?(key) || STRIPPED_ENV_KEYS.include?(key) ||
@@ -27,7 +27,8 @@ module Ace
27
27
 
28
28
  REPORT_KINDS = {
29
29
  package: ->(timestamp, package) { "#{timestamp}-#{package}-report.md" },
30
- suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" }
30
+ suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" },
31
+ suite_final: ->(timestamp, _package) { "#{timestamp}-suite-final-report.md" }
31
32
  }.freeze
32
33
 
33
34
  # Write an aggregated report
@@ -68,6 +69,44 @@ module Ace
68
69
  report_path
69
70
  end
70
71
 
72
+ # Write a deterministic wrapper report for a two-attempt suite run.
73
+ #
74
+ # Preserves first-pass failure evidence while reflecting the final retry outcome.
75
+ def write_retry_summary(initial_results:, retry_results:, timestamp:, base_dir:, package: "suite")
76
+ cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
77
+ FileUtils.mkdir_p(cache_dir)
78
+
79
+ report_path = File.join(cache_dir, report_filename(:suite_final, timestamp, package))
80
+ initial_entries = flatten_attempt_results(initial_results, base_dir: base_dir)
81
+ retry_entries = flatten_attempt_results(retry_results, base_dir: base_dir)
82
+ retry_by_test = retry_entries.each_with_object({}) { |entry, memo| memo[entry[:test_id]] = entry }
83
+
84
+ flaky_entries = initial_entries.filter_map do |entry|
85
+ next if entry[:status] == "pass"
86
+
87
+ retry_entry = retry_by_test[entry[:test_id]]
88
+ next unless retry_entry && retry_entry[:status] == "pass"
89
+
90
+ entry.merge(retry_entry: retry_entry)
91
+ end.sort_by { |entry| entry[:test_id] }
92
+ remaining_entries = retry_entries.reject { |entry| entry[:status] == "pass" }.sort_by { |entry| entry[:test_id] }
93
+ final_status = compute_retry_summary_status(retry_entries)
94
+
95
+ content = build_retry_summary_content(
96
+ timestamp: timestamp,
97
+ initial_results: initial_results,
98
+ retry_results: retry_results,
99
+ initial_entries: initial_entries,
100
+ flaky_entries: flaky_entries,
101
+ remaining_entries: remaining_entries,
102
+ final_status: final_status,
103
+ base_dir: base_dir
104
+ )
105
+
106
+ File.write(report_path, content)
107
+ report_path
108
+ end
109
+
71
110
  private
72
111
 
73
112
  def report_filename(report_kind, timestamp, package)
@@ -77,6 +116,148 @@ module Ace
77
116
  builder.call(timestamp, package)
78
117
  end
79
118
 
119
+ def flatten_attempt_results(results, base_dir:)
120
+ results.fetch(:packages, {}).values.flatten.map do |result|
121
+ report_dir = result[:report_dir]
122
+ metadata = read_retry_metadata(report_dir)
123
+ report_frontmatter = read_report_frontmatter(report_dir)
124
+ test_name = result[:test_name] || result[:test_id] || ""
125
+ test_id = metadata["test-id"] || canonical_retry_test_id(test_name)
126
+ failed_entries = Array(metadata["failed"]).filter_map do |entry|
127
+ next unless entry.is_a?(Hash)
128
+
129
+ {
130
+ tc: entry["tc"] || entry[:tc],
131
+ category: entry["category"] || entry[:category] || "runner-error",
132
+ evidence: entry["evidence"] || entry[:evidence] || "See attempt report for details"
133
+ }
134
+ end
135
+ if failed_entries.empty? && result[:status] != "pass"
136
+ failed_entries << {
137
+ tc: nil,
138
+ category: result[:status] || "runner-error",
139
+ evidence: result[:summary] || result[:error] || "See attempt report for details"
140
+ }
141
+ end
142
+
143
+ {
144
+ test_id: test_id,
145
+ title: report_frontmatter["title"] || test_id,
146
+ status: result[:status],
147
+ report_dir: report_dir,
148
+ report_dir_display: display_path(report_dir, base_dir),
149
+ report_dir_name: report_dir ? File.basename(report_dir) : nil,
150
+ failed_entries: failed_entries,
151
+ passed_cases: result[:passed_cases] || metadata["tcs-passed"] || metadata.dig("results", "passed") || 0,
152
+ total_cases: result[:total_cases] || metadata["tcs-total"] || metadata.dig("results", "total") || 0
153
+ }
154
+ end
155
+ end
156
+
157
+ def read_retry_metadata(report_dir)
158
+ return {} unless report_dir
159
+
160
+ path = File.join(report_dir, "metadata.yml")
161
+ return {} unless File.exist?(path)
162
+
163
+ YAML.safe_load_file(path, permitted_classes: [Time, Date]) || {}
164
+ rescue
165
+ {}
166
+ end
167
+
168
+ def canonical_retry_test_id(test_name)
169
+ match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
170
+ match ? match[1].upcase : test_name
171
+ end
172
+
173
+ def display_path(path, base_dir)
174
+ return nil if path.nil?
175
+
176
+ path.start_with?(base_dir) ? path.delete_prefix("#{base_dir}/") : path
177
+ end
178
+
179
+ def compute_retry_summary_status(entries)
180
+ executed = entries.reject { |entry| entry[:status] == "skip" }
181
+ return "skip" if executed.empty?
182
+ return "pass" if executed.all? { |entry| entry[:status] == "pass" }
183
+ return "partial" if executed.any? { |entry| entry[:status] == "pass" }
184
+
185
+ "fail"
186
+ end
187
+
188
+ def build_retry_summary_content(timestamp:, initial_results:, retry_results:, initial_entries:, flaky_entries:, remaining_entries:, final_status:, base_dir:)
189
+ total_initial_failures = initial_entries.count { |entry| entry[:status] != "pass" }
190
+ lines = []
191
+ lines << "---"
192
+ lines << "suite-id: #{timestamp}"
193
+ lines << "package: suite"
194
+ lines << "status: #{final_status}"
195
+ lines << "retry-attempted: true"
196
+ lines << "flaky-scenarios: #{flaky_entries.length}"
197
+ lines << "remaining-failures: #{remaining_entries.length}"
198
+ lines << "attempt-1-report: #{display_path(initial_results[:report_path], base_dir)}"
199
+ lines << "attempt-2-report: #{display_path(retry_results[:report_path], base_dir)}"
200
+ lines << "---"
201
+ lines << ""
202
+ lines << "# E2E Final Suite Report: `suite`"
203
+ lines << ""
204
+ lines << "## Attempt Summary"
205
+ lines << ""
206
+ lines << "| Attempt | Report | Status | Scenarios | Failures |"
207
+ lines << "|---|---|---:|---:|---:|"
208
+ lines << "| 1 | `#{display_path(initial_results[:report_path], base_dir)}` | #{initial_results[:failed].to_i > 0 || initial_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{initial_results[:total]} | #{initial_results[:failed].to_i + initial_results[:errors].to_i} |"
209
+ lines << "| 2 | `#{display_path(retry_results[:report_path], base_dir)}` | #{retry_results[:failed].to_i > 0 || retry_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{retry_results[:total]} | #{retry_results[:failed].to_i + retry_results[:errors].to_i} |"
210
+ lines << ""
211
+ lines << "First-pass failing scenarios: #{total_initial_failures}"
212
+ lines << "Recovered on retry (flaky): #{flaky_entries.length}"
213
+ lines << "Remaining failures after retry: #{remaining_entries.length}"
214
+ lines << ""
215
+ lines << "## Flaky Recoveries"
216
+ lines << ""
217
+ if flaky_entries.empty?
218
+ lines << "None."
219
+ else
220
+ flaky_entries.each do |entry|
221
+ lines << "### #{entry[:test_id]}"
222
+ lines << ""
223
+ lines << "- Title: #{entry[:title]}"
224
+ lines << "- Attempt 1 status: `#{entry[:status]}`"
225
+ lines << "- Attempt 1 report directory: `#{entry[:report_dir_display]}`"
226
+ lines << "- Attempt 2 report directory: `#{entry[:retry_entry][:report_dir_display]}`"
227
+ entry[:failed_entries].each do |failure|
228
+ lines << "- #{format_failure_entry(failure)}"
229
+ end
230
+ lines << ""
231
+ end
232
+ end
233
+ lines << "## Remaining Failures"
234
+ lines << ""
235
+ if remaining_entries.empty?
236
+ lines << "None."
237
+ else
238
+ remaining_entries.each do |entry|
239
+ lines << "### #{entry[:test_id]}"
240
+ lines << ""
241
+ lines << "- Title: #{entry[:title]}"
242
+ lines << "- Attempt 2 status: `#{entry[:status]}`"
243
+ lines << "- Attempt 2 report directory: `#{entry[:report_dir_display]}`"
244
+ entry[:failed_entries].each do |failure|
245
+ lines << "- #{format_failure_entry(failure)}"
246
+ end
247
+ lines << ""
248
+ end
249
+ end
250
+
251
+ lines.join("\n")
252
+ end
253
+
254
+ def format_failure_entry(failure)
255
+ tc = failure[:tc] || failure["tc"]
256
+ category = failure[:category] || failure["category"] || "runner-error"
257
+ evidence = failure[:evidence] || failure["evidence"] || "See attempt report for details"
258
+ tc ? "`#{tc}` (`#{category}`) - #{evidence}" : "`#{category}` - #{evidence}"
259
+ end
260
+
80
261
  # Attempt LLM synthesis for narrative sections only, falling back to
81
262
  # deterministic defaults when the model is unavailable or malformed.
82
263
  def synthesize_narrative_sections(results_data, package:, timestamp:, overall_status:, executed_at:)
@@ -31,7 +31,8 @@ module Ace
31
31
  # @param timestamp_generator Timestamp generator (injectable)
32
32
  def initialize(max_parallel: 4, base_dir: nil, discoverer: nil, affected_detector: nil,
33
33
  failure_finder: nil, output: $stdout, use_color: nil, progress: false,
34
- suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil)
34
+ suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil,
35
+ runtime_builder: nil, shared_runtime_cache_root: nil)
35
36
  @max_parallel = max_parallel
36
37
  @base_dir = base_dir || Dir.pwd
37
38
  @discoverer = discoverer || Molecules::TestDiscoverer.new
@@ -44,6 +45,14 @@ module Ace
44
45
  @suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
45
46
  @loader = scenario_loader || Molecules::ScenarioLoader.new
46
47
  @timestamp_generator = timestamp_generator || method(:default_timestamp)
48
+ @runtime_builder = runtime_builder || Molecules::SandboxRuntimeBuilder.new(
49
+ source_root: @base_dir,
50
+ ruby_version: config.dig("sandbox", "ruby_version") || Molecules::ConfigLoader.default_sandbox_ruby_version
51
+ )
52
+ @shared_runtime_cache_root = File.expand_path(
53
+ shared_runtime_cache_root || File.join(@base_dir, Molecules::SandboxRuntimeBuilder::DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
54
+ )
55
+ @shared_runtime_root = nil
47
56
  end
48
57
 
49
58
  # Run E2E tests across all packages
@@ -124,6 +133,7 @@ module Ace
124
133
 
125
134
  total_tests = package_tests.values.flatten.size
126
135
  pkg_count = package_tests.keys.size
136
+ prepare_shared_runtime_cache if total_tests > 0
127
137
 
128
138
  # Pre-compute column widths for aligned output
129
139
  compute_column_widths(package_tests)
@@ -330,7 +340,7 @@ module Ace
330
340
  cmd_array = build_test_command(package, test_file, options, run_id: run_id)
331
341
 
332
342
  # Spawn process with array form (no shell invocation)
333
- stdin, stdout, stderr, thread = Open3.popen3(*cmd_array, chdir: @base_dir)
343
+ stdin, stdout, stderr, thread = Open3.popen3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
334
344
 
335
345
  {pid: thread.pid, thread: thread, stdout: stdout, stderr: stderr,
336
346
  stdin: stdin, package: package, test_file: test_file, output: String.new}
@@ -395,6 +405,18 @@ module Ace
395
405
  File.executable?(local) ? local : "ace-test-e2e"
396
406
  end
397
407
 
408
+ def prepare_shared_runtime_cache
409
+ @shared_runtime_root = @runtime_builder.prepare_shared_runtime(
410
+ cache_root: @shared_runtime_cache_root
411
+ )
412
+ end
413
+
414
+ def suite_subprocess_env
415
+ return {} if @shared_runtime_root.to_s.empty?
416
+
417
+ {Molecules::SandboxRuntimeBuilder::SHARED_RUNTIME_ENV_KEY => @shared_runtime_root}
418
+ end
419
+
398
420
  # Extract test ID from file path
399
421
  #
400
422
  # @param test_file [String] Path to scenario.yml file
@@ -864,7 +886,7 @@ module Ace
864
886
  # @return [Hash] Test result
865
887
  def run_single_test(package, test_file, options, run_id: nil)
866
888
  cmd_array = build_test_command(package, test_file, options, run_id: run_id)
867
- output, stderr, status = Open3.capture3(*cmd_array, chdir: @base_dir)
889
+ output, stderr, status = Open3.capture3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
868
890
 
869
891
  # Combine stdout and stderr for parsing
870
892
  combined_output = output + stderr