ace-test-runner-e2e 0.38.11 → 0.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/handbook/guides/e2e-testing.g.md +35 -3
- data/handbook/guides/scenario-yml-reference.g.md +8 -3
- data/handbook/guides/tc-authoring.g.md +15 -4
- data/handbook/templates/tc-file.template.md +4 -2
- data/handbook/workflow-instructions/e2e/create.wf.md +13 -3
- data/handbook/workflow-instructions/e2e/fix.wf.md +19 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +16 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +14 -10
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +10 -3
- data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +37 -1
- data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +90 -14
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +43 -5
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +7 -5
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +2 -0
- data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +101 -9
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +28 -30
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +24 -1
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +182 -1
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +25 -3
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +44 -5
- data/lib/ace/test/end_to_end_runner/version.rb +1 -1
- data/lib/ace/test/end_to_end_runner.rb +2 -0
- metadata +4 -2
|
@@ -77,7 +77,7 @@ module Ace
|
|
|
77
77
|
# non-mutating.
|
|
78
78
|
#
|
|
79
79
|
# @param sandbox_path [String]
|
|
80
|
-
# @return [
|
|
80
|
+
# @return [Array<String>]
|
|
81
81
|
def sync_protocol_sources_into(sandbox_path)
|
|
82
82
|
sync_protocol_sources(File.expand_path(sandbox_path))
|
|
83
83
|
end
|
|
@@ -108,12 +108,13 @@ module Ace
|
|
|
108
108
|
end
|
|
109
109
|
|
|
110
110
|
def sync_protocol_sources(sandbox_path)
|
|
111
|
-
%w[skill wfi].
|
|
111
|
+
packages = %w[skill wfi].flat_map do |protocol|
|
|
112
112
|
Dir.glob(File.join(@config_root, "*", ".ace-defaults", "nav", "protocols",
|
|
113
|
-
"#{protocol}-sources", "*.yml")).sort.
|
|
113
|
+
"#{protocol}-sources", "*.yml")).sort.filter_map do |manifest_path|
|
|
114
114
|
sync_protocol_source_manifest(protocol, manifest_path, sandbox_path)
|
|
115
115
|
end
|
|
116
116
|
end
|
|
117
|
+
packages.compact.uniq.sort
|
|
117
118
|
end
|
|
118
119
|
|
|
119
120
|
def sync_protocol_source_manifest(protocol, manifest_path, sandbox_path)
|
|
@@ -137,11 +138,12 @@ module Ace
|
|
|
137
138
|
|
|
138
139
|
FileUtils.mkdir_p(File.dirname(target_manifest_path))
|
|
139
140
|
FileUtils.cp(manifest_path, target_manifest_path) unless File.exist?(target_manifest_path)
|
|
140
|
-
return unless File.directory?(source_dir)
|
|
141
|
+
return package_name unless File.directory?(source_dir)
|
|
141
142
|
return if File.exist?(target_dir)
|
|
142
143
|
|
|
143
144
|
FileUtils.mkdir_p(File.dirname(target_dir))
|
|
144
145
|
FileUtils.cp_r(source_dir, target_dir)
|
|
146
|
+
package_name
|
|
145
147
|
rescue Psych::SyntaxError
|
|
146
148
|
nil
|
|
147
149
|
end
|
|
@@ -214,7 +216,7 @@ module Ace
|
|
|
214
216
|
|
|
215
217
|
stdout, stderr, status = Open3.capture3(
|
|
216
218
|
env,
|
|
217
|
-
"bash", "--noprofile", "--norc", "-c", "ace-config
|
|
219
|
+
"bash", "--noprofile", "--norc", "-c", "ace-config sync ace-llm-providers-cli && ace-handbook sync",
|
|
218
220
|
chdir: sandbox_path
|
|
219
221
|
)
|
|
220
222
|
return if status.success?
|
|
@@ -217,6 +217,8 @@ module Ace
|
|
|
217
217
|
metadata["runner_observations"] = result.metadata["runner_observations"] if result.metadata.key?("runner_observations")
|
|
218
218
|
metadata["verifier_observations"] = result.observations unless result.observations.to_s.empty?
|
|
219
219
|
metadata["missing_required_artifacts"] = result.metadata["missing_required_artifacts"] if result.metadata.key?("missing_required_artifacts")
|
|
220
|
+
metadata["initial_missing_required_artifacts"] = result.metadata["initial_missing_required_artifacts"] if result.metadata.key?("initial_missing_required_artifacts")
|
|
221
|
+
metadata["artifact_repair_attempted"] = result.metadata["artifact_repair_attempted"] if result.metadata.key?("artifact_repair_attempted")
|
|
220
222
|
metadata["works_for_end_user"] = result.metadata["works_for_end_user"] if result.metadata.key?("works_for_end_user")
|
|
221
223
|
metadata["user_friction"] = result.metadata["user_friction"] if result.metadata.key?("user_friction")
|
|
222
224
|
metadata["user_feedback"] = result.metadata["user_feedback"] if result.metadata.key?("user_feedback")
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "fileutils"
|
|
4
4
|
require "open3"
|
|
5
|
+
require "digest"
|
|
5
6
|
|
|
6
7
|
module Ace
|
|
7
8
|
module Test
|
|
@@ -10,6 +11,9 @@ module Ace
|
|
|
10
11
|
# Builds a sandbox-local Ruby/Bundler runtime for E2E execution.
|
|
11
12
|
class SandboxRuntimeBuilder
|
|
12
13
|
DEFAULT_RUBY_VERSION = "3.4.9"
|
|
14
|
+
DEFAULT_SHARED_RUNTIME_CACHE_ROOT = ".ace-local/test-e2e/runtime-cache"
|
|
15
|
+
SHARED_RUNTIME_ENV_KEY = "ACE_E2E_SHARED_RUNTIME_ROOT"
|
|
16
|
+
RUNTIME_CACHE_LAYOUT_VERSION = 1
|
|
13
17
|
RESERVED_ENV_KEYS = %w[
|
|
14
18
|
PROJECT_ROOT_PATH
|
|
15
19
|
ACE_E2E_SOURCE_ROOT
|
|
@@ -37,14 +41,22 @@ module Ace
|
|
|
37
41
|
|
|
38
42
|
def prepare(sandbox_root:, env: {}, tool_names: nil)
|
|
39
43
|
sandbox_root = File.expand_path(sandbox_root)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
+
local_runtime_root = File.join(sandbox_root, ".ace-local", "e2e-runtime")
|
|
45
|
+
runtime_root = resolve_runtime_root(local_runtime_root, env)
|
|
46
|
+
FileUtils.mkdir_p(runtime_root) unless shared_runtime_root?(env)
|
|
47
|
+
|
|
48
|
+
runtime_env = build_runtime_env(
|
|
49
|
+
sandbox_root,
|
|
50
|
+
runtime_root,
|
|
51
|
+
env,
|
|
52
|
+
mutable_runtime_root: local_runtime_root
|
|
53
|
+
)
|
|
44
54
|
ensure_runtime_dirs(runtime_env)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
55
|
+
if shared_runtime_root?(env)
|
|
56
|
+
ensure_shared_runtime!(runtime_root, tool_names)
|
|
57
|
+
else
|
|
58
|
+
prepare_runtime_root!(runtime_root, runtime_env, tool_names)
|
|
59
|
+
end
|
|
48
60
|
|
|
49
61
|
{
|
|
50
62
|
runtime_root: runtime_root,
|
|
@@ -52,15 +64,23 @@ module Ace
|
|
|
52
64
|
}
|
|
53
65
|
end
|
|
54
66
|
|
|
67
|
+
def prepare_shared_runtime(cache_root: nil, tool_names: nil)
|
|
68
|
+
runtime_root = shared_runtime_root(cache_root: cache_root)
|
|
69
|
+
runtime_env = build_shared_runtime_env(runtime_root)
|
|
70
|
+
ensure_runtime_dirs(runtime_env)
|
|
71
|
+
prepare_runtime_root!(runtime_root, runtime_env, tool_names)
|
|
72
|
+
runtime_root
|
|
73
|
+
end
|
|
74
|
+
|
|
55
75
|
private
|
|
56
76
|
|
|
57
77
|
def capture3(env, *cmd, chdir:)
|
|
58
78
|
Open3.capture3(env, *cmd, chdir: chdir, unsetenv_others: true)
|
|
59
79
|
end
|
|
60
80
|
|
|
61
|
-
def build_runtime_env(sandbox_root, runtime_root, env)
|
|
81
|
+
def build_runtime_env(sandbox_root, runtime_root, env, mutable_runtime_root: runtime_root)
|
|
62
82
|
merged = stringify_keys(env).reject { |key, _value| RESERVED_ENV_KEYS.include?(key) }
|
|
63
|
-
bundler_root = File.join(
|
|
83
|
+
bundler_root = File.join(mutable_runtime_root, "bundler")
|
|
64
84
|
gem_root = File.join(runtime_root, "gems")
|
|
65
85
|
bin_root = File.join(runtime_root, "bin")
|
|
66
86
|
path = merged["PATH"].to_s
|
|
@@ -87,6 +107,10 @@ module Ace
|
|
|
87
107
|
)
|
|
88
108
|
end
|
|
89
109
|
|
|
110
|
+
def build_shared_runtime_env(runtime_root)
|
|
111
|
+
build_runtime_env(@source_root, runtime_root, {}, mutable_runtime_root: runtime_root)
|
|
112
|
+
end
|
|
113
|
+
|
|
90
114
|
def ensure_runtime_dirs(env)
|
|
91
115
|
[
|
|
92
116
|
env.fetch("ACE_CONFIG_PATH"),
|
|
@@ -169,6 +193,74 @@ module Ace
|
|
|
169
193
|
.uniq
|
|
170
194
|
end
|
|
171
195
|
|
|
196
|
+
def resolve_runtime_root(local_runtime_root, env)
|
|
197
|
+
shared_root = shared_runtime_root_from_env(env)
|
|
198
|
+
return shared_root if shared_root
|
|
199
|
+
|
|
200
|
+
local_runtime_root
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def shared_runtime_root?(env)
|
|
204
|
+
!shared_runtime_root_from_env(env).nil?
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def shared_runtime_root_from_env(env)
|
|
208
|
+
merged = stringify_keys(ENV.to_h).merge(stringify_keys(env))
|
|
209
|
+
raw = merged[SHARED_RUNTIME_ENV_KEY].to_s.strip
|
|
210
|
+
return nil if raw.empty?
|
|
211
|
+
|
|
212
|
+
File.expand_path(raw)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def shared_runtime_root(cache_root: nil)
|
|
216
|
+
base = if cache_root
|
|
217
|
+
File.expand_path(cache_root)
|
|
218
|
+
else
|
|
219
|
+
File.join(@source_root, DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
File.join(base, runtime_cache_key)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def runtime_cache_key
|
|
226
|
+
@runtime_cache_key ||= begin
|
|
227
|
+
digest = Digest::SHA256.new
|
|
228
|
+
digest.update("layout:#{RUNTIME_CACHE_LAYOUT_VERSION}\n")
|
|
229
|
+
digest.update("ruby:#{@ruby_version}\n")
|
|
230
|
+
digest.update(File.read(File.join(@source_root, "Gemfile")))
|
|
231
|
+
lockfile_path = File.join(@source_root, "Gemfile.lock")
|
|
232
|
+
digest.update(File.read(lockfile_path)) if File.file?(lockfile_path)
|
|
233
|
+
digest.hexdigest[0, 16]
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def ensure_shared_runtime!(runtime_root, tool_names)
|
|
238
|
+
runtime_env = build_shared_runtime_env(runtime_root)
|
|
239
|
+
ensure_runtime_dirs(runtime_env)
|
|
240
|
+
prepare_runtime_root!(runtime_root, runtime_env, tool_names)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def prepare_runtime_root!(runtime_root, env, tool_names)
|
|
244
|
+
with_runtime_lock(runtime_root) do
|
|
245
|
+
marker_path = File.join(runtime_root, ".bootstrapped")
|
|
246
|
+
return if File.exist?(marker_path)
|
|
247
|
+
|
|
248
|
+
FileUtils.mkdir_p(runtime_root)
|
|
249
|
+
write_runtime_gemfile(runtime_root)
|
|
250
|
+
write_command_shims(runtime_root, tool_names)
|
|
251
|
+
install_runtime!(runtime_root, env)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def with_runtime_lock(runtime_root)
|
|
256
|
+
lock_path = "#{runtime_root}.lock"
|
|
257
|
+
FileUtils.mkdir_p(File.dirname(lock_path))
|
|
258
|
+
File.open(lock_path, File::RDWR | File::CREAT, 0o644) do |lock_file|
|
|
259
|
+
lock_file.flock(File::LOCK_EX)
|
|
260
|
+
yield
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
172
264
|
def install_runtime!(runtime_root, env)
|
|
173
265
|
marker_path = File.join(runtime_root, ".bootstrapped")
|
|
174
266
|
return if File.exist?(marker_path)
|
|
@@ -256,27 +256,39 @@ module Ace
|
|
|
256
256
|
|
|
257
257
|
def declared_artifacts_for(scenario_dir, tc_id, runner_content, verify_content)
|
|
258
258
|
scenario_frontmatter = parse_scenario_yml(File.join(scenario_dir, "scenario.yml"))
|
|
259
|
-
|
|
260
|
-
optional_artifacts = []
|
|
261
|
-
|
|
262
|
-
required_artifacts.concat(
|
|
259
|
+
scenario_references = Atoms::ArtifactContractValidator.references_from_paths(
|
|
263
260
|
Array((scenario_frontmatter["sandbox-layout"] || {}).keys).select do |path|
|
|
264
261
|
declared_artifact_matches_tc?(path, tc_id)
|
|
265
|
-
end
|
|
262
|
+
end,
|
|
263
|
+
source: File.join(scenario_dir, "scenario.yml")
|
|
264
|
+
)
|
|
265
|
+
runner_references = Atoms::ArtifactContractValidator.extract(
|
|
266
|
+
runner_content,
|
|
267
|
+
source: File.join(scenario_dir, "#{tc_id}.runner.md")
|
|
268
|
+
)
|
|
269
|
+
verifier_references = Atoms::ArtifactContractValidator.extract(
|
|
270
|
+
verify_content,
|
|
271
|
+
source: File.join(scenario_dir, "#{tc_id}.verify.md")
|
|
266
272
|
)
|
|
267
273
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
end
|
|
276
|
-
end
|
|
274
|
+
Atoms::ArtifactContractValidator.validate!(
|
|
275
|
+
tc_id: tc_id,
|
|
276
|
+
scenario_dir: scenario_dir,
|
|
277
|
+
runner_references: runner_references,
|
|
278
|
+
verifier_references: verifier_references,
|
|
279
|
+
scenario_references: scenario_references
|
|
280
|
+
)
|
|
277
281
|
|
|
278
|
-
required =
|
|
279
|
-
|
|
282
|
+
required = (scenario_references + runner_references)
|
|
283
|
+
.reject(&:optional)
|
|
284
|
+
.map(&:path)
|
|
285
|
+
.compact
|
|
286
|
+
.uniq
|
|
287
|
+
optional = runner_references
|
|
288
|
+
.select(&:optional)
|
|
289
|
+
.map(&:path)
|
|
290
|
+
.compact
|
|
291
|
+
.uniq
|
|
280
292
|
optional = optional.reject { |path| required.include?(path) }
|
|
281
293
|
|
|
282
294
|
[required.sort, optional.sort]
|
|
@@ -300,20 +312,6 @@ module Ace
|
|
|
300
312
|
match ? match[1].to_i : nil
|
|
301
313
|
end
|
|
302
314
|
|
|
303
|
-
def extract_declared_artifacts(markdown)
|
|
304
|
-
markdown.to_s.scan(%r{(?:`|"|')?(results/tc/\d{2}/[^\s`)"']+|results/tc/\d{2}/)(?:`|"|')?(\s*\(optional\))?}i).map do |match|
|
|
305
|
-
path, optional = match
|
|
306
|
-
{path: path, optional: !optional.to_s.empty?}
|
|
307
|
-
end
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
def normalize_declared_artifact(path)
|
|
311
|
-
value = path.to_s.strip
|
|
312
|
-
return nil unless value.start_with?("results/tc/")
|
|
313
|
-
|
|
314
|
-
value.sub(%r{/+\z}, "")
|
|
315
|
-
end
|
|
316
|
-
|
|
317
315
|
# Infer package name from scenario directory path
|
|
318
316
|
#
|
|
319
317
|
# @param scenario_dir [String] Path to scenario directory
|
|
@@ -39,13 +39,15 @@ module Ace
|
|
|
39
39
|
# @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
|
|
40
40
|
# @param run_id [String, nil] Unique run ID for deterministic tmux session naming
|
|
41
41
|
# @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
|
|
42
|
-
def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {}
|
|
42
|
+
def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {},
|
|
43
|
+
git_excludes: [])
|
|
43
44
|
FileUtils.mkdir_p(sandbox_dir)
|
|
44
45
|
env = if @sandbox_backend
|
|
45
46
|
@sandbox_backend.prepared_env(initial_env.dup)
|
|
46
47
|
else
|
|
47
48
|
initial_env.dup
|
|
48
49
|
end
|
|
50
|
+
@git_excludes = normalize_git_excludes(git_excludes)
|
|
49
51
|
steps_completed = 0
|
|
50
52
|
@tmux_session = nil
|
|
51
53
|
@scenario_name = scenario_name
|
|
@@ -162,6 +164,7 @@ module Ace
|
|
|
162
164
|
run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
|
|
163
165
|
run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
|
|
164
166
|
run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
|
|
167
|
+
seed_git_excludes(sandbox_dir)
|
|
165
168
|
end
|
|
166
169
|
|
|
167
170
|
# Copy fixture files into sandbox
|
|
@@ -245,6 +248,26 @@ module Ace
|
|
|
245
248
|
Open3.capture3(*args, **kwargs)
|
|
246
249
|
end
|
|
247
250
|
|
|
251
|
+
def seed_git_excludes(sandbox_dir)
|
|
252
|
+
patterns = (default_git_excludes + @git_excludes).uniq
|
|
253
|
+
return if patterns.empty?
|
|
254
|
+
|
|
255
|
+
exclude_path = File.join(sandbox_dir, ".git", "info", "exclude")
|
|
256
|
+
existing = File.exist?(exclude_path) ? File.readlines(exclude_path, chomp: true) : []
|
|
257
|
+
additions = patterns.reject { |pattern| existing.include?(pattern) }
|
|
258
|
+
return if additions.empty?
|
|
259
|
+
|
|
260
|
+
File.write(exclude_path, (existing + additions).join("\n") + "\n")
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def normalize_git_excludes(git_excludes)
|
|
264
|
+
Array(git_excludes).map(&:to_s).map(&:strip).reject(&:empty?).uniq
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def default_git_excludes
|
|
268
|
+
[".ace-local/", "reports/", "results/"]
|
|
269
|
+
end
|
|
270
|
+
|
|
248
271
|
def sanitized_process_environment
|
|
249
272
|
ENV.to_h.each_with_object({}) do |(key, value), env|
|
|
250
273
|
if AMBIENT_TMUX_ENV_VARS.include?(key) || STRIPPED_ENV_KEYS.include?(key) ||
|
|
@@ -27,7 +27,8 @@ module Ace
|
|
|
27
27
|
|
|
28
28
|
REPORT_KINDS = {
|
|
29
29
|
package: ->(timestamp, package) { "#{timestamp}-#{package}-report.md" },
|
|
30
|
-
suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" }
|
|
30
|
+
suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" },
|
|
31
|
+
suite_final: ->(timestamp, _package) { "#{timestamp}-suite-final-report.md" }
|
|
31
32
|
}.freeze
|
|
32
33
|
|
|
33
34
|
# Write an aggregated report
|
|
@@ -68,6 +69,44 @@ module Ace
|
|
|
68
69
|
report_path
|
|
69
70
|
end
|
|
70
71
|
|
|
72
|
+
# Write a deterministic wrapper report for a two-attempt suite run.
|
|
73
|
+
#
|
|
74
|
+
# Preserves first-pass failure evidence while reflecting the final retry outcome.
|
|
75
|
+
def write_retry_summary(initial_results:, retry_results:, timestamp:, base_dir:, package: "suite")
|
|
76
|
+
cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
|
|
77
|
+
FileUtils.mkdir_p(cache_dir)
|
|
78
|
+
|
|
79
|
+
report_path = File.join(cache_dir, report_filename(:suite_final, timestamp, package))
|
|
80
|
+
initial_entries = flatten_attempt_results(initial_results, base_dir: base_dir)
|
|
81
|
+
retry_entries = flatten_attempt_results(retry_results, base_dir: base_dir)
|
|
82
|
+
retry_by_test = retry_entries.each_with_object({}) { |entry, memo| memo[entry[:test_id]] = entry }
|
|
83
|
+
|
|
84
|
+
flaky_entries = initial_entries.filter_map do |entry|
|
|
85
|
+
next if entry[:status] == "pass"
|
|
86
|
+
|
|
87
|
+
retry_entry = retry_by_test[entry[:test_id]]
|
|
88
|
+
next unless retry_entry && retry_entry[:status] == "pass"
|
|
89
|
+
|
|
90
|
+
entry.merge(retry_entry: retry_entry)
|
|
91
|
+
end.sort_by { |entry| entry[:test_id] }
|
|
92
|
+
remaining_entries = retry_entries.reject { |entry| entry[:status] == "pass" }.sort_by { |entry| entry[:test_id] }
|
|
93
|
+
final_status = compute_retry_summary_status(retry_entries)
|
|
94
|
+
|
|
95
|
+
content = build_retry_summary_content(
|
|
96
|
+
timestamp: timestamp,
|
|
97
|
+
initial_results: initial_results,
|
|
98
|
+
retry_results: retry_results,
|
|
99
|
+
initial_entries: initial_entries,
|
|
100
|
+
flaky_entries: flaky_entries,
|
|
101
|
+
remaining_entries: remaining_entries,
|
|
102
|
+
final_status: final_status,
|
|
103
|
+
base_dir: base_dir
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
File.write(report_path, content)
|
|
107
|
+
report_path
|
|
108
|
+
end
|
|
109
|
+
|
|
71
110
|
private
|
|
72
111
|
|
|
73
112
|
def report_filename(report_kind, timestamp, package)
|
|
@@ -77,6 +116,148 @@ module Ace
|
|
|
77
116
|
builder.call(timestamp, package)
|
|
78
117
|
end
|
|
79
118
|
|
|
119
|
+
def flatten_attempt_results(results, base_dir:)
|
|
120
|
+
results.fetch(:packages, {}).values.flatten.map do |result|
|
|
121
|
+
report_dir = result[:report_dir]
|
|
122
|
+
metadata = read_retry_metadata(report_dir)
|
|
123
|
+
report_frontmatter = read_report_frontmatter(report_dir)
|
|
124
|
+
test_name = result[:test_name] || result[:test_id] || ""
|
|
125
|
+
test_id = metadata["test-id"] || canonical_retry_test_id(test_name)
|
|
126
|
+
failed_entries = Array(metadata["failed"]).filter_map do |entry|
|
|
127
|
+
next unless entry.is_a?(Hash)
|
|
128
|
+
|
|
129
|
+
{
|
|
130
|
+
tc: entry["tc"] || entry[:tc],
|
|
131
|
+
category: entry["category"] || entry[:category] || "runner-error",
|
|
132
|
+
evidence: entry["evidence"] || entry[:evidence] || "See attempt report for details"
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
if failed_entries.empty? && result[:status] != "pass"
|
|
136
|
+
failed_entries << {
|
|
137
|
+
tc: nil,
|
|
138
|
+
category: result[:status] || "runner-error",
|
|
139
|
+
evidence: result[:summary] || result[:error] || "See attempt report for details"
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
test_id: test_id,
|
|
145
|
+
title: report_frontmatter["title"] || test_id,
|
|
146
|
+
status: result[:status],
|
|
147
|
+
report_dir: report_dir,
|
|
148
|
+
report_dir_display: display_path(report_dir, base_dir),
|
|
149
|
+
report_dir_name: report_dir ? File.basename(report_dir) : nil,
|
|
150
|
+
failed_entries: failed_entries,
|
|
151
|
+
passed_cases: result[:passed_cases] || metadata["tcs-passed"] || metadata.dig("results", "passed") || 0,
|
|
152
|
+
total_cases: result[:total_cases] || metadata["tcs-total"] || metadata.dig("results", "total") || 0
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def read_retry_metadata(report_dir)
|
|
158
|
+
return {} unless report_dir
|
|
159
|
+
|
|
160
|
+
path = File.join(report_dir, "metadata.yml")
|
|
161
|
+
return {} unless File.exist?(path)
|
|
162
|
+
|
|
163
|
+
YAML.safe_load_file(path, permitted_classes: [Time, Date]) || {}
|
|
164
|
+
rescue
|
|
165
|
+
{}
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def canonical_retry_test_id(test_name)
|
|
169
|
+
match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
|
|
170
|
+
match ? match[1].upcase : test_name
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def display_path(path, base_dir)
|
|
174
|
+
return nil if path.nil?
|
|
175
|
+
|
|
176
|
+
path.start_with?(base_dir) ? path.delete_prefix("#{base_dir}/") : path
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def compute_retry_summary_status(entries)
|
|
180
|
+
executed = entries.reject { |entry| entry[:status] == "skip" }
|
|
181
|
+
return "skip" if executed.empty?
|
|
182
|
+
return "pass" if executed.all? { |entry| entry[:status] == "pass" }
|
|
183
|
+
return "partial" if executed.any? { |entry| entry[:status] == "pass" }
|
|
184
|
+
|
|
185
|
+
"fail"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def build_retry_summary_content(timestamp:, initial_results:, retry_results:, initial_entries:, flaky_entries:, remaining_entries:, final_status:, base_dir:)
|
|
189
|
+
total_initial_failures = initial_entries.count { |entry| entry[:status] != "pass" }
|
|
190
|
+
lines = []
|
|
191
|
+
lines << "---"
|
|
192
|
+
lines << "suite-id: #{timestamp}"
|
|
193
|
+
lines << "package: suite"
|
|
194
|
+
lines << "status: #{final_status}"
|
|
195
|
+
lines << "retry-attempted: true"
|
|
196
|
+
lines << "flaky-scenarios: #{flaky_entries.length}"
|
|
197
|
+
lines << "remaining-failures: #{remaining_entries.length}"
|
|
198
|
+
lines << "attempt-1-report: #{display_path(initial_results[:report_path], base_dir)}"
|
|
199
|
+
lines << "attempt-2-report: #{display_path(retry_results[:report_path], base_dir)}"
|
|
200
|
+
lines << "---"
|
|
201
|
+
lines << ""
|
|
202
|
+
lines << "# E2E Final Suite Report: `suite`"
|
|
203
|
+
lines << ""
|
|
204
|
+
lines << "## Attempt Summary"
|
|
205
|
+
lines << ""
|
|
206
|
+
lines << "| Attempt | Report | Status | Scenarios | Failures |"
|
|
207
|
+
lines << "|---|---|---:|---:|---:|"
|
|
208
|
+
lines << "| 1 | `#{display_path(initial_results[:report_path], base_dir)}` | #{initial_results[:failed].to_i > 0 || initial_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{initial_results[:total]} | #{initial_results[:failed].to_i + initial_results[:errors].to_i} |"
|
|
209
|
+
lines << "| 2 | `#{display_path(retry_results[:report_path], base_dir)}` | #{retry_results[:failed].to_i > 0 || retry_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{retry_results[:total]} | #{retry_results[:failed].to_i + retry_results[:errors].to_i} |"
|
|
210
|
+
lines << ""
|
|
211
|
+
lines << "First-pass failing scenarios: #{total_initial_failures}"
|
|
212
|
+
lines << "Recovered on retry (flaky): #{flaky_entries.length}"
|
|
213
|
+
lines << "Remaining failures after retry: #{remaining_entries.length}"
|
|
214
|
+
lines << ""
|
|
215
|
+
lines << "## Flaky Recoveries"
|
|
216
|
+
lines << ""
|
|
217
|
+
if flaky_entries.empty?
|
|
218
|
+
lines << "None."
|
|
219
|
+
else
|
|
220
|
+
flaky_entries.each do |entry|
|
|
221
|
+
lines << "### #{entry[:test_id]}"
|
|
222
|
+
lines << ""
|
|
223
|
+
lines << "- Title: #{entry[:title]}"
|
|
224
|
+
lines << "- Attempt 1 status: `#{entry[:status]}`"
|
|
225
|
+
lines << "- Attempt 1 report directory: `#{entry[:report_dir_display]}`"
|
|
226
|
+
lines << "- Attempt 2 report directory: `#{entry[:retry_entry][:report_dir_display]}`"
|
|
227
|
+
entry[:failed_entries].each do |failure|
|
|
228
|
+
lines << "- #{format_failure_entry(failure)}"
|
|
229
|
+
end
|
|
230
|
+
lines << ""
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
lines << "## Remaining Failures"
|
|
234
|
+
lines << ""
|
|
235
|
+
if remaining_entries.empty?
|
|
236
|
+
lines << "None."
|
|
237
|
+
else
|
|
238
|
+
remaining_entries.each do |entry|
|
|
239
|
+
lines << "### #{entry[:test_id]}"
|
|
240
|
+
lines << ""
|
|
241
|
+
lines << "- Title: #{entry[:title]}"
|
|
242
|
+
lines << "- Attempt 2 status: `#{entry[:status]}`"
|
|
243
|
+
lines << "- Attempt 2 report directory: `#{entry[:report_dir_display]}`"
|
|
244
|
+
entry[:failed_entries].each do |failure|
|
|
245
|
+
lines << "- #{format_failure_entry(failure)}"
|
|
246
|
+
end
|
|
247
|
+
lines << ""
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
lines.join("\n")
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def format_failure_entry(failure)
|
|
255
|
+
tc = failure[:tc] || failure["tc"]
|
|
256
|
+
category = failure[:category] || failure["category"] || "runner-error"
|
|
257
|
+
evidence = failure[:evidence] || failure["evidence"] || "See attempt report for details"
|
|
258
|
+
tc ? "`#{tc}` (`#{category}`) - #{evidence}" : "`#{category}` - #{evidence}"
|
|
259
|
+
end
|
|
260
|
+
|
|
80
261
|
# Attempt LLM synthesis for narrative sections only, falling back to
|
|
81
262
|
# deterministic defaults when the model is unavailable or malformed.
|
|
82
263
|
def synthesize_narrative_sections(results_data, package:, timestamp:, overall_status:, executed_at:)
|
|
@@ -31,7 +31,8 @@ module Ace
|
|
|
31
31
|
# @param timestamp_generator Timestamp generator (injectable)
|
|
32
32
|
def initialize(max_parallel: 4, base_dir: nil, discoverer: nil, affected_detector: nil,
|
|
33
33
|
failure_finder: nil, output: $stdout, use_color: nil, progress: false,
|
|
34
|
-
suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil
|
|
34
|
+
suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil,
|
|
35
|
+
runtime_builder: nil, shared_runtime_cache_root: nil)
|
|
35
36
|
@max_parallel = max_parallel
|
|
36
37
|
@base_dir = base_dir || Dir.pwd
|
|
37
38
|
@discoverer = discoverer || Molecules::TestDiscoverer.new
|
|
@@ -44,6 +45,14 @@ module Ace
|
|
|
44
45
|
@suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
|
|
45
46
|
@loader = scenario_loader || Molecules::ScenarioLoader.new
|
|
46
47
|
@timestamp_generator = timestamp_generator || method(:default_timestamp)
|
|
48
|
+
@runtime_builder = runtime_builder || Molecules::SandboxRuntimeBuilder.new(
|
|
49
|
+
source_root: @base_dir,
|
|
50
|
+
ruby_version: config.dig("sandbox", "ruby_version") || Molecules::ConfigLoader.default_sandbox_ruby_version
|
|
51
|
+
)
|
|
52
|
+
@shared_runtime_cache_root = File.expand_path(
|
|
53
|
+
shared_runtime_cache_root || File.join(@base_dir, Molecules::SandboxRuntimeBuilder::DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
|
|
54
|
+
)
|
|
55
|
+
@shared_runtime_root = nil
|
|
47
56
|
end
|
|
48
57
|
|
|
49
58
|
# Run E2E tests across all packages
|
|
@@ -124,6 +133,7 @@ module Ace
|
|
|
124
133
|
|
|
125
134
|
total_tests = package_tests.values.flatten.size
|
|
126
135
|
pkg_count = package_tests.keys.size
|
|
136
|
+
prepare_shared_runtime_cache if total_tests > 0
|
|
127
137
|
|
|
128
138
|
# Pre-compute column widths for aligned output
|
|
129
139
|
compute_column_widths(package_tests)
|
|
@@ -330,7 +340,7 @@ module Ace
|
|
|
330
340
|
cmd_array = build_test_command(package, test_file, options, run_id: run_id)
|
|
331
341
|
|
|
332
342
|
# Spawn process with array form (no shell invocation)
|
|
333
|
-
stdin, stdout, stderr, thread = Open3.popen3(*cmd_array, chdir: @base_dir)
|
|
343
|
+
stdin, stdout, stderr, thread = Open3.popen3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
|
|
334
344
|
|
|
335
345
|
{pid: thread.pid, thread: thread, stdout: stdout, stderr: stderr,
|
|
336
346
|
stdin: stdin, package: package, test_file: test_file, output: String.new}
|
|
@@ -395,6 +405,18 @@ module Ace
|
|
|
395
405
|
File.executable?(local) ? local : "ace-test-e2e"
|
|
396
406
|
end
|
|
397
407
|
|
|
408
|
+
def prepare_shared_runtime_cache
|
|
409
|
+
@shared_runtime_root = @runtime_builder.prepare_shared_runtime(
|
|
410
|
+
cache_root: @shared_runtime_cache_root
|
|
411
|
+
)
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def suite_subprocess_env
|
|
415
|
+
return {} if @shared_runtime_root.to_s.empty?
|
|
416
|
+
|
|
417
|
+
{Molecules::SandboxRuntimeBuilder::SHARED_RUNTIME_ENV_KEY => @shared_runtime_root}
|
|
418
|
+
end
|
|
419
|
+
|
|
398
420
|
# Extract test ID from file path
|
|
399
421
|
#
|
|
400
422
|
# @param test_file [String] Path to scenario.yml file
|
|
@@ -864,7 +886,7 @@ module Ace
|
|
|
864
886
|
# @return [Hash] Test result
|
|
865
887
|
def run_single_test(package, test_file, options, run_id: nil)
|
|
866
888
|
cmd_array = build_test_command(package, test_file, options, run_id: run_id)
|
|
867
|
-
output, stderr, status = Open3.capture3(*cmd_array, chdir: @base_dir)
|
|
889
|
+
output, stderr, status = Open3.capture3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
|
|
868
890
|
|
|
869
891
|
# Combine stdout and stderr for parsing
|
|
870
892
|
combined_output = output + stderr
|