ace-test-runner-e2e 0.29.8 → 0.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ace-defaults/e2e-runner/config.yml +14 -2
- data/CHANGELOG.md +233 -0
- data/README.md +2 -2
- data/exe/ace-test-e2e-sh +9 -4
- data/handbook/guides/e2e-testing.g.md +75 -9
- data/handbook/guides/scenario-yml-reference.g.md +21 -8
- data/handbook/guides/tc-authoring.g.md +23 -5
- data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
- data/handbook/skills/as-e2e-review/SKILL.md +2 -2
- data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
- data/handbook/templates/agent-experience-report.template.md +3 -2
- data/handbook/templates/scenario.yml.template.yml +7 -2
- data/handbook/templates/tc-file.template.md +16 -4
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
- data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
- data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
- data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
- data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
- data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
- data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
- data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
- data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
- data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
- data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
- data/lib/ace/test/end_to_end_runner/version.rb +1 -1
- data/lib/ace/test/end_to_end_runner.rb +4 -0
- metadata +21 -2
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "fileutils"
|
|
4
|
+
require "ostruct"
|
|
4
5
|
require "yaml"
|
|
6
|
+
require "set"
|
|
7
|
+
require "date"
|
|
5
8
|
require "ace/llm"
|
|
6
9
|
require "ace/llm/query_interface"
|
|
7
10
|
|
|
@@ -9,7 +12,7 @@ module Ace
|
|
|
9
12
|
module Test
|
|
10
13
|
module EndToEndRunner
|
|
11
14
|
module Molecules
|
|
12
|
-
# Writes
|
|
15
|
+
# Writes an aggregated package or suite report
|
|
13
16
|
#
|
|
14
17
|
# Uses LLM synthesis to generate rich reports with root cause analysis,
|
|
15
18
|
# friction insights, and improvement suggestions. Falls back to a static
|
|
@@ -22,7 +25,13 @@ module Ace
|
|
|
22
25
|
@timeout = reporting["timeout"] || 60
|
|
23
26
|
end
|
|
24
27
|
|
|
25
|
-
|
|
28
|
+
REPORT_KINDS = {
|
|
29
|
+
package: ->(timestamp, package) { "#{timestamp}-#{package}-report.md" },
|
|
30
|
+
suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" },
|
|
31
|
+
suite_final: ->(timestamp, _package) { "#{timestamp}-suite-final-report.md" }
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
# Write an aggregated report
|
|
26
35
|
#
|
|
27
36
|
# @param results [Array<Models::TestResult>] Test results (ordered)
|
|
28
37
|
# @param scenarios [Array<Models::TestScenario>] Corresponding scenarios
|
|
@@ -30,22 +39,69 @@ module Ace
|
|
|
30
39
|
# @param timestamp [String] Timestamp ID for this run
|
|
31
40
|
# @param base_dir [String] Base directory for cache output
|
|
32
41
|
# @return [String] Path to the written report file
|
|
33
|
-
def write(results, scenarios, package:, timestamp:, base_dir:)
|
|
42
|
+
def write(results, scenarios, package:, timestamp:, base_dir:, report_kind: :package, diagnostics: nil)
|
|
34
43
|
cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
|
|
35
44
|
FileUtils.mkdir_p(cache_dir)
|
|
36
45
|
|
|
37
|
-
report_path = File.join(cache_dir,
|
|
46
|
+
report_path = File.join(cache_dir, report_filename(report_kind, timestamp, package))
|
|
38
47
|
|
|
39
48
|
overall_status = compute_status(results)
|
|
40
49
|
executed_at = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
results_data = build_results_data(results, scenarios)
|
|
51
|
+
narrative_sections = synthesize_narrative_sections(
|
|
52
|
+
results_data,
|
|
44
53
|
package: package,
|
|
45
54
|
timestamp: timestamp,
|
|
46
55
|
overall_status: overall_status,
|
|
47
56
|
executed_at: executed_at
|
|
48
57
|
)
|
|
58
|
+
content = build_report(
|
|
59
|
+
results_data,
|
|
60
|
+
package: package,
|
|
61
|
+
timestamp: timestamp,
|
|
62
|
+
overall_status: overall_status,
|
|
63
|
+
executed_at: executed_at,
|
|
64
|
+
narrative_sections: narrative_sections,
|
|
65
|
+
diagnostics: diagnostics
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
File.write(report_path, content)
|
|
69
|
+
report_path
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Write a deterministic wrapper report for a two-attempt suite run.
|
|
73
|
+
#
|
|
74
|
+
# Preserves first-pass failure evidence while reflecting the final retry outcome.
|
|
75
|
+
def write_retry_summary(initial_results:, retry_results:, timestamp:, base_dir:, package: "suite")
|
|
76
|
+
cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
|
|
77
|
+
FileUtils.mkdir_p(cache_dir)
|
|
78
|
+
|
|
79
|
+
report_path = File.join(cache_dir, report_filename(:suite_final, timestamp, package))
|
|
80
|
+
initial_entries = flatten_attempt_results(initial_results, base_dir: base_dir)
|
|
81
|
+
retry_entries = flatten_attempt_results(retry_results, base_dir: base_dir)
|
|
82
|
+
retry_by_test = retry_entries.each_with_object({}) { |entry, memo| memo[entry[:test_id]] = entry }
|
|
83
|
+
|
|
84
|
+
flaky_entries = initial_entries.filter_map do |entry|
|
|
85
|
+
next if entry[:status] == "pass"
|
|
86
|
+
|
|
87
|
+
retry_entry = retry_by_test[entry[:test_id]]
|
|
88
|
+
next unless retry_entry && retry_entry[:status] == "pass"
|
|
89
|
+
|
|
90
|
+
entry.merge(retry_entry: retry_entry)
|
|
91
|
+
end.sort_by { |entry| entry[:test_id] }
|
|
92
|
+
remaining_entries = retry_entries.reject { |entry| entry[:status] == "pass" }.sort_by { |entry| entry[:test_id] }
|
|
93
|
+
final_status = compute_retry_summary_status(retry_entries)
|
|
94
|
+
|
|
95
|
+
content = build_retry_summary_content(
|
|
96
|
+
timestamp: timestamp,
|
|
97
|
+
initial_results: initial_results,
|
|
98
|
+
retry_results: retry_results,
|
|
99
|
+
initial_entries: initial_entries,
|
|
100
|
+
flaky_entries: flaky_entries,
|
|
101
|
+
remaining_entries: remaining_entries,
|
|
102
|
+
final_status: final_status,
|
|
103
|
+
base_dir: base_dir
|
|
104
|
+
)
|
|
49
105
|
|
|
50
106
|
File.write(report_path, content)
|
|
51
107
|
report_path
|
|
@@ -53,10 +109,158 @@ module Ace
|
|
|
53
109
|
|
|
54
110
|
private
|
|
55
111
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
112
|
+
def report_filename(report_kind, timestamp, package)
|
|
113
|
+
builder = REPORT_KINDS[report_kind.to_sym]
|
|
114
|
+
raise ArgumentError, "Unknown report kind: #{report_kind}" unless builder
|
|
115
|
+
|
|
116
|
+
builder.call(timestamp, package)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def flatten_attempt_results(results, base_dir:)
|
|
120
|
+
results.fetch(:packages, {}).values.flatten.map do |result|
|
|
121
|
+
report_dir = result[:report_dir]
|
|
122
|
+
metadata = read_retry_metadata(report_dir)
|
|
123
|
+
report_frontmatter = read_report_frontmatter(report_dir)
|
|
124
|
+
test_name = result[:test_name] || result[:test_id] || ""
|
|
125
|
+
test_id = metadata["test-id"] || canonical_retry_test_id(test_name)
|
|
126
|
+
failed_entries = Array(metadata["failed"]).filter_map do |entry|
|
|
127
|
+
next unless entry.is_a?(Hash)
|
|
128
|
+
|
|
129
|
+
{
|
|
130
|
+
tc: entry["tc"] || entry[:tc],
|
|
131
|
+
category: entry["category"] || entry[:category] || "runner-error",
|
|
132
|
+
evidence: entry["evidence"] || entry[:evidence] || "See attempt report for details"
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
if failed_entries.empty? && result[:status] != "pass"
|
|
136
|
+
failed_entries << {
|
|
137
|
+
tc: nil,
|
|
138
|
+
category: result[:status] || "runner-error",
|
|
139
|
+
evidence: result[:summary] || result[:error] || "See attempt report for details"
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
{
|
|
144
|
+
test_id: test_id,
|
|
145
|
+
title: report_frontmatter["title"] || test_id,
|
|
146
|
+
status: result[:status],
|
|
147
|
+
report_dir: report_dir,
|
|
148
|
+
report_dir_display: display_path(report_dir, base_dir),
|
|
149
|
+
report_dir_name: report_dir ? File.basename(report_dir) : nil,
|
|
150
|
+
failed_entries: failed_entries,
|
|
151
|
+
passed_cases: result[:passed_cases] || metadata["tcs-passed"] || metadata.dig("results", "passed") || 0,
|
|
152
|
+
total_cases: result[:total_cases] || metadata["tcs-total"] || metadata.dig("results", "total") || 0
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def read_retry_metadata(report_dir)
|
|
158
|
+
return {} unless report_dir
|
|
159
|
+
|
|
160
|
+
path = File.join(report_dir, "metadata.yml")
|
|
161
|
+
return {} unless File.exist?(path)
|
|
162
|
+
|
|
163
|
+
YAML.safe_load_file(path, permitted_classes: [Time, Date]) || {}
|
|
164
|
+
rescue
|
|
165
|
+
{}
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def canonical_retry_test_id(test_name)
|
|
169
|
+
match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
|
|
170
|
+
match ? match[1].upcase : test_name
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def display_path(path, base_dir)
|
|
174
|
+
return nil if path.nil?
|
|
175
|
+
|
|
176
|
+
path.start_with?(base_dir) ? path.delete_prefix("#{base_dir}/") : path
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def compute_retry_summary_status(entries)
|
|
180
|
+
executed = entries.reject { |entry| entry[:status] == "skip" }
|
|
181
|
+
return "skip" if executed.empty?
|
|
182
|
+
return "pass" if executed.all? { |entry| entry[:status] == "pass" }
|
|
183
|
+
return "partial" if executed.any? { |entry| entry[:status] == "pass" }
|
|
184
|
+
|
|
185
|
+
"fail"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def build_retry_summary_content(timestamp:, initial_results:, retry_results:, initial_entries:, flaky_entries:, remaining_entries:, final_status:, base_dir:)
|
|
189
|
+
total_initial_failures = initial_entries.count { |entry| entry[:status] != "pass" }
|
|
190
|
+
lines = []
|
|
191
|
+
lines << "---"
|
|
192
|
+
lines << "suite-id: #{timestamp}"
|
|
193
|
+
lines << "package: suite"
|
|
194
|
+
lines << "status: #{final_status}"
|
|
195
|
+
lines << "retry-attempted: true"
|
|
196
|
+
lines << "flaky-scenarios: #{flaky_entries.length}"
|
|
197
|
+
lines << "remaining-failures: #{remaining_entries.length}"
|
|
198
|
+
lines << "attempt-1-report: #{display_path(initial_results[:report_path], base_dir)}"
|
|
199
|
+
lines << "attempt-2-report: #{display_path(retry_results[:report_path], base_dir)}"
|
|
200
|
+
lines << "---"
|
|
201
|
+
lines << ""
|
|
202
|
+
lines << "# E2E Final Suite Report: `suite`"
|
|
203
|
+
lines << ""
|
|
204
|
+
lines << "## Attempt Summary"
|
|
205
|
+
lines << ""
|
|
206
|
+
lines << "| Attempt | Report | Status | Scenarios | Failures |"
|
|
207
|
+
lines << "|---|---|---:|---:|---:|"
|
|
208
|
+
lines << "| 1 | `#{display_path(initial_results[:report_path], base_dir)}` | #{initial_results[:failed].to_i > 0 || initial_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{initial_results[:total]} | #{initial_results[:failed].to_i + initial_results[:errors].to_i} |"
|
|
209
|
+
lines << "| 2 | `#{display_path(retry_results[:report_path], base_dir)}` | #{retry_results[:failed].to_i > 0 || retry_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{retry_results[:total]} | #{retry_results[:failed].to_i + retry_results[:errors].to_i} |"
|
|
210
|
+
lines << ""
|
|
211
|
+
lines << "First-pass failing scenarios: #{total_initial_failures}"
|
|
212
|
+
lines << "Recovered on retry (flaky): #{flaky_entries.length}"
|
|
213
|
+
lines << "Remaining failures after retry: #{remaining_entries.length}"
|
|
214
|
+
lines << ""
|
|
215
|
+
lines << "## Flaky Recoveries"
|
|
216
|
+
lines << ""
|
|
217
|
+
if flaky_entries.empty?
|
|
218
|
+
lines << "None."
|
|
219
|
+
else
|
|
220
|
+
flaky_entries.each do |entry|
|
|
221
|
+
lines << "### #{entry[:test_id]}"
|
|
222
|
+
lines << ""
|
|
223
|
+
lines << "- Title: #{entry[:title]}"
|
|
224
|
+
lines << "- Attempt 1 status: `#{entry[:status]}`"
|
|
225
|
+
lines << "- Attempt 1 report directory: `#{entry[:report_dir_display]}`"
|
|
226
|
+
lines << "- Attempt 2 report directory: `#{entry[:retry_entry][:report_dir_display]}`"
|
|
227
|
+
entry[:failed_entries].each do |failure|
|
|
228
|
+
lines << "- #{format_failure_entry(failure)}"
|
|
229
|
+
end
|
|
230
|
+
lines << ""
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
lines << "## Remaining Failures"
|
|
234
|
+
lines << ""
|
|
235
|
+
if remaining_entries.empty?
|
|
236
|
+
lines << "None."
|
|
237
|
+
else
|
|
238
|
+
remaining_entries.each do |entry|
|
|
239
|
+
lines << "### #{entry[:test_id]}"
|
|
240
|
+
lines << ""
|
|
241
|
+
lines << "- Title: #{entry[:title]}"
|
|
242
|
+
lines << "- Attempt 2 status: `#{entry[:status]}`"
|
|
243
|
+
lines << "- Attempt 2 report directory: `#{entry[:report_dir_display]}`"
|
|
244
|
+
entry[:failed_entries].each do |failure|
|
|
245
|
+
lines << "- #{format_failure_entry(failure)}"
|
|
246
|
+
end
|
|
247
|
+
lines << ""
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
lines.join("\n")
|
|
252
|
+
end
|
|
59
253
|
|
|
254
|
+
def format_failure_entry(failure)
|
|
255
|
+
tc = failure[:tc] || failure["tc"]
|
|
256
|
+
category = failure[:category] || failure["category"] || "runner-error"
|
|
257
|
+
evidence = failure[:evidence] || failure["evidence"] || "See attempt report for details"
|
|
258
|
+
tc ? "`#{tc}` (`#{category}`) - #{evidence}" : "`#{category}` - #{evidence}"
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Attempt LLM synthesis for narrative sections only, falling back to
|
|
262
|
+
# deterministic defaults when the model is unavailable or malformed.
|
|
263
|
+
def synthesize_narrative_sections(results_data, package:, timestamp:, overall_status:, executed_at:)
|
|
60
264
|
prompt_builder = Atoms::SuiteReportPromptBuilder.new
|
|
61
265
|
user_prompt = prompt_builder.build(
|
|
62
266
|
results_data,
|
|
@@ -73,51 +277,35 @@ module Ace
|
|
|
73
277
|
timeout: @timeout,
|
|
74
278
|
temperature: 0.3
|
|
75
279
|
)
|
|
76
|
-
|
|
77
|
-
total_passed = results.sum(&:passed_count)
|
|
78
|
-
total_tc = results.sum(&:total_count)
|
|
79
|
-
validate_overall_line(response[:text], total_passed, total_tc)
|
|
280
|
+
extract_narrative_sections(response[:text])
|
|
80
281
|
rescue => e
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
executed_date = Time.now.utc.strftime("%Y-%m-%d")
|
|
84
|
-
total_passed = results.sum(&:passed_count)
|
|
85
|
-
total_failed = results.sum(&:failed_count)
|
|
86
|
-
total_tc = results.sum(&:total_count)
|
|
87
|
-
|
|
88
|
-
build_static_report(
|
|
89
|
-
results, scenarios,
|
|
90
|
-
package: package,
|
|
91
|
-
timestamp: timestamp,
|
|
92
|
-
overall_status: overall_status,
|
|
93
|
-
executed_at: executed_at,
|
|
94
|
-
executed_date: executed_date,
|
|
95
|
-
total_passed: total_passed,
|
|
96
|
-
total_failed: total_failed,
|
|
97
|
-
total_tc: total_tc
|
|
98
|
-
)
|
|
282
|
+
warn "Warning: LLM synthesis failed (#{e.class}: #{e.message}), using deterministic narrative" if ENV["DEBUG"]
|
|
283
|
+
fallback_narrative_sections(results_data)
|
|
99
284
|
end
|
|
100
285
|
|
|
101
286
|
# Read summary and experience report content from each result's report dir
|
|
102
287
|
def build_results_data(results, scenarios)
|
|
103
288
|
results.each_with_index.map do |result, i|
|
|
104
|
-
scenario = scenarios
|
|
289
|
+
scenario = scenario_for_result(result, scenarios, i)
|
|
105
290
|
report_dir = result.report_dir
|
|
106
291
|
|
|
107
292
|
summary_content = read_report_file(report_dir, "summary.r.md")
|
|
108
293
|
experience_content = read_report_file(report_dir, "experience.r.md")
|
|
109
294
|
|
|
295
|
+
report_metadata = read_report_frontmatter(report_dir)
|
|
296
|
+
|
|
110
297
|
{
|
|
111
298
|
test_id: result.test_id,
|
|
112
299
|
title: scenario.title,
|
|
113
300
|
status: result.status,
|
|
114
|
-
passed: result
|
|
115
|
-
failed: result
|
|
116
|
-
total: result
|
|
117
|
-
test_cases: result
|
|
301
|
+
passed: reported_count(report_metadata, result, "passed"),
|
|
302
|
+
failed: reported_count(report_metadata, result, "failed"),
|
|
303
|
+
total: reported_count(report_metadata, result, "total"),
|
|
304
|
+
test_cases: canonical_test_cases(report_metadata, result),
|
|
118
305
|
report_dir_name: report_dir ? File.basename(report_dir) : nil,
|
|
119
306
|
summary_content: summary_content,
|
|
120
|
-
experience_content: experience_content
|
|
307
|
+
experience_content: experience_content,
|
|
308
|
+
canonical_tc_source: !report_metadata.empty?
|
|
121
309
|
}
|
|
122
310
|
end
|
|
123
311
|
end
|
|
@@ -132,21 +320,71 @@ module Ace
|
|
|
132
320
|
File.read(path)
|
|
133
321
|
end
|
|
134
322
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def validate_overall_line(report_text, expected_passed, expected_total)
|
|
138
|
-
expected_pct = (expected_total > 0) ? (expected_passed * 100.0 / expected_total).round(0) : 0
|
|
139
|
-
correct_line = "**Overall:** #{expected_passed}/#{expected_total} test cases passed (#{expected_pct}%)"
|
|
323
|
+
def read_report_frontmatter(report_dir)
|
|
324
|
+
return {} unless report_dir
|
|
140
325
|
|
|
141
|
-
|
|
142
|
-
|
|
326
|
+
path = File.join(report_dir, "report.md")
|
|
327
|
+
return {} unless File.exist?(path)
|
|
143
328
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
329
|
+
content = File.read(path)
|
|
330
|
+
match = content.match(/\A---\s*\n(.*?)\n---\s*\n/m)
|
|
331
|
+
return {} unless match
|
|
332
|
+
|
|
333
|
+
YAML.safe_load(match[1], permitted_classes: [Time, Date]) || {}
|
|
334
|
+
rescue
|
|
335
|
+
{}
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def reported_count(report_metadata, result, kind)
|
|
339
|
+
key = "tcs-#{kind}"
|
|
340
|
+
fallback =
|
|
341
|
+
case kind
|
|
342
|
+
when "passed" then result.passed_count
|
|
343
|
+
when "failed" then result.failed_count
|
|
344
|
+
else result.total_count
|
|
345
|
+
end
|
|
346
|
+
report_metadata[key] || fallback
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def canonical_test_cases(report_metadata, result)
|
|
350
|
+
return result.test_cases if report_metadata.empty?
|
|
351
|
+
|
|
352
|
+
failed_entries = Array(report_metadata["failed"]).filter_map do |entry|
|
|
353
|
+
next unless entry.is_a?(Hash)
|
|
354
|
+
|
|
355
|
+
id = entry["tc"] || entry[:tc]
|
|
356
|
+
next unless id
|
|
357
|
+
|
|
358
|
+
{
|
|
359
|
+
id: id,
|
|
360
|
+
description: "",
|
|
361
|
+
status: "fail",
|
|
362
|
+
notes: entry["evidence"] || entry[:evidence] || "See scenario report for details",
|
|
363
|
+
category: entry["category"] || entry[:category] || "runner-error"
|
|
364
|
+
}
|
|
149
365
|
end
|
|
366
|
+
|
|
367
|
+
failed_ids = failed_entries.map { |entry| entry[:id] }.to_set
|
|
368
|
+
Array(report_metadata["canonical-failed-tcs"]).each do |tc_id|
|
|
369
|
+
next if failed_ids.include?(tc_id)
|
|
370
|
+
|
|
371
|
+
failed_entries << {
|
|
372
|
+
id: tc_id,
|
|
373
|
+
description: "",
|
|
374
|
+
status: "fail",
|
|
375
|
+
notes: "See scenario report for details",
|
|
376
|
+
category: "runner-error"
|
|
377
|
+
}
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
passed_entries = Array(report_metadata["passed"]).filter_map do |tc_id|
|
|
381
|
+
next if failed_ids.include?(tc_id)
|
|
382
|
+
|
|
383
|
+
{id: tc_id, description: "", status: "pass", notes: ""}
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
canonical = passed_entries + failed_entries
|
|
387
|
+
canonical.empty? ? result.test_cases : canonical
|
|
150
388
|
end
|
|
151
389
|
|
|
152
390
|
def compute_status(results)
|
|
@@ -163,22 +401,26 @@ module Ace
|
|
|
163
401
|
end
|
|
164
402
|
end
|
|
165
403
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
404
|
+
def build_report(results_data, package:, timestamp:, overall_status:, executed_at:, narrative_sections:, diagnostics:)
|
|
405
|
+
total_skipped = results_data.count { |r| r[:status] == "skip" }
|
|
406
|
+
total_passed = results_data.sum { |r| r[:passed] }
|
|
407
|
+
total_tc = results_data.sum { |r| r[:total] }
|
|
170
408
|
|
|
171
409
|
parts = []
|
|
172
410
|
parts << build_frontmatter(
|
|
173
411
|
timestamp: timestamp, package: package, overall_status: overall_status,
|
|
174
|
-
tests_run:
|
|
412
|
+
tests_run: results_data.size, executed_at: executed_at, skipped: total_skipped
|
|
175
413
|
)
|
|
176
|
-
parts << build_header(package: package
|
|
177
|
-
parts << build_summary_table(
|
|
414
|
+
parts << build_header(package: package)
|
|
415
|
+
parts << build_summary_table(results_data)
|
|
178
416
|
parts << build_overall_line(total_passed: total_passed, total_tc: total_tc)
|
|
179
|
-
parts << build_failed_section(
|
|
180
|
-
parts <<
|
|
181
|
-
parts
|
|
417
|
+
parts << build_failed_section(results_data) if results_data.any? { |r| r[:failed].positive? }
|
|
418
|
+
parts << build_runner_diagnostics_section(diagnostics)
|
|
419
|
+
parts << build_narrative_section("Friction Analysis", narrative_sections[:friction])
|
|
420
|
+
parts << build_narrative_section("Improvement Suggestions", narrative_sections[:improvements])
|
|
421
|
+
parts << build_narrative_section("Positive Observations", narrative_sections[:positive])
|
|
422
|
+
parts << build_reports_section(results_data)
|
|
423
|
+
parts.compact.join("\n")
|
|
182
424
|
end
|
|
183
425
|
|
|
184
426
|
def build_frontmatter(timestamp:, package:, overall_status:, tests_run:, executed_at:, skipped: 0)
|
|
@@ -194,82 +436,165 @@ module Ace
|
|
|
194
436
|
FRONTMATTER
|
|
195
437
|
end
|
|
196
438
|
|
|
197
|
-
def build_header(package
|
|
198
|
-
skipped_info = (skipped > 0) ? " (#{skipped} skipped)" : ""
|
|
439
|
+
def build_header(package:)
|
|
199
440
|
<<~HEADER
|
|
200
|
-
# E2E
|
|
201
|
-
|
|
202
|
-
**Package:** #{package}
|
|
203
|
-
**Tests:** #{tests_run}#{skipped_info}
|
|
204
|
-
**Executed:** #{executed_date}
|
|
441
|
+
# E2E Suite Report: `#{package}`
|
|
205
442
|
HEADER
|
|
206
443
|
end
|
|
207
444
|
|
|
208
|
-
def build_summary_table(
|
|
209
|
-
rows =
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
"| #{result.test_id} | #{scenario.title} | #{status_label} | #{passed} | #{failed} | #{total} |"
|
|
445
|
+
def build_summary_table(results_data)
|
|
446
|
+
rows = results_data.map do |result|
|
|
447
|
+
status_label = result[:status].capitalize
|
|
448
|
+
passed = (result[:status] == "skip") ? "-" : result[:passed].to_s
|
|
449
|
+
failed = (result[:status] == "skip") ? "-" : result[:failed].to_s
|
|
450
|
+
total = (result[:status] == "skip") ? "-" : result[:total].to_s
|
|
451
|
+
"| #{result[:test_id]} | #{result[:title]} | #{status_label} | #{passed} | #{failed} | #{total} |"
|
|
216
452
|
end
|
|
217
453
|
|
|
218
454
|
<<~TABLE
|
|
219
|
-
## Summary
|
|
455
|
+
## Summary Table
|
|
220
456
|
|
|
221
457
|
| Test ID | Title | Status | Passed | Failed | Total |
|
|
222
|
-
|
|
458
|
+
|---|---|---:|---:|---:|---:|
|
|
223
459
|
#{rows.join("\n")}
|
|
224
460
|
TABLE
|
|
225
461
|
end
|
|
226
462
|
|
|
227
463
|
def build_overall_line(total_passed:, total_tc:)
|
|
228
|
-
pct = (total_tc > 0) ? (total_passed * 100.0 / total_tc).round(
|
|
229
|
-
|
|
464
|
+
pct = (total_tc > 0) ? (total_passed * 100.0 / total_tc).round(1) : 0.0
|
|
465
|
+
formatted_pct = (pct % 1).zero? ? pct.to_i.to_s : format("%.1f", pct)
|
|
466
|
+
<<~OVERALL
|
|
467
|
+
## Overall Line
|
|
468
|
+
|
|
469
|
+
**Overall:** #{total_passed}/#{total_tc} test cases passed (#{formatted_pct}%)
|
|
470
|
+
OVERALL
|
|
230
471
|
end
|
|
231
472
|
|
|
232
|
-
def build_failed_section(
|
|
473
|
+
def build_failed_section(results_data)
|
|
233
474
|
parts = ["\n## Failed Tests\n"]
|
|
234
475
|
|
|
235
|
-
|
|
236
|
-
next
|
|
476
|
+
results_data.each do |result|
|
|
477
|
+
next unless result[:failed].positive?
|
|
237
478
|
|
|
238
|
-
|
|
239
|
-
parts << "
|
|
479
|
+
parts << "### #{result[:test_id]}"
|
|
480
|
+
parts << ""
|
|
481
|
+
parts << "**Failed test case details**"
|
|
240
482
|
|
|
241
|
-
failed_tcs = result
|
|
483
|
+
failed_tcs = result[:test_cases].select { |tc| tc[:status] == "fail" }
|
|
242
484
|
if failed_tcs.any?
|
|
243
|
-
parts << "**Failed Test Cases:**"
|
|
244
485
|
failed_tcs.each do |tc|
|
|
245
|
-
|
|
486
|
+
category = tc[:category] || "runner-error"
|
|
487
|
+
details = tc[:notes].to_s.strip
|
|
488
|
+
details = tc[:description].to_s if details.empty?
|
|
489
|
+
parts << "- `#{tc[:id]}` (#{category}) — #{details}"
|
|
246
490
|
end
|
|
247
|
-
|
|
491
|
+
else
|
|
492
|
+
parts << "- Exact failed TC mapping unavailable in aggregate view — see scenario report for canonical details."
|
|
248
493
|
end
|
|
249
494
|
|
|
250
|
-
if result
|
|
251
|
-
parts << "
|
|
495
|
+
if result[:report_dir_name]
|
|
496
|
+
parts << ""
|
|
497
|
+
parts << "**Report directory:** `#{result[:report_dir_name]}`"
|
|
252
498
|
end
|
|
499
|
+
parts << ""
|
|
253
500
|
end
|
|
254
501
|
|
|
255
502
|
parts.join("\n")
|
|
256
503
|
end
|
|
257
504
|
|
|
258
|
-
def
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
505
|
+
def build_runner_diagnostics_section(diagnostics)
|
|
506
|
+
return nil unless diagnostics.is_a?(Hash) && diagnostics[:dirty_worktree]
|
|
507
|
+
|
|
508
|
+
entries = Array(diagnostics[:new_tracked_entries]).map { |line| "- `#{line}`" }.join("\n")
|
|
509
|
+
entries = "- No specific entries captured." if entries.empty?
|
|
510
|
+
|
|
511
|
+
<<~SECTION
|
|
512
|
+
## Runner Diagnostics
|
|
513
|
+
|
|
514
|
+
Suite execution introduced new tracked working-tree changes relative to the pre-run snapshot.
|
|
515
|
+
|
|
516
|
+
#{entries}
|
|
517
|
+
SECTION
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def build_narrative_section(title, content)
|
|
521
|
+
return nil if content.to_s.strip.empty?
|
|
522
|
+
|
|
523
|
+
<<~SECTION
|
|
524
|
+
## #{title}
|
|
525
|
+
|
|
526
|
+
#{content.to_s.strip}
|
|
527
|
+
SECTION
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
def build_reports_section(results_data)
|
|
531
|
+
rows = results_data.map do |result|
|
|
532
|
+
dir = result[:report_dir_name] || "N/A"
|
|
533
|
+
"| #{result[:test_id]} | `#{dir}` |"
|
|
262
534
|
end
|
|
263
535
|
|
|
264
536
|
<<~SECTION
|
|
265
537
|
|
|
266
|
-
## Reports
|
|
538
|
+
## Reports Table
|
|
267
539
|
|
|
268
|
-
| Test ID |
|
|
269
|
-
|
|
540
|
+
| Test ID | Report Directory |
|
|
541
|
+
|---|---|
|
|
270
542
|
#{rows.join("\n")}
|
|
271
543
|
SECTION
|
|
272
544
|
end
|
|
545
|
+
|
|
546
|
+
def extract_narrative_sections(report_text)
|
|
547
|
+
text = report_text.to_s
|
|
548
|
+
sections = {
|
|
549
|
+
friction: extract_markdown_section(text, "Friction Analysis"),
|
|
550
|
+
improvements: extract_markdown_section(text, "Improvement Suggestions"),
|
|
551
|
+
positive: extract_markdown_section(text, "Positive Observations")
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
fallback = strip_canonical_sections(text)
|
|
555
|
+
has_markdown_sections = text.match?(/^\#{2,3}\s+/)
|
|
556
|
+
sections[:positive] = fallback if sections.values.all? { |value| value.to_s.strip.empty? } &&
|
|
557
|
+
!fallback.empty? && !has_markdown_sections
|
|
558
|
+
sections
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def extract_markdown_section(text, heading)
|
|
562
|
+
match = text.match(/^\#{2,3}\s+#{Regexp.escape(heading)}\s*$\n?(.*?)(?=^\#{1,3}\s|\z)/mi)
|
|
563
|
+
return "" unless match
|
|
564
|
+
|
|
565
|
+
match[1].to_s.strip
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def strip_canonical_sections(text)
|
|
569
|
+
body = text.to_s.dup
|
|
570
|
+
body.sub!(/\A---.*?^---\s*/m, "")
|
|
571
|
+
body.gsub!(/^\#{1,3}\s+.*$/, "")
|
|
572
|
+
body.gsub!(/^\|.*\|\s*$/, "")
|
|
573
|
+
body.gsub!(/^\*\*Overall:\*\*.*$/, "")
|
|
574
|
+
body.lines.map(&:rstrip).reject(&:empty?).join("\n").strip
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
def fallback_narrative_sections(results_data)
|
|
578
|
+
failed_results = results_data.select { |result| result[:failed].positive? }
|
|
579
|
+
|
|
580
|
+
{
|
|
581
|
+
friction: failed_results.empty? ? "" : failed_results.map { |result|
|
|
582
|
+
"- #{result[:test_id]} had #{result[:failed]} failing test case(s); inspect `#{result[:report_dir_name]}` for scenario details."
|
|
583
|
+
}.join("\n"),
|
|
584
|
+
improvements: failed_results.empty? ? "" : failed_results.map { |result|
|
|
585
|
+
"- Re-run #{result[:test_id]} after the targeted fix and confirm the failing test case set is empty."
|
|
586
|
+
}.join("\n"),
|
|
587
|
+
positive: results_data.select { |result| result[:failed].zero? }.map { |result|
|
|
588
|
+
"- #{result[:test_id]} passed #{result[:passed]}/#{result[:total]} test cases."
|
|
589
|
+
}.join("\n")
|
|
590
|
+
}
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
def scenario_for_result(result, scenarios, index)
|
|
594
|
+
scenarios[index] || OpenStruct.new(
|
|
595
|
+
title: result.metadata[:phase] == "preflight" || result.metadata["phase"] == "preflight" ? "Preflight" : result.test_id
|
|
596
|
+
)
|
|
597
|
+
end
|
|
273
598
|
end
|
|
274
599
|
end
|
|
275
600
|
end
|