ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.ace-defaults/e2e-runner/config.yml +14 -2
  3. data/CHANGELOG.md +233 -0
  4. data/README.md +2 -2
  5. data/exe/ace-test-e2e-sh +9 -4
  6. data/handbook/guides/e2e-testing.g.md +75 -9
  7. data/handbook/guides/scenario-yml-reference.g.md +21 -8
  8. data/handbook/guides/tc-authoring.g.md +23 -5
  9. data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
  10. data/handbook/skills/as-e2e-review/SKILL.md +2 -2
  11. data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
  12. data/handbook/templates/agent-experience-report.template.md +3 -2
  13. data/handbook/templates/scenario.yml.template.yml +7 -2
  14. data/handbook/templates/tc-file.template.md +16 -4
  15. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
  16. data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
  17. data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
  18. data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
  19. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
  20. data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
  21. data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
  22. data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
  23. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
  24. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  25. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
  26. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
  27. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  28. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
  29. data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
  30. data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
  31. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
  32. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
  33. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  34. data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
  35. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
  36. data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
  37. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
  38. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
  39. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
  40. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
  41. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
  42. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
  43. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
  44. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
  45. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
  46. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
  47. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
  48. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
  49. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
  50. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  51. data/lib/ace/test/end_to_end_runner.rb +4 -0
  52. metadata +21 -2
@@ -1,7 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "fileutils"
4
+ require "ostruct"
4
5
  require "yaml"
6
+ require "set"
7
+ require "date"
5
8
  require "ace/llm"
6
9
  require "ace/llm/query_interface"
7
10
 
@@ -9,7 +12,7 @@ module Ace
9
12
  module Test
10
13
  module EndToEndRunner
11
14
  module Molecules
12
- # Writes a suite-level final report aggregating all test results
15
+ # Writes an aggregated package or suite report
13
16
  #
14
17
  # Uses LLM synthesis to generate rich reports with root cause analysis,
15
18
  # friction insights, and improvement suggestions. Falls back to a static
@@ -22,7 +25,13 @@ module Ace
22
25
  @timeout = reporting["timeout"] || 60
23
26
  end
24
27
 
25
- # Write a suite-level final report
28
+ REPORT_KINDS = {
29
+ package: ->(timestamp, package) { "#{timestamp}-#{package}-report.md" },
30
+ suite: ->(timestamp, _package) { "#{timestamp}-suite-report.md" },
31
+ suite_final: ->(timestamp, _package) { "#{timestamp}-suite-final-report.md" }
32
+ }.freeze
33
+
34
+ # Write an aggregated report
26
35
  #
27
36
  # @param results [Array<Models::TestResult>] Test results (ordered)
28
37
  # @param scenarios [Array<Models::TestScenario>] Corresponding scenarios
@@ -30,22 +39,69 @@ module Ace
30
39
  # @param timestamp [String] Timestamp ID for this run
31
40
  # @param base_dir [String] Base directory for cache output
32
41
  # @return [String] Path to the written report file
33
- def write(results, scenarios, package:, timestamp:, base_dir:)
42
+ def write(results, scenarios, package:, timestamp:, base_dir:, report_kind: :package, diagnostics: nil)
34
43
  cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
35
44
  FileUtils.mkdir_p(cache_dir)
36
45
 
37
- report_path = File.join(cache_dir, "#{timestamp}-final-report.md")
46
+ report_path = File.join(cache_dir, report_filename(report_kind, timestamp, package))
38
47
 
39
48
  overall_status = compute_status(results)
40
49
  executed_at = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
41
-
42
- content = synthesize_report(
43
- results, scenarios,
50
+ results_data = build_results_data(results, scenarios)
51
+ narrative_sections = synthesize_narrative_sections(
52
+ results_data,
44
53
  package: package,
45
54
  timestamp: timestamp,
46
55
  overall_status: overall_status,
47
56
  executed_at: executed_at
48
57
  )
58
+ content = build_report(
59
+ results_data,
60
+ package: package,
61
+ timestamp: timestamp,
62
+ overall_status: overall_status,
63
+ executed_at: executed_at,
64
+ narrative_sections: narrative_sections,
65
+ diagnostics: diagnostics
66
+ )
67
+
68
+ File.write(report_path, content)
69
+ report_path
70
+ end
71
+
72
+ # Write a deterministic wrapper report for a two-attempt suite run.
73
+ #
74
+ # Preserves first-pass failure evidence while reflecting the final retry outcome.
75
+ def write_retry_summary(initial_results:, retry_results:, timestamp:, base_dir:, package: "suite")
76
+ cache_dir = File.join(base_dir, ".ace-local", "test-e2e")
77
+ FileUtils.mkdir_p(cache_dir)
78
+
79
+ report_path = File.join(cache_dir, report_filename(:suite_final, timestamp, package))
80
+ initial_entries = flatten_attempt_results(initial_results, base_dir: base_dir)
81
+ retry_entries = flatten_attempt_results(retry_results, base_dir: base_dir)
82
+ retry_by_test = retry_entries.each_with_object({}) { |entry, memo| memo[entry[:test_id]] = entry }
83
+
84
+ flaky_entries = initial_entries.filter_map do |entry|
85
+ next if entry[:status] == "pass"
86
+
87
+ retry_entry = retry_by_test[entry[:test_id]]
88
+ next unless retry_entry && retry_entry[:status] == "pass"
89
+
90
+ entry.merge(retry_entry: retry_entry)
91
+ end.sort_by { |entry| entry[:test_id] }
92
+ remaining_entries = retry_entries.reject { |entry| entry[:status] == "pass" }.sort_by { |entry| entry[:test_id] }
93
+ final_status = compute_retry_summary_status(retry_entries)
94
+
95
+ content = build_retry_summary_content(
96
+ timestamp: timestamp,
97
+ initial_results: initial_results,
98
+ retry_results: retry_results,
99
+ initial_entries: initial_entries,
100
+ flaky_entries: flaky_entries,
101
+ remaining_entries: remaining_entries,
102
+ final_status: final_status,
103
+ base_dir: base_dir
104
+ )
49
105
 
50
106
  File.write(report_path, content)
51
107
  report_path
@@ -53,10 +109,158 @@ module Ace
53
109
 
54
110
  private
55
111
 
56
- # Attempt LLM synthesis, falling back to static template
57
- def synthesize_report(results, scenarios, package:, timestamp:, overall_status:, executed_at:)
58
- results_data = build_results_data(results, scenarios)
112
+ def report_filename(report_kind, timestamp, package)
113
+ builder = REPORT_KINDS[report_kind.to_sym]
114
+ raise ArgumentError, "Unknown report kind: #{report_kind}" unless builder
115
+
116
+ builder.call(timestamp, package)
117
+ end
118
+
119
+ def flatten_attempt_results(results, base_dir:)
120
+ results.fetch(:packages, {}).values.flatten.map do |result|
121
+ report_dir = result[:report_dir]
122
+ metadata = read_retry_metadata(report_dir)
123
+ report_frontmatter = read_report_frontmatter(report_dir)
124
+ test_name = result[:test_name] || result[:test_id] || ""
125
+ test_id = metadata["test-id"] || canonical_retry_test_id(test_name)
126
+ failed_entries = Array(metadata["failed"]).filter_map do |entry|
127
+ next unless entry.is_a?(Hash)
128
+
129
+ {
130
+ tc: entry["tc"] || entry[:tc],
131
+ category: entry["category"] || entry[:category] || "runner-error",
132
+ evidence: entry["evidence"] || entry[:evidence] || "See attempt report for details"
133
+ }
134
+ end
135
+ if failed_entries.empty? && result[:status] != "pass"
136
+ failed_entries << {
137
+ tc: nil,
138
+ category: result[:status] || "runner-error",
139
+ evidence: result[:summary] || result[:error] || "See attempt report for details"
140
+ }
141
+ end
142
+
143
+ {
144
+ test_id: test_id,
145
+ title: report_frontmatter["title"] || test_id,
146
+ status: result[:status],
147
+ report_dir: report_dir,
148
+ report_dir_display: display_path(report_dir, base_dir),
149
+ report_dir_name: report_dir ? File.basename(report_dir) : nil,
150
+ failed_entries: failed_entries,
151
+ passed_cases: result[:passed_cases] || metadata["tcs-passed"] || metadata.dig("results", "passed") || 0,
152
+ total_cases: result[:total_cases] || metadata["tcs-total"] || metadata.dig("results", "total") || 0
153
+ }
154
+ end
155
+ end
156
+
157
+ def read_retry_metadata(report_dir)
158
+ return {} unless report_dir
159
+
160
+ path = File.join(report_dir, "metadata.yml")
161
+ return {} unless File.exist?(path)
162
+
163
+ YAML.safe_load_file(path, permitted_classes: [Time, Date]) || {}
164
+ rescue
165
+ {}
166
+ end
167
+
168
+ def canonical_retry_test_id(test_name)
169
+ match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
170
+ match ? match[1].upcase : test_name
171
+ end
172
+
173
+ def display_path(path, base_dir)
174
+ return nil if path.nil?
175
+
176
+ path.start_with?(base_dir) ? path.delete_prefix("#{base_dir}/") : path
177
+ end
178
+
179
+ def compute_retry_summary_status(entries)
180
+ executed = entries.reject { |entry| entry[:status] == "skip" }
181
+ return "skip" if executed.empty?
182
+ return "pass" if executed.all? { |entry| entry[:status] == "pass" }
183
+ return "partial" if executed.any? { |entry| entry[:status] == "pass" }
184
+
185
+ "fail"
186
+ end
187
+
188
+ def build_retry_summary_content(timestamp:, initial_results:, retry_results:, initial_entries:, flaky_entries:, remaining_entries:, final_status:, base_dir:)
189
+ total_initial_failures = initial_entries.count { |entry| entry[:status] != "pass" }
190
+ lines = []
191
+ lines << "---"
192
+ lines << "suite-id: #{timestamp}"
193
+ lines << "package: suite"
194
+ lines << "status: #{final_status}"
195
+ lines << "retry-attempted: true"
196
+ lines << "flaky-scenarios: #{flaky_entries.length}"
197
+ lines << "remaining-failures: #{remaining_entries.length}"
198
+ lines << "attempt-1-report: #{display_path(initial_results[:report_path], base_dir)}"
199
+ lines << "attempt-2-report: #{display_path(retry_results[:report_path], base_dir)}"
200
+ lines << "---"
201
+ lines << ""
202
+ lines << "# E2E Final Suite Report: `suite`"
203
+ lines << ""
204
+ lines << "## Attempt Summary"
205
+ lines << ""
206
+ lines << "| Attempt | Report | Status | Scenarios | Failures |"
207
+ lines << "|---|---|---:|---:|---:|"
208
+ lines << "| 1 | `#{display_path(initial_results[:report_path], base_dir)}` | #{initial_results[:failed].to_i > 0 || initial_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{initial_results[:total]} | #{initial_results[:failed].to_i + initial_results[:errors].to_i} |"
209
+ lines << "| 2 | `#{display_path(retry_results[:report_path], base_dir)}` | #{retry_results[:failed].to_i > 0 || retry_results[:errors].to_i > 0 ? "Fail" : "Pass"} | #{retry_results[:total]} | #{retry_results[:failed].to_i + retry_results[:errors].to_i} |"
210
+ lines << ""
211
+ lines << "First-pass failing scenarios: #{total_initial_failures}"
212
+ lines << "Recovered on retry (flaky): #{flaky_entries.length}"
213
+ lines << "Remaining failures after retry: #{remaining_entries.length}"
214
+ lines << ""
215
+ lines << "## Flaky Recoveries"
216
+ lines << ""
217
+ if flaky_entries.empty?
218
+ lines << "None."
219
+ else
220
+ flaky_entries.each do |entry|
221
+ lines << "### #{entry[:test_id]}"
222
+ lines << ""
223
+ lines << "- Title: #{entry[:title]}"
224
+ lines << "- Attempt 1 status: `#{entry[:status]}`"
225
+ lines << "- Attempt 1 report directory: `#{entry[:report_dir_display]}`"
226
+ lines << "- Attempt 2 report directory: `#{entry[:retry_entry][:report_dir_display]}`"
227
+ entry[:failed_entries].each do |failure|
228
+ lines << "- #{format_failure_entry(failure)}"
229
+ end
230
+ lines << ""
231
+ end
232
+ end
233
+ lines << "## Remaining Failures"
234
+ lines << ""
235
+ if remaining_entries.empty?
236
+ lines << "None."
237
+ else
238
+ remaining_entries.each do |entry|
239
+ lines << "### #{entry[:test_id]}"
240
+ lines << ""
241
+ lines << "- Title: #{entry[:title]}"
242
+ lines << "- Attempt 2 status: `#{entry[:status]}`"
243
+ lines << "- Attempt 2 report directory: `#{entry[:report_dir_display]}`"
244
+ entry[:failed_entries].each do |failure|
245
+ lines << "- #{format_failure_entry(failure)}"
246
+ end
247
+ lines << ""
248
+ end
249
+ end
250
+
251
+ lines.join("\n")
252
+ end
59
253
 
254
+ def format_failure_entry(failure)
255
+ tc = failure[:tc] || failure["tc"]
256
+ category = failure[:category] || failure["category"] || "runner-error"
257
+ evidence = failure[:evidence] || failure["evidence"] || "See attempt report for details"
258
+ tc ? "`#{tc}` (`#{category}`) - #{evidence}" : "`#{category}` - #{evidence}"
259
+ end
260
+
261
+ # Attempt LLM synthesis for narrative sections only, falling back to
262
+ # deterministic defaults when the model is unavailable or malformed.
263
+ def synthesize_narrative_sections(results_data, package:, timestamp:, overall_status:, executed_at:)
60
264
  prompt_builder = Atoms::SuiteReportPromptBuilder.new
61
265
  user_prompt = prompt_builder.build(
62
266
  results_data,
@@ -73,51 +277,35 @@ module Ace
73
277
  timeout: @timeout,
74
278
  temperature: 0.3
75
279
  )
76
-
77
- total_passed = results.sum(&:passed_count)
78
- total_tc = results.sum(&:total_count)
79
- validate_overall_line(response[:text], total_passed, total_tc)
280
+ extract_narrative_sections(response[:text])
80
281
  rescue => e
81
- # LLM failed fall back to static report
82
- warn "Warning: LLM synthesis failed (#{e.class}: #{e.message}), using static report" if ENV["DEBUG"]
83
- executed_date = Time.now.utc.strftime("%Y-%m-%d")
84
- total_passed = results.sum(&:passed_count)
85
- total_failed = results.sum(&:failed_count)
86
- total_tc = results.sum(&:total_count)
87
-
88
- build_static_report(
89
- results, scenarios,
90
- package: package,
91
- timestamp: timestamp,
92
- overall_status: overall_status,
93
- executed_at: executed_at,
94
- executed_date: executed_date,
95
- total_passed: total_passed,
96
- total_failed: total_failed,
97
- total_tc: total_tc
98
- )
282
+ warn "Warning: LLM synthesis failed (#{e.class}: #{e.message}), using deterministic narrative" if ENV["DEBUG"]
283
+ fallback_narrative_sections(results_data)
99
284
  end
100
285
 
101
286
  # Read summary and experience report content from each result's report dir
102
287
  def build_results_data(results, scenarios)
103
288
  results.each_with_index.map do |result, i|
104
- scenario = scenarios[i]
289
+ scenario = scenario_for_result(result, scenarios, i)
105
290
  report_dir = result.report_dir
106
291
 
107
292
  summary_content = read_report_file(report_dir, "summary.r.md")
108
293
  experience_content = read_report_file(report_dir, "experience.r.md")
109
294
 
295
+ report_metadata = read_report_frontmatter(report_dir)
296
+
110
297
  {
111
298
  test_id: result.test_id,
112
299
  title: scenario.title,
113
300
  status: result.status,
114
- passed: result.passed_count,
115
- failed: result.failed_count,
116
- total: result.total_count,
117
- test_cases: result.test_cases,
301
+ passed: reported_count(report_metadata, result, "passed"),
302
+ failed: reported_count(report_metadata, result, "failed"),
303
+ total: reported_count(report_metadata, result, "total"),
304
+ test_cases: canonical_test_cases(report_metadata, result),
118
305
  report_dir_name: report_dir ? File.basename(report_dir) : nil,
119
306
  summary_content: summary_content,
120
- experience_content: experience_content
307
+ experience_content: experience_content,
308
+ canonical_tc_source: !report_metadata.empty?
121
309
  }
122
310
  end
123
311
  end
@@ -132,21 +320,71 @@ module Ace
132
320
  File.read(path)
133
321
  end
134
322
 
135
- # Validate the LLM-generated Overall line against deterministic totals.
136
- # If the LLM hallucinated wrong numbers, replace the line with correct values.
137
- def validate_overall_line(report_text, expected_passed, expected_total)
138
- expected_pct = (expected_total > 0) ? (expected_passed * 100.0 / expected_total).round(0) : 0
139
- correct_line = "**Overall:** #{expected_passed}/#{expected_total} test cases passed (#{expected_pct}%)"
323
+ def read_report_frontmatter(report_dir)
324
+ return {} unless report_dir
140
325
 
141
- # Match patterns like "**Overall:** X/Y test cases passed (Z%)"
142
- overall_pattern = /\*\*Overall:\*\*\s*\d+\/\d+\s+test cases passed\s*\(\d+%\)/
326
+ path = File.join(report_dir, "report.md")
327
+ return {} unless File.exist?(path)
143
328
 
144
- if report_text.match?(overall_pattern)
145
- report_text.gsub(overall_pattern, correct_line)
146
- else
147
- # No Overall line found — append the correct one after the summary table
148
- "#{report_text.rstrip}\n\n#{correct_line}\n"
329
+ content = File.read(path)
330
+ match = content.match(/\A---\s*\n(.*?)\n---\s*\n/m)
331
+ return {} unless match
332
+
333
+ YAML.safe_load(match[1], permitted_classes: [Time, Date]) || {}
334
+ rescue
335
+ {}
336
+ end
337
+
338
+ def reported_count(report_metadata, result, kind)
339
+ key = "tcs-#{kind}"
340
+ fallback =
341
+ case kind
342
+ when "passed" then result.passed_count
343
+ when "failed" then result.failed_count
344
+ else result.total_count
345
+ end
346
+ report_metadata[key] || fallback
347
+ end
348
+
349
+ def canonical_test_cases(report_metadata, result)
350
+ return result.test_cases if report_metadata.empty?
351
+
352
+ failed_entries = Array(report_metadata["failed"]).filter_map do |entry|
353
+ next unless entry.is_a?(Hash)
354
+
355
+ id = entry["tc"] || entry[:tc]
356
+ next unless id
357
+
358
+ {
359
+ id: id,
360
+ description: "",
361
+ status: "fail",
362
+ notes: entry["evidence"] || entry[:evidence] || "See scenario report for details",
363
+ category: entry["category"] || entry[:category] || "runner-error"
364
+ }
149
365
  end
366
+
367
+ failed_ids = failed_entries.map { |entry| entry[:id] }.to_set
368
+ Array(report_metadata["canonical-failed-tcs"]).each do |tc_id|
369
+ next if failed_ids.include?(tc_id)
370
+
371
+ failed_entries << {
372
+ id: tc_id,
373
+ description: "",
374
+ status: "fail",
375
+ notes: "See scenario report for details",
376
+ category: "runner-error"
377
+ }
378
+ end
379
+
380
+ passed_entries = Array(report_metadata["passed"]).filter_map do |tc_id|
381
+ next if failed_ids.include?(tc_id)
382
+
383
+ {id: tc_id, description: "", status: "pass", notes: ""}
384
+ end
385
+
386
+ canonical = passed_entries + failed_entries
387
+ canonical.empty? ? result.test_cases : canonical
150
388
  end
151
389
 
152
390
  def compute_status(results)
@@ -163,22 +401,26 @@ module Ace
163
401
  end
164
402
  end
165
403
 
166
- # Static fallback report (original template-based approach)
167
- def build_static_report(results, scenarios, package:, timestamp:, overall_status:,
168
- executed_at:, executed_date:, total_passed:, total_failed:, total_tc:)
169
- total_skipped = results.count(&:skipped?)
404
+ def build_report(results_data, package:, timestamp:, overall_status:, executed_at:, narrative_sections:, diagnostics:)
405
+ total_skipped = results_data.count { |r| r[:status] == "skip" }
406
+ total_passed = results_data.sum { |r| r[:passed] }
407
+ total_tc = results_data.sum { |r| r[:total] }
170
408
 
171
409
  parts = []
172
410
  parts << build_frontmatter(
173
411
  timestamp: timestamp, package: package, overall_status: overall_status,
174
- tests_run: results.size, executed_at: executed_at, skipped: total_skipped
412
+ tests_run: results_data.size, executed_at: executed_at, skipped: total_skipped
175
413
  )
176
- parts << build_header(package: package, tests_run: results.size, executed_date: executed_date, skipped: total_skipped)
177
- parts << build_summary_table(results, scenarios)
414
+ parts << build_header(package: package)
415
+ parts << build_summary_table(results_data)
178
416
  parts << build_overall_line(total_passed: total_passed, total_tc: total_tc)
179
- parts << build_failed_section(results, scenarios) if results.any?(&:failed?)
180
- parts << build_reports_section(results, scenarios)
181
- parts.join("\n")
417
+ parts << build_failed_section(results_data) if results_data.any? { |r| r[:failed].positive? }
418
+ parts << build_runner_diagnostics_section(diagnostics)
419
+ parts << build_narrative_section("Friction Analysis", narrative_sections[:friction])
420
+ parts << build_narrative_section("Improvement Suggestions", narrative_sections[:improvements])
421
+ parts << build_narrative_section("Positive Observations", narrative_sections[:positive])
422
+ parts << build_reports_section(results_data)
423
+ parts.compact.join("\n")
182
424
  end
183
425
 
184
426
  def build_frontmatter(timestamp:, package:, overall_status:, tests_run:, executed_at:, skipped: 0)
@@ -194,82 +436,165 @@ module Ace
194
436
  FRONTMATTER
195
437
  end
196
438
 
197
- def build_header(package:, tests_run:, executed_date:, skipped: 0)
198
- skipped_info = (skipped > 0) ? " (#{skipped} skipped)" : ""
439
+ def build_header(package:)
199
440
  <<~HEADER
200
- # E2E Test Suite Report
201
-
202
- **Package:** #{package}
203
- **Tests:** #{tests_run}#{skipped_info}
204
- **Executed:** #{executed_date}
441
+ # E2E Suite Report: `#{package}`
205
442
  HEADER
206
443
  end
207
444
 
208
- def build_summary_table(results, scenarios)
209
- rows = results.each_with_index.map do |result, i|
210
- scenario = scenarios[i]
211
- status_label = result.status.capitalize
212
- passed = result.skipped? ? "-" : result.passed_count.to_s
213
- failed = result.skipped? ? "-" : result.failed_count.to_s
214
- total = result.skipped? ? "-" : result.total_count.to_s
215
- "| #{result.test_id} | #{scenario.title} | #{status_label} | #{passed} | #{failed} | #{total} |"
445
+ def build_summary_table(results_data)
446
+ rows = results_data.map do |result|
447
+ status_label = result[:status].capitalize
448
+ passed = (result[:status] == "skip") ? "-" : result[:passed].to_s
449
+ failed = (result[:status] == "skip") ? "-" : result[:failed].to_s
450
+ total = (result[:status] == "skip") ? "-" : result[:total].to_s
451
+ "| #{result[:test_id]} | #{result[:title]} | #{status_label} | #{passed} | #{failed} | #{total} |"
216
452
  end
217
453
 
218
454
  <<~TABLE
219
- ## Summary
455
+ ## Summary Table
220
456
 
221
457
  | Test ID | Title | Status | Passed | Failed | Total |
222
- |---------|-------|--------|--------|--------|-------|
458
+ |---|---|---:|---:|---:|---:|
223
459
  #{rows.join("\n")}
224
460
  TABLE
225
461
  end
226
462
 
227
463
  def build_overall_line(total_passed:, total_tc:)
228
- pct = (total_tc > 0) ? (total_passed * 100.0 / total_tc).round(0) : 0
229
- "**Overall:** #{total_passed}/#{total_tc} test cases passed (#{pct}%)\n"
464
+ pct = (total_tc > 0) ? (total_passed * 100.0 / total_tc).round(1) : 0.0
465
+ formatted_pct = (pct % 1).zero? ? pct.to_i.to_s : format("%.1f", pct)
466
+ <<~OVERALL
467
+ ## Overall Line
468
+
469
+ **Overall:** #{total_passed}/#{total_tc} test cases passed (#{formatted_pct}%)
470
+ OVERALL
230
471
  end
231
472
 
232
- def build_failed_section(results, scenarios)
473
+ def build_failed_section(results_data)
233
474
  parts = ["\n## Failed Tests\n"]
234
475
 
235
- results.each_with_index do |result, i|
236
- next if result.success? || result.skipped?
476
+ results_data.each do |result|
477
+ next unless result[:failed].positive?
237
478
 
238
- scenario = scenarios[i]
239
- parts << "### #{result.test_id}: #{scenario.title} (#{result.passed_count}/#{result.total_count})\n"
479
+ parts << "### #{result[:test_id]}"
480
+ parts << ""
481
+ parts << "**Failed test case details**"
240
482
 
241
- failed_tcs = result.test_cases.select { |tc| tc[:status] == "fail" }
483
+ failed_tcs = result[:test_cases].select { |tc| tc[:status] == "fail" }
242
484
  if failed_tcs.any?
243
- parts << "**Failed Test Cases:**"
244
485
  failed_tcs.each do |tc|
245
- parts << "- #{tc[:id]}: #{tc[:description]}"
486
+ category = tc[:category] || "runner-error"
487
+ details = tc[:notes].to_s.strip
488
+ details = tc[:description].to_s if details.empty?
489
+ parts << "- `#{tc[:id]}` (#{category}) — #{details}"
246
490
  end
247
- parts << ""
491
+ else
492
+ parts << "- Exact failed TC mapping unavailable in aggregate view — see scenario report for canonical details."
248
493
  end
249
494
 
250
- if result.report_dir
251
- parts << "**Report:** #{result.report_dir}\n"
495
+ if result[:report_dir_name]
496
+ parts << ""
497
+ parts << "**Report directory:** `#{result[:report_dir_name]}`"
252
498
  end
499
+ parts << ""
253
500
  end
254
501
 
255
502
  parts.join("\n")
256
503
  end
257
504
 
258
- def build_reports_section(results, scenarios)
259
- rows = results.each_with_index.map do |result, i|
260
- dir = result.report_dir ? File.basename(result.report_dir) : "N/A"
261
- "| #{result.test_id} | #{dir} |"
505
+ def build_runner_diagnostics_section(diagnostics)
506
+ return nil unless diagnostics.is_a?(Hash) && diagnostics[:dirty_worktree]
507
+
508
+ entries = Array(diagnostics[:new_tracked_entries]).map { |line| "- `#{line}`" }.join("\n")
509
+ entries = "- No specific entries captured." if entries.empty?
510
+
511
+ <<~SECTION
512
+ ## Runner Diagnostics
513
+
514
+ Suite execution introduced new tracked working-tree changes relative to the pre-run snapshot.
515
+
516
+ #{entries}
517
+ SECTION
518
+ end
519
+
520
+ def build_narrative_section(title, content)
521
+ return nil if content.to_s.strip.empty?
522
+
523
+ <<~SECTION
524
+ ## #{title}
525
+
526
+ #{content.to_s.strip}
527
+ SECTION
528
+ end
529
+
530
+ def build_reports_section(results_data)
531
+ rows = results_data.map do |result|
532
+ dir = result[:report_dir_name] || "N/A"
533
+ "| #{result[:test_id]} | `#{dir}` |"
262
534
  end
263
535
 
264
536
  <<~SECTION
265
537
 
266
- ## Reports
538
+ ## Reports Table
267
539
 
268
- | Test ID | Reports Folder |
269
- |---------|----------------|
540
+ | Test ID | Report Directory |
541
+ |---|---|
270
542
  #{rows.join("\n")}
271
543
  SECTION
272
544
  end
545
+
546
+ def extract_narrative_sections(report_text)
547
+ text = report_text.to_s
548
+ sections = {
549
+ friction: extract_markdown_section(text, "Friction Analysis"),
550
+ improvements: extract_markdown_section(text, "Improvement Suggestions"),
551
+ positive: extract_markdown_section(text, "Positive Observations")
552
+ }
553
+
554
+ fallback = strip_canonical_sections(text)
555
+ has_markdown_sections = text.match?(/^\#{2,3}\s+/)
556
+ sections[:positive] = fallback if sections.values.all? { |value| value.to_s.strip.empty? } &&
557
+ !fallback.empty? && !has_markdown_sections
558
+ sections
559
+ end
560
+
561
+ def extract_markdown_section(text, heading)
562
+ match = text.match(/^\#{2,3}\s+#{Regexp.escape(heading)}\s*$\n?(.*?)(?=^\#{1,3}\s|\z)/mi)
563
+ return "" unless match
564
+
565
+ match[1].to_s.strip
566
+ end
567
+
568
+ def strip_canonical_sections(text)
569
+ body = text.to_s.dup
570
+ body.sub!(/\A---.*?^---\s*/m, "")
571
+ body.gsub!(/^\#{1,3}\s+.*$/, "")
572
+ body.gsub!(/^\|.*\|\s*$/, "")
573
+ body.gsub!(/^\*\*Overall:\*\*.*$/, "")
574
+ body.lines.map(&:rstrip).reject(&:empty?).join("\n").strip
575
+ end
576
+
577
+ def fallback_narrative_sections(results_data)
578
+ failed_results = results_data.select { |result| result[:failed].positive? }
579
+
580
+ {
581
+ friction: failed_results.empty? ? "" : failed_results.map { |result|
582
+ "- #{result[:test_id]} had #{result[:failed]} failing test case(s); inspect `#{result[:report_dir_name]}` for scenario details."
583
+ }.join("\n"),
584
+ improvements: failed_results.empty? ? "" : failed_results.map { |result|
585
+ "- Re-run #{result[:test_id]} after the targeted fix and confirm the failing test case set is empty."
586
+ }.join("\n"),
587
+ positive: results_data.select { |result| result[:failed].zero? }.map { |result|
588
+ "- #{result[:test_id]} passed #{result[:passed]}/#{result[:total]} test cases."
589
+ }.join("\n")
590
+ }
591
+ end
592
+
593
+ def scenario_for_result(result, scenarios, index)
594
+ scenarios[index] || OpenStruct.new(
595
+ title: result.metadata[:phase] == "preflight" || result.metadata["phase"] == "preflight" ? "Preflight" : result.test_id
596
+ )
597
+ end
273
598
  end
274
599
  end
275
600
  end