ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Test
|
|
5
|
+
module EndToEndRunner
|
|
6
|
+
module Atoms
|
|
7
|
+
# Parses structured markdown results from CLI-provider skill/workflow execution
|
|
8
|
+
#
|
|
9
|
+
# CLI providers return results in the subagent return contract format:
|
|
10
|
+
# - **Test ID**: TS-LINT-001
|
|
11
|
+
# - **Status**: pass
|
|
12
|
+
# - **Passed**: 8
|
|
13
|
+
# - **Failed**: 0
|
|
14
|
+
# - **Total**: 8
|
|
15
|
+
# - **Report Paths**: 8p5jo2-lint-ts001-reports/*
|
|
16
|
+
# - **Issues**: None
|
|
17
|
+
#
|
|
18
|
+
# Falls back to ResultParser.parse() for JSON responses.
|
|
19
|
+
class SkillResultParser
|
|
20
|
+
# Parse response text from a CLI provider
|
|
21
|
+
#
|
|
22
|
+
# @param text [String] Raw response text
|
|
23
|
+
# @return [Hash] Parsed result with :test_id, :status, :test_cases, :summary, :observations
|
|
24
|
+
# @raise [ResultParser::ParseError] If neither markdown nor JSON can be parsed
|
|
25
|
+
def self.parse(text)
|
|
26
|
+
raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
|
|
27
|
+
|
|
28
|
+
parsed = parse_markdown(text)
|
|
29
|
+
return to_normalized(parsed) if parsed
|
|
30
|
+
|
|
31
|
+
# Fall back to JSON parsing via ResultParser
|
|
32
|
+
ResultParser.parse(text)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Parse the markdown return contract format
|
|
36
|
+
#
|
|
37
|
+
# @param text [String] Response text
|
|
38
|
+
# @return [Hash, nil] Parsed fields or nil if format not matched
|
|
39
|
+
def self.parse_markdown(text)
|
|
40
|
+
fields = {}
|
|
41
|
+
|
|
42
|
+
fields[:test_id] = extract_field(text, "Test ID")
|
|
43
|
+
fields[:status] = extract_field(text, "Status")
|
|
44
|
+
fields[:passed] = extract_field(text, "Passed")
|
|
45
|
+
fields[:failed] = extract_field(text, "Failed")
|
|
46
|
+
fields[:total] = extract_field(text, "Total")
|
|
47
|
+
fields[:report_paths] = extract_field(text, "Report Paths")
|
|
48
|
+
fields[:issues] = extract_field(text, "Issues")
|
|
49
|
+
|
|
50
|
+
# Need at least test_id and status for a valid parse
|
|
51
|
+
return nil unless fields[:test_id] && fields[:status]
|
|
52
|
+
|
|
53
|
+
fields
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Convert parsed markdown fields to normalized result format
|
|
57
|
+
#
|
|
58
|
+
# @param parsed [Hash] Parsed markdown fields
|
|
59
|
+
# @return [Hash] Normalized result matching ResultParser output format
|
|
60
|
+
def self.to_normalized(parsed)
|
|
61
|
+
parsed[:status] = normalize_status(parsed[:status])
|
|
62
|
+
|
|
63
|
+
passed = parsed[:passed].to_i
|
|
64
|
+
failed = parsed[:failed].to_i
|
|
65
|
+
total = parsed[:total].to_i
|
|
66
|
+
|
|
67
|
+
# Build synthetic test cases from counts
|
|
68
|
+
test_cases = []
|
|
69
|
+
passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
|
|
70
|
+
failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
|
|
71
|
+
|
|
72
|
+
issues = parsed[:issues]
|
|
73
|
+
observations = (issues && issues.downcase != "none") ? issues : ""
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
test_id: parsed[:test_id],
|
|
77
|
+
status: parsed[:status],
|
|
78
|
+
test_cases: test_cases,
|
|
79
|
+
summary: "#{passed}/#{total} passed",
|
|
80
|
+
observations: observations
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Extract a field value from markdown bold-key format
|
|
85
|
+
#
|
|
86
|
+
# @param text [String] Text to search
|
|
87
|
+
# @param field_name [String] Field name (e.g., "Test ID")
|
|
88
|
+
# @return [String, nil] Extracted value or nil
|
|
89
|
+
def self.extract_field(text, field_name)
|
|
90
|
+
# Match "- **Field Name**: value" or "**Field Name**: value"
|
|
91
|
+
match = text.match(/\*\*#{Regexp.escape(field_name)}\*\*:\s*(.+?)$/i)
|
|
92
|
+
return nil unless match
|
|
93
|
+
|
|
94
|
+
value = match[1].strip
|
|
95
|
+
value.empty? ? nil : value
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Parse TC-level response text from a CLI provider
|
|
99
|
+
#
|
|
100
|
+
# Handles TC-level markdown with **TC ID** field. Falls back to
|
|
101
|
+
# parse() if the response has the multi-TC format.
|
|
102
|
+
#
|
|
103
|
+
# @param text [String] Raw response text
|
|
104
|
+
# @return [Hash] Parsed result with single-entry :test_cases array
|
|
105
|
+
# @raise [ResultParser::ParseError] If neither format can be parsed
|
|
106
|
+
def self.parse_tc(text)
|
|
107
|
+
raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
|
|
108
|
+
|
|
109
|
+
parsed = parse_tc_markdown(text)
|
|
110
|
+
return to_tc_normalized(parsed) if parsed
|
|
111
|
+
|
|
112
|
+
# Fall back to standard parse (handles both markdown and JSON)
|
|
113
|
+
parse(text)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Parse verifier-mode markdown return contract.
|
|
117
|
+
#
|
|
118
|
+
# @param text [String]
|
|
119
|
+
# @return [Hash] Normalized test result payload
|
|
120
|
+
def self.parse_verifier(text)
|
|
121
|
+
raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
|
|
122
|
+
|
|
123
|
+
fields = {}
|
|
124
|
+
fields[:test_id] = extract_field(text, "Test ID")
|
|
125
|
+
fields[:status] = extract_field(text, "Status")
|
|
126
|
+
fields[:tcs_passed] = extract_field(text, "TCs Passed")
|
|
127
|
+
fields[:tcs_failed] = extract_field(text, "TCs Failed")
|
|
128
|
+
fields[:tcs_total] = extract_field(text, "TCs Total")
|
|
129
|
+
fields[:score] = extract_field(text, "Score")
|
|
130
|
+
fields[:verdict] = extract_field(text, "Verdict")
|
|
131
|
+
fields[:failed_tcs] = extract_field(text, "Failed TCs")
|
|
132
|
+
fields[:issues] = extract_field(text, "Issues")
|
|
133
|
+
|
|
134
|
+
return parse(text) unless fields[:test_id] && fields[:status] &&
|
|
135
|
+
fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
|
|
136
|
+
|
|
137
|
+
passed = fields[:tcs_passed].to_i
|
|
138
|
+
failed = fields[:tcs_failed].to_i
|
|
139
|
+
total = fields[:tcs_total].to_i
|
|
140
|
+
status = normalize_status(fields[:status])
|
|
141
|
+
|
|
142
|
+
failed_entries = parse_failed_tcs(fields[:failed_tcs])
|
|
143
|
+
failed_ids = failed_entries.map { |e| e[:tc] }.to_set
|
|
144
|
+
test_cases = []
|
|
145
|
+
pass_index = 0
|
|
146
|
+
passed.times do
|
|
147
|
+
pass_index += 1
|
|
148
|
+
pass_index += 1 while failed_ids.include?("TC-#{format("%03d", pass_index)}")
|
|
149
|
+
test_cases << {id: "TC-#{format("%03d", pass_index)}", description: "", status: "pass", actual: "", notes: ""}
|
|
150
|
+
end
|
|
151
|
+
if failed_entries.empty?
|
|
152
|
+
failed.times do |i|
|
|
153
|
+
test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""}
|
|
154
|
+
end
|
|
155
|
+
else
|
|
156
|
+
failed_entries.each do |entry|
|
|
157
|
+
test_cases << {
|
|
158
|
+
id: entry[:tc],
|
|
159
|
+
description: "",
|
|
160
|
+
status: "fail",
|
|
161
|
+
actual: "",
|
|
162
|
+
notes: entry[:category],
|
|
163
|
+
category: entry[:category]
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
summary = if total.positive?
|
|
169
|
+
"#{passed}/#{total} passed (#{fields[:verdict] || status})"
|
|
170
|
+
else
|
|
171
|
+
fields[:verdict] || status
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
test_id: fields[:test_id],
|
|
176
|
+
status: status,
|
|
177
|
+
test_cases: test_cases,
|
|
178
|
+
summary: summary,
|
|
179
|
+
observations: (fields[:issues].to_s.strip.casecmp("none").zero? ? "" : fields[:issues].to_s)
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Parse TC-level markdown return contract
|
|
184
|
+
def self.parse_tc_markdown(text)
|
|
185
|
+
fields = {}
|
|
186
|
+
|
|
187
|
+
fields[:test_id] = extract_field(text, "Test ID")
|
|
188
|
+
fields[:tc_id] = extract_field(text, "TC ID")
|
|
189
|
+
fields[:status] = extract_field(text, "Status")
|
|
190
|
+
fields[:report_paths] = extract_field(text, "Report Paths")
|
|
191
|
+
fields[:issues] = extract_field(text, "Issues")
|
|
192
|
+
|
|
193
|
+
# Need test_id, tc_id, and status for a valid TC parse
|
|
194
|
+
return nil unless fields[:test_id] && fields[:tc_id] && fields[:status]
|
|
195
|
+
|
|
196
|
+
fields
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Convert parsed TC markdown to normalized result format
|
|
200
|
+
def self.to_tc_normalized(parsed)
|
|
201
|
+
parsed[:status] = normalize_status(parsed[:status])
|
|
202
|
+
|
|
203
|
+
issues = parsed[:issues]
|
|
204
|
+
observations = (issues && issues.downcase != "none") ? issues : ""
|
|
205
|
+
|
|
206
|
+
{
|
|
207
|
+
test_id: parsed[:test_id],
|
|
208
|
+
status: parsed[:status],
|
|
209
|
+
test_cases: [{
|
|
210
|
+
id: parsed[:tc_id],
|
|
211
|
+
description: "",
|
|
212
|
+
status: parsed[:status],
|
|
213
|
+
actual: "",
|
|
214
|
+
notes: observations
|
|
215
|
+
}],
|
|
216
|
+
summary: "#{parsed[:tc_id]} #{parsed[:status]}",
|
|
217
|
+
observations: observations
|
|
218
|
+
}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Normalize a status value: take first word, default to "unknown"
|
|
222
|
+
def self.normalize_status(value)
|
|
223
|
+
(value.to_s.strip.split(/\s+/).first || "unknown").downcase
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def self.parse_failed_tcs(value)
|
|
227
|
+
return [] if value.nil? || value.strip.empty? || value.strip.casecmp("none").zero?
|
|
228
|
+
|
|
229
|
+
value.split(",").map(&:strip).filter_map do |entry|
|
|
230
|
+
tc, category = entry.split(":", 2).map { |part| part.to_s.strip }
|
|
231
|
+
next if tc.empty?
|
|
232
|
+
|
|
233
|
+
{tc: tc.upcase, category: (category.to_s.empty? ? "unknown" : category)}
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
private_class_method :parse_markdown, :to_normalized, :extract_field,
|
|
238
|
+
:parse_tc_markdown, :to_tc_normalized, :normalize_status,
|
|
239
|
+
:parse_failed_tcs
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Test
|
|
5
|
+
module EndToEndRunner
|
|
6
|
+
module Atoms
|
|
7
|
+
# Builds LLM prompts for suite-level final report synthesis
|
|
8
|
+
#
|
|
9
|
+
# Pure atom (no I/O). Constructs system and user prompts from
|
|
10
|
+
# pre-read test result data for LLM-based report generation.
|
|
11
|
+
class SuiteReportPromptBuilder
|
|
12
|
+
SYSTEM_PROMPT = <<~PROMPT
|
|
13
|
+
You are a senior QA engineer writing an E2E test suite report.
|
|
14
|
+
|
|
15
|
+
Generate a structured markdown report with YAML frontmatter. The report should provide actionable insights, not just raw data.
|
|
16
|
+
|
|
17
|
+
## Required Sections
|
|
18
|
+
|
|
19
|
+
1. **YAML Frontmatter** — suite-id, package, status, tests-run, executed timestamp
|
|
20
|
+
2. **Summary Table** — Test ID, Title, Status, Passed, Failed, Total columns
|
|
21
|
+
3. **Overall Line** — "X/Y test cases passed (Z%)"
|
|
22
|
+
4. **Failed Tests** (if any) — For each failed test: root cause analysis, failed test case details
|
|
23
|
+
5. **Friction Analysis** — Developer experience issues, tooling pain points, environment problems observed across tests
|
|
24
|
+
6. **Improvement Suggestions** — Concrete, actionable recommendations based on the failures and friction observed
|
|
25
|
+
7. **Positive Observations** — What worked well, reliable patterns, strengths
|
|
26
|
+
8. **Reports Table** — Test ID mapped to report directory names
|
|
27
|
+
|
|
28
|
+
## Formatting Rules
|
|
29
|
+
|
|
30
|
+
- Use GitHub-flavored markdown
|
|
31
|
+
- Frontmatter must be valid YAML between --- fences
|
|
32
|
+
- Keep root cause analysis concise but specific
|
|
33
|
+
- Friction analysis should focus on patterns across tests, not individual failures
|
|
34
|
+
- Suggestions should be actionable (not vague like "improve testing")
|
|
35
|
+
- If all tests pass, skip Failed Tests section and focus on positive observations and any friction
|
|
36
|
+
PROMPT
|
|
37
|
+
|
|
38
|
+
# Build user prompt from pre-read test result data
|
|
39
|
+
#
|
|
40
|
+
# @param results_data [Array<Hash>] Pre-read result data, each with:
|
|
41
|
+
# :test_id, :title, :status, :passed, :failed, :total,
|
|
42
|
+
# :test_cases, :report_dir_name, :summary_content, :experience_content
|
|
43
|
+
# @param package [String] Package name
|
|
44
|
+
# @param timestamp [String] Suite timestamp ID
|
|
45
|
+
# @param overall_status [String] "pass", "partial", or "fail"
|
|
46
|
+
# @param executed_at [String] ISO 8601 execution timestamp
|
|
47
|
+
# @return [String] User prompt for LLM
|
|
48
|
+
def build(results_data, package:, timestamp:, overall_status:, executed_at:)
|
|
49
|
+
parts = []
|
|
50
|
+
parts << "# Suite Report Request"
|
|
51
|
+
parts << ""
|
|
52
|
+
parts << "**Package:** #{package}"
|
|
53
|
+
parts << "**Suite ID:** #{timestamp}"
|
|
54
|
+
parts << "**Status:** #{overall_status}"
|
|
55
|
+
parts << "**Executed:** #{executed_at}"
|
|
56
|
+
parts << "**Tests Run:** #{results_data.size}"
|
|
57
|
+
parts << ""
|
|
58
|
+
|
|
59
|
+
total_passed = results_data.sum { |r| r[:passed] }
|
|
60
|
+
results_data.sum { |r| r[:failed] }
|
|
61
|
+
total_tc = results_data.sum { |r| r[:total] }
|
|
62
|
+
parts << "**Overall:** #{total_passed}/#{total_tc} test cases passed"
|
|
63
|
+
parts << ""
|
|
64
|
+
|
|
65
|
+
parts << "## Test Results"
|
|
66
|
+
parts << ""
|
|
67
|
+
|
|
68
|
+
results_data.each do |r|
|
|
69
|
+
parts << "### #{r[:test_id]}: #{r[:title]}"
|
|
70
|
+
parts << "- **Status:** #{r[:status]}"
|
|
71
|
+
parts << "- **Passed:** #{r[:passed]}/#{r[:total]}"
|
|
72
|
+
parts << "- **Report Dir:** #{r[:report_dir_name]}" if r[:report_dir_name]
|
|
73
|
+
|
|
74
|
+
if r[:test_cases]&.any?
|
|
75
|
+
parts << ""
|
|
76
|
+
parts << "**Test Cases:**"
|
|
77
|
+
r[:test_cases].each do |tc|
|
|
78
|
+
parts << "- #{tc[:id]}: #{tc[:description]} — #{tc[:status]}"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
if r[:summary_content]
|
|
83
|
+
parts << ""
|
|
84
|
+
parts << "**Summary Report:**"
|
|
85
|
+
parts << r[:summary_content]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
if r[:experience_content]
|
|
89
|
+
parts << ""
|
|
90
|
+
parts << "**Experience Report:**"
|
|
91
|
+
parts << r[:experience_content]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
parts << ""
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
parts.join("\n")
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Test
|
|
5
|
+
module EndToEndRunner
|
|
6
|
+
module Atoms
|
|
7
|
+
# Validates that agent-reported test cases match the scenario's expected TCs
|
|
8
|
+
#
|
|
9
|
+
# Detects when an agent invents its own test cases instead of executing
|
|
10
|
+
# the defined standalone TC files. Returns an error result when fidelity check fails.
|
|
11
|
+
class TcFidelityValidator
|
|
12
|
+
# Validate parsed result against expected test case count
|
|
13
|
+
#
|
|
14
|
+
# @param parsed [Hash] Parsed result from SkillResultParser (:test_cases, :status, etc.)
|
|
15
|
+
# @param scenario [Models::TestScenario] The scenario with expected TCs
|
|
16
|
+
# @param filtered_tc_ids [Array<String>, nil] TC IDs filter (when subset was requested)
|
|
17
|
+
# @return [Hash, nil] Error info hash if validation fails, nil if valid
|
|
18
|
+
def self.validate(parsed, scenario, filtered_tc_ids: nil)
|
|
19
|
+
expected_ids = filtered_tc_ids || scenario.test_case_ids
|
|
20
|
+
return nil if expected_ids.empty?
|
|
21
|
+
|
|
22
|
+
reported_count = parsed[:test_cases]&.size || 0
|
|
23
|
+
expected_count = expected_ids.size
|
|
24
|
+
|
|
25
|
+
return nil if reported_count == expected_count
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
error: "TC fidelity mismatch: agent reported #{reported_count} test cases " \
|
|
29
|
+
"but scenario has #{expected_count} (#{expected_ids.join(", ")})",
|
|
30
|
+
expected_count: expected_count,
|
|
31
|
+
reported_count: reported_count,
|
|
32
|
+
expected_ids: expected_ids
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Test
|
|
5
|
+
module EndToEndRunner
|
|
6
|
+
module Atoms
|
|
7
|
+
# Parses and normalizes test case IDs from markdown content
|
|
8
|
+
#
|
|
9
|
+
# Provides pure utility methods for:
|
|
10
|
+
# - Extracting TC-NNN headers from markdown test scenarios
|
|
11
|
+
# - Normalizing various test case ID formats to TC-NNN
|
|
12
|
+
# - Filtering test cases by ID list
|
|
13
|
+
#
|
|
14
|
+
# Normalization rules (consistent with workflow bash logic):
|
|
15
|
+
# - "TC-001" -> "TC-001" (already normalized)
|
|
16
|
+
# - "tc-001" -> "TC-001" (uppercased)
|
|
17
|
+
# - "001" -> "TC-001" (prefix added)
|
|
18
|
+
# - "1" -> "TC-001" (zero-padded and prefixed)
|
|
19
|
+
# - "TC-1" -> "TC-001" (zero-padded)
|
|
20
|
+
class TestCaseParser
|
|
21
|
+
# Pattern matching TC-NNN headers in markdown
|
|
22
|
+
# Matches: ### TC-001: Description
|
|
23
|
+
TC_HEADER_PATTERN = /^###\s+(TC-\d+[a-z]?)[\s:]/i
|
|
24
|
+
|
|
25
|
+
# Normalize a single test case identifier to TC-NNN format
|
|
26
|
+
#
|
|
27
|
+
# @param id [String] Raw test case ID in any accepted format
|
|
28
|
+
# @return [String] Normalized TC-NNN format
|
|
29
|
+
# @raise [ArgumentError] If the ID cannot be normalized
|
|
30
|
+
def self.normalize_identifier(id)
|
|
31
|
+
raw = id.to_s.strip
|
|
32
|
+
raise ArgumentError, "Empty test case ID" if raw.empty?
|
|
33
|
+
|
|
34
|
+
# Strip TC- prefix if present (case-insensitive)
|
|
35
|
+
number_part = raw.sub(/\Atc-/i, "")
|
|
36
|
+
|
|
37
|
+
# Extract numeric portion and optional alpha suffix
|
|
38
|
+
match = number_part.match(/\A(\d+)([a-z]?)\z/i)
|
|
39
|
+
raise ArgumentError, "Invalid test case ID: '#{id}'" unless match
|
|
40
|
+
|
|
41
|
+
numeric = match[1]
|
|
42
|
+
suffix = match[2].downcase
|
|
43
|
+
|
|
44
|
+
# Zero-pad to 3 digits minimum
|
|
45
|
+
padded = format("%03d", numeric.to_i)
|
|
46
|
+
|
|
47
|
+
"TC-#{padded}#{suffix}"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Normalize multiple test case identifiers
|
|
51
|
+
#
|
|
52
|
+
# @param ids [Array<String>] Raw test case IDs
|
|
53
|
+
# @return [Array<String>] Normalized TC-NNN format IDs
|
|
54
|
+
def self.normalize_identifiers(ids)
|
|
55
|
+
ids.map { |id| normalize_identifier(id) }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Parse a comma-separated string of test case IDs
|
|
59
|
+
#
|
|
60
|
+
# @param input [String] Comma-separated test case IDs (e.g., "tc-001,002,TC-3")
|
|
61
|
+
# @return [Array<String>] Normalized TC-NNN format IDs
|
|
62
|
+
# @raise [ArgumentError] If input is empty or contains invalid IDs
|
|
63
|
+
def self.parse(input)
|
|
64
|
+
raw = input.to_s.strip
|
|
65
|
+
raise ArgumentError, "Empty test cases input" if raw.empty?
|
|
66
|
+
|
|
67
|
+
ids = raw.split(",").map(&:strip).reject(&:empty?)
|
|
68
|
+
raise ArgumentError, "No valid test case IDs found in: '#{input}'" if ids.empty?
|
|
69
|
+
|
|
70
|
+
normalize_identifiers(ids)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Extract available test case IDs from markdown content
|
|
74
|
+
#
|
|
75
|
+
# Scans for ### TC-NNN: headers in the test scenario markdown.
|
|
76
|
+
#
|
|
77
|
+
# @param content [String] Markdown content of a test scenario
|
|
78
|
+
# @return [Array<String>] List of test case IDs found (e.g., ["TC-001", "TC-002"])
|
|
79
|
+
def self.extract_from_content(content)
|
|
80
|
+
content.scan(TC_HEADER_PATTERN).map { |match| match[0].upcase }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Filter test case content by ID list
|
|
84
|
+
#
|
|
85
|
+
# Given a list of desired test case IDs and the available IDs in content,
|
|
86
|
+
# validates that all requested IDs exist and returns the validated set.
|
|
87
|
+
#
|
|
88
|
+
# @param requested_ids [Array<String>] Normalized test case IDs to filter
|
|
89
|
+
# @param available_ids [Array<String>] Test case IDs available in the scenario
|
|
90
|
+
# @return [Array<String>] Validated test case IDs
|
|
91
|
+
# @raise [ArgumentError] If any requested IDs are not found in the scenario
|
|
92
|
+
def self.validate_against_available(requested_ids, available_ids)
|
|
93
|
+
normalized_available = available_ids.map(&:upcase)
|
|
94
|
+
missing = requested_ids.reject { |id| normalized_available.include?(id.upcase) }
|
|
95
|
+
|
|
96
|
+
unless missing.empty?
|
|
97
|
+
raise ArgumentError,
|
|
98
|
+
"Test case(s) not found: #{missing.join(", ")}. " \
|
|
99
|
+
"Available: #{available_ids.join(", ")}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
requested_ids
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ace/support/cli"
|
|
4
|
+
require "stringio"
|
|
5
|
+
require "ace/support/cli"
|
|
6
|
+
|
|
7
|
+
module Ace
|
|
8
|
+
module Test
|
|
9
|
+
module EndToEndRunner
|
|
10
|
+
module CLI
|
|
11
|
+
module Commands
|
|
12
|
+
# CLI command for running E2E test suite across all packages
|
|
13
|
+
#
|
|
14
|
+
# Discovers all E2E tests in the monorepo and executes them
|
|
15
|
+
# with optional parallel execution and affected package filtering.
|
|
16
|
+
class RunSuite < Ace::Support::Cli::Command
|
|
17
|
+
include Ace::Support::Cli::Base
|
|
18
|
+
|
|
19
|
+
desc <<~DESC.strip
|
|
20
|
+
Run E2E test suite across all packages
|
|
21
|
+
|
|
22
|
+
Discovers and executes TS-* test scenarios from all packages
|
|
23
|
+
in the monorepo. Tests run sequentially by default or in parallel
|
|
24
|
+
with --parallel flag. Use --affected to only test changed packages.
|
|
25
|
+
Use --only-failures to re-run only previously failed scenarios.
|
|
26
|
+
Optionally filter to specific packages with a comma-separated list.
|
|
27
|
+
|
|
28
|
+
Output:
|
|
29
|
+
Exit codes: 0 (all pass), 1 (any fail/error)
|
|
30
|
+
DESC
|
|
31
|
+
|
|
32
|
+
argument :packages, required: false,
|
|
33
|
+
desc: "Comma-separated package names (e.g., ace-bundle,ace-lint)"
|
|
34
|
+
|
|
35
|
+
example [
|
|
36
|
+
" # Run all tests sequentially",
|
|
37
|
+
"ace-bundle,ace-lint # Run only specified packages",
|
|
38
|
+
"--parallel 4 # Run with 4 parallel workers",
|
|
39
|
+
"--affected # Only test changed packages",
|
|
40
|
+
"--affected --parallel 8 # Parallel affected tests only",
|
|
41
|
+
"--only-failures # Re-run failed scenarios from cache",
|
|
42
|
+
"--affected --only-failures # Re-run failed scenarios in affected packages",
|
|
43
|
+
"--tags smoke,happy-path # Include scenarios by tag",
|
|
44
|
+
"--exclude-tags deep # Exclude scenarios by tag",
|
|
45
|
+
"--cli-args dangerously-skip-permissions # Pass args to provider"
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
option :parallel, type: :string, default: Molecules::ConfigLoader.default_parallel.to_s,
|
|
49
|
+
desc: "Number of parallel workers (0 = sequential)"
|
|
50
|
+
option :affected, type: :boolean, desc: "Only test affected packages"
|
|
51
|
+
option :only_failures, type: :boolean,
|
|
52
|
+
desc: "Re-run only previously failed scenarios"
|
|
53
|
+
option :cli_args, type: :string,
|
|
54
|
+
desc: "Extra args for CLI-based LLM providers"
|
|
55
|
+
option :provider, type: :string, default: Molecules::ConfigLoader.default_provider,
|
|
56
|
+
desc: "LLM provider:model (e.g., claude:sonnet, gemini:flash)"
|
|
57
|
+
option :timeout, type: :string, default: Molecules::ConfigLoader.default_timeout.to_s,
|
|
58
|
+
desc: "Timeout per test in seconds"
|
|
59
|
+
option :tags, type: :string, desc: "Comma-separated scenario tags to include"
|
|
60
|
+
option :exclude_tags, type: :string, desc: "Comma-separated scenario tags to exclude"
|
|
61
|
+
option :progress, type: :boolean, desc: "Enable live animated display"
|
|
62
|
+
option :verify, type: :boolean,
|
|
63
|
+
desc: "Run independent verifier pass for each scenario"
|
|
64
|
+
option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
|
|
65
|
+
option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
|
|
66
|
+
option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
|
|
67
|
+
|
|
68
|
+
def call(packages: nil, **options)
|
|
69
|
+
options = coerce_types(options, parallel: :integer, timeout: :integer)
|
|
70
|
+
|
|
71
|
+
parallel = options[:parallel]
|
|
72
|
+
affected = options[:affected]
|
|
73
|
+
only_failures = options[:only_failures]
|
|
74
|
+
tags = parse_csv_list(options[:tags])
|
|
75
|
+
exclude_tags = parse_csv_list(options[:exclude_tags])
|
|
76
|
+
|
|
77
|
+
output = quiet?(options) ? StringIO.new : $stdout
|
|
78
|
+
progress = options[:progress] && !quiet?(options)
|
|
79
|
+
|
|
80
|
+
orchestrator = Organisms::SuiteOrchestrator.new(
|
|
81
|
+
max_parallel: [parallel, 1].max,
|
|
82
|
+
output: output,
|
|
83
|
+
progress: progress
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
results = orchestrator.run(
|
|
87
|
+
parallel: parallel > 0,
|
|
88
|
+
affected: affected,
|
|
89
|
+
only_failures: only_failures,
|
|
90
|
+
packages: packages,
|
|
91
|
+
cli_args: options[:cli_args],
|
|
92
|
+
provider: options[:provider],
|
|
93
|
+
timeout: options[:timeout],
|
|
94
|
+
tags: tags,
|
|
95
|
+
exclude_tags: exclude_tags,
|
|
96
|
+
verify: options[:verify]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if results[:total].zero?
|
|
100
|
+
if only_failures
|
|
101
|
+
raise Ace::Support::Cli::Error.new(
|
|
102
|
+
"No failed test scenarios found in cache"
|
|
103
|
+
)
|
|
104
|
+
else
|
|
105
|
+
raise Ace::Support::Cli::Error.new("No tests found to run")
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Exit with error if any test failed
|
|
110
|
+
if results[:failed] > 0 || results[:errors] > 0
|
|
111
|
+
failed_count = results[:failed] + results[:errors]
|
|
112
|
+
raise Ace::Support::Cli::Error.new(
|
|
113
|
+
"#{failed_count} test(s) failed or errored"
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
|
|
120
|
+
def parse_csv_list(raw)
|
|
121
|
+
return [] if raw.nil? || raw.strip.empty?
|
|
122
|
+
|
|
123
|
+
raw.split(",").map(&:strip).reject(&:empty?).map(&:downcase)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|