ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Parses structured markdown results from CLI-provider skill/workflow execution
8
+ #
9
+ # CLI providers return results in the subagent return contract format:
10
+ # - **Test ID**: TS-LINT-001
11
+ # - **Status**: pass
12
+ # - **Passed**: 8
13
+ # - **Failed**: 0
14
+ # - **Total**: 8
15
+ # - **Report Paths**: 8p5jo2-lint-ts001-reports/*
16
+ # - **Issues**: None
17
+ #
18
+ # Falls back to ResultParser.parse() for JSON responses.
19
+ class SkillResultParser
20
+ # Parse response text from a CLI provider
21
+ #
22
+ # @param text [String] Raw response text
23
+ # @return [Hash] Parsed result with :test_id, :status, :test_cases, :summary, :observations
24
+ # @raise [ResultParser::ParseError] If neither markdown nor JSON can be parsed
25
+ def self.parse(text)
26
+ raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
27
+
28
+ parsed = parse_markdown(text)
29
+ return to_normalized(parsed) if parsed
30
+
31
+ # Fall back to JSON parsing via ResultParser
32
+ ResultParser.parse(text)
33
+ end
34
+
35
+ # Parse the markdown return contract format
36
+ #
37
+ # @param text [String] Response text
38
+ # @return [Hash, nil] Parsed fields or nil if format not matched
39
+ def self.parse_markdown(text)
40
+ fields = {}
41
+
42
+ fields[:test_id] = extract_field(text, "Test ID")
43
+ fields[:status] = extract_field(text, "Status")
44
+ fields[:passed] = extract_field(text, "Passed")
45
+ fields[:failed] = extract_field(text, "Failed")
46
+ fields[:total] = extract_field(text, "Total")
47
+ fields[:report_paths] = extract_field(text, "Report Paths")
48
+ fields[:issues] = extract_field(text, "Issues")
49
+
50
+ # Need at least test_id and status for a valid parse
51
+ return nil unless fields[:test_id] && fields[:status]
52
+
53
+ fields
54
+ end
55
+
56
+ # Convert parsed markdown fields to normalized result format
57
+ #
58
+ # @param parsed [Hash] Parsed markdown fields
59
+ # @return [Hash] Normalized result matching ResultParser output format
60
+ def self.to_normalized(parsed)
61
+ parsed[:status] = normalize_status(parsed[:status])
62
+
63
+ passed = parsed[:passed].to_i
64
+ failed = parsed[:failed].to_i
65
+ total = parsed[:total].to_i
66
+
67
+ # Build synthetic test cases from counts
68
+ test_cases = []
69
+ passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
70
+ failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
71
+
72
+ issues = parsed[:issues]
73
+ observations = (issues && issues.downcase != "none") ? issues : ""
74
+
75
+ {
76
+ test_id: parsed[:test_id],
77
+ status: parsed[:status],
78
+ test_cases: test_cases,
79
+ summary: "#{passed}/#{total} passed",
80
+ observations: observations
81
+ }
82
+ end
83
+
84
+ # Extract a field value from markdown bold-key format
85
+ #
86
+ # @param text [String] Text to search
87
+ # @param field_name [String] Field name (e.g., "Test ID")
88
+ # @return [String, nil] Extracted value or nil
89
+ def self.extract_field(text, field_name)
90
+ # Match "- **Field Name**: value" or "**Field Name**: value"
91
+ match = text.match(/\*\*#{Regexp.escape(field_name)}\*\*:\s*(.+?)$/i)
92
+ return nil unless match
93
+
94
+ value = match[1].strip
95
+ value.empty? ? nil : value
96
+ end
97
+
98
+ # Parse TC-level response text from a CLI provider
99
+ #
100
+ # Handles TC-level markdown with **TC ID** field. Falls back to
101
+ # parse() if the response has the multi-TC format.
102
+ #
103
+ # @param text [String] Raw response text
104
+ # @return [Hash] Parsed result with single-entry :test_cases array
105
+ # @raise [ResultParser::ParseError] If neither format can be parsed
106
+ def self.parse_tc(text)
107
+ raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
108
+
109
+ parsed = parse_tc_markdown(text)
110
+ return to_tc_normalized(parsed) if parsed
111
+
112
+ # Fall back to standard parse (handles both markdown and JSON)
113
+ parse(text)
114
+ end
115
+
116
+ # Parse verifier-mode markdown return contract.
117
+ #
118
+ # @param text [String]
119
+ # @return [Hash] Normalized test result payload
120
+ def self.parse_verifier(text)
121
+ raise ResultParser::ParseError, "Empty response from CLI provider" if text.nil? || text.strip.empty?
122
+
123
+ fields = {}
124
+ fields[:test_id] = extract_field(text, "Test ID")
125
+ fields[:status] = extract_field(text, "Status")
126
+ fields[:tcs_passed] = extract_field(text, "TCs Passed")
127
+ fields[:tcs_failed] = extract_field(text, "TCs Failed")
128
+ fields[:tcs_total] = extract_field(text, "TCs Total")
129
+ fields[:score] = extract_field(text, "Score")
130
+ fields[:verdict] = extract_field(text, "Verdict")
131
+ fields[:failed_tcs] = extract_field(text, "Failed TCs")
132
+ fields[:issues] = extract_field(text, "Issues")
133
+
134
+ return parse(text) unless fields[:test_id] && fields[:status] &&
135
+ fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
136
+
137
+ passed = fields[:tcs_passed].to_i
138
+ failed = fields[:tcs_failed].to_i
139
+ total = fields[:tcs_total].to_i
140
+ status = normalize_status(fields[:status])
141
+
142
+ failed_entries = parse_failed_tcs(fields[:failed_tcs])
143
+ failed_ids = failed_entries.map { |e| e[:tc] }.to_set
144
+ test_cases = []
145
+ pass_index = 0
146
+ passed.times do
147
+ pass_index += 1
148
+ pass_index += 1 while failed_ids.include?("TC-#{format("%03d", pass_index)}")
149
+ test_cases << {id: "TC-#{format("%03d", pass_index)}", description: "", status: "pass", actual: "", notes: ""}
150
+ end
151
+ if failed_entries.empty?
152
+ failed.times do |i|
153
+ test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""}
154
+ end
155
+ else
156
+ failed_entries.each do |entry|
157
+ test_cases << {
158
+ id: entry[:tc],
159
+ description: "",
160
+ status: "fail",
161
+ actual: "",
162
+ notes: entry[:category],
163
+ category: entry[:category]
164
+ }
165
+ end
166
+ end
167
+
168
+ summary = if total.positive?
169
+ "#{passed}/#{total} passed (#{fields[:verdict] || status})"
170
+ else
171
+ fields[:verdict] || status
172
+ end
173
+
174
+ {
175
+ test_id: fields[:test_id],
176
+ status: status,
177
+ test_cases: test_cases,
178
+ summary: summary,
179
+ observations: (fields[:issues].to_s.strip.casecmp("none").zero? ? "" : fields[:issues].to_s)
180
+ }
181
+ end
182
+
183
+ # Parse TC-level markdown return contract
184
+ def self.parse_tc_markdown(text)
185
+ fields = {}
186
+
187
+ fields[:test_id] = extract_field(text, "Test ID")
188
+ fields[:tc_id] = extract_field(text, "TC ID")
189
+ fields[:status] = extract_field(text, "Status")
190
+ fields[:report_paths] = extract_field(text, "Report Paths")
191
+ fields[:issues] = extract_field(text, "Issues")
192
+
193
+ # Need test_id, tc_id, and status for a valid TC parse
194
+ return nil unless fields[:test_id] && fields[:tc_id] && fields[:status]
195
+
196
+ fields
197
+ end
198
+
199
+ # Convert parsed TC markdown to normalized result format
200
+ def self.to_tc_normalized(parsed)
201
+ parsed[:status] = normalize_status(parsed[:status])
202
+
203
+ issues = parsed[:issues]
204
+ observations = (issues && issues.downcase != "none") ? issues : ""
205
+
206
+ {
207
+ test_id: parsed[:test_id],
208
+ status: parsed[:status],
209
+ test_cases: [{
210
+ id: parsed[:tc_id],
211
+ description: "",
212
+ status: parsed[:status],
213
+ actual: "",
214
+ notes: observations
215
+ }],
216
+ summary: "#{parsed[:tc_id]} #{parsed[:status]}",
217
+ observations: observations
218
+ }
219
+ end
220
+
221
+ # Normalize a status value: take first word, default to "unknown"
222
+ def self.normalize_status(value)
223
+ (value.to_s.strip.split(/\s+/).first || "unknown").downcase
224
+ end
225
+
226
+ def self.parse_failed_tcs(value)
227
+ return [] if value.nil? || value.strip.empty? || value.strip.casecmp("none").zero?
228
+
229
+ value.split(",").map(&:strip).filter_map do |entry|
230
+ tc, category = entry.split(":", 2).map { |part| part.to_s.strip }
231
+ next if tc.empty?
232
+
233
+ {tc: tc.upcase, category: (category.to_s.empty? ? "unknown" : category)}
234
+ end
235
+ end
236
+
237
+ private_class_method :parse_markdown, :to_normalized, :extract_field,
238
+ :parse_tc_markdown, :to_tc_normalized, :normalize_status,
239
+ :parse_failed_tcs
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Builds LLM prompts for suite-level final report synthesis
8
+ #
9
+ # Pure atom (no I/O). Constructs system and user prompts from
10
+ # pre-read test result data for LLM-based report generation.
11
+ class SuiteReportPromptBuilder
12
+ SYSTEM_PROMPT = <<~PROMPT
13
+ You are a senior QA engineer writing an E2E test suite report.
14
+
15
+ Generate a structured markdown report with YAML frontmatter. The report should provide actionable insights, not just raw data.
16
+
17
+ ## Required Sections
18
+
19
+ 1. **YAML Frontmatter** — suite-id, package, status, tests-run, executed timestamp
20
+ 2. **Summary Table** — Test ID, Title, Status, Passed, Failed, Total columns
21
+ 3. **Overall Line** — "X/Y test cases passed (Z%)"
22
+ 4. **Failed Tests** (if any) — For each failed test: root cause analysis, failed test case details
23
+ 5. **Friction Analysis** — Developer experience issues, tooling pain points, environment problems observed across tests
24
+ 6. **Improvement Suggestions** — Concrete, actionable recommendations based on the failures and friction observed
25
+ 7. **Positive Observations** — What worked well, reliable patterns, strengths
26
+ 8. **Reports Table** — Test ID mapped to report directory names
27
+
28
+ ## Formatting Rules
29
+
30
+ - Use GitHub-flavored markdown
31
+ - Frontmatter must be valid YAML between --- fences
32
+ - Keep root cause analysis concise but specific
33
+ - Friction analysis should focus on patterns across tests, not individual failures
34
+ - Suggestions should be actionable (not vague like "improve testing")
35
+ - If all tests pass, skip Failed Tests section and focus on positive observations and any friction
36
+ PROMPT
37
+
38
+ # Build user prompt from pre-read test result data
39
+ #
40
+ # @param results_data [Array<Hash>] Pre-read result data, each with:
41
+ # :test_id, :title, :status, :passed, :failed, :total,
42
+ # :test_cases, :report_dir_name, :summary_content, :experience_content
43
+ # @param package [String] Package name
44
+ # @param timestamp [String] Suite timestamp ID
45
+ # @param overall_status [String] "pass", "partial", or "fail"
46
+ # @param executed_at [String] ISO 8601 execution timestamp
47
+ # @return [String] User prompt for LLM
48
+ def build(results_data, package:, timestamp:, overall_status:, executed_at:)
49
+ parts = []
50
+ parts << "# Suite Report Request"
51
+ parts << ""
52
+ parts << "**Package:** #{package}"
53
+ parts << "**Suite ID:** #{timestamp}"
54
+ parts << "**Status:** #{overall_status}"
55
+ parts << "**Executed:** #{executed_at}"
56
+ parts << "**Tests Run:** #{results_data.size}"
57
+ parts << ""
58
+
59
+ total_passed = results_data.sum { |r| r[:passed] }
60
+ results_data.sum { |r| r[:failed] }
61
+ total_tc = results_data.sum { |r| r[:total] }
62
+ parts << "**Overall:** #{total_passed}/#{total_tc} test cases passed"
63
+ parts << ""
64
+
65
+ parts << "## Test Results"
66
+ parts << ""
67
+
68
+ results_data.each do |r|
69
+ parts << "### #{r[:test_id]}: #{r[:title]}"
70
+ parts << "- **Status:** #{r[:status]}"
71
+ parts << "- **Passed:** #{r[:passed]}/#{r[:total]}"
72
+ parts << "- **Report Dir:** #{r[:report_dir_name]}" if r[:report_dir_name]
73
+
74
+ if r[:test_cases]&.any?
75
+ parts << ""
76
+ parts << "**Test Cases:**"
77
+ r[:test_cases].each do |tc|
78
+ parts << "- #{tc[:id]}: #{tc[:description]} — #{tc[:status]}"
79
+ end
80
+ end
81
+
82
+ if r[:summary_content]
83
+ parts << ""
84
+ parts << "**Summary Report:**"
85
+ parts << r[:summary_content]
86
+ end
87
+
88
+ if r[:experience_content]
89
+ parts << ""
90
+ parts << "**Experience Report:**"
91
+ parts << r[:experience_content]
92
+ end
93
+
94
+ parts << ""
95
+ end
96
+
97
+ parts.join("\n")
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Validates that agent-reported test cases match the scenario's expected TCs
8
+ #
9
+ # Detects when an agent invents its own test cases instead of executing
10
+ # the defined standalone TC files. Returns an error result when fidelity check fails.
11
+ class TcFidelityValidator
12
+ # Validate parsed result against expected test case count
13
+ #
14
+ # @param parsed [Hash] Parsed result from SkillResultParser (:test_cases, :status, etc.)
15
+ # @param scenario [Models::TestScenario] The scenario with expected TCs
16
+ # @param filtered_tc_ids [Array<String>, nil] TC IDs filter (when subset was requested)
17
+ # @return [Hash, nil] Error info hash if validation fails, nil if valid
18
+ def self.validate(parsed, scenario, filtered_tc_ids: nil)
19
+ expected_ids = filtered_tc_ids || scenario.test_case_ids
20
+ return nil if expected_ids.empty?
21
+
22
+ reported_count = parsed[:test_cases]&.size || 0
23
+ expected_count = expected_ids.size
24
+
25
+ return nil if reported_count == expected_count
26
+
27
+ {
28
+ error: "TC fidelity mismatch: agent reported #{reported_count} test cases " \
29
+ "but scenario has #{expected_count} (#{expected_ids.join(", ")})",
30
+ expected_count: expected_count,
31
+ reported_count: reported_count,
32
+ expected_ids: expected_ids
33
+ }
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Test
5
+ module EndToEndRunner
6
+ module Atoms
7
+ # Parses and normalizes test case IDs from markdown content
8
+ #
9
+ # Provides pure utility methods for:
10
+ # - Extracting TC-NNN headers from markdown test scenarios
11
+ # - Normalizing various test case ID formats to TC-NNN
12
+ # - Filtering test cases by ID list
13
+ #
14
+ # Normalization rules (consistent with workflow bash logic):
15
+ # - "TC-001" -> "TC-001" (already normalized)
16
+ # - "tc-001" -> "TC-001" (uppercased)
17
+ # - "001" -> "TC-001" (prefix added)
18
+ # - "1" -> "TC-001" (zero-padded and prefixed)
19
+ # - "TC-1" -> "TC-001" (zero-padded)
20
+ class TestCaseParser
21
+ # Pattern matching TC-NNN headers in markdown
22
+ # Matches: ### TC-001: Description
23
+ TC_HEADER_PATTERN = /^###\s+(TC-\d+[a-z]?)[\s:]/i
24
+
25
+ # Normalize a single test case identifier to TC-NNN format
26
+ #
27
+ # @param id [String] Raw test case ID in any accepted format
28
+ # @return [String] Normalized TC-NNN format
29
+ # @raise [ArgumentError] If the ID cannot be normalized
30
+ def self.normalize_identifier(id)
31
+ raw = id.to_s.strip
32
+ raise ArgumentError, "Empty test case ID" if raw.empty?
33
+
34
+ # Strip TC- prefix if present (case-insensitive)
35
+ number_part = raw.sub(/\Atc-/i, "")
36
+
37
+ # Extract numeric portion and optional alpha suffix
38
+ match = number_part.match(/\A(\d+)([a-z]?)\z/i)
39
+ raise ArgumentError, "Invalid test case ID: '#{id}'" unless match
40
+
41
+ numeric = match[1]
42
+ suffix = match[2].downcase
43
+
44
+ # Zero-pad to 3 digits minimum
45
+ padded = format("%03d", numeric.to_i)
46
+
47
+ "TC-#{padded}#{suffix}"
48
+ end
49
+
50
+ # Normalize multiple test case identifiers
51
+ #
52
+ # @param ids [Array<String>] Raw test case IDs
53
+ # @return [Array<String>] Normalized TC-NNN format IDs
54
+ def self.normalize_identifiers(ids)
55
+ ids.map { |id| normalize_identifier(id) }
56
+ end
57
+
58
+ # Parse a comma-separated string of test case IDs
59
+ #
60
+ # @param input [String] Comma-separated test case IDs (e.g., "tc-001,002,TC-3")
61
+ # @return [Array<String>] Normalized TC-NNN format IDs
62
+ # @raise [ArgumentError] If input is empty or contains invalid IDs
63
+ def self.parse(input)
64
+ raw = input.to_s.strip
65
+ raise ArgumentError, "Empty test cases input" if raw.empty?
66
+
67
+ ids = raw.split(",").map(&:strip).reject(&:empty?)
68
+ raise ArgumentError, "No valid test case IDs found in: '#{input}'" if ids.empty?
69
+
70
+ normalize_identifiers(ids)
71
+ end
72
+
73
+ # Extract available test case IDs from markdown content
74
+ #
75
+ # Scans for ### TC-NNN: headers in the test scenario markdown.
76
+ #
77
+ # @param content [String] Markdown content of a test scenario
78
+ # @return [Array<String>] List of test case IDs found (e.g., ["TC-001", "TC-002"])
79
+ def self.extract_from_content(content)
80
+ content.scan(TC_HEADER_PATTERN).map { |match| match[0].upcase }
81
+ end
82
+
83
+ # Filter test case content by ID list
84
+ #
85
+ # Given a list of desired test case IDs and the available IDs in content,
86
+ # validates that all requested IDs exist and returns the validated set.
87
+ #
88
+ # @param requested_ids [Array<String>] Normalized test case IDs to filter
89
+ # @param available_ids [Array<String>] Test case IDs available in the scenario
90
+ # @return [Array<String>] Validated test case IDs
91
+ # @raise [ArgumentError] If any requested IDs are not found in the scenario
92
+ def self.validate_against_available(requested_ids, available_ids)
93
+ normalized_available = available_ids.map(&:upcase)
94
+ missing = requested_ids.reject { |id| normalized_available.include?(id.upcase) }
95
+
96
+ unless missing.empty?
97
+ raise ArgumentError,
98
+ "Test case(s) not found: #{missing.join(", ")}. " \
99
+ "Available: #{available_ids.join(", ")}"
100
+ end
101
+
102
+ requested_ids
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ace/support/cli"
4
+ require "stringio"
5
+ require "ace/support/cli"
6
+
7
+ module Ace
8
+ module Test
9
+ module EndToEndRunner
10
+ module CLI
11
+ module Commands
12
+ # CLI command for running E2E test suite across all packages
13
+ #
14
+ # Discovers all E2E tests in the monorepo and executes them
15
+ # with optional parallel execution and affected package filtering.
16
+ class RunSuite < Ace::Support::Cli::Command
17
+ include Ace::Support::Cli::Base
18
+
19
+ desc <<~DESC.strip
20
+ Run E2E test suite across all packages
21
+
22
+ Discovers and executes TS-* test scenarios from all packages
23
+ in the monorepo. Tests run sequentially by default or in parallel
24
+ with --parallel flag. Use --affected to only test changed packages.
25
+ Use --only-failures to re-run only previously failed scenarios.
26
+ Optionally filter to specific packages with a comma-separated list.
27
+
28
+ Output:
29
+ Exit codes: 0 (all pass), 1 (any fail/error)
30
+ DESC
31
+
32
+ argument :packages, required: false,
33
+ desc: "Comma-separated package names (e.g., ace-bundle,ace-lint)"
34
+
35
+ example [
36
+ " # Run all tests sequentially",
37
+ "ace-bundle,ace-lint # Run only specified packages",
38
+ "--parallel 4 # Run with 4 parallel workers",
39
+ "--affected # Only test changed packages",
40
+ "--affected --parallel 8 # Parallel affected tests only",
41
+ "--only-failures # Re-run failed scenarios from cache",
42
+ "--affected --only-failures # Re-run failed scenarios in affected packages",
43
+ "--tags smoke,happy-path # Include scenarios by tag",
44
+ "--exclude-tags deep # Exclude scenarios by tag",
45
+ "--cli-args dangerously-skip-permissions # Pass args to provider"
46
+ ]
47
+
48
+ option :parallel, type: :string, default: Molecules::ConfigLoader.default_parallel.to_s,
49
+ desc: "Number of parallel workers (0 = sequential)"
50
+ option :affected, type: :boolean, desc: "Only test affected packages"
51
+ option :only_failures, type: :boolean,
52
+ desc: "Re-run only previously failed scenarios"
53
+ option :cli_args, type: :string,
54
+ desc: "Extra args for CLI-based LLM providers"
55
+ option :provider, type: :string, default: Molecules::ConfigLoader.default_provider,
56
+ desc: "LLM provider:model (e.g., claude:sonnet, gemini:flash)"
57
+ option :timeout, type: :string, default: Molecules::ConfigLoader.default_timeout.to_s,
58
+ desc: "Timeout per test in seconds"
59
+ option :tags, type: :string, desc: "Comma-separated scenario tags to include"
60
+ option :exclude_tags, type: :string, desc: "Comma-separated scenario tags to exclude"
61
+ option :progress, type: :boolean, desc: "Enable live animated display"
62
+ option :verify, type: :boolean,
63
+ desc: "Run independent verifier pass for each scenario"
64
+ option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
65
+ option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
66
+ option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
67
+
68
+ def call(packages: nil, **options)
69
+ options = coerce_types(options, parallel: :integer, timeout: :integer)
70
+
71
+ parallel = options[:parallel]
72
+ affected = options[:affected]
73
+ only_failures = options[:only_failures]
74
+ tags = parse_csv_list(options[:tags])
75
+ exclude_tags = parse_csv_list(options[:exclude_tags])
76
+
77
+ output = quiet?(options) ? StringIO.new : $stdout
78
+ progress = options[:progress] && !quiet?(options)
79
+
80
+ orchestrator = Organisms::SuiteOrchestrator.new(
81
+ max_parallel: [parallel, 1].max,
82
+ output: output,
83
+ progress: progress
84
+ )
85
+
86
+ results = orchestrator.run(
87
+ parallel: parallel > 0,
88
+ affected: affected,
89
+ only_failures: only_failures,
90
+ packages: packages,
91
+ cli_args: options[:cli_args],
92
+ provider: options[:provider],
93
+ timeout: options[:timeout],
94
+ tags: tags,
95
+ exclude_tags: exclude_tags,
96
+ verify: options[:verify]
97
+ )
98
+
99
+ if results[:total].zero?
100
+ if only_failures
101
+ raise Ace::Support::Cli::Error.new(
102
+ "No failed test scenarios found in cache"
103
+ )
104
+ else
105
+ raise Ace::Support::Cli::Error.new("No tests found to run")
106
+ end
107
+ end
108
+
109
+ # Exit with error if any test failed
110
+ if results[:failed] > 0 || results[:errors] > 0
111
+ failed_count = results[:failed] + results[:errors]
112
+ raise Ace::Support::Cli::Error.new(
113
+ "#{failed_count} test(s) failed or errored"
114
+ )
115
+ end
116
+ end
117
+
118
+ private
119
+
120
+ def parse_csv_list(raw)
121
+ return [] if raw.nil? || raw.strip.empty?
122
+
123
+ raw.split(",").map(&:strip).reject(&:empty?).map(&:downcase)
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end