ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Test
|
|
8
|
+
module EndToEndRunner
|
|
9
|
+
module Molecules
|
|
10
|
+
# Writes E2E test reports to disk
|
|
11
|
+
#
|
|
12
|
+
# Generates summary, experience, and metadata reports following
|
|
13
|
+
# the standard report path contract.
|
|
14
|
+
class ReportWriter
|
|
15
|
+
# Write all reports for a test result
|
|
16
|
+
#
|
|
17
|
+
# @param result [Models::TestResult] The test result
|
|
18
|
+
# @param scenario [Models::TestScenario] The test scenario
|
|
19
|
+
# @param report_dir [String] Directory to write reports to
|
|
20
|
+
# @param test_case [Models::TestCase, nil] Optional single test case for TC-level reports
|
|
21
|
+
# @return [Hash] Paths to written report files
|
|
22
|
+
def write(result, scenario, report_dir:, test_case: nil)
|
|
23
|
+
FileUtils.mkdir_p(report_dir)
|
|
24
|
+
|
|
25
|
+
summary_path = write_summary(result, scenario, report_dir, test_case)
|
|
26
|
+
experience_path = write_experience(result, scenario, report_dir, test_case)
|
|
27
|
+
metadata_path = write_metadata(result, scenario, report_dir, test_case)
|
|
28
|
+
|
|
29
|
+
{
|
|
30
|
+
summary: summary_path,
|
|
31
|
+
experience: experience_path,
|
|
32
|
+
metadata: metadata_path
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
# Write summary report
|
|
39
|
+
# @return [String] Path to written file
|
|
40
|
+
def write_summary(result, scenario, report_dir, test_case = nil)
|
|
41
|
+
path = File.join(report_dir, "summary.r.md")
|
|
42
|
+
|
|
43
|
+
tc_rows = result.test_cases.map do |tc|
|
|
44
|
+
"| #{tc[:id]} | #{tc[:description]} | #{tc[:status].capitalize} |"
|
|
45
|
+
end.join("\n")
|
|
46
|
+
|
|
47
|
+
goal_criteria_sections = build_goal_criteria_sections(result.test_cases)
|
|
48
|
+
failed_entries = result.test_cases
|
|
49
|
+
.select { |tc| tc[:status] == "fail" }
|
|
50
|
+
.map do |tc|
|
|
51
|
+
{
|
|
52
|
+
"tc" => tc[:id],
|
|
53
|
+
"category" => tc[:category] || "runner-error",
|
|
54
|
+
"evidence" => tc[:notes].to_s
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
verdict = if result.failed_count.zero?
|
|
58
|
+
(result.status == "error") ? "fail" : "pass"
|
|
59
|
+
elsif result.passed_count.zero?
|
|
60
|
+
"fail"
|
|
61
|
+
else
|
|
62
|
+
"partial"
|
|
63
|
+
end
|
|
64
|
+
score = result.total_count.zero? ? 0.0 : (result.passed_count.to_f / result.total_count).round(3)
|
|
65
|
+
|
|
66
|
+
frontmatter_hash = {
|
|
67
|
+
"test-id" => result.test_id
|
|
68
|
+
}
|
|
69
|
+
if test_case
|
|
70
|
+
frontmatter_hash["tc-id"] = test_case.tc_id
|
|
71
|
+
frontmatter_hash["scenario-id"] = scenario.test_id
|
|
72
|
+
end
|
|
73
|
+
frontmatter_hash.merge!(
|
|
74
|
+
"package" => scenario.package,
|
|
75
|
+
"agent" => "ace-test-e2e",
|
|
76
|
+
"executed" => result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
77
|
+
"status" => result.status,
|
|
78
|
+
"tcs-passed" => result.passed_count,
|
|
79
|
+
"tcs-failed" => result.failed_count,
|
|
80
|
+
"tcs-total" => result.total_count,
|
|
81
|
+
"score" => score,
|
|
82
|
+
"verdict" => verdict,
|
|
83
|
+
"failed" => failed_entries
|
|
84
|
+
)
|
|
85
|
+
frontmatter_yaml = YAML.dump(frontmatter_hash).sub(/\A---\s*\n/, "").sub(/\.\.\.\s*\n\z/, "")
|
|
86
|
+
|
|
87
|
+
tc_info_rows = if test_case
|
|
88
|
+
"| TC ID | #{test_case.tc_id} |\n| TC Title | #{test_case.title} |\n"
|
|
89
|
+
else
|
|
90
|
+
""
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
content = <<~REPORT
|
|
94
|
+
---
|
|
95
|
+
#{frontmatter_yaml.rstrip}
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
# E2E Test Report: #{result.test_id}
|
|
99
|
+
|
|
100
|
+
## Test Information
|
|
101
|
+
|
|
102
|
+
| Field | Value |
|
|
103
|
+
|-------|-------|
|
|
104
|
+
| Test ID | #{result.test_id} |
|
|
105
|
+
#{tc_info_rows}| Title | #{scenario.title} |
|
|
106
|
+
| Package | #{scenario.package} |
|
|
107
|
+
| Agent | ace-test-e2e |
|
|
108
|
+
| Executed | #{result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ")} |
|
|
109
|
+
| Duration | #{result.duration_display} |
|
|
110
|
+
|
|
111
|
+
## Results Summary
|
|
112
|
+
|
|
113
|
+
| Test Case | Description | Status |
|
|
114
|
+
|-----------|-------------|--------|
|
|
115
|
+
#{tc_rows}
|
|
116
|
+
|
|
117
|
+
## Overall Status: #{result.status.upcase}
|
|
118
|
+
|
|
119
|
+
#{goal_criteria_sections}
|
|
120
|
+
|
|
121
|
+
#{result.summary}
|
|
122
|
+
#{"## Error\n\n#{result.error}" if result.error}
|
|
123
|
+
REPORT
|
|
124
|
+
|
|
125
|
+
File.write(path, content)
|
|
126
|
+
path
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Write experience report
|
|
130
|
+
# @return [String] Path to written file
|
|
131
|
+
def write_experience(result, scenario, report_dir, test_case = nil)
|
|
132
|
+
path = File.join(report_dir, "experience.r.md")
|
|
133
|
+
|
|
134
|
+
tc_title_suffix = test_case ? " / #{test_case.tc_id}" : ""
|
|
135
|
+
|
|
136
|
+
exp_frontmatter_lines = [
|
|
137
|
+
"test-id: #{result.test_id}"
|
|
138
|
+
]
|
|
139
|
+
exp_frontmatter_lines << "tc-id: #{test_case.tc_id}" if test_case
|
|
140
|
+
exp_frontmatter_lines.concat([
|
|
141
|
+
"test-title: #{scenario.title}",
|
|
142
|
+
"package: #{scenario.package}",
|
|
143
|
+
"agent: ace-test-e2e",
|
|
144
|
+
"executed: #{result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ")}",
|
|
145
|
+
"status: #{(result.status == "pass") ? "complete" : "incomplete"}"
|
|
146
|
+
])
|
|
147
|
+
|
|
148
|
+
content = <<~REPORT
|
|
149
|
+
---
|
|
150
|
+
#{exp_frontmatter_lines.join("\n")}
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
# Agent Experience Report: #{result.test_id}#{tc_title_suffix}
|
|
154
|
+
|
|
155
|
+
## Summary
|
|
156
|
+
|
|
157
|
+
Executed via ace-test-e2e CLI using LLM provider.
|
|
158
|
+
#{(result.status == "pass") ? "No significant friction encountered." : "Test execution completed with issues noted below."}
|
|
159
|
+
|
|
160
|
+
## Friction Points
|
|
161
|
+
|
|
162
|
+
### Documentation Gaps
|
|
163
|
+
|
|
164
|
+
- Automated execution via LLM - no documentation gaps observed
|
|
165
|
+
|
|
166
|
+
### Tool Behavior Issues
|
|
167
|
+
|
|
168
|
+
- #{result.error || "None observed"}
|
|
169
|
+
|
|
170
|
+
## Positive Observations
|
|
171
|
+
|
|
172
|
+
- Automated test execution completed successfully via LLM
|
|
173
|
+
REPORT
|
|
174
|
+
|
|
175
|
+
File.write(path, content)
|
|
176
|
+
path
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Write metadata file
|
|
180
|
+
# @return [String] Path to written file
|
|
181
|
+
def write_metadata(result, scenario, report_dir, test_case = nil)
|
|
182
|
+
path = File.join(report_dir, "metadata.yml")
|
|
183
|
+
|
|
184
|
+
metadata = {
|
|
185
|
+
"run-id" => File.basename(report_dir).sub(/-reports\z/, ""),
|
|
186
|
+
"test-id" => result.test_id,
|
|
187
|
+
"package" => scenario.package,
|
|
188
|
+
"agent" => "ace-test-e2e",
|
|
189
|
+
"started" => result.started_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
190
|
+
"completed" => result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
191
|
+
"duration" => "#{result.duration.round(0)}s",
|
|
192
|
+
"status" => result.status,
|
|
193
|
+
"score" => (result.total_count.zero? ? 0.0 : (result.passed_count.to_f / result.total_count).round(3)),
|
|
194
|
+
"verdict" => (if result.status == "error"
|
|
195
|
+
"fail"
|
|
196
|
+
else
|
|
197
|
+
(if result.failed_count.zero?
|
|
198
|
+
"pass"
|
|
199
|
+
else
|
|
200
|
+
(result.passed_count.zero? ? "fail" : "partial")
|
|
201
|
+
end)
|
|
202
|
+
end),
|
|
203
|
+
"tcs-passed" => result.passed_count,
|
|
204
|
+
"tcs-failed" => result.failed_count,
|
|
205
|
+
"tcs-total" => result.total_count,
|
|
206
|
+
"results" => {
|
|
207
|
+
"passed" => result.passed_count,
|
|
208
|
+
"failed" => result.failed_count,
|
|
209
|
+
"total" => result.total_count
|
|
210
|
+
},
|
|
211
|
+
"failed" => result.test_cases
|
|
212
|
+
.select { |tc| tc[:status] == "fail" }
|
|
213
|
+
.map do |tc|
|
|
214
|
+
{
|
|
215
|
+
"tc" => tc[:id],
|
|
216
|
+
"category" => tc[:category] || "runner-error",
|
|
217
|
+
"evidence" => tc[:notes].to_s
|
|
218
|
+
}
|
|
219
|
+
end,
|
|
220
|
+
"failed_test_cases" => result.failed_test_case_ids
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if test_case
|
|
224
|
+
metadata["scenario-id"] = scenario.test_id
|
|
225
|
+
metadata["tc-id"] = test_case.tc_id
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
File.write(path, YAML.dump(metadata))
|
|
229
|
+
path
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def build_goal_criteria_sections(test_cases)
|
|
233
|
+
sections = test_cases.filter_map do |tc|
|
|
234
|
+
criteria = tc[:criteria]
|
|
235
|
+
next if criteria.nil? || criteria.empty?
|
|
236
|
+
|
|
237
|
+
rows = criteria.map do |criterion|
|
|
238
|
+
desc = criterion[:description].to_s.empty? ? criterion[:id] : criterion[:description]
|
|
239
|
+
"| #{desc} | #{criterion[:status].to_s.upcase} | #{criterion[:evidence]} |"
|
|
240
|
+
end.join("\n")
|
|
241
|
+
|
|
242
|
+
<<~SECTION
|
|
243
|
+
### Goal Criteria: #{tc[:id]}
|
|
244
|
+
|
|
245
|
+
| Criterion | Status | Evidence |
|
|
246
|
+
|-----------|--------|----------|
|
|
247
|
+
#{rows}
|
|
248
|
+
SECTION
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
return "" if sections.empty?
|
|
252
|
+
|
|
253
|
+
"## Goal Evaluation\n\n#{sections.join("\n")}"
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Test
|
|
8
|
+
module EndToEndRunner
|
|
9
|
+
module Molecules
|
|
10
|
+
# Loads a TS-format scenario directory into TestScenario + TestCase models.
|
|
11
|
+
#
|
|
12
|
+
# Supported test case format is standalone TC pairs only:
|
|
13
|
+
# - `TC-*.runner.md`
|
|
14
|
+
# - `TC-*.verify.md`
|
|
15
|
+
class ScenarioLoader
|
|
16
|
+
LEGACY_FIELDS = %w[mode execution-model].freeze
|
|
17
|
+
|
|
18
|
+
# Load a scenario directory
|
|
19
|
+
#
|
|
20
|
+
# @param scenario_dir [String] Path to the TS-* scenario directory
|
|
21
|
+
# @return [Models::TestScenario] Populated test scenario
|
|
22
|
+
# @raise [ArgumentError] If scenario.yml is missing, invalid, or has missing required fields
|
|
23
|
+
def load(scenario_dir)
|
|
24
|
+
yml_path = File.join(scenario_dir, "scenario.yml")
|
|
25
|
+
raise ArgumentError, "scenario.yml not found: #{yml_path}" unless File.exist?(yml_path)
|
|
26
|
+
|
|
27
|
+
frontmatter = parse_scenario_yml(yml_path)
|
|
28
|
+
validate_scenario!(frontmatter, yml_path)
|
|
29
|
+
|
|
30
|
+
test_cases = discover_test_cases(scenario_dir)
|
|
31
|
+
fixture_path = detect_fixture_path(scenario_dir)
|
|
32
|
+
|
|
33
|
+
Models::TestScenario.new(
|
|
34
|
+
test_id: frontmatter["test-id"],
|
|
35
|
+
title: frontmatter["title"],
|
|
36
|
+
area: frontmatter["area"],
|
|
37
|
+
package: frontmatter["package"] || infer_package(scenario_dir),
|
|
38
|
+
priority: frontmatter["priority"] || "medium",
|
|
39
|
+
duration: frontmatter["duration"] || "~5min",
|
|
40
|
+
timeout: parse_timeout(frontmatter["timeout"], yml_path),
|
|
41
|
+
requires: frontmatter["requires"] || {},
|
|
42
|
+
file_path: File.expand_path(yml_path),
|
|
43
|
+
content: File.read(yml_path),
|
|
44
|
+
setup_steps: frontmatter["setup"] || [],
|
|
45
|
+
dir_path: File.expand_path(scenario_dir),
|
|
46
|
+
fixture_path: fixture_path,
|
|
47
|
+
test_cases: test_cases,
|
|
48
|
+
tags: parse_tags(frontmatter["tags"]),
|
|
49
|
+
tool_under_test: frontmatter["tool-under-test"],
|
|
50
|
+
sandbox_layout: frontmatter["sandbox-layout"] || {}
|
|
51
|
+
)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Parse an optional per-scenario timeout in seconds.
|
|
57
|
+
#
|
|
58
|
+
# @param raw_timeout [Object] Raw YAML timeout value
|
|
59
|
+
# @param source_path [String] Source file path for errors
|
|
60
|
+
# @return [Integer, nil] Timeout in seconds
|
|
61
|
+
# @raise [ArgumentError] If timeout is present and invalid
|
|
62
|
+
def parse_timeout(raw_timeout, source_path)
|
|
63
|
+
return nil if raw_timeout.nil?
|
|
64
|
+
|
|
65
|
+
value =
|
|
66
|
+
case raw_timeout
|
|
67
|
+
when Integer
|
|
68
|
+
raw_timeout
|
|
69
|
+
when Numeric
|
|
70
|
+
raw_timeout.to_i
|
|
71
|
+
when String
|
|
72
|
+
stripped = raw_timeout.strip
|
|
73
|
+
return nil if stripped.empty?
|
|
74
|
+
raise ArgumentError, "Invalid timeout in #{source_path}: #{raw_timeout.inspect}" unless stripped.match?(/\\A\\d+\\z/)
|
|
75
|
+
stripped.to_i
|
|
76
|
+
else
|
|
77
|
+
raise ArgumentError, "Invalid timeout in #{source_path}: #{raw_timeout.inspect}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
raise ArgumentError, "Invalid timeout in #{source_path}: must be greater than 0" if value <= 0
|
|
81
|
+
value
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Parse scenario.yml with safe YAML loading
|
|
85
|
+
#
|
|
86
|
+
# @param path [String] Path to scenario.yml
|
|
87
|
+
# @return [Hash] Parsed YAML frontmatter
|
|
88
|
+
# @raise [ArgumentError] If YAML is invalid or empty
|
|
89
|
+
def parse_scenario_yml(path)
|
|
90
|
+
content = File.read(path)
|
|
91
|
+
result = YAML.safe_load(content, permitted_classes: [Date])
|
|
92
|
+
raise ArgumentError, "Empty or invalid YAML in #{path}" if result.nil?
|
|
93
|
+
|
|
94
|
+
result
|
|
95
|
+
rescue Psych::SyntaxError => e
|
|
96
|
+
raise ArgumentError, "Invalid YAML in #{path}: #{e.message}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Validate required fields in scenario frontmatter
|
|
100
|
+
#
|
|
101
|
+
# @param frontmatter [Hash] Parsed scenario.yml
|
|
102
|
+
# @param path [String] File path for error messages
|
|
103
|
+
# @raise [ArgumentError] If required fields are missing
|
|
104
|
+
def validate_scenario!(frontmatter, path)
|
|
105
|
+
required = %w[test-id title area]
|
|
106
|
+
missing = required.reject { |field| frontmatter&.key?(field) }
|
|
107
|
+
unless missing.empty?
|
|
108
|
+
raise ArgumentError, "Missing required fields in #{path}: #{missing.join(", ")}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
legacy = LEGACY_FIELDS.select { |field| frontmatter.key?(field) }
|
|
112
|
+
return if legacy.empty?
|
|
113
|
+
|
|
114
|
+
raise ArgumentError,
|
|
115
|
+
"Legacy field(s) not supported in #{path}: #{legacy.join(", ")}. " \
|
|
116
|
+
"Remove these fields; standalone runner/verify scenarios are the only supported format."
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Discover and parse standalone TC files in the scenario directory.
|
|
120
|
+
#
|
|
121
|
+
# @param scenario_dir [String] Path to the scenario directory
|
|
122
|
+
# @return [Array<Models::TestCase>] Parsed test case models, sorted by TC ID
|
|
123
|
+
def discover_test_cases(scenario_dir)
|
|
124
|
+
runner_files = Dir.glob(File.join(scenario_dir, "TC-*.runner.md")).sort
|
|
125
|
+
verify_files = Dir.glob(File.join(scenario_dir, "TC-*.verify.md")).sort
|
|
126
|
+
|
|
127
|
+
if runner_files.empty? && verify_files.empty?
|
|
128
|
+
reject_inline_tc_files!(scenario_dir)
|
|
129
|
+
return []
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
validate_standalone_files!(scenario_dir, runner_files, verify_files)
|
|
133
|
+
|
|
134
|
+
runner_by_id = runner_files.to_h { |f| [extract_tc_id_from_standalone_name(f), f] }
|
|
135
|
+
verify_by_id = verify_files.to_h { |f| [extract_tc_id_from_standalone_name(f), f] }
|
|
136
|
+
|
|
137
|
+
runner_by_id.keys.sort.map do |tc_id|
|
|
138
|
+
runner_file = runner_by_id.fetch(tc_id)
|
|
139
|
+
verify_file = verify_by_id.fetch(tc_id)
|
|
140
|
+
parse_standalone_test_case(tc_id, runner_file, verify_file)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def reject_inline_tc_files!(scenario_dir)
|
|
145
|
+
inline_files = Dir.glob(File.join(scenario_dir, "TC-*.tc.md")).sort
|
|
146
|
+
return if inline_files.empty?
|
|
147
|
+
|
|
148
|
+
raise ArgumentError,
|
|
149
|
+
"Inline TC files are no longer supported in #{scenario_dir}. " \
|
|
150
|
+
"Replace #{inline_files.map { |f| File.basename(f) }.join(", ")} with standalone " \
|
|
151
|
+
"TC-*.runner.md and TC-*.verify.md pairs."
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def parse_standalone_test_case(tc_id, runner_file, verify_file)
|
|
155
|
+
runner_content = File.read(runner_file)
|
|
156
|
+
verify_content = File.read(verify_file)
|
|
157
|
+
|
|
158
|
+
Models::TestCase.new(
|
|
159
|
+
tc_id: tc_id,
|
|
160
|
+
title: extract_title_from_markdown(runner_content) || tc_id,
|
|
161
|
+
content: build_standalone_content(runner_content, verify_content),
|
|
162
|
+
file_path: File.expand_path(runner_file),
|
|
163
|
+
pending: nil,
|
|
164
|
+
goal_format: "standalone"
|
|
165
|
+
)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def extract_tc_id_from_standalone_name(file_path)
|
|
169
|
+
basename = File.basename(file_path)
|
|
170
|
+
match = basename.match(/\A(TC-\d+[a-z]*)(?:-[^.]+)?\.(?:runner|verify)\.md\z/i)
|
|
171
|
+
return match[1].upcase if match
|
|
172
|
+
|
|
173
|
+
raise ArgumentError, "Invalid standalone test case filename: #{basename}"
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def extract_title_from_markdown(markdown)
|
|
177
|
+
line = markdown.each_line.find { |l| l.strip.start_with?("#") }
|
|
178
|
+
return nil unless line
|
|
179
|
+
|
|
180
|
+
line.sub(/\A#+\s*/, "").strip
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def build_standalone_content(runner_content, verify_content)
|
|
184
|
+
<<~CONTENT
|
|
185
|
+
## Runner
|
|
186
|
+
|
|
187
|
+
#{runner_content.rstrip}
|
|
188
|
+
|
|
189
|
+
## Verifier
|
|
190
|
+
|
|
191
|
+
#{verify_content.rstrip}
|
|
192
|
+
CONTENT
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def validate_standalone_files!(scenario_dir, runner_files, verify_files)
|
|
196
|
+
runner_ids = runner_files.map { |f| extract_tc_id_from_standalone_name(f) }.uniq
|
|
197
|
+
verify_ids = verify_files.map { |f| extract_tc_id_from_standalone_name(f) }.uniq
|
|
198
|
+
|
|
199
|
+
missing_runner_ids = verify_ids - runner_ids
|
|
200
|
+
missing_verify_ids = runner_ids - verify_ids
|
|
201
|
+
|
|
202
|
+
unless missing_runner_ids.empty?
|
|
203
|
+
raise ArgumentError,
|
|
204
|
+
"Missing standalone runner file(s) for: #{missing_runner_ids.join(", ")} in #{scenario_dir}"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
unless missing_verify_ids.empty?
|
|
208
|
+
raise ArgumentError,
|
|
209
|
+
"Missing standalone verify file(s) for: #{missing_verify_ids.join(", ")} in #{scenario_dir}"
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
runner_yml = File.join(scenario_dir, "runner.yml.md")
|
|
213
|
+
verifier_yml = File.join(scenario_dir, "verifier.yml.md")
|
|
214
|
+
|
|
215
|
+
raise ArgumentError, "Missing standalone file: #{runner_yml}" unless File.exist?(runner_yml)
|
|
216
|
+
raise ArgumentError, "Missing standalone file: #{verifier_yml}" unless File.exist?(verifier_yml)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def parse_tags(raw_tags)
|
|
220
|
+
return [] unless raw_tags
|
|
221
|
+
|
|
222
|
+
tags = raw_tags.is_a?(Array) ? raw_tags : [raw_tags]
|
|
223
|
+
tags.map(&:to_s).map(&:strip).reject(&:empty?).map(&:downcase)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Detect fixtures directory if it exists
|
|
227
|
+
#
|
|
228
|
+
# @param scenario_dir [String] Path to the scenario directory
|
|
229
|
+
# @return [String, nil] Absolute path to fixtures/ or nil
|
|
230
|
+
def detect_fixture_path(scenario_dir)
|
|
231
|
+
path = File.join(scenario_dir, "fixtures")
|
|
232
|
+
Dir.exist?(path) ? File.expand_path(path) : nil
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Infer package name from scenario directory path
|
|
236
|
+
#
|
|
237
|
+
# @param scenario_dir [String] Path to scenario directory
|
|
238
|
+
# @return [String] Inferred package name
|
|
239
|
+
def infer_package(scenario_dir)
|
|
240
|
+
# Expected path: {package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
|
|
241
|
+
parts = File.expand_path(scenario_dir).split("/")
|
|
242
|
+
parts.each_with_index do |part, idx|
|
|
243
|
+
next unless part == "test" && idx > 0 && parts[idx + 1] == "e2e"
|
|
244
|
+
|
|
245
|
+
return parts[idx - 1]
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
"unknown"
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "open3"
|
|
5
|
+
require "shellwords"
|
|
6
|
+
|
|
7
|
+
module Ace
|
|
8
|
+
module Test
|
|
9
|
+
module EndToEndRunner
|
|
10
|
+
module Molecules
|
|
11
|
+
# Executes setup steps deterministically to create a populated sandbox
|
|
12
|
+
#
|
|
13
|
+
# Processes the setup array from scenario.yml, running each action
|
|
14
|
+
# via Ruby system calls (no LLM involved). Supports: git-init,
|
|
15
|
+
# copy-fixtures, run, write-file, agent-env, and tmux-session actions.
|
|
16
|
+
#
|
|
17
|
+
# Note: This is a Molecule because it performs filesystem I/O and
|
|
18
|
+
# system calls via Open3 and FileUtils.
|
|
19
|
+
class SetupExecutor
|
|
20
|
+
# Execute all setup steps in a sandbox directory
|
|
21
|
+
#
|
|
22
|
+
# @param setup_steps [Array] Setup steps from scenario.yml
|
|
23
|
+
# @param sandbox_dir [String] Path to the sandbox directory
|
|
24
|
+
# @param fixture_source [String, nil] Path to the fixtures/ directory
|
|
25
|
+
# @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
|
|
26
|
+
# @param run_id [String, nil] Unique run ID for deterministic tmux session naming
|
|
27
|
+
# @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
|
|
28
|
+
def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil)
|
|
29
|
+
FileUtils.mkdir_p(sandbox_dir)
|
|
30
|
+
env = {}
|
|
31
|
+
steps_completed = 0
|
|
32
|
+
@tmux_session = nil
|
|
33
|
+
@scenario_name = scenario_name
|
|
34
|
+
@run_id = run_id
|
|
35
|
+
|
|
36
|
+
setup_steps.each do |step|
|
|
37
|
+
execute_step(step, sandbox_dir, env, fixture_source)
|
|
38
|
+
steps_completed += 1
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
{success: true, steps_completed: steps_completed, error: nil, env: env, tmux_session: @tmux_session}
|
|
42
|
+
rescue => e
|
|
43
|
+
{success: false, steps_completed: steps_completed, error: e.message, env: env, tmux_session: @tmux_session}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Clean up resources created during setup (e.g. tmux session)
|
|
47
|
+
def teardown
|
|
48
|
+
return unless @tmux_session
|
|
49
|
+
|
|
50
|
+
system("tmux", "kill-session", "-t", @tmux_session, out: File::NULL, err: File::NULL)
|
|
51
|
+
@tmux_session = nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Dispatch a single step to the appropriate handler
|
|
57
|
+
#
|
|
58
|
+
# @param step [String, Hash] Step definition
|
|
59
|
+
# @param sandbox_dir [String] Sandbox path
|
|
60
|
+
# @param env [Hash] Environment variables
|
|
61
|
+
# @param fixture_source [String, nil] Fixtures path
|
|
62
|
+
def execute_step(step, sandbox_dir, env, fixture_source)
|
|
63
|
+
case step
|
|
64
|
+
when "git-init"
|
|
65
|
+
handle_git_init(sandbox_dir, env)
|
|
66
|
+
when "copy-fixtures"
|
|
67
|
+
handle_copy_fixtures(sandbox_dir, fixture_source)
|
|
68
|
+
when "tmux-session"
|
|
69
|
+
handle_tmux_session(env)
|
|
70
|
+
when Hash
|
|
71
|
+
execute_hash_step(step, sandbox_dir, env)
|
|
72
|
+
else
|
|
73
|
+
raise ArgumentError, "Unknown setup step: #{step.inspect}"
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Dispatch hash-based steps
|
|
78
|
+
def execute_hash_step(step, sandbox_dir, env)
|
|
79
|
+
key = step.keys.first
|
|
80
|
+
value = step.values.first
|
|
81
|
+
|
|
82
|
+
case key
|
|
83
|
+
when "run"
|
|
84
|
+
handle_run(value, sandbox_dir, env)
|
|
85
|
+
when "write-file"
|
|
86
|
+
handle_write_file(value["path"], value["content"], sandbox_dir)
|
|
87
|
+
when "agent-env"
|
|
88
|
+
handle_env(value, env)
|
|
89
|
+
when "tmux-session"
|
|
90
|
+
handle_tmux_session(env, value)
|
|
91
|
+
else
|
|
92
|
+
raise ArgumentError, "Unknown setup step type: #{key.inspect}"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Create an isolated detached tmux session and store its name in env
|
|
97
|
+
def handle_tmux_session(env, config = nil)
|
|
98
|
+
name_source = config.is_a?(Hash) ? config["name-source"] : nil
|
|
99
|
+
session_name = if name_source == "run-id" && @run_id && !@run_id.to_s.empty?
|
|
100
|
+
@run_id
|
|
101
|
+
else
|
|
102
|
+
@scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{Time.now.to_i}"
|
|
103
|
+
end
|
|
104
|
+
_stdout, stderr, status = Open3.capture3("tmux", "new-session", "-d", "-s", session_name)
|
|
105
|
+
raise "Failed to create tmux session '#{session_name}': #{stderr.strip}" unless status.success?
|
|
106
|
+
|
|
107
|
+
@tmux_session = session_name
|
|
108
|
+
env["ACE_TMUX_SESSION"] = session_name
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Initialize a git repo with test user config
|
|
112
|
+
def handle_git_init(sandbox_dir, env)
|
|
113
|
+
run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
|
|
114
|
+
run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
|
|
115
|
+
run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Copy fixture files into sandbox
|
|
119
|
+
def handle_copy_fixtures(sandbox_dir, fixture_source)
|
|
120
|
+
raise ArgumentError, "No fixture source provided for copy-fixtures step" if fixture_source.nil?
|
|
121
|
+
|
|
122
|
+
FixtureCopier.new.copy(source_dir: fixture_source, target_dir: sandbox_dir)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Execute a shell command in the sandbox
|
|
126
|
+
# NOTE: Uses shell invocation (bash -lc) intentionally to support
|
|
127
|
+
# shell operators (&&, |, >) in scenario.yml setup steps. Commands originate from
|
|
128
|
+
# committed scenario.yml files, not user input, so shell injection risk is mitigated.
|
|
129
|
+
def handle_run(command, sandbox_dir, env)
|
|
130
|
+
full_env = merged_environment(env)
|
|
131
|
+
# Re-export env vars after profile sourcing to protect against
|
|
132
|
+
# mise's shell hook clobbering (mise manages PROJECT_ROOT_PATH).
|
|
133
|
+
export_vars = env.dup
|
|
134
|
+
%w[PROJECT_ROOT_PATH ACE_TASKFLOW_PATH].each do |key|
|
|
135
|
+
export_vars[key] ||= ENV[key] if ENV[key]
|
|
136
|
+
end
|
|
137
|
+
exports = export_vars.map { |k, v| "export #{k}=#{Shellwords.shellescape(v.to_s)}" }.join("; ")
|
|
138
|
+
wrapped = exports.empty? ? command : "#{exports}; #{command}"
|
|
139
|
+
stdout, stderr, status = Open3.capture3(full_env, "bash", "-lc", wrapped, chdir: sandbox_dir)
|
|
140
|
+
|
|
141
|
+
unless status.success?
|
|
142
|
+
raise "Setup step 'run' failed (exit #{status.exitstatus}): #{command}\n#{stderr}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
stdout
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Write inline content to a file in the sandbox
|
|
149
|
+
def handle_write_file(path, content, sandbox_dir)
|
|
150
|
+
full_path = File.join(sandbox_dir, path)
|
|
151
|
+
FileUtils.mkdir_p(File.dirname(full_path))
|
|
152
|
+
File.write(full_path, content)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Merge environment variables for subsequent steps
|
|
156
|
+
def handle_env(vars, env)
|
|
157
|
+
vars.each { |k, v| env[k.to_s] = v.to_s }
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Merge custom env vars with the process environment
|
|
161
|
+
#
|
|
162
|
+
# @param env [Hash] Custom environment variables
|
|
163
|
+
# @return [Hash] Merged environment
|
|
164
|
+
def merged_environment(env)
|
|
165
|
+
return ENV.to_h if env.empty?
|
|
166
|
+
ENV.to_h.merge(env.transform_keys(&:to_s))
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Run a command and raise on failure
|
|
170
|
+
def run_command(*args, chdir:, env: {})
|
|
171
|
+
_stdout, stderr, status = Open3.capture3(merged_environment(env), *args, chdir: chdir)
|
|
172
|
+
|
|
173
|
+
unless status.success?
|
|
174
|
+
raise "Command failed (exit #{status.exitstatus}): #{args.join(" ")}\n#{stderr}"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|