ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Test
|
|
8
|
+
module EndToEndRunner
|
|
9
|
+
module Molecules
|
|
10
|
+
# Generates TC-first reports from standalone verifier output.
|
|
11
|
+
class PipelineReportGenerator
|
|
12
|
+
FAILURE_CATEGORIES = %w[test-spec-error tool-bug runner-error infrastructure-error].freeze
|
|
13
|
+
|
|
14
|
+
# @param report_writer [Molecules::ReportWriter]
|
|
15
|
+
def initialize(report_writer: nil)
|
|
16
|
+
@report_writer = report_writer || Molecules::ReportWriter.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @param scenario [Models::TestScenario]
|
|
20
|
+
# @param verifier_output [String]
|
|
21
|
+
# @param report_dir [String]
|
|
22
|
+
# @param provider [String]
|
|
23
|
+
# @param started_at [Time]
|
|
24
|
+
# @param completed_at [Time]
|
|
25
|
+
# @return [Models::TestResult]
|
|
26
|
+
def generate(scenario:, verifier_output:, report_dir:, provider:, started_at:, completed_at:)
|
|
27
|
+
parsed = parse_verifier_output(verifier_output, scenario)
|
|
28
|
+
|
|
29
|
+
result = Models::TestResult.new(
|
|
30
|
+
test_id: scenario.test_id,
|
|
31
|
+
status: parsed[:status],
|
|
32
|
+
test_cases: parsed[:test_cases],
|
|
33
|
+
summary: parsed[:summary],
|
|
34
|
+
error: parsed[:error],
|
|
35
|
+
started_at: started_at,
|
|
36
|
+
completed_at: completed_at
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
FileUtils.mkdir_p(report_dir)
|
|
40
|
+
@report_writer.write(result, scenario, report_dir: report_dir)
|
|
41
|
+
write_goal_report(
|
|
42
|
+
path: File.join(report_dir, "report.md"),
|
|
43
|
+
scenario: scenario,
|
|
44
|
+
provider: provider,
|
|
45
|
+
result: result
|
|
46
|
+
)
|
|
47
|
+
result.with_report_dir(report_dir)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Write deterministic error reports when pipeline execution fails before
|
|
51
|
+
# normal verifier parsing/report generation can complete.
|
|
52
|
+
#
|
|
53
|
+
# @param scenario [Models::TestScenario]
|
|
54
|
+
# @param report_dir [String]
|
|
55
|
+
# @param provider [String]
|
|
56
|
+
# @param started_at [Time]
|
|
57
|
+
# @param completed_at [Time]
|
|
58
|
+
# @param error_message [String]
|
|
59
|
+
# @return [Models::TestResult]
|
|
60
|
+
def write_failure_report(scenario:, report_dir:, provider:, started_at:, completed_at:, error_message:)
|
|
61
|
+
result = Models::TestResult.new(
|
|
62
|
+
test_id: scenario.test_id,
|
|
63
|
+
status: "error",
|
|
64
|
+
test_cases: [],
|
|
65
|
+
summary: "Execution pipeline failed",
|
|
66
|
+
error: error_message,
|
|
67
|
+
started_at: started_at,
|
|
68
|
+
completed_at: completed_at
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
FileUtils.mkdir_p(report_dir)
|
|
72
|
+
@report_writer.write(result, scenario, report_dir: report_dir)
|
|
73
|
+
write_goal_report(
|
|
74
|
+
path: File.join(report_dir, "report.md"),
|
|
75
|
+
scenario: scenario,
|
|
76
|
+
provider: provider,
|
|
77
|
+
result: result
|
|
78
|
+
)
|
|
79
|
+
result.with_report_dir(report_dir)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def parse_verifier_output(text, scenario)
|
|
85
|
+
goals = parse_goal_sections(text, scenario)
|
|
86
|
+
return build_result_from_goals(goals) unless goals.empty?
|
|
87
|
+
|
|
88
|
+
parsed = Atoms::SkillResultParser.parse_verifier(text)
|
|
89
|
+
{
|
|
90
|
+
status: parsed[:status],
|
|
91
|
+
test_cases: parsed[:test_cases],
|
|
92
|
+
summary: parsed[:summary],
|
|
93
|
+
error: parsed[:observations]
|
|
94
|
+
}
|
|
95
|
+
rescue Atoms::ResultParser::ParseError => e
|
|
96
|
+
issue = summarize_unstructured_verifier_output(text)
|
|
97
|
+
{
|
|
98
|
+
status: "error",
|
|
99
|
+
test_cases: [],
|
|
100
|
+
summary: "Verifier returned unstructured output",
|
|
101
|
+
error: issue || e.message
|
|
102
|
+
}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def parse_goal_sections(text, scenario)
|
|
106
|
+
lines = text.to_s.lines
|
|
107
|
+
headers = []
|
|
108
|
+
lines.each_with_index do |line, idx|
|
|
109
|
+
match = line.match(/^\#{2,3}\s+Goal\s+(\d+)\s*[—-]\s*(.+?)\s*$/i)
|
|
110
|
+
headers << [idx, match[1].to_i, match[2].strip] if match
|
|
111
|
+
end
|
|
112
|
+
return [] if headers.empty?
|
|
113
|
+
|
|
114
|
+
scenario_test_cases = scenario.test_cases || []
|
|
115
|
+
|
|
116
|
+
headers.each_with_index.map do |(start_idx, goal_number, title), index|
|
|
117
|
+
end_idx = (index + 1 < headers.size) ? headers[index + 1][0] : lines.size
|
|
118
|
+
block = lines[start_idx...end_idx].join
|
|
119
|
+
|
|
120
|
+
verdict = normalize_verdict(extract_field_token(block, %w[Verdict Status]))
|
|
121
|
+
evidence = extract_evidence(block)
|
|
122
|
+
next if verdict.nil?
|
|
123
|
+
|
|
124
|
+
tc_id = scenario_test_cases[goal_number - 1]&.tc_id || format("TC-%03d", goal_number)
|
|
125
|
+
category = extract_category(block, evidence)
|
|
126
|
+
|
|
127
|
+
{
|
|
128
|
+
id: tc_id,
|
|
129
|
+
description: title,
|
|
130
|
+
status: (verdict == "PASS") ? "pass" : "fail",
|
|
131
|
+
notes: evidence,
|
|
132
|
+
category: ((verdict == "FAIL") ? category : nil)
|
|
133
|
+
}
|
|
134
|
+
end.compact
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def extract_value(block, field)
|
|
138
|
+
match = block.match(/^\s*[-*]?\s*\*\*#{Regexp.escape(field)}\*\*:\s*(.+?)\s*$/im)
|
|
139
|
+
return nil unless match
|
|
140
|
+
|
|
141
|
+
match[1].strip
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def extract_evidence(block)
|
|
145
|
+
lines = block.to_s.lines
|
|
146
|
+
marker_index = nil
|
|
147
|
+
inline_value = nil
|
|
148
|
+
|
|
149
|
+
lines.each_with_index do |line, idx|
|
|
150
|
+
match = line.match(/^\s*[-*]?\s*\*\*Evidence(?:\s+of\s+failure)?\*\*:\s*(.*)$/i)
|
|
151
|
+
next unless match
|
|
152
|
+
|
|
153
|
+
marker_index = idx
|
|
154
|
+
inline_value = match[1].to_s.strip
|
|
155
|
+
break
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
return inline_value unless marker_index
|
|
159
|
+
|
|
160
|
+
collected = []
|
|
161
|
+
collected << inline_value unless inline_value.empty?
|
|
162
|
+
|
|
163
|
+
lines[(marker_index + 1)..]&.each do |line|
|
|
164
|
+
break if line.match?(/^\s*[-*]?\s*\*\*(Category|Verdict)\*\*:/i)
|
|
165
|
+
break if line.match?(/^\#{2,3}\s+Goal\s+\d+/i)
|
|
166
|
+
break if line.match?(/^\s*\*\*Results/i)
|
|
167
|
+
break if line.strip == "---"
|
|
168
|
+
|
|
169
|
+
text = line.rstrip
|
|
170
|
+
next if text.strip.empty?
|
|
171
|
+
|
|
172
|
+
text = text.sub(/^\s*[-*]\s+/, "")
|
|
173
|
+
collected << text.strip
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
collected.join(" ").strip
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def extract_category(block, evidence)
|
|
180
|
+
explicit = extract_field_token(block, %w[Category])
|
|
181
|
+
return normalize_category(explicit) if explicit
|
|
182
|
+
|
|
183
|
+
inline = block.to_s.match(/`(test-spec-error|tool-bug|runner-error|infrastructure-error)`/i)
|
|
184
|
+
return normalize_category(inline[1]) if inline
|
|
185
|
+
|
|
186
|
+
paren = block.to_s.match(/\((test-spec-error|tool-bug|runner-error|infrastructure-error)\)/i)
|
|
187
|
+
return normalize_category(paren[1]) if paren
|
|
188
|
+
|
|
189
|
+
normalize_category("#{block}\n#{evidence}")
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def normalize_category(value)
|
|
193
|
+
category = value.to_s.strip.downcase
|
|
194
|
+
match = category.match(/\b(test-spec-error|tool-bug|runner-error|infrastructure-error)\b/)
|
|
195
|
+
return match[1] if match
|
|
196
|
+
|
|
197
|
+
"runner-error"
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def normalize_verdict(value)
|
|
201
|
+
raw = value.to_s.strip
|
|
202
|
+
return nil if raw.empty?
|
|
203
|
+
|
|
204
|
+
token = raw.gsub(/[*_`]/, "").upcase.match(/\b(PASS|FAIL)\b/)
|
|
205
|
+
return token[1] if token
|
|
206
|
+
|
|
207
|
+
nil
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def extract_field_token(block, fields)
|
|
211
|
+
fields.each do |field|
|
|
212
|
+
direct = extract_value(block, field)
|
|
213
|
+
return direct if direct && !direct.empty?
|
|
214
|
+
|
|
215
|
+
bold_inline = block.match(/\*\*#{Regexp.escape(field)}\s*:\s*([^*\n]+)\*\*/i)
|
|
216
|
+
return bold_inline[1].strip if bold_inline
|
|
217
|
+
|
|
218
|
+
plain = block.match(/^\s*(?:[-*]\s+)?#{Regexp.escape(field)}\s*:\s*(.+?)\s*$/im)
|
|
219
|
+
return plain[1].strip if plain
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
nil
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def build_result_from_goals(goals)
|
|
226
|
+
passed = goals.count { |goal| goal[:status] == "pass" }
|
|
227
|
+
total = goals.size
|
|
228
|
+
status = if passed == total
|
|
229
|
+
"pass"
|
|
230
|
+
elsif passed.zero?
|
|
231
|
+
"fail"
|
|
232
|
+
else
|
|
233
|
+
"partial"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
{
|
|
237
|
+
status: status,
|
|
238
|
+
test_cases: goals,
|
|
239
|
+
summary: "#{passed}/#{total} passed"
|
|
240
|
+
}
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def summarize_unstructured_verifier_output(text)
|
|
244
|
+
summary = text.to_s.lines.map(&:strip).reject(&:empty?).first(3).join(" ")
|
|
245
|
+
return nil if summary.empty?
|
|
246
|
+
|
|
247
|
+
(summary.length > 240) ? "#{summary[0, 237]}..." : summary
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def write_goal_report(path:, scenario:, provider:, result:)
|
|
251
|
+
passed = result.passed_count
|
|
252
|
+
failed = result.failed_count
|
|
253
|
+
total = result.total_count
|
|
254
|
+
score = total.zero? ? 0.0 : (passed.to_f / total).round(3)
|
|
255
|
+
verdict = if result.status == "error"
|
|
256
|
+
"fail"
|
|
257
|
+
elsif failed.zero?
|
|
258
|
+
"pass"
|
|
259
|
+
elsif passed.zero?
|
|
260
|
+
"fail"
|
|
261
|
+
else
|
|
262
|
+
"partial"
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
frontmatter = {
|
|
266
|
+
"test-id" => scenario.test_id,
|
|
267
|
+
"title" => scenario.title,
|
|
268
|
+
"package" => scenario.package,
|
|
269
|
+
"runner-provider" => provider,
|
|
270
|
+
"verifier-provider" => provider,
|
|
271
|
+
"timestamp" => result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
272
|
+
"tcs-passed" => passed,
|
|
273
|
+
"tcs-failed" => failed,
|
|
274
|
+
"tcs-total" => total,
|
|
275
|
+
"score" => score,
|
|
276
|
+
"verdict" => verdict,
|
|
277
|
+
"passed" => result.test_cases.select { |tc| tc[:status] == "pass" }.map { |tc| tc[:id] },
|
|
278
|
+
"failed" => result.test_cases.select { |tc| tc[:status] == "fail" }.map do |tc|
|
|
279
|
+
{
|
|
280
|
+
"tc" => tc[:id],
|
|
281
|
+
"category" => tc[:category] || "runner-error",
|
|
282
|
+
"evidence" => tc[:notes].to_s
|
|
283
|
+
}
|
|
284
|
+
end
|
|
285
|
+
}
|
|
286
|
+
frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\s*\n/, "").sub(/\.\.\.\s*\n\z/, "")
|
|
287
|
+
|
|
288
|
+
rows = result.test_cases.map do |tc|
|
|
289
|
+
"| #{tc[:id]} | #{tc[:status].upcase} | #{tc[:notes]} |"
|
|
290
|
+
end.join("\n")
|
|
291
|
+
|
|
292
|
+
content = <<~REPORT
|
|
293
|
+
---
|
|
294
|
+
#{frontmatter_yaml.rstrip}
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
# E2E Report: #{scenario.title}
|
|
298
|
+
|
|
299
|
+
## Goal Results
|
|
300
|
+
|
|
301
|
+
| Goal | Verdict | Evidence |
|
|
302
|
+
|------|---------|----------|
|
|
303
|
+
#{rows}
|
|
304
|
+
|
|
305
|
+
## Summary
|
|
306
|
+
|
|
307
|
+
| Metric | Value |
|
|
308
|
+
|--------|-------|
|
|
309
|
+
| Passed | #{passed} |
|
|
310
|
+
| Failed | #{failed} |
|
|
311
|
+
| Total | #{total} |
|
|
312
|
+
| Score | #{(score * 100).round(1)}% |
|
|
313
|
+
REPORT
|
|
314
|
+
|
|
315
|
+
File.write(path, content)
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "open3"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Test
|
|
8
|
+
module EndToEndRunner
|
|
9
|
+
module Molecules
|
|
10
|
+
# Builds deterministic sandbox state for standalone execution.
|
|
11
|
+
class PipelineSandboxBuilder
|
|
12
|
+
# @param config_root [String] Project root used for provider symlink/bin path
|
|
13
|
+
def initialize(config_root: Dir.pwd)
|
|
14
|
+
@config_root = File.expand_path(config_root)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# @param scenario [Models::TestScenario]
|
|
18
|
+
# @param sandbox_path [String]
|
|
19
|
+
# @param test_cases [Array<String>, nil] Optional TC filter
|
|
20
|
+
# @return [Hash] Environment variables for subprocess execution
|
|
21
|
+
def build(scenario:, sandbox_path:, test_cases: nil)
|
|
22
|
+
sandbox_path = File.expand_path(sandbox_path)
|
|
23
|
+
FileUtils.mkdir_p(sandbox_path)
|
|
24
|
+
FileUtils.mkdir_p(File.join(sandbox_path, ".ace-local", "e2e"))
|
|
25
|
+
FileUtils.mkdir_p(File.join(sandbox_path, "reports"))
|
|
26
|
+
|
|
27
|
+
initialize_git_repo(sandbox_path)
|
|
28
|
+
ensure_package_available(scenario.package, sandbox_path)
|
|
29
|
+
link_provider_configs(sandbox_path)
|
|
30
|
+
create_result_directories(scenario, sandbox_path, test_cases: test_cases)
|
|
31
|
+
verify_tool_access(scenario, sandbox_path)
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
"PROJECT_ROOT_PATH" => sandbox_path
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def ensure_package_available(package_name, sandbox_path)
|
|
41
|
+
package_name = package_name.to_s.strip
|
|
42
|
+
return if package_name.empty?
|
|
43
|
+
|
|
44
|
+
package_source = File.join(@config_root, package_name)
|
|
45
|
+
package_target = File.join(sandbox_path, package_name)
|
|
46
|
+
|
|
47
|
+
return if File.exist?(package_target)
|
|
48
|
+
|
|
49
|
+
unless File.directory?(package_source)
|
|
50
|
+
raise "Scenario package not found: #{package_name} (expected #{package_source})"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
FileUtils.cp_r(package_source, package_target)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def initialize_git_repo(sandbox_path)
|
|
57
|
+
return if Dir.exist?(File.join(sandbox_path, ".git"))
|
|
58
|
+
|
|
59
|
+
_stdout, stderr, status = Open3.capture3("git", "init", "-b", "main", chdir: sandbox_path)
|
|
60
|
+
return if status.success?
|
|
61
|
+
|
|
62
|
+
raise "Sandbox git init failed: #{stderr}".strip
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def link_provider_configs(sandbox_path)
|
|
66
|
+
source = File.join(@config_root, ".ace", "llm", "providers")
|
|
67
|
+
target = File.join(sandbox_path, ".ace", "llm", "providers")
|
|
68
|
+
FileUtils.mkdir_p(File.dirname(target))
|
|
69
|
+
|
|
70
|
+
FileUtils.rm_f(target) if File.symlink?(target)
|
|
71
|
+
FileUtils.rm_rf(target) if File.directory?(target)
|
|
72
|
+
|
|
73
|
+
if File.directory?(source)
|
|
74
|
+
File.symlink(source, target)
|
|
75
|
+
else
|
|
76
|
+
FileUtils.mkdir_p(target)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def create_result_directories(scenario, sandbox_path, test_cases:)
|
|
81
|
+
result_dirs = resolve_result_dirs(scenario, test_cases: test_cases)
|
|
82
|
+
result_dirs.each do |relative_dir|
|
|
83
|
+
FileUtils.mkdir_p(File.join(sandbox_path, relative_dir))
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def resolve_result_dirs(scenario, test_cases:)
|
|
88
|
+
all_cases = scenario.test_cases || []
|
|
89
|
+
case_positions = all_cases.each_with_index.to_h do |tc, idx|
|
|
90
|
+
[tc.tc_id.to_s.upcase, idx + 1]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
selected_positions = if test_cases && !test_cases.empty?
|
|
94
|
+
test_cases.filter_map { |tc_id| case_positions[tc_id.to_s.upcase] }.uniq.sort
|
|
95
|
+
else
|
|
96
|
+
case_positions.values.sort
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
selected_positions = (1..all_cases.size).to_a if selected_positions.empty? && !all_cases.empty?
|
|
100
|
+
|
|
101
|
+
layout_keys = (scenario.sandbox_layout || {}).keys
|
|
102
|
+
if layout_keys.any?
|
|
103
|
+
selected_from_layout = layout_keys.select do |key|
|
|
104
|
+
idx = extract_result_dir_index(key)
|
|
105
|
+
idx.nil? || selected_positions.include?(idx)
|
|
106
|
+
end
|
|
107
|
+
return selected_from_layout unless selected_from_layout.empty?
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
selected_positions.map { |idx| "results/tc/#{format("%02d", idx)}" }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def extract_result_dir_index(path)
|
|
114
|
+
match = path.to_s.match(%r{results/tc/(\d{1,3})/?})
|
|
115
|
+
match ? match[1].to_i : nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def verify_tool_access(scenario, sandbox_path)
|
|
119
|
+
tool = scenario.tool_under_test.to_s.strip
|
|
120
|
+
return if tool.empty?
|
|
121
|
+
|
|
122
|
+
_stdout, stderr, status = Open3.capture3(tool, "--help", chdir: sandbox_path)
|
|
123
|
+
return if status.success?
|
|
124
|
+
|
|
125
|
+
raise "Sandbox tool check failed for #{tool}: #{stderr}".strip
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Test
|
|
5
|
+
module EndToEndRunner
|
|
6
|
+
module Molecules
|
|
7
|
+
# Animated ANSI table display manager for E2E test output (--progress mode).
|
|
8
|
+
# Updates test rows in place using cursor movement escape codes.
|
|
9
|
+
# Modeled on ace-test-runner's DisplayManager for visual consistency.
|
|
10
|
+
class ProgressDisplayManager
|
|
11
|
+
# @param scenarios [Array<Models::TestScenario>] tests to run
|
|
12
|
+
# @param output [IO] output stream
|
|
13
|
+
# @param parallel [Integer] parallelism level
|
|
14
|
+
def initialize(scenarios, output:, parallel:)
|
|
15
|
+
@scenarios = scenarios
|
|
16
|
+
@output = output
|
|
17
|
+
@parallel = parallel
|
|
18
|
+
@use_color = output.respond_to?(:tty?) && output.tty?
|
|
19
|
+
@start_time = Time.now
|
|
20
|
+
@last_refresh = Time.at(0)
|
|
21
|
+
@lines = {} # scenario.test_id => line number
|
|
22
|
+
@states = {} # scenario.test_id => :waiting | :running | :completed
|
|
23
|
+
@results = {} # scenario.test_id => Models::TestResult
|
|
24
|
+
@started_at = {} # scenario.test_id => Time
|
|
25
|
+
@title_width = calculate_title_width
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Print header and initial table with all tests in waiting state
|
|
29
|
+
def initialize_display
|
|
30
|
+
h = Atoms::DisplayHelpers
|
|
31
|
+
package = @scenarios.first&.package || "unknown"
|
|
32
|
+
|
|
33
|
+
# Clear screen (preserves scrollback)
|
|
34
|
+
@output.print "\033[H\033[J"
|
|
35
|
+
|
|
36
|
+
@output.puts h.separator
|
|
37
|
+
@output.puts " E2E Tests: #{package} (#{@scenarios.size} tests)"
|
|
38
|
+
@output.puts h.separator
|
|
39
|
+
@output.puts
|
|
40
|
+
|
|
41
|
+
@scenarios.each_with_index do |scenario, index|
|
|
42
|
+
line = index + 5 # account for header lines
|
|
43
|
+
@lines[scenario.test_id] = line
|
|
44
|
+
@states[scenario.test_id] = :waiting
|
|
45
|
+
print_row(scenario)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
@output.puts
|
|
49
|
+
@output.puts
|
|
50
|
+
@footer_line = @lines.values.max + 3
|
|
51
|
+
update_footer
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Update row when a test begins
|
|
55
|
+
# @param scenario [Models::TestScenario]
|
|
56
|
+
def test_started(scenario)
|
|
57
|
+
@states[scenario.test_id] = :running
|
|
58
|
+
@started_at[scenario.test_id] = Time.now
|
|
59
|
+
print_row(scenario)
|
|
60
|
+
update_footer
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Update row when a test completes
|
|
64
|
+
# @param scenario [Models::TestScenario]
|
|
65
|
+
# @param result [Models::TestResult]
|
|
66
|
+
# @param completed [Integer]
|
|
67
|
+
# @param total [Integer]
|
|
68
|
+
def test_completed(scenario, result, completed, total)
|
|
69
|
+
@states[scenario.test_id] = :completed
|
|
70
|
+
@results[scenario.test_id] = result
|
|
71
|
+
print_row(scenario)
|
|
72
|
+
update_footer
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Refresh running test rows to update elapsed timers
|
|
76
|
+
# Throttled to ~4Hz — redraws are expensive with ANSI cursor movement
|
|
77
|
+
def refresh
|
|
78
|
+
now = Time.now
|
|
79
|
+
return if now - @last_refresh < REFRESH_INTERVAL
|
|
80
|
+
|
|
81
|
+
@last_refresh = now
|
|
82
|
+
|
|
83
|
+
@states.each do |test_id, state|
|
|
84
|
+
next unless state == :running
|
|
85
|
+
|
|
86
|
+
scenario = @scenarios.find { |s| s.test_id == test_id }
|
|
87
|
+
print_row(scenario) if scenario
|
|
88
|
+
end
|
|
89
|
+
update_footer
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Print a single-test result line (for run-single-test mode)
|
|
93
|
+
# @param result [Models::TestResult]
|
|
94
|
+
def show_single_result(result)
|
|
95
|
+
@output.puts Atoms::DisplayHelpers.format_single_result(result, use_color: @use_color)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Print structured summary block
|
|
99
|
+
# @param results [Array<Models::TestResult>]
|
|
100
|
+
# @param report_path [String]
|
|
101
|
+
def show_summary(results, report_path)
|
|
102
|
+
# Move cursor past the display area
|
|
103
|
+
move_to_line(@footer_line + 1)
|
|
104
|
+
@output.puts
|
|
105
|
+
|
|
106
|
+
lines = Atoms::DisplayHelpers.format_summary_lines(
|
|
107
|
+
results, Time.now - @start_time, report_path, use_color: @use_color
|
|
108
|
+
)
|
|
109
|
+
lines.each { |line| @output.puts line }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
private
|
|
113
|
+
|
|
114
|
+
def print_row(scenario)
|
|
115
|
+
h = Atoms::DisplayHelpers
|
|
116
|
+
line = @lines[scenario.test_id]
|
|
117
|
+
state = @states[scenario.test_id]
|
|
118
|
+
|
|
119
|
+
move_to_line(line)
|
|
120
|
+
@output.print "\033[K" # clear line
|
|
121
|
+
|
|
122
|
+
title = scenario.title.ljust(@title_width)
|
|
123
|
+
|
|
124
|
+
case state
|
|
125
|
+
when :waiting
|
|
126
|
+
icon = h.color("\u00b7", :gray, use_color: @use_color)
|
|
127
|
+
elapsed = " 0.0s"
|
|
128
|
+
status = "waiting"
|
|
129
|
+
@output.print "#{icon} #{elapsed} #{scenario.test_id} #{title} #{status}"
|
|
130
|
+
|
|
131
|
+
when :running
|
|
132
|
+
icon = h.color("\u22ef", :cyan, use_color: @use_color)
|
|
133
|
+
secs = Time.now - (@started_at[scenario.test_id] || Time.now)
|
|
134
|
+
elapsed = h.format_elapsed(secs)
|
|
135
|
+
status = "running"
|
|
136
|
+
@output.print "#{icon} #{elapsed} #{scenario.test_id} #{title} #{status}"
|
|
137
|
+
|
|
138
|
+
when :completed
|
|
139
|
+
result = @results[scenario.test_id]
|
|
140
|
+
success = result.success?
|
|
141
|
+
icon = h.color(h.status_icon(success), success ? :green : :red, use_color: @use_color)
|
|
142
|
+
elapsed = h.format_elapsed(result.duration)
|
|
143
|
+
tc = h.tc_count_display(result)
|
|
144
|
+
status_text = result.status.upcase
|
|
145
|
+
@output.print "#{icon} #{elapsed} #{scenario.test_id} #{title} #{status_text}#{tc}"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def update_footer
|
|
150
|
+
move_to_line(@footer_line)
|
|
151
|
+
@output.print "\033[K"
|
|
152
|
+
|
|
153
|
+
active = @states.count { |_, s| s == :running }
|
|
154
|
+
completed = @states.count { |_, s| s == :completed }
|
|
155
|
+
waiting = @states.count { |_, s| s == :waiting }
|
|
156
|
+
|
|
157
|
+
@output.print "Active: #{active} | Completed: #{completed} | Waiting: #{waiting}"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def move_to_line(line)
|
|
161
|
+
@output.print "\033[#{line};1H"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def calculate_title_width
|
|
165
|
+
max = @scenarios.map { |s| s.title.length }.max || 0
|
|
166
|
+
[max, 20].max
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|