ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 ACE Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<h1> ACE - Test Runner E2E </h1>
|
|
3
|
+
|
|
4
|
+
Agent-executed end-to-end tests with reproducible sandboxes and structured reporting.
|
|
5
|
+
|
|
6
|
+
<img src="https://raw.githubusercontent.com/cs3b/ace/main/docs/brand/AgenticCodingEnvironment.Logo.XS.jpg" alt="ACE Logo" width="480">
|
|
7
|
+
<br><br>
|
|
8
|
+
|
|
9
|
+
<a href="https://rubygems.org/gems/ace-test-runner-e2e"><img alt="Gem Version" src="https://img.shields.io/gem/v/ace-test-runner-e2e.svg" /></a>
|
|
10
|
+
<a href="https://www.ruby-lang.org"><img alt="Ruby" src="https://img.shields.io/badge/Ruby-3.2+-CC342D?logo=ruby" /></a>
|
|
11
|
+
<a href="https://opensource.org/licenses/MIT"><img alt="License: MIT" src="https://img.shields.io/badge/License-MIT-blue.svg" /></a>
|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
> Works with: Claude Code, Codex CLI, OpenCode, Gemini CLI, pi-agent, and more.
|
|
16
|
+
|
|
17
|
+
[Getting Started](docs/getting-started.md) | [Usage Guide](docs/usage.md) | [Handbook - Skills, Agents, Templates](docs/handbook.md)
|
|
18
|
+
|
|
19
|
+

|
|
20
|
+
|
|
21
|
+
`ace-test-runner-e2e` runs realistic workflow scenarios through coding agents so teams can validate behavior beyond unit and integration coverage while keeping execution reproducible and isolated from the working tree.
|
|
22
|
+
|
|
23
|
+
## How It Works
|
|
24
|
+
|
|
25
|
+
1. Discover E2E scenario definitions from package-local `test/e2e/` suites with metadata, tags, and command flows.
|
|
26
|
+
2. Execute scenarios inside reproducible sandboxes that isolate agent runs from the working tree.
|
|
27
|
+
3. Produce structured reports that are easy to inspect, compare across runs, and feed back into triage workflows.
|
|
28
|
+
|
|
29
|
+
## Use Cases
|
|
30
|
+
|
|
31
|
+
**Validate real developer workflows end-to-end** - use `/as-e2e-run` or run `ace-test-e2e` to confirm that instructions, tooling, and outputs behave correctly under agent execution for any package.
|
|
32
|
+
|
|
33
|
+
**Run broad regression sweeps across packages** - use `ace-test-e2e-suite` for cross-package scenario orchestration with filtering by package, tags, and prior failures.
|
|
34
|
+
|
|
35
|
+
**Keep execution deterministic and reviewable** - execute in sandboxes with structured outputs so results are reproducible and easy to compare across runs, complementing fast loops from [ace-test-runner](../ace-test-runner). Use `ace-test-e2e-sh` to run ad-hoc commands inside a test sandbox.
|
|
36
|
+
|
|
37
|
+
**Create and maintain E2E scenarios** - use `/as-e2e-create` to scaffold new scenarios and `/as-e2e-rewrite` or `/as-e2e-fix` to keep existing ones current as workflows evolve.
|
|
38
|
+
|
|
39
|
+
**Plan E2E coverage for new features** - use `/as-e2e-plan-changes` to map which scenarios need updates when instructions or tooling change, and `/as-e2e-review` to audit scenario quality.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
[Getting Started](docs/getting-started.md) | [Usage Guide](docs/usage.md) | [Handbook - Skills, Agents, Templates](docs/handbook.md) | Part of [ACE](https://github.com/cs3b/ace)
|
data/Rakefile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "rake/testtask"
|
|
5
|
+
|
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
|
7
|
+
t.libs << "test"
|
|
8
|
+
t.libs << "lib"
|
|
9
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Alias for CI compatibility
|
|
13
|
+
task spec: :test
|
|
14
|
+
|
|
15
|
+
task default: :test
|
data/exe/ace-test-e2e
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative "../lib/ace/test/end_to_end_runner"
|
|
5
|
+
require "ace/support/cli"
|
|
6
|
+
|
|
7
|
+
# Start ace-support-cli single-command entrypoint with exception-based exit code handling (per ADR-023)
|
|
8
|
+
args = ARGV.empty? ? ["--help"] : ARGV
|
|
9
|
+
|
|
10
|
+
begin
|
|
11
|
+
Ace::Support::Cli::Runner.new(Ace::Test::EndToEndRunner::CLI::Commands::RunTest).call(args: args)
|
|
12
|
+
rescue Ace::Support::Cli::Error => e
|
|
13
|
+
warn e.message
|
|
14
|
+
exit(e.exit_code)
|
|
15
|
+
end
|
data/exe/ace-test-e2e-sh
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# ace-test-e2e-sh - Execute commands within E2E test sandbox
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# ace-test-e2e-sh <test-dir> <command...>
|
|
8
|
+
# ace-test-e2e-sh <test-dir> bash <<'SANDBOX'
|
|
9
|
+
# git add . && git commit -m "test"
|
|
10
|
+
# SANDBOX
|
|
11
|
+
|
|
12
|
+
# Handle --help/-h before shifting arguments
|
|
13
|
+
if ARGV.first && %w[--help -h].include?(ARGV.first)
|
|
14
|
+
puts <<~HELP
|
|
15
|
+
NAME
|
|
16
|
+
ace-test-e2e-sh - Execute commands within E2E test sandbox
|
|
17
|
+
|
|
18
|
+
USAGE
|
|
19
|
+
ace-test-e2e-sh <test-dir> [command...]
|
|
20
|
+
|
|
21
|
+
ARGUMENTS
|
|
22
|
+
test-dir Sandbox directory path (must be under .ace-local/test-e2e/)
|
|
23
|
+
command Command to execute inside the sandbox
|
|
24
|
+
|
|
25
|
+
OPTIONS
|
|
26
|
+
--help, -h Show this help
|
|
27
|
+
|
|
28
|
+
EXAMPLES
|
|
29
|
+
$ ace-test-e2e-sh /path/to/sandbox bash
|
|
30
|
+
$ ace-test-e2e-sh /path/to/sandbox git add . && git commit -m "test"
|
|
31
|
+
HELP
|
|
32
|
+
exit 0
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
if ARGV.first == "--version"
|
|
36
|
+
require_relative "../lib/ace/test/end_to_end_runner/version"
|
|
37
|
+
puts "ace-test-e2e-sh #{Ace::Test::EndToEndRunner::VERSION}"
|
|
38
|
+
exit 0
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
test_dir = ARGV.shift
|
|
42
|
+
|
|
43
|
+
unless test_dir
|
|
44
|
+
warn "Usage: ace-test-e2e-sh <test-dir> [command...]"
|
|
45
|
+
exit 1
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
test_dir = File.expand_path(test_dir)
|
|
49
|
+
|
|
50
|
+
unless test_dir.include?("/.ace-local/test-e2e/")
|
|
51
|
+
warn "FATAL: Not a sandbox path: #{test_dir}"
|
|
52
|
+
exit 1
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
unless Dir.exist?(test_dir)
|
|
56
|
+
warn "FATAL: Sandbox directory not found: #{test_dir}"
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
Dir.chdir(test_dir)
|
|
61
|
+
ENV["PROJECT_ROOT_PATH"] = test_dir
|
|
62
|
+
|
|
63
|
+
if ARGV.empty?
|
|
64
|
+
exec "bash"
|
|
65
|
+
else
|
|
66
|
+
exec(*ARGV)
|
|
67
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative "../lib/ace/test/end_to_end_runner"
|
|
5
|
+
require "ace/support/cli"
|
|
6
|
+
|
|
7
|
+
# Start ace-support-cli single-command entrypoint with exception-based exit code handling (per ADR-023)
|
|
8
|
+
begin
|
|
9
|
+
Ace::Support::Cli::Runner.new(Ace::Test::EndToEndRunner::CLI::Commands::RunSuite).call(args: ARGV)
|
|
10
|
+
rescue Ace::Support::Cli::Error => e
|
|
11
|
+
warn e.message
|
|
12
|
+
exit(e.exit_code)
|
|
13
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
---
|
|
2
|
+
doc-type: guide
|
|
3
|
+
title: E2E Testing Guide
|
|
4
|
+
purpose: Conventions and best practices for agent-executed end-to-end tests
|
|
5
|
+
ace-docs:
|
|
6
|
+
last-updated: 2026-03-12
|
|
7
|
+
last-checked: 2026-03-21
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# E2E Testing Guide
|
|
11
|
+
|
|
12
|
+
## Overview
|
|
13
|
+
|
|
14
|
+
E2E tests are executed by an AI agent and reserved for behaviors that require real CLI execution, real tools, and real filesystem side effects.
|
|
15
|
+
|
|
16
|
+
## Canonical Conventions
|
|
17
|
+
|
|
18
|
+
- CLI split:
|
|
19
|
+
- `ace-test-e2e` runs tests for a single package
|
|
20
|
+
- `ace-test-e2e-suite` runs suite-level execution
|
|
21
|
+
- Scenario IDs follow `TS-<PACKAGE_SHORT>-<NNN>[-slug]`
|
|
22
|
+
- Test format is standalone pair only:
|
|
23
|
+
- `TC-*.runner.md`
|
|
24
|
+
- `TC-*.verify.md`
|
|
25
|
+
- `runner.yml.md`
|
|
26
|
+
- `verifier.yml.md`
|
|
27
|
+
- TC artifacts use `results/tc/{NN}/`
|
|
28
|
+
- Summary reports use `tcs-passed`, `tcs-failed`, `tcs-total`, and `failed[].tc`
|
|
29
|
+
- Scenarios declare `tags` for discovery-time filtering via `--tags`/`--exclude-tags`
|
|
30
|
+
|
|
31
|
+
## Runner vs Verifier Contract
|
|
32
|
+
|
|
33
|
+
- Runner is **execution-only**:
|
|
34
|
+
- perform user-like CLI actions in sandbox
|
|
35
|
+
- produce evidence files under `results/tc/{NN}/`
|
|
36
|
+
- do not issue PASS/FAIL verdicts
|
|
37
|
+
- do not perform verifier-style assertion/classification
|
|
38
|
+
- Verifier is **verification-only**:
|
|
39
|
+
- evaluate TC outcome from sandbox evidence
|
|
40
|
+
- apply an **impact-first** evidence order:
|
|
41
|
+
1. sandbox/project state impact
|
|
42
|
+
2. explicit TC artifacts
|
|
43
|
+
3. debug captures (`stdout`, `stderr`, `*.exit`, metadata) only as fallback
|
|
44
|
+
- Setup ownership:
|
|
45
|
+
- sandbox preparation belongs to `scenario.yml` `setup:` + `fixtures/`
|
|
46
|
+
- TC runner files must not define independent environment setup procedures
|
|
47
|
+
|
|
48
|
+
## E2E Value Gate
|
|
49
|
+
|
|
50
|
+
Before adding a TC, confirm the behavior needs:
|
|
51
|
+
- full CLI binary execution
|
|
52
|
+
- real external tools/processes
|
|
53
|
+
- real filesystem I/O and environment state
|
|
54
|
+
|
|
55
|
+
If not, keep coverage in unit/integration tests.
|
|
56
|
+
|
|
57
|
+
## Cost and Scope
|
|
58
|
+
|
|
59
|
+
- Keep scenarios small and coherent.
|
|
60
|
+
- Typical scenario size: 2-5 TCs.
|
|
61
|
+
- Consolidate assertions that share the same command/setup into one TC.
|
|
62
|
+
- Use `cost-tier` to stage execution (`smoke` → `happy-path` → `deep`).
|
|
63
|
+
|
|
64
|
+
## Execution Pipeline
|
|
65
|
+
|
|
66
|
+
CLI providers (`ace-test-e2e`, `ace-test-e2e-suite`) use a deterministic 6-phase pipeline:
|
|
67
|
+
|
|
68
|
+
1. **Setup** — `SetupExecutor` creates sandbox (git init, mise.toml, .ace symlinks, results/tc/{NN}/ dirs)
|
|
69
|
+
2. **Runner prompt** — `SkillPromptBuilder` assembles context from `runner.yml.md` and `TC-*.runner.md`
|
|
70
|
+
3. **Runner LLM** — Agent executes TC steps in sandbox, produces artifacts
|
|
71
|
+
4. **Verifier prompt** — `SkillPromptBuilder` assembles context from `verifier.yml.md` and `TC-*.verify.md`
|
|
72
|
+
5. **Verifier LLM** — Independent agent evaluates artifacts against expectations
|
|
73
|
+
6. **Report** — `PipelineReportGenerator` produces deterministic summary from verifier output
|
|
74
|
+
|
|
75
|
+
API providers use a single-prompt approach (runner and verifier in one pass).
|
|
76
|
+
|
|
77
|
+
The verifier is always-on for standalone goal-mode TCs in the CLI pipeline. For procedural runs guided by `ace-bundle wfi://e2e/run`, the verifier is opt-in via `--verify`.
|
|
78
|
+
|
|
79
|
+
## Scenario Layout
|
|
80
|
+
|
|
81
|
+
```text
|
|
82
|
+
{package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
|
|
83
|
+
scenario.yml
|
|
84
|
+
runner.yml.md
|
|
85
|
+
verifier.yml.md
|
|
86
|
+
TC-001-{slug}.runner.md
|
|
87
|
+
TC-001-{slug}.verify.md
|
|
88
|
+
fixtures/
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Required Scenario Evidence
|
|
92
|
+
|
|
93
|
+
In `scenario.yml`, record:
|
|
94
|
+
- `tags` (cost-tier tag + use-case tags)
|
|
95
|
+
- `e2e-justification`
|
|
96
|
+
- `unit-coverage-reviewed`
|
|
97
|
+
- `cost-tier`
|
|
98
|
+
|
|
99
|
+
This prevents duplicate assertions across test layers.
|
|
100
|
+
|
|
101
|
+
## Authoring Rules
|
|
102
|
+
|
|
103
|
+
- Keep runner goals outcome-oriented and deterministic.
|
|
104
|
+
- Keep verifier expectations impact-first, then artifacts, then debug fallback.
|
|
105
|
+
- Preserve strict TC pairing (`runner` + `verify`).
|
|
106
|
+
- Keep outputs inside `results/tc/{NN}/`.
|
|
107
|
+
- Avoid hidden dependencies between TCs unless explicitly intended.
|
|
108
|
+
|
|
109
|
+
## Execution Artifacts
|
|
110
|
+
|
|
111
|
+
Reports are written under `.ace-local/test-e2e/`:
|
|
112
|
+
- `{run-id}-{pkg}-{scenario}-reports/summary.r.md`
|
|
113
|
+
- `{run-id}-{pkg}-{scenario}-reports/experience.r.md`
|
|
114
|
+
- `{run-id}-{pkg}-{scenario}-reports/metadata.yml`
|
|
115
|
+
|
|
116
|
+
## Review Checklist
|
|
117
|
+
|
|
118
|
+
Before approving new/updated E2E tests:
|
|
119
|
+
- [ ] Scenario uses standalone pair format only
|
|
120
|
+
- [ ] `scenario.yml` omits legacy `mode` and `execution-model`
|
|
121
|
+
- [ ] `runner.yml.md` and `verifier.yml.md` exist
|
|
122
|
+
- [ ] Every TC has both `.runner.md` and `.verify.md`
|
|
123
|
+
- [ ] Artifacts are scoped to `results/tc/{NN}/`
|
|
124
|
+
- [ ] Value-gate metadata is present (`e2e-justification`, `unit-coverage-reviewed`, `cost-tier`)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
---
|
|
2
|
+
doc-type: guide
|
|
3
|
+
title: scenario.yml Reference
|
|
4
|
+
purpose: Complete schema reference for TS-format scenario configuration files
|
|
5
|
+
ace-docs:
|
|
6
|
+
last-updated: 2026-03-18
|
|
7
|
+
last-checked: 2026-03-21
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# scenario.yml Reference
|
|
11
|
+
|
|
12
|
+
## Overview
|
|
13
|
+
|
|
14
|
+
The `scenario.yml` file configures a TS-format E2E scenario.
|
|
15
|
+
|
|
16
|
+
Supported test definition format is standalone pairs only:
|
|
17
|
+
- `TC-*.runner.md`
|
|
18
|
+
- `TC-*.verify.md`
|
|
19
|
+
- `runner.yml.md`
|
|
20
|
+
- `verifier.yml.md`
|
|
21
|
+
|
|
22
|
+
Legacy fields `mode` and `execution-model` are not supported.
|
|
23
|
+
|
|
24
|
+
## Location
|
|
25
|
+
|
|
26
|
+
```text
|
|
27
|
+
{package}/test/e2e/TS-{AREA}-{NNN}-{slug}/scenario.yml
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Example: `ace-lint/test/e2e/TS-LINT-001-lint-pipeline/scenario.yml`
|
|
31
|
+
|
|
32
|
+
## Schema
|
|
33
|
+
|
|
34
|
+
### Required Fields
|
|
35
|
+
|
|
36
|
+
| Field | Type | Description |
|
|
37
|
+
|-------|------|-------------|
|
|
38
|
+
| `test-id` | string | Unique test identifier in format `TS-{AREA}-{NNN}` |
|
|
39
|
+
| `title` | string | Human-readable scenario title |
|
|
40
|
+
| `area` | string | Functional area code |
|
|
41
|
+
| `package` | string | Package name |
|
|
42
|
+
|
|
43
|
+
### Optional Fields
|
|
44
|
+
|
|
45
|
+
| Field | Type | Default | Description |
|
|
46
|
+
|-------|------|---------|-------------|
|
|
47
|
+
| `priority` | string | `medium` | Test priority: `high`, `medium`, `low` |
|
|
48
|
+
| `tool-under-test` | string | — | Primary command/tool validated |
|
|
49
|
+
| `sandbox-layout` | object | `{}` | Declared artifact paths and expected outputs |
|
|
50
|
+
| `duration` | string | — | Estimated duration (e.g., `~15min`) |
|
|
51
|
+
| `timeout` | integer | — | Optional per-scenario execution timeout in seconds |
|
|
52
|
+
| `automation-candidate` | boolean | `false` | Whether test is automatable |
|
|
53
|
+
| `tags` | array | `[]` | Scenario tags for filtering with `--tags`/`--exclude-tags` (OR semantics) |
|
|
54
|
+
| `cost-tier` | string | `smoke` | Run profile: `smoke`, `happy-path`, `deep` |
|
|
55
|
+
| `e2e-justification` | string | — | Why E2E is needed |
|
|
56
|
+
| `unit-coverage-reviewed` | array | `[]` | Unit/integration files reviewed |
|
|
57
|
+
| `requires` | object | — | Test prerequisites |
|
|
58
|
+
| `setup` | array | `[]` | Setup directives before execution |
|
|
59
|
+
| `last-verified` | string | — | Last successful verification date |
|
|
60
|
+
| `verified-by` | string | — | Agent that last verified |
|
|
61
|
+
|
|
62
|
+
## Standalone File Conventions
|
|
63
|
+
|
|
64
|
+
Scenario directory must contain:
|
|
65
|
+
- `runner.yml.md`
|
|
66
|
+
- `verifier.yml.md`
|
|
67
|
+
- paired `TC-*.runner.md` and `TC-*.verify.md`
|
|
68
|
+
|
|
69
|
+
Pairing rule:
|
|
70
|
+
- every `TC-XXX.runner.md` must have a matching `TC-XXX.verify.md`
|
|
71
|
+
- every `TC-XXX.verify.md` must have a matching `TC-XXX.runner.md`
|
|
72
|
+
|
|
73
|
+
Artifact layout conventions:
|
|
74
|
+
- canonical: `results/tc/{NN}/`
|
|
75
|
+
- avoid non-TC-scoped result folders
|
|
76
|
+
|
|
77
|
+
Canonical summary report fields:
|
|
78
|
+
- `tcs-passed`
|
|
79
|
+
- `tcs-failed`
|
|
80
|
+
- `tcs-total`
|
|
81
|
+
- `failed[].tc`
|
|
82
|
+
|
|
83
|
+
Role contract:
|
|
84
|
+
- `runner.yml.md` + `TC-*.runner.md` are execution-only.
|
|
85
|
+
- `verifier.yml.md` + `TC-*.verify.md` are verification-only with impact-first checks.
|
|
86
|
+
|
|
87
|
+
## `requires` Object
|
|
88
|
+
|
|
89
|
+
```yaml
|
|
90
|
+
requires:
|
|
91
|
+
tools: [ace-lint, jq]
|
|
92
|
+
ruby: ">= 3.0"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## `setup` Directives
|
|
96
|
+
|
|
97
|
+
Available directives:
|
|
98
|
+
- `git-init` — Initialize git repository in sandbox
|
|
99
|
+
- `run:` — Execute a shell command (bash -lc; env vars are re-exported to protect against mise clobbering)
|
|
100
|
+
- `copy-fixtures` — Copy fixtures/ directory into sandbox
|
|
101
|
+
- `write-file:` — Write inline content to a file (`path:` + `content:`)
|
|
102
|
+
- `agent-env:` — Environment variables passed to the runner/verifier agent subprocess (not setup commands)
|
|
103
|
+
- `tmux-session` — Create a detached tmux session
|
|
104
|
+
- String form: `tmux-session` (uses scenario-based naming)
|
|
105
|
+
- Hash form: `tmux-session: { name-source: run-id }` (uses unique E2E run ID as session name)
|
|
106
|
+
- Runner teardown removes the created session after test execution
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
setup:
|
|
112
|
+
- git-init
|
|
113
|
+
- tmux-session:
|
|
114
|
+
name-source: run-id
|
|
115
|
+
- run: "cp $PROJECT_ROOT_PATH/mise.toml mise.toml && mise trust mise.toml"
|
|
116
|
+
- copy-fixtures
|
|
117
|
+
- run: git add -A && git commit -m "initial" --quiet
|
|
118
|
+
- agent-env:
|
|
119
|
+
PROJECT_ROOT_PATH: "."
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Setup rules:
|
|
123
|
+
- Setup is fail-fast. Do not hide setup failures with `|| true`.
|
|
124
|
+
- Setup belongs in `scenario.yml` and fixtures, not in TC runner instructions.
|
|
125
|
+
- If setup fails (for example, missing `mise trust` support), stop scenario execution and report infrastructure failure.
|
|
126
|
+
|
|
127
|
+
## Complete Example
|
|
128
|
+
|
|
129
|
+
```yaml
|
|
130
|
+
test-id: TS-LINT-001
|
|
131
|
+
title: Core Lint Pipeline
|
|
132
|
+
area: lint
|
|
133
|
+
package: ace-lint
|
|
134
|
+
priority: high
|
|
135
|
+
duration: ~10min
|
|
136
|
+
cost-tier: smoke
|
|
137
|
+
tags: [smoke, "use-case:lint"]
|
|
138
|
+
e2e-justification: "Validates real subprocess behavior and report file generation"
|
|
139
|
+
unit-coverage-reviewed:
|
|
140
|
+
- test/molecules/lint_runner_test.rb
|
|
141
|
+
- test/organisms/lint_orchestrator_test.rb
|
|
142
|
+
tool-under-test: ace-lint
|
|
143
|
+
sandbox-layout:
|
|
144
|
+
results/tc/01/: "help artifacts"
|
|
145
|
+
requires:
|
|
146
|
+
tools: [ace-lint, standardrb, jq]
|
|
147
|
+
ruby: ">= 3.0"
|
|
148
|
+
setup:
|
|
149
|
+
- git-init
|
|
150
|
+
- run: "cp $PROJECT_ROOT_PATH/mise.toml mise.toml && mise trust mise.toml"
|
|
151
|
+
- copy-fixtures
|
|
152
|
+
- agent-env:
|
|
153
|
+
PROJECT_ROOT_PATH: "."
|
|
154
|
+
last-verified: 2026-02-24
|
|
155
|
+
verified-by: claude-opus-4
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Tags
|
|
159
|
+
|
|
160
|
+
The `tags` field enables discovery-time filtering with `--tags` and `--exclude-tags`.
|
|
161
|
+
|
|
162
|
+
**Naming conventions:**
|
|
163
|
+
- Cost tier is auto-included: `smoke`, `happy-path`, `deep`
|
|
164
|
+
- Use-case tags use the `use-case:{area}` pattern (e.g., `use-case:lint`, `use-case:config`)
|
|
165
|
+
- Custom tags are lowercase kebab-case
|
|
166
|
+
|
|
167
|
+
**Filtering semantics:**
|
|
168
|
+
- `--tags` uses OR: scenario matches if it has **any** of the specified tags
|
|
169
|
+
- `--exclude-tags` uses OR: scenario is excluded if it has **any** of the specified tags
|
|
170
|
+
- Both filters can be combined; exclude is applied after include
|
|
171
|
+
|
|
172
|
+
## Directory Structure
|
|
173
|
+
|
|
174
|
+
```text
|
|
175
|
+
test/e2e/TS-LINT-001-lint-pipeline/
|
|
176
|
+
├── scenario.yml
|
|
177
|
+
├── runner.yml.md
|
|
178
|
+
├── verifier.yml.md
|
|
179
|
+
├── TC-001-help-survey.runner.md
|
|
180
|
+
├── TC-001-help-survey.verify.md
|
|
181
|
+
└── fixtures/
|
|
182
|
+
```
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
---
|
|
2
|
+
doc-type: guide
|
|
3
|
+
title: Test Case Authoring Guide
|
|
4
|
+
purpose: Guide for writing standalone TC runner/verifier files for TS-format E2E scenarios
|
|
5
|
+
ace-docs:
|
|
6
|
+
last-updated: 2026-02-25
|
|
7
|
+
last-checked: 2026-03-21
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Test Case Authoring Guide
|
|
11
|
+
|
|
12
|
+
## Overview
|
|
13
|
+
|
|
14
|
+
TC (Test Case) files are the individual execution units in TS-format E2E scenarios.
|
|
15
|
+
|
|
16
|
+
Only one authoring format is supported:
|
|
17
|
+
- Standalone runner/verifier pairs:
|
|
18
|
+
- `TC-*.runner.md`
|
|
19
|
+
- `TC-*.verify.md`
|
|
20
|
+
|
|
21
|
+
Inline `.tc.md` and frontmatter `mode` values are no longer supported.
|
|
22
|
+
|
|
23
|
+
## Canonical Conventions
|
|
24
|
+
|
|
25
|
+
- Scenario IDs: `TS-<PACKAGE_SHORT>-<NNN>[-slug]`
|
|
26
|
+
- Standalone pair files:
|
|
27
|
+
- `TC-{NNN}-{slug}.runner.md`
|
|
28
|
+
- `TC-{NNN}-{slug}.verify.md`
|
|
29
|
+
- Scenario-level config files:
|
|
30
|
+
- `runner.yml.md`
|
|
31
|
+
- `verifier.yml.md`
|
|
32
|
+
- TC artifacts write to `results/tc/{NN}/`
|
|
33
|
+
- Summary counters use `tcs-passed`, `tcs-failed`, and `tcs-total`
|
|
34
|
+
|
|
35
|
+
## File Naming
|
|
36
|
+
|
|
37
|
+
- `TC-{NNN}` — test case number (e.g., TC-001)
|
|
38
|
+
- `{slug}` — descriptive kebab-case identifier
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
- `TC-001-help-survey.runner.md`
|
|
42
|
+
- `TC-001-help-survey.verify.md`
|
|
43
|
+
- `TC-002-error-handling.runner.md`
|
|
44
|
+
- `TC-002-error-handling.verify.md`
|
|
45
|
+
|
|
46
|
+
## Location
|
|
47
|
+
|
|
48
|
+
TC files are placed in the scenario directory alongside `scenario.yml`:
|
|
49
|
+
|
|
50
|
+
```text
|
|
51
|
+
{package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
|
|
52
|
+
├── scenario.yml
|
|
53
|
+
├── runner.yml.md
|
|
54
|
+
├── verifier.yml.md
|
|
55
|
+
├── TC-001-{slug}.runner.md
|
|
56
|
+
├── TC-001-{slug}.verify.md
|
|
57
|
+
└── fixtures/
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Runner File Structure (`TC-*.runner.md`)
|
|
61
|
+
|
|
62
|
+
Required sections:
|
|
63
|
+
- `# Goal N - Title`
|
|
64
|
+
- `## Goal`
|
|
65
|
+
- `## Workspace`
|
|
66
|
+
- `## Constraints`
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
|
|
70
|
+
```markdown
|
|
71
|
+
# Goal 1 - Generate report
|
|
72
|
+
|
|
73
|
+
## Goal
|
|
74
|
+
|
|
75
|
+
Run `ace-lint` and produce report artifacts for a valid file.
|
|
76
|
+
|
|
77
|
+
## Workspace
|
|
78
|
+
|
|
79
|
+
- Root: sandbox directory
|
|
80
|
+
- Output: `results/tc/01/`
|
|
81
|
+
|
|
82
|
+
## Constraints
|
|
83
|
+
|
|
84
|
+
- Use only sandbox paths
|
|
85
|
+
- Keep evidence under `results/tc/01/`
|
|
86
|
+
- Execute actions only; do not assign PASS/FAIL or final verdicts
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Verifier File Structure (`TC-*.verify.md`)
|
|
90
|
+
|
|
91
|
+
Required sections:
|
|
92
|
+
- `# Goal N - Title`
|
|
93
|
+
- `## Expectations`
|
|
94
|
+
- `## Verdict`
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
|
|
98
|
+
```markdown
|
|
99
|
+
# Goal 1 - Generate report
|
|
100
|
+
|
|
101
|
+
## Expectations
|
|
102
|
+
|
|
103
|
+
- **Impact Checks**: target sandbox/project state changed as expected
|
|
104
|
+
- **Artifact Checks**: `results/tc/01/report.json` exists and is valid
|
|
105
|
+
- **Debug Fallback**: inspect `stdout`/`stderr`/`*.exit` only when primary checks are inconclusive
|
|
106
|
+
|
|
107
|
+
## Verdict
|
|
108
|
+
|
|
109
|
+
Pass only when all expectations are satisfied by on-disk evidence.
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Scenario-Level Configuration
|
|
113
|
+
|
|
114
|
+
- `tags` in `scenario.yml` control discovery-time filtering via `--tags`/`--exclude-tags`. Tags are not set per-TC; they apply to the entire scenario.
|
|
115
|
+
- `runner.yml.md` bundles execution context for the runner agent (tool paths, sandbox conventions, environment setup).
|
|
116
|
+
- `verifier.yml.md` bundles validation criteria for the independent verifier agent (artifact expectations, verdict rules).
|
|
117
|
+
- TCs execute sequentially within a scenario. The runner agent accumulates context across TCs (e.g., files created in TC-001 are available in TC-002).
|
|
118
|
+
|
|
119
|
+
## Best Practices
|
|
120
|
+
|
|
121
|
+
- Keep each TC focused on one coherent behavior path.
|
|
122
|
+
- Ensure goal numbers and TC numbers remain aligned (`TC-001` -> Goal 1).
|
|
123
|
+
- Keep runner files execution-only and verifier files verdict-only.
|
|
124
|
+
- Make verifier expectations deterministic with impact-first ordering.
|
|
125
|
+
- Keep all artifacts under `results/tc/{NN}/` to avoid cross-goal contamination.
|
|
126
|
+
- Record why each scenario remains E2E via `e2e-justification` and `unit-coverage-reviewed` in `scenario.yml`.
|
|
127
|
+
|
|
128
|
+
## Related
|
|
129
|
+
|
|
130
|
+
- [scenario.yml Reference](scenario-yml-reference.g.md)
|
|
131
|
+
- [E2E Testing Guide](e2e-testing.g.md)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: as-e2e-create
|
|
3
|
+
description: Create a new E2E test scenario from template
|
|
4
|
+
# bundle: wfi://e2e/create
|
|
5
|
+
# agent: general-purpose
|
|
6
|
+
user-invocable: true
|
|
7
|
+
allowed-tools:
|
|
8
|
+
- Bash(ace-bundle:*)
|
|
9
|
+
- Read
|
|
10
|
+
- Write
|
|
11
|
+
- Glob
|
|
12
|
+
- Grep
|
|
13
|
+
argument-hint: <package> <area> [--context <description>]
|
|
14
|
+
last_modified: 2026-01-19
|
|
15
|
+
source: ace-test-runner-e2e
|
|
16
|
+
integration:
|
|
17
|
+
targets:
|
|
18
|
+
- claude
|
|
19
|
+
- codex
|
|
20
|
+
- gemini
|
|
21
|
+
- opencode
|
|
22
|
+
- pi
|
|
23
|
+
providers: {}
|
|
24
|
+
skill:
|
|
25
|
+
kind: workflow
|
|
26
|
+
execution:
|
|
27
|
+
workflow: wfi://e2e/create
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
Load and run `ace-bundle wfi://e2e/create` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: as-e2e-fix
|
|
3
|
+
description: Diagnose, fix, and rerun failing E2E tests systematically
|
|
4
|
+
# context: no-fork
|
|
5
|
+
# agent: general-purpose
|
|
6
|
+
user-invocable: true
|
|
7
|
+
allowed-tools:
|
|
8
|
+
- Bash(ace-task:*)
|
|
9
|
+
- Bash(ace-bundle:*)
|
|
10
|
+
- Bash(ace-test:*)
|
|
11
|
+
- Bash(ace-test-suite:*)
|
|
12
|
+
- Bash(git:*)
|
|
13
|
+
- Read
|
|
14
|
+
- Write
|
|
15
|
+
- Edit
|
|
16
|
+
- Grep
|
|
17
|
+
- Glob
|
|
18
|
+
argument-hint: '[package] [test-id]'
|
|
19
|
+
last_modified: 2026-03-13
|
|
20
|
+
source: ace-test-runner-e2e
|
|
21
|
+
integration:
|
|
22
|
+
targets:
|
|
23
|
+
- claude
|
|
24
|
+
- codex
|
|
25
|
+
- gemini
|
|
26
|
+
- opencode
|
|
27
|
+
- pi
|
|
28
|
+
providers: {}
|
|
29
|
+
skill:
|
|
30
|
+
kind: workflow
|
|
31
|
+
execution:
|
|
32
|
+
workflow: wfi://e2e/fix
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
Load and run `ace-bundle wfi://e2e/fix` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.
|