ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 ACE Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ <div align="center">
2
+ <h1> ACE - Test Runner E2E </h1>
3
+
4
+ Agent-executed end-to-end tests with reproducible sandboxes and structured reporting.
5
+
6
+ <img src="https://raw.githubusercontent.com/cs3b/ace/main/docs/brand/AgenticCodingEnvironment.Logo.XS.jpg" alt="ACE Logo" width="480">
7
+ <br><br>
8
+
9
+ <a href="https://rubygems.org/gems/ace-test-runner-e2e"><img alt="Gem Version" src="https://img.shields.io/gem/v/ace-test-runner-e2e.svg" /></a>
10
+ <a href="https://www.ruby-lang.org"><img alt="Ruby" src="https://img.shields.io/badge/Ruby-3.2+-CC342D?logo=ruby" /></a>
11
+ <a href="https://opensource.org/licenses/MIT"><img alt="License: MIT" src="https://img.shields.io/badge/License-MIT-blue.svg" /></a>
12
+
13
+ </div>
14
+
15
+ > Works with: Claude Code, Codex CLI, OpenCode, Gemini CLI, pi-agent, and more.
16
+
17
+ [Getting Started](docs/getting-started.md) | [Usage Guide](docs/usage.md) | [Handbook - Skills, Agents, Templates](docs/handbook.md)
18
+
19
+ ![ace-test-runner-e2e demo](docs/demo/ace-test-runner-e2e-getting-started.gif)
20
+
21
+ `ace-test-runner-e2e` runs realistic workflow scenarios through coding agents so teams can validate behavior beyond unit and integration coverage while keeping execution reproducible and isolated from the working tree.
22
+
23
+ ## How It Works
24
+
25
+ 1. Discover E2E scenario definitions from package-local `test/e2e/` suites with metadata, tags, and command flows.
26
+ 2. Execute scenarios inside reproducible sandboxes that isolate agent runs from the working tree.
27
+ 3. Produce structured reports that are easy to inspect, compare across runs, and feed back into triage workflows.
28
+
29
+ ## Use Cases
30
+
31
+ **Validate real developer workflows end-to-end** - use `/as-e2e-run` or run `ace-test-e2e` to confirm that instructions, tooling, and outputs behave correctly under agent execution for any package.
32
+
33
+ **Run broad regression sweeps across packages** - use `ace-test-e2e-suite` for cross-package scenario orchestration with filtering by package, tags, and prior failures.
34
+
35
+ **Keep execution deterministic and reviewable** - execute in sandboxes with structured outputs so results are reproducible and easy to compare across runs, complementing fast loops from [ace-test-runner](../ace-test-runner). Use `ace-test-e2e-sh` to run ad-hoc commands inside a test sandbox.
36
+
37
+ **Create and maintain E2E scenarios** - use `/as-e2e-create` to scaffold new scenarios and `/as-e2e-rewrite` or `/as-e2e-fix` to keep existing ones current as workflows evolve.
38
+
39
+ **Plan E2E coverage for new features** - use `/as-e2e-plan-changes` to map which scenarios need updates when instructions or tooling change, and `/as-e2e-review` to audit scenario quality.
40
+
41
+ ---
42
+ [Getting Started](docs/getting-started.md) | [Usage Guide](docs/usage.md) | [Handbook - Skills, Agents, Templates](docs/handbook.md) | Part of [ACE](https://github.com/cs3b/ace)
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ # Alias for CI compatibility
13
+ task spec: :test
14
+
15
+ task default: :test
data/exe/ace-test-e2e ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../lib/ace/test/end_to_end_runner"
5
+ require "ace/support/cli"
6
+
7
+ # Start ace-support-cli single-command entrypoint with exception-based exit code handling (per ADR-023)
8
+ args = ARGV.empty? ? ["--help"] : ARGV
9
+
10
+ begin
11
+ Ace::Support::Cli::Runner.new(Ace::Test::EndToEndRunner::CLI::Commands::RunTest).call(args: args)
12
+ rescue Ace::Support::Cli::Error => e
13
+ warn e.message
14
+ exit(e.exit_code)
15
+ end
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # ace-test-e2e-sh - Execute commands within E2E test sandbox
5
+ #
6
+ # Usage:
7
+ # ace-test-e2e-sh <test-dir> <command...>
8
+ # ace-test-e2e-sh <test-dir> bash <<'SANDBOX'
9
+ # git add . && git commit -m "test"
10
+ # SANDBOX
11
+
12
+ # Handle --help/-h before shifting arguments
13
+ if ARGV.first && %w[--help -h].include?(ARGV.first)
14
+ puts <<~HELP
15
+ NAME
16
+ ace-test-e2e-sh - Execute commands within E2E test sandbox
17
+
18
+ USAGE
19
+ ace-test-e2e-sh <test-dir> [command...]
20
+
21
+ ARGUMENTS
22
+ test-dir Sandbox directory path (must be under .ace-local/test-e2e/)
23
+ command Command to execute inside the sandbox
24
+
25
+ OPTIONS
26
+ --help, -h Show this help
27
+
28
+ EXAMPLES
29
+ $ ace-test-e2e-sh /path/to/sandbox bash
30
+ $ ace-test-e2e-sh /path/to/sandbox git add . && git commit -m "test"
31
+ HELP
32
+ exit 0
33
+ end
34
+
35
+ if ARGV.first == "--version"
36
+ require_relative "../lib/ace/test/end_to_end_runner/version"
37
+ puts "ace-test-e2e-sh #{Ace::Test::EndToEndRunner::VERSION}"
38
+ exit 0
39
+ end
40
+
41
+ test_dir = ARGV.shift
42
+
43
+ unless test_dir
44
+ warn "Usage: ace-test-e2e-sh <test-dir> [command...]"
45
+ exit 1
46
+ end
47
+
48
+ test_dir = File.expand_path(test_dir)
49
+
50
+ unless test_dir.include?("/.ace-local/test-e2e/")
51
+ warn "FATAL: Not a sandbox path: #{test_dir}"
52
+ exit 1
53
+ end
54
+
55
+ unless Dir.exist?(test_dir)
56
+ warn "FATAL: Sandbox directory not found: #{test_dir}"
57
+ exit 1
58
+ end
59
+
60
+ Dir.chdir(test_dir)
61
+ ENV["PROJECT_ROOT_PATH"] = test_dir
62
+
63
+ if ARGV.empty?
64
+ exec "bash"
65
+ else
66
+ exec(*ARGV)
67
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../lib/ace/test/end_to_end_runner"
5
+ require "ace/support/cli"
6
+
7
+ # Start ace-support-cli single-command entrypoint with exception-based exit code handling (per ADR-023)
8
+ begin
9
+ Ace::Support::Cli::Runner.new(Ace::Test::EndToEndRunner::CLI::Commands::RunSuite).call(args: ARGV)
10
+ rescue Ace::Support::Cli::Error => e
11
+ warn e.message
12
+ exit(e.exit_code)
13
+ end
@@ -0,0 +1,124 @@
1
+ ---
2
+ doc-type: guide
3
+ title: E2E Testing Guide
4
+ purpose: Conventions and best practices for agent-executed end-to-end tests
5
+ ace-docs:
6
+ last-updated: 2026-03-12
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # E2E Testing Guide
11
+
12
+ ## Overview
13
+
14
+ E2E tests are executed by an AI agent and reserved for behaviors that require real CLI execution, real tools, and real filesystem side effects.
15
+
16
+ ## Canonical Conventions
17
+
18
+ - CLI split:
19
+ - `ace-test-e2e` runs tests for a single package
20
+ - `ace-test-e2e-suite` runs suite-level execution
21
+ - Scenario IDs follow `TS-<PACKAGE_SHORT>-<NNN>[-slug]`
22
+ - Test format is standalone pair only:
23
+ - `TC-*.runner.md`
24
+ - `TC-*.verify.md`
25
+ - `runner.yml.md`
26
+ - `verifier.yml.md`
27
+ - TC artifacts use `results/tc/{NN}/`
28
+ - Summary reports use `tcs-passed`, `tcs-failed`, `tcs-total`, and `failed[].tc`
29
+ - Scenarios declare `tags` for discovery-time filtering via `--tags`/`--exclude-tags`
30
+
31
+ ## Runner vs Verifier Contract
32
+
33
+ - Runner is **execution-only**:
34
+ - perform user-like CLI actions in sandbox
35
+ - produce evidence files under `results/tc/{NN}/`
36
+ - do not issue PASS/FAIL verdicts
37
+ - do not perform verifier-style assertion/classification
38
+ - Verifier is **verification-only**:
39
+ - evaluate TC outcome from sandbox evidence
40
+ - apply an **impact-first** evidence order:
41
+ 1. sandbox/project state impact
42
+ 2. explicit TC artifacts
43
+ 3. debug captures (`stdout`, `stderr`, `*.exit`, metadata) only as fallback
44
+ - Setup ownership:
45
+ - sandbox preparation belongs to `scenario.yml` `setup:` + `fixtures/`
46
+ - TC runner files must not define independent environment setup procedures
47
+
48
+ ## E2E Value Gate
49
+
50
+ Before adding a TC, confirm the behavior needs:
51
+ - full CLI binary execution
52
+ - real external tools/processes
53
+ - real filesystem I/O and environment state
54
+
55
+ If not, keep coverage in unit/integration tests.
56
+
57
+ ## Cost and Scope
58
+
59
+ - Keep scenarios small and coherent.
60
+ - Typical scenario size: 2-5 TCs.
61
+ - Consolidate assertions that share the same command/setup into one TC.
62
+ - Use `cost-tier` to stage execution (`smoke` → `happy-path` → `deep`).
63
+
64
+ ## Execution Pipeline
65
+
66
+ CLI providers (`ace-test-e2e`, `ace-test-e2e-suite`) use a deterministic 6-phase pipeline:
67
+
68
+ 1. **Setup** — `SetupExecutor` creates sandbox (git init, mise.toml, .ace symlinks, results/tc/{NN}/ dirs)
69
+ 2. **Runner prompt** — `SkillPromptBuilder` assembles context from `runner.yml.md` and `TC-*.runner.md`
70
+ 3. **Runner LLM** — Agent executes TC steps in sandbox, produces artifacts
71
+ 4. **Verifier prompt** — `SkillPromptBuilder` assembles context from `verifier.yml.md` and `TC-*.verify.md`
72
+ 5. **Verifier LLM** — Independent agent evaluates artifacts against expectations
73
+ 6. **Report** — `PipelineReportGenerator` produces deterministic summary from verifier output
74
+
75
+ API providers use a single-prompt approach (runner and verifier in one pass).
76
+
77
+ The verifier is always-on for standalone goal-mode TCs in the CLI pipeline. For procedural runs guided by `ace-bundle wfi://e2e/run`, the verifier is opt-in via `--verify`.
78
+
79
+ ## Scenario Layout
80
+
81
+ ```text
82
+ {package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
83
+ scenario.yml
84
+ runner.yml.md
85
+ verifier.yml.md
86
+ TC-001-{slug}.runner.md
87
+ TC-001-{slug}.verify.md
88
+ fixtures/
89
+ ```
90
+
91
+ ## Required Scenario Evidence
92
+
93
+ In `scenario.yml`, record:
94
+ - `tags` (cost-tier tag + use-case tags)
95
+ - `e2e-justification`
96
+ - `unit-coverage-reviewed`
97
+ - `cost-tier`
98
+
99
+ This prevents duplicate assertions across test layers.
100
+
101
+ ## Authoring Rules
102
+
103
+ - Keep runner goals outcome-oriented and deterministic.
104
+ - Keep verifier expectations impact-first, then artifacts, then debug fallback.
105
+ - Preserve strict TC pairing (`runner` + `verify`).
106
+ - Keep outputs inside `results/tc/{NN}/`.
107
+ - Avoid hidden dependencies between TCs unless explicitly intended.
108
+
109
+ ## Execution Artifacts
110
+
111
+ Reports are written under `.ace-local/test-e2e/`:
112
+ - `{run-id}-{pkg}-{scenario}-reports/summary.r.md`
113
+ - `{run-id}-{pkg}-{scenario}-reports/experience.r.md`
114
+ - `{run-id}-{pkg}-{scenario}-reports/metadata.yml`
115
+
116
+ ## Review Checklist
117
+
118
+ Before approving new/updated E2E tests:
119
+ - [ ] Scenario uses standalone pair format only
120
+ - [ ] `scenario.yml` omits legacy `mode` and `execution-model`
121
+ - [ ] `runner.yml.md` and `verifier.yml.md` exist
122
+ - [ ] Every TC has both `.runner.md` and `.verify.md`
123
+ - [ ] Artifacts are scoped to `results/tc/{NN}/`
124
+ - [ ] Value-gate metadata is present (`e2e-justification`, `unit-coverage-reviewed`, `cost-tier`)
@@ -0,0 +1,182 @@
1
+ ---
2
+ doc-type: guide
3
+ title: scenario.yml Reference
4
+ purpose: Complete schema reference for TS-format scenario configuration files
5
+ ace-docs:
6
+ last-updated: 2026-03-18
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # scenario.yml Reference
11
+
12
+ ## Overview
13
+
14
+ The `scenario.yml` file configures a TS-format E2E scenario.
15
+
16
+ Supported test definition format is standalone pairs only:
17
+ - `TC-*.runner.md`
18
+ - `TC-*.verify.md`
19
+ - `runner.yml.md`
20
+ - `verifier.yml.md`
21
+
22
+ Legacy fields `mode` and `execution-model` are not supported.
23
+
24
+ ## Location
25
+
26
+ ```text
27
+ {package}/test/e2e/TS-{AREA}-{NNN}-{slug}/scenario.yml
28
+ ```
29
+
30
+ Example: `ace-lint/test/e2e/TS-LINT-001-lint-pipeline/scenario.yml`
31
+
32
+ ## Schema
33
+
34
+ ### Required Fields
35
+
36
+ | Field | Type | Description |
37
+ |-------|------|-------------|
38
+ | `test-id` | string | Unique test identifier in format `TS-{AREA}-{NNN}` |
39
+ | `title` | string | Human-readable scenario title |
40
+ | `area` | string | Functional area code |
41
+ | `package` | string | Package name |
42
+
43
+ ### Optional Fields
44
+
45
+ | Field | Type | Default | Description |
46
+ |-------|------|---------|-------------|
47
+ | `priority` | string | `medium` | Test priority: `high`, `medium`, `low` |
48
+ | `tool-under-test` | string | — | Primary command/tool validated |
49
+ | `sandbox-layout` | object | `{}` | Declared artifact paths and expected outputs |
50
+ | `duration` | string | — | Estimated duration (e.g., `~15min`) |
51
+ | `timeout` | integer | — | Optional per-scenario execution timeout in seconds |
52
+ | `automation-candidate` | boolean | `false` | Whether test is automatable |
53
+ | `tags` | array | `[]` | Scenario tags for filtering with `--tags`/`--exclude-tags` (OR semantics) |
54
+ | `cost-tier` | string | `smoke` | Run profile: `smoke`, `happy-path`, `deep` |
55
+ | `e2e-justification` | string | — | Why E2E is needed |
56
+ | `unit-coverage-reviewed` | array | `[]` | Unit/integration files reviewed |
57
+ | `requires` | object | — | Test prerequisites |
58
+ | `setup` | array | `[]` | Setup directives before execution |
59
+ | `last-verified` | string | — | Last successful verification date |
60
+ | `verified-by` | string | — | Agent that last verified |
61
+
62
+ ## Standalone File Conventions
63
+
64
+ Scenario directory must contain:
65
+ - `runner.yml.md`
66
+ - `verifier.yml.md`
67
+ - paired `TC-*.runner.md` and `TC-*.verify.md`
68
+
69
+ Pairing rule:
70
+ - every `TC-XXX.runner.md` must have a matching `TC-XXX.verify.md`
71
+ - every `TC-XXX.verify.md` must have a matching `TC-XXX.runner.md`
72
+
73
+ Artifact layout conventions:
74
+ - canonical: `results/tc/{NN}/`
75
+ - avoid non-TC-scoped result folders
76
+
77
+ Canonical summary report fields:
78
+ - `tcs-passed`
79
+ - `tcs-failed`
80
+ - `tcs-total`
81
+ - `failed[].tc`
82
+
83
+ Role contract:
84
+ - `runner.yml.md` + `TC-*.runner.md` are execution-only.
85
+ - `verifier.yml.md` + `TC-*.verify.md` are verification-only with impact-first checks.
86
+
87
+ ## `requires` Object
88
+
89
+ ```yaml
90
+ requires:
91
+ tools: [ace-lint, jq]
92
+ ruby: ">= 3.0"
93
+ ```
94
+
95
+ ## `setup` Directives
96
+
97
+ Available directives:
98
+ - `git-init` — Initialize git repository in sandbox
99
+ - `run:` — Execute a shell command (bash -lc; env vars are re-exported to protect against mise clobbering)
100
+ - `copy-fixtures` — Copy fixtures/ directory into sandbox
101
+ - `write-file:` — Write inline content to a file (`path:` + `content:`)
102
+ - `agent-env:` — Environment variables passed to the runner/verifier agent subprocess (not setup commands)
103
+ - `tmux-session` — Create a detached tmux session
104
+ - String form: `tmux-session` (uses scenario-based naming)
105
+ - Hash form: `tmux-session: { name-source: run-id }` (uses unique E2E run ID as session name)
106
+ - Runner teardown removes the created session after test execution
107
+
108
+ Example:
109
+
110
+ ```yaml
111
+ setup:
112
+ - git-init
113
+ - tmux-session:
114
+ name-source: run-id
115
+ - run: "cp $PROJECT_ROOT_PATH/mise.toml mise.toml && mise trust mise.toml"
116
+ - copy-fixtures
117
+ - run: git add -A && git commit -m "initial" --quiet
118
+ - agent-env:
119
+ PROJECT_ROOT_PATH: "."
120
+ ```
121
+
122
+ Setup rules:
123
+ - Setup is fail-fast. Do not hide setup failures with `|| true`.
124
+ - Setup belongs in `scenario.yml` and fixtures, not in TC runner instructions.
125
+ - If setup fails (for example, missing `mise trust` support), stop scenario execution and report infrastructure failure.
126
+
127
+ ## Complete Example
128
+
129
+ ```yaml
130
+ test-id: TS-LINT-001
131
+ title: Core Lint Pipeline
132
+ area: lint
133
+ package: ace-lint
134
+ priority: high
135
+ duration: ~10min
136
+ cost-tier: smoke
137
+ tags: [smoke, "use-case:lint"]
138
+ e2e-justification: "Validates real subprocess behavior and report file generation"
139
+ unit-coverage-reviewed:
140
+ - test/molecules/lint_runner_test.rb
141
+ - test/organisms/lint_orchestrator_test.rb
142
+ tool-under-test: ace-lint
143
+ sandbox-layout:
144
+ results/tc/01/: "help artifacts"
145
+ requires:
146
+ tools: [ace-lint, standardrb, jq]
147
+ ruby: ">= 3.0"
148
+ setup:
149
+ - git-init
150
+ - run: "cp $PROJECT_ROOT_PATH/mise.toml mise.toml && mise trust mise.toml"
151
+ - copy-fixtures
152
+ - agent-env:
153
+ PROJECT_ROOT_PATH: "."
154
+ last-verified: 2026-02-24
155
+ verified-by: claude-opus-4
156
+ ```
157
+
158
+ ## Tags
159
+
160
+ The `tags` field enables discovery-time filtering with `--tags` and `--exclude-tags`.
161
+
162
+ **Naming conventions:**
163
+ - Cost tier is auto-included: `smoke`, `happy-path`, `deep`
164
+ - Use-case tags use the `use-case:{area}` pattern (e.g., `use-case:lint`, `use-case:config`)
165
+ - Custom tags are lowercase kebab-case
166
+
167
+ **Filtering semantics:**
168
+ - `--tags` uses OR: scenario matches if it has **any** of the specified tags
169
+ - `--exclude-tags` uses OR: scenario is excluded if it has **any** of the specified tags
170
+ - Both filters can be combined; exclude is applied after include
171
+
172
+ ## Directory Structure
173
+
174
+ ```text
175
+ test/e2e/TS-LINT-001-lint-pipeline/
176
+ ├── scenario.yml
177
+ ├── runner.yml.md
178
+ ├── verifier.yml.md
179
+ ├── TC-001-help-survey.runner.md
180
+ ├── TC-001-help-survey.verify.md
181
+ └── fixtures/
182
+ ```
@@ -0,0 +1,131 @@
1
+ ---
2
+ doc-type: guide
3
+ title: Test Case Authoring Guide
4
+ purpose: Guide for writing standalone TC runner/verifier files for TS-format E2E scenarios
5
+ ace-docs:
6
+ last-updated: 2026-02-25
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Test Case Authoring Guide
11
+
12
+ ## Overview
13
+
14
+ TC (Test Case) files are the individual execution units in TS-format E2E scenarios.
15
+
16
+ Only one authoring format is supported:
17
+ - Standalone runner/verifier pairs:
18
+ - `TC-*.runner.md`
19
+ - `TC-*.verify.md`
20
+
21
+ Inline `.tc.md` and frontmatter `mode` values are no longer supported.
22
+
23
+ ## Canonical Conventions
24
+
25
+ - Scenario IDs: `TS-<PACKAGE_SHORT>-<NNN>[-slug]`
26
+ - Standalone pair files:
27
+ - `TC-{NNN}-{slug}.runner.md`
28
+ - `TC-{NNN}-{slug}.verify.md`
29
+ - Scenario-level config files:
30
+ - `runner.yml.md`
31
+ - `verifier.yml.md`
32
+ - TC artifacts write to `results/tc/{NN}/`
33
+ - Summary counters use `tcs-passed`, `tcs-failed`, and `tcs-total`
34
+
35
+ ## File Naming
36
+
37
+ - `TC-{NNN}` — test case number (e.g., TC-001)
38
+ - `{slug}` — descriptive kebab-case identifier
39
+
40
+ Examples:
41
+ - `TC-001-help-survey.runner.md`
42
+ - `TC-001-help-survey.verify.md`
43
+ - `TC-002-error-handling.runner.md`
44
+ - `TC-002-error-handling.verify.md`
45
+
46
+ ## Location
47
+
48
+ TC files are placed in the scenario directory alongside `scenario.yml`:
49
+
50
+ ```text
51
+ {package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
52
+ ├── scenario.yml
53
+ ├── runner.yml.md
54
+ ├── verifier.yml.md
55
+ ├── TC-001-{slug}.runner.md
56
+ ├── TC-001-{slug}.verify.md
57
+ └── fixtures/
58
+ ```
59
+
60
+ ## Runner File Structure (`TC-*.runner.md`)
61
+
62
+ Required sections:
63
+ - `# Goal N - Title`
64
+ - `## Goal`
65
+ - `## Workspace`
66
+ - `## Constraints`
67
+
68
+ Example:
69
+
70
+ ```markdown
71
+ # Goal 1 - Generate report
72
+
73
+ ## Goal
74
+
75
+ Run `ace-lint` and produce report artifacts for a valid file.
76
+
77
+ ## Workspace
78
+
79
+ - Root: sandbox directory
80
+ - Output: `results/tc/01/`
81
+
82
+ ## Constraints
83
+
84
+ - Use only sandbox paths
85
+ - Keep evidence under `results/tc/01/`
86
+ - Execute actions only; do not assign PASS/FAIL or final verdicts
87
+ ```
88
+
89
+ ## Verifier File Structure (`TC-*.verify.md`)
90
+
91
+ Required sections:
92
+ - `# Goal N - Title`
93
+ - `## Expectations`
94
+ - `## Verdict`
95
+
96
+ Example:
97
+
98
+ ```markdown
99
+ # Goal 1 - Generate report
100
+
101
+ ## Expectations
102
+
103
+ - **Impact Checks**: target sandbox/project state changed as expected
104
+ - **Artifact Checks**: `results/tc/01/report.json` exists and is valid
105
+ - **Debug Fallback**: inspect `stdout`/`stderr`/`*.exit` only when primary checks are inconclusive
106
+
107
+ ## Verdict
108
+
109
+ Pass only when all expectations are satisfied by on-disk evidence.
110
+ ```
111
+
112
+ ## Scenario-Level Configuration
113
+
114
+ - `tags` in `scenario.yml` control discovery-time filtering via `--tags`/`--exclude-tags`. Tags are not set per-TC; they apply to the entire scenario.
115
+ - `runner.yml.md` bundles execution context for the runner agent (tool paths, sandbox conventions, environment setup).
116
+ - `verifier.yml.md` bundles validation criteria for the independent verifier agent (artifact expectations, verdict rules).
117
+ - TCs execute sequentially within a scenario. The runner agent accumulates context across TCs (e.g., files created in TC-001 are available in TC-002).
118
+
119
+ ## Best Practices
120
+
121
+ - Keep each TC focused on one coherent behavior path.
122
+ - Ensure goal numbers and TC numbers remain aligned (`TC-001` -> Goal 1).
123
+ - Keep runner files execution-only and verifier files verdict-only.
124
+ - Make verifier expectations deterministic with impact-first ordering.
125
+ - Keep all artifacts under `results/tc/{NN}/` to avoid cross-goal contamination.
126
+ - Record why each scenario remains E2E via `e2e-justification` and `unit-coverage-reviewed` in `scenario.yml`.
127
+
128
+ ## Related
129
+
130
+ - [scenario.yml Reference](scenario-yml-reference.g.md)
131
+ - [E2E Testing Guide](e2e-testing.g.md)
@@ -0,0 +1,30 @@
1
+ ---
2
+ name: as-e2e-create
3
+ description: Create a new E2E test scenario from template
4
+ # bundle: wfi://e2e/create
5
+ # agent: general-purpose
6
+ user-invocable: true
7
+ allowed-tools:
8
+ - Bash(ace-bundle:*)
9
+ - Read
10
+ - Write
11
+ - Glob
12
+ - Grep
13
+ argument-hint: <package> <area> [--context <description>]
14
+ last_modified: 2026-01-19
15
+ source: ace-test-runner-e2e
16
+ integration:
17
+ targets:
18
+ - claude
19
+ - codex
20
+ - gemini
21
+ - opencode
22
+ - pi
23
+ providers: {}
24
+ skill:
25
+ kind: workflow
26
+ execution:
27
+ workflow: wfi://e2e/create
28
+ ---
29
+
30
+ Load and run `ace-bundle wfi://e2e/create` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: as-e2e-fix
3
+ description: Diagnose, fix, and rerun failing E2E tests systematically
4
+ # context: no-fork
5
+ # agent: general-purpose
6
+ user-invocable: true
7
+ allowed-tools:
8
+ - Bash(ace-task:*)
9
+ - Bash(ace-bundle:*)
10
+ - Bash(ace-test:*)
11
+ - Bash(ace-test-suite:*)
12
+ - Bash(git:*)
13
+ - Read
14
+ - Write
15
+ - Edit
16
+ - Grep
17
+ - Glob
18
+ argument-hint: '[package] [test-id]'
19
+ last_modified: 2026-03-13
20
+ source: ace-test-runner-e2e
21
+ integration:
22
+ targets:
23
+ - claude
24
+ - codex
25
+ - gemini
26
+ - opencode
27
+ - pi
28
+ providers: {}
29
+ skill:
30
+ kind: workflow
31
+ execution:
32
+ workflow: wfi://e2e/fix
33
+ ---
34
+
35
+ Load and run `ace-bundle wfi://e2e/fix` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.