ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,166 @@
1
+ ---
2
+ doc-type: workflow
3
+ title: Fix E2E Tests Workflow
4
+ purpose: fix-e2e-tests workflow instruction
5
+ ace-docs:
6
+ last-updated: 2026-03-13
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Fix E2E Tests Workflow
11
+
12
+ ## Goal
13
+
14
+ Apply targeted fixes for failing E2E scenarios based on an existing E2E failure analysis report.
15
+
16
+ This workflow is execution-only. Root cause classification is handled by `wfi://e2e/analyze-failures`.
17
+
18
+ ## Hard Gate (Required Before Any Fix)
19
+
20
+ Do not apply any fix until an analysis report exists with:
21
+ - scenario / TC identifier
22
+ - category (`code-issue`, `test-issue`, `runner-infrastructure-issue`)
23
+ - evidence from reports/artifacts
24
+ - fix target
25
+ - fix target layer
26
+ - primary candidate files
27
+ - do-not-touch boundaries
28
+ - rerun scope recommendation
29
+
30
+ If analysis is missing or incomplete, stop and run:
31
+ ```bash
32
+ ace-bundle wfi://e2e/analyze-failures
33
+ ```
34
+
35
+ ## Required Input
36
+
37
+ Use the output section from `e2e/analyze-failures`:
38
+ - `## E2E Failure Analysis Report`
39
+ - `## Fix Decisions`
40
+ - `### Execution Plan Input`
41
+
42
+ ## Autonomy Rule
43
+
44
+ - Do not ask the user to choose fix target, category, or rerun scope.
45
+ - If analysis is incomplete, auto-complete missing decision fields via local evidence (reports, artifacts, scenario files, implementation), then proceed.
46
+ - Only stop for hard blockers (missing files/tools/permissions).
47
+
48
+ ## Execution Environment Guardrail
49
+
50
+ - Do **not** run E2E commands autonomously in constrained/sandboxed agent environments.
51
+ - Treat `ace-test-e2e` as **user-executed verification** by default.
52
+ - Provide exact rerun commands for the user instead of executing them when environment fidelity is uncertain (missing `mise`, restricted HOME/state dirs, missing provider credentials, restricted tmux/socket access).
53
+ - Run E2E commands directly only when the user explicitly requests execution in the current environment and confirms it is properly configured.
54
+
55
+ ## Priority Order
56
+
57
+ Apply fixes in this order:
58
+ 1. `runner-infrastructure-issue` (can unblock many scenarios)
59
+ 2. `code-issue`
60
+ 3. `test-issue`
61
+
62
+ ## Fix Procedure
63
+
64
+ 1. Pick the first prioritized item from analysis
65
+ - Use the selected "First item to fix"
66
+ - Confirm category, fix target, and rerun scope
67
+ - Apply the "Chosen fix decision" and primary candidate files directly
68
+
69
+ 2. Apply category-specific fix
70
+
71
+ ### Category: runner-infrastructure-issue
72
+ - Fix runner/sandbox/provider/reporting/orchestration behavior
73
+ - Verify with runner tests when applicable: `ace-test ace-test-runner-e2e`
74
+
75
+ ### Category: code-issue
76
+ - Fix package/tool behavior in implementation code
77
+ - Add/update unit tests if needed
78
+
79
+ ### Category: test-issue
80
+ - Fix scenario definition, runner/verifier criteria, fixtures, or setup steps
81
+ - Preserve role split: runner is execution-only, verifier is impact-first verdict
82
+ - Keep implementation unchanged unless analysis is revised
83
+
84
+ 3. Rerun the selected failing scope after each fix
85
+
86
+ After every implemented fix, rerun the analysis-selected failing scope before moving to the next item or recommending release.
87
+
88
+ ```text
89
+ # scenario scope (default)
90
+ # user executes locally
91
+ ace-test-e2e {package} {test-id}
92
+
93
+ # package scope (only if analysis recommended)
94
+ # user executes locally
95
+ ace-test-e2e {package}
96
+ ```
97
+
98
+ Rules:
99
+ - Scenario rerun is the default after each fix iteration.
100
+ - Use package rerun only when analysis explicitly selected package scope.
101
+ - For multiple failing scenarios, rerun each scenario explicitly.
102
+ ```text
103
+ ace-test-e2e ace-assign TS-ASSIGN-001
104
+ ace-test-e2e ace-assign TS-ASSIGN-002
105
+ ace-test-e2e ace-bundle TS-BUNDLE-001
106
+ ```
107
+ - Record the rerun command and result in the execution summary for every fix item.
108
+
109
+ 4. Re-check classification when evidence conflicts
110
+ - If outcome contradicts analysis, return to `e2e/analyze-failures`
111
+ - Update analysis report and re-select a new autonomous chosen fix decision before continuing
112
+
113
+ 5. Iterate until all targeted failures are resolved
114
+ - Keep one active scenario/TC at a time
115
+ - Preserve cost-conscious rerun discipline
116
+
117
+ 6. Run a final explicit failing-scenario checkpoint before concluding the fix session
118
+
119
+ After the currently targeted failures are addressed, require one final:
120
+
121
+ ```bash
122
+ # user executes locally
123
+ ace-test-e2e {package} {test-id}
124
+ ```
125
+
126
+ Use one explicit command per previously failing scenario to confirm no targeted failure remains in the active set before ending the fix session or recommending release.
127
+
128
+ ## Cost-Conscious Rules
129
+
130
+ - Do not run suite reruns by default
131
+ - Prefer scenario reruns while iterating
132
+ - Use package reruns only when analysis explicitly recommends broader scope
133
+
134
+ ## Required Output
135
+
136
+ ```markdown
137
+ ## E2E Fix Execution Summary
138
+
139
+ | Scenario / TC | Category | Change Applied | Verification Command | Result |
140
+ |---|---|---|---|---|
141
+ | ... | ... | ... | ... | pass/fail |
142
+ ```
143
+
144
+ Include one final row for the batch checkpoint:
145
+ - Verification Command: one explicit rerun command per remaining failed scenario (`ace-test-e2e {package} {test-id}`)
146
+ - Result: `pass` or remaining failing scenarios
147
+ - If failures remain, continue the fix loop instead of treating the session as complete
148
+
149
+ If unresolved:
150
+
151
+ ```markdown
152
+ ## Blockers
153
+ - Scenario / TC: ...
154
+ - Why unresolved: ...
155
+ - New evidence: ...
156
+ - Re-analysis required: yes/no
157
+ ```
158
+
159
+ ## Success Criteria
160
+
161
+ - Fixes are traceable to analyzed failures
162
+ - Verification scope matches analysis recommendation, including mandatory reruns after each fix
163
+ - Cost-conscious rerun strategy was followed
164
+ - Final explicit per-scenario rerun checkpoint for all targeted failures was completed before concluding the fix session
165
+ - No user clarification was required for fix targeting/scope in normal flow
166
+ - Targeted failures pass, or blockers are explicitly documented
@@ -0,0 +1,179 @@
1
+ ---
2
+ doc-type: workflow
3
+ title: Manage E2E Tests Workflow
4
+ purpose: Orchestrate the 3-stage E2E test lifecycle pipeline (review → plan → rewrite)
5
+ ace-docs:
6
+ last-updated: 2026-03-12
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Manage E2E Tests Workflow
11
+
12
+ This workflow is a lightweight orchestrator that chains the 3-stage E2E test pipeline. It delegates all logic to the specialized stage workflows.
13
+
14
+ ```text
15
+ ace-bundle wfi://e2e/manage
16
+
17
+ ├─► Stage 1: ace-bundle wfi://e2e/review (explore)
18
+ │ └─► Coverage matrix
19
+
20
+ ├─► Stage 2: ace-bundle wfi://e2e/plan-changes (decide)
21
+ │ └─► Change plan
22
+
23
+ ├─► User confirmation gate
24
+
25
+ ├─► Stage 3: ace-bundle wfi://e2e/rewrite (execute)
26
+ │ └─► Updated test files
27
+
28
+ └─► (optional) `ace-test-e2e` verification (verify)
29
+ ```
30
+
31
+ ## Arguments
32
+
33
+ - `PACKAGE` (required) - The package to manage (e.g., `ace-lint`)
34
+ - `--dry-run` (optional) - Stop after presenting the change plan (skip execution)
35
+ - `--run-tests` (optional) - Run all E2E tests after rewriting
36
+ - `--tags TAG,...` (optional) - Include only scenarios matching any specified tag (OR semantics)
37
+ - `--exclude-tags TAG,...` (optional) - Exclude scenarios matching any specified tag (OR semantics)
38
+
39
+ ## Guardrail
40
+
41
+ - Do not execute `ace-test-e2e` / `ace-test-e2e-suite` autonomously in constrained or uncertain environments.
42
+ - When verification is requested, provide exact commands for user execution unless the user explicitly requests local execution and confirms environment fidelity.
43
+
44
+ ## Workflow Steps
45
+
46
+ ### 1. Invoke Stage 1: Review
47
+
48
+ Run the exploration stage to produce a coverage matrix:
49
+
50
+ ```bash
51
+ ace-bundle wfi://e2e/review
52
+ ```
53
+
54
+ Capture the full review report output including coverage matrix, overlap analysis, gap analysis, and health status.
55
+
56
+ If the review finds no E2E tests and no features worth testing, report this and stop.
57
+
58
+ ### 2. Invoke Stage 2: Plan
59
+
60
+ Run the decision stage to produce a concrete change plan:
61
+
62
+ ```bash
63
+ ace-bundle wfi://e2e/plan-changes
64
+ ```
65
+
66
+ Capture the change plan with its REMOVE / KEEP / MODIFY / CONSOLIDATE / ADD classifications and the proposed scenario structure.
67
+
68
+ ### 3. Present Plan for Confirmation
69
+
70
+ Display the change plan from Stage 2 to the user. The plan includes:
71
+ - Impact summary (current vs proposed scenarios/TCs/cost)
72
+ - Classified actions for each TC
73
+ - Proposed scenario structure
74
+
75
+ **If `--dry-run`:** Stop here after presenting the plan. Do not execute.
76
+
77
+ **If not dry-run:** Wait for user confirmation before proceeding to Stage 3.
78
+
79
+ If the user requests modifications, re-run Stage 2 with their feedback incorporated.
80
+
81
+ ### 4. Invoke Stage 3: Rewrite
82
+
83
+ After user confirms the plan, execute it:
84
+
85
+ ```bash
86
+ ace-bundle wfi://e2e/rewrite
87
+ ```
88
+
89
+ Capture the execution summary with files created, modified, and deleted.
90
+
91
+ ### 5. Run Tests (if --run-tests)
92
+
93
+ If `--run-tests` flag is provided, verify the rewritten tests:
94
+
95
+ ```bash
96
+ ace-test-e2e {PACKAGE} {TEST_ID_1}
97
+ ace-test-e2e {PACKAGE} {TEST_ID_2}
98
+ ```
99
+
100
+ Provide an explicit `--items` list from the scenarios you want to verify.
101
+
102
+ Capture test results for the final summary.
103
+
104
+ ### 6. Report Final Summary
105
+
106
+ Produce a combined summary of the full pipeline run:
107
+
108
+ ```markdown
109
+ ## E2E Management Summary: {package}
110
+
111
+ **Executed:** {timestamp}
112
+ **Pipeline:** review → plan → rewrite {→ test}
113
+
114
+ ### Pipeline Results
115
+
116
+ | Stage | Status | Key Output |
117
+ |-------|--------|------------|
118
+ | Review | Done | {n} features, {n} unit tests, {n} E2E TCs mapped |
119
+ | Plan | Done | {n} REMOVE, {n} KEEP, {n} MODIFY, {n} CONSOLIDATE, {n} ADD |
120
+ | Rewrite | Done | {n} files created, {n} modified, {n} deleted |
121
+ | Test | {Done/Skipped} | {n}/{n} passed or "not run" |
122
+
123
+ ### Final State
124
+
125
+ | Metric | Before | After |
126
+ |--------|--------|-------|
127
+ | Scenarios | {n} | {n} |
128
+ | Test Cases | {n} | {n} |
129
+
130
+ ### Next Steps
131
+
132
+ 1. {If tests not run: run `ace-test-e2e {PACKAGE} {TEST_ID}` to verify}
133
+ 2. {If tests failed: Investigate failures}
134
+ 3. Commit changes with `ace-git-commit`
135
+ ```
136
+
137
+ ## Example Invocations
138
+
139
+ **Full lifecycle (review → plan → confirm → rewrite):**
140
+ ```bash
141
+ ace-bundle wfi://e2e/manage
142
+ ```
143
+
144
+ **Dry-run (review → plan → stop):**
145
+ ```bash
146
+ ace-bundle wfi://e2e/manage
147
+ ```
148
+
149
+ **Full lifecycle with test verification:**
150
+ ```bash
151
+ ace-bundle wfi://e2e/manage
152
+ ```
153
+
154
+ ## Error Handling
155
+
156
+ ### No Tests Found
157
+
158
+ If Stage 1 finds no E2E tests:
159
+ ```
160
+ No E2E tests found for {package}.
161
+
162
+ Use `ace-bundle wfi://e2e/create` to create the first test,
163
+ or load `ace-bundle wfi://e2e/review` to see the unit test coverage.
164
+ ```
165
+
166
+ ### User Cancellation
167
+
168
+ If the user declines the plan at step 3:
169
+ 1. Ask what they want to change
170
+ 2. Re-invoke Stage 2 with their feedback
171
+ 3. Present the updated plan
172
+ 4. Or exit if the user wants to stop entirely
173
+
174
+ ### Stage Failure
175
+
176
+ If any stage fails:
177
+ 1. Report which stage failed and why
178
+ 2. Show any partial output from the failed stage
179
+ 3. Suggest running the failed stage individually for debugging
@@ -0,0 +1,255 @@
1
+ ---
2
+ doc-type: workflow
3
+ title: Plan E2E Changes Workflow
4
+ purpose: Analyze coverage matrix and produce a concrete E2E test change plan
5
+ ace-docs:
6
+ last-updated: 2026-03-12
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Plan E2E Changes Workflow
11
+
12
+ This workflow takes the review output (coverage matrix) from Stage 1 and produces a concrete change plan with classified actions for each existing and proposed TC.
13
+
14
+ **Pipeline position:** Stage 2 of 3 (Decide)
15
+
16
+ ```text
17
+ ace-bundle wfi://e2e/review → ace-bundle wfi://e2e/plan-changes → ace-bundle wfi://e2e/rewrite
18
+ (explore) ▶ (decide) ◀ (execute)
19
+ ```
20
+
21
+ ## Arguments
22
+
23
+ - `PACKAGE` (required) - The package to plan changes for (e.g., `ace-lint`)
24
+ - `--review-report <path>` (optional) - Path to review report from Stage 1. If omitted, load `ace-bundle wfi://e2e/review` first.
25
+ - `--scope <scenario-id>` (optional) - Limit planning to a single scenario (e.g., `TS-LINT-001`)
26
+
27
+ ## Workflow Steps
28
+
29
+ ### 1. Load Review Output
30
+
31
+ **If `--review-report` provided:**
32
+ Read the file at the given path. Verify it contains a coverage matrix with the expected structure (features × unit tests × E2E columns).
33
+
34
+ **If no review report:**
35
+ Load `ace-bundle wfi://e2e/review` and capture the full output including coverage matrix, overlap analysis, gap analysis, and health status.
36
+
37
+ If `--scope` is provided, filter the review data to only the specified scenario and its related features.
38
+
39
+ ### 2. Analyze Recent Changes
40
+
41
+ Determine what has changed in the package since each TC was last verified:
42
+
43
+ ```bash
44
+ # Get last-verified dates from review output, then check changes since then
45
+ git log --oneline --since="{oldest-last-verified}" -- {PACKAGE}/lib/ {PACKAGE}/bin/
46
+ ```
47
+
48
+ ```bash
49
+ # Changed files relative to current state
50
+ git diff --name-only HEAD~20 -- {PACKAGE}/lib/ {PACKAGE}/bin/
51
+ ```
52
+
53
+ Build a change inventory:
54
+ - **New features** — files/modules added since last verification
55
+ - **Modified features** — existing code with changes since last verification
56
+ - **Removed features** — deleted files or deprecated modules
57
+ - **Unchanged features** — stable code with no recent modifications
58
+
59
+ ### 3. Classify Each Existing TC
60
+
61
+ For each TC listed in the coverage matrix, assign exactly one classification:
62
+
63
+ **REMOVE** — The TC should be deleted. Criteria (any one is sufficient):
64
+ - Full overlap with unit tests AND the TC does not test real binary/subprocess/filesystem
65
+ - The TC tests behavior that has been removed from the package
66
+ - The TC is a duplicate of another TC (same CLI invocation + same assertions)
67
+
68
+ For REMOVE due to overlap, replacement evidence is mandatory:
69
+ - Reference existing unit test file(s) and assertions that cover the removed behavior, OR
70
+ - Add a follow-up unit test action to the plan (file + behavior) before removal is considered complete.
71
+
72
+ **KEEP** — The TC has genuine E2E value and needs no changes. Criteria (all must be true):
73
+ - TC passes the E2E Value Gate (tests real CLI binary + external tools + filesystem I/O)
74
+ - Related source code has no changes since `last-verified`
75
+ - TC structure is valid and assertions are current
76
+
77
+ **MODIFY** — The TC has E2E value but needs updates. Criteria (any one is sufficient):
78
+ - Related source code changed since `last-verified` (assertions may be outdated)
79
+ - TC scope is too broad (should be narrowed to only E2E-exclusive aspects)
80
+ - TC scope is too narrow (missing assertions for related behavior in same CLI invocation)
81
+ - TC has structure issues flagged in the review
82
+
83
+ **CONSOLIDATE** — The TC should merge with another TC. Criteria (any one is sufficient):
84
+ - Multiple TCs share the same CLI invocation and could be a single TC with multiple assertions
85
+ - A scenario has more than 5 TCs (merge related TCs to reduce count)
86
+ - Separate TCs each check one assertion after the same setup
87
+
88
+ For each classification, document:
89
+ - The TC identifier
90
+ - The classification reason (specific, not generic)
91
+ - For REMOVE (overlap): replacement evidence (`existing unit tests` or `planned unit backfill`)
92
+ - For MODIFY: what specifically needs to change
93
+ - For CONSOLIDATE: the target TC and which assertions merge
94
+
95
+ ### 4. Identify New TCs Needed
96
+
97
+ Review the coverage matrix for gaps that warrant new E2E tests:
98
+
99
+ **Candidates for new TCs:**
100
+ - Features with no E2E coverage that pass the E2E Value Gate
101
+ - New features from git changes (step 2) without any test coverage
102
+ - Error paths not covered by any existing TC
103
+
104
+ **For each proposed new TC, document:**
105
+ - Proposed title and objective
106
+ - What CLI command it exercises
107
+ - What it verifies that unit tests cannot
108
+ - Which scenario it belongs to (existing or new)
109
+
110
+ **Filter through Value Gate:**
111
+ For each candidate, answer: "Does this require the full CLI binary + real external tools + real filesystem I/O?"
112
+ - If NO: skip — unit tests cover this (or add explicit unit test action if coverage is missing)
113
+ - If YES: include in the plan
114
+
115
+ ### 5. Propose Scenario Structure
116
+
117
+ Group all planned TCs (KEEP + MODIFY + CONSOLIDATE targets + ADD) into scenarios:
118
+
119
+ **Grouping rules:**
120
+ - 2-5 TCs per scenario (shared setup context)
121
+ - Group by CLI command or feature area
122
+ - Each scenario has a clear theme (e.g., "config validation", "report generation")
123
+
124
+ **For each scenario:**
125
+ - Name: `TS-{AREA}-{NNN}-{slug}`
126
+ - Tags: `[{cost-tier}, "use-case:{area}"]`
127
+ - List of TCs with ordering (errors first, happy path, structure verification, lifecycle)
128
+ - Shared setup requirements
129
+ - Fixtures needed
130
+
131
+ **Cost estimation:**
132
+ - Each TC ≈ 1 LLM invocation when run
133
+ - Estimate total scenarios × avg TCs × cost per invocation
134
+
135
+ ### 6. Present Plan to User
136
+
137
+ Format the complete change plan:
138
+
139
+ ```markdown
140
+ ## E2E Change Plan: {package}
141
+
142
+ **Generated:** {timestamp}
143
+ **Based on:** {review-report path or "inline review"}
144
+ **Scope:** {package-wide or scenario-id}
145
+
146
+ ### Impact Summary
147
+
148
+ | Metric | Current | Proposed | Change |
149
+ |--------|---------|----------|--------|
150
+ | Scenarios | {n} | {n} | {±n} |
151
+ | Test Cases | {n} | {n} | {±n} |
152
+ | Est. cost/run | ~${x} | ~${x} | {-n%} |
153
+
154
+ ### REMOVE ({n} TCs)
155
+
156
+ | TC | Reason | Replacement Evidence |
157
+ |----|--------|----------------------|
158
+ | {tc-id} | Unit tests in {file} cover this fully | Existing: {test-file}:{test-name} |
159
+ | {tc-id} | Duplicate of {other-tc-id} | Existing: {test-file}:{test-name} |
160
+
161
+ ### Unit Coverage Backfill ({n} actions)
162
+
163
+ | Action | File | Behavior |
164
+ |--------|------|----------|
165
+ | Add unit test | {test-file} | {behavior replacing removed E2E assertion} |
166
+
167
+ ### KEEP ({n} TCs)
168
+
169
+ | TC | Notes |
170
+ |----|-------|
171
+ | {tc-id} | Genuine E2E value, no changes needed |
172
+
173
+ ### MODIFY ({n} TCs)
174
+
175
+ | TC | Change Needed |
176
+ |----|---------------|
177
+ | {tc-id} | Update assertions — {feature} behavior changed in {commit} |
178
+ | {tc-id} | Narrow scope — remove assertions covered by unit tests |
179
+
180
+ ### CONSOLIDATE ({n} TCs → {n} TCs)
181
+
182
+ | Source TCs | Target TC | Merged Assertions |
183
+ |------------|-----------|-------------------|
184
+ | {tc-a}, {tc-b} | {tc-a} | Combine {n} assertions into single TC |
185
+
186
+ ### ADD ({n} new TCs)
187
+
188
+ | Proposed TC | Scenario | Verifies |
189
+ |-------------|----------|----------|
190
+ | {title} | {scenario-id} | {what it tests that units cannot} |
191
+
192
+ ### Proposed Scenario Structure
193
+
194
+ ```
195
+ TS-{AREA}-001-{slug}/ ({n} TCs)
196
+ TC-001: {title}
197
+ TC-002: {title}
198
+
199
+ TS-{AREA}-002-{slug}/ ({n} TCs)
200
+ TC-001: {title}
201
+ TC-002: {title}
202
+ TC-003: {title}
203
+ ```
204
+
205
+ ### Next Steps
206
+
207
+ - Review and approve this plan
208
+ - Run `ace-bundle wfi://e2e/rewrite` to execute
209
+ - Or modify the plan and re-run
210
+ ```
211
+
212
+ **If any classifications are uncertain**, flag them with a `?` and ask the user to confirm before proceeding.
213
+
214
+ ## Example Invocations
215
+
216
+ **Plan changes with a prior review report:**
217
+ ```bash
218
+ ace-bundle wfi://e2e/plan-changes
219
+ ```
220
+
221
+ **Plan changes (runs review automatically):**
222
+ ```bash
223
+ ace-bundle wfi://e2e/plan-changes
224
+ ```
225
+
226
+ **Plan changes for a single scenario:**
227
+ ```bash
228
+ ace-bundle wfi://e2e/plan-changes
229
+ ```
230
+
231
+ ## Error Handling
232
+
233
+ ### No Review Data
234
+
235
+ If no `--review-report` is provided and `ace-bundle wfi://e2e/review` finds no tests:
236
+ ```
237
+ No E2E tests found for {package}. Nothing to plan changes for.
238
+
239
+ To create the first E2E test: `ace-bundle wfi://e2e/create`
240
+ ```
241
+
242
+ ### Empty Coverage Matrix
243
+
244
+ If the review shows no features or tests:
245
+ ```
246
+ Coverage matrix is empty for {package}. Ensure the package has both
247
+ implementation code and at least one E2E test before planning changes.
248
+ ```
249
+
250
+ ### User Rejection
251
+
252
+ If the user rejects the plan:
253
+ 1. Ask which classifications they disagree with
254
+ 2. Adjust the plan based on feedback
255
+ 3. Re-present the updated plan