codebyplan 1.11.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/cli.js +56 -5
  2. package/package.json +1 -1
  3. package/templates/README.md +1 -1
  4. package/templates/agents/cbp-cc-executor.md +1 -1
  5. package/templates/agents/cbp-e2e-maestro.md +202 -0
  6. package/templates/agents/cbp-e2e-playwright.md +229 -0
  7. package/templates/agents/cbp-e2e-tauri.md +184 -0
  8. package/templates/agents/cbp-e2e-vscode.md +203 -0
  9. package/templates/agents/cbp-e2e-xcuitest.md +224 -0
  10. package/templates/agents/cbp-improve-claude.md +1 -1
  11. package/templates/agents/cbp-round-executor.md +11 -11
  12. package/templates/agents/cbp-task-check.md +1 -1
  13. package/templates/agents/cbp-task-planner.md +2 -0
  14. package/templates/agents/cbp-testing-qa-agent.md +9 -9
  15. package/templates/context/testing/e2e.md +303 -0
  16. package/templates/hooks/validate-structure-lengths.sh +2 -0
  17. package/templates/hooks/validate-structure-smoke.sh +2 -1
  18. package/templates/hooks/validate-structure-templates.sh +1 -0
  19. package/templates/rules/context-file-loading.md +4 -1
  20. package/templates/rules/e2e-mandatory.md +70 -0
  21. package/templates/skills/cbp-build-cc-agent/SKILL.md +16 -14
  22. package/templates/skills/cbp-build-cc-agent/reference/cbp-quality.md +4 -4
  23. package/templates/skills/cbp-build-cc-agent/scripts/validate-agent.sh +8 -6
  24. package/templates/skills/cbp-build-cc-mode/SKILL.md +4 -4
  25. package/templates/skills/cbp-checkpoint-check/SKILL.md +12 -8
  26. package/templates/skills/cbp-checkpoint-plan/SKILL.md +2 -2
  27. package/templates/skills/cbp-checkpoint-plan/reference/e2e-discovery-probe.md +5 -5
  28. package/templates/skills/cbp-e2e-setup/SKILL.md +254 -0
  29. package/templates/skills/cbp-e2e-setup/reference/maestro.md +200 -0
  30. package/templates/skills/cbp-e2e-setup/reference/playwright.md +212 -0
  31. package/templates/skills/cbp-e2e-setup/reference/tauri.md +147 -0
  32. package/templates/skills/cbp-e2e-setup/reference/vscode.md +154 -0
  33. package/templates/skills/cbp-e2e-setup/reference/xcuitest.md +185 -0
  34. package/templates/skills/cbp-frontend-ui/SKILL.md +6 -6
  35. package/templates/skills/cbp-frontend-ux/SKILL.md +1 -1
  36. package/templates/skills/cbp-round-execute/SKILL.md +30 -17
  37. package/templates/skills/cbp-task-check/SKILL.md +2 -2
  38. package/templates/agents/cbp-test-e2e-agent.md +0 -363
@@ -0,0 +1,185 @@
1
+ # XCUITest Reference
2
+
3
+ Full walkthrough for iOS native E2E testing with XCUITest via the Expo `withXCUITests`
4
+ plugin. Source: Apple XCUITest docs + Expo prebuild docs.
5
+
6
+ ## When to use XCUITest vs Maestro
7
+
8
+ | Scenario | Use |
9
+ | --- | --- |
10
+ | Standard UI flows (login, navigation, forms) | Maestro — simpler, cross-platform |
11
+ | Apple Watch companion app testing | XCUITest — Maestro can't target watchOS |
12
+ | HealthKit permission dialogs | XCUITest — system dialogs not reachable by Maestro |
13
+ | iOS system sheet interactions (share sheet, notification permissions) | XCUITest |
14
+ | Face ID / Touch ID prompts | XCUITest |
15
+ | Camera/microphone permission dialogs | XCUITest |
16
+
17
+ Choose Maestro first; escalate to XCUITest only when Maestro genuinely cannot reach
18
+ the target UI.
19
+
20
+ ## Prerequisites
21
+
22
+ - macOS with Xcode 15+ installed
23
+ - An active Apple Developer account (free tier sufficient for Simulator testing)
24
+ - Expo managed workflow with prebuild enabled
25
+ - `xcbeautify` for readable output: `brew install xcbeautify`
26
+
27
+ ## Setup — Expo withXCUITests plugin
28
+
29
+ Add the plugin to `app.config.ts` (or `app.config.js`):
30
+
31
+ ```ts
32
+ export default {
33
+ expo: {
34
+ plugins: [
35
+ ["expo-build-properties", { ios: { useFrameworks: "static" } }],
36
+ // Add your withXCUITests plugin config
37
+ ["./plugins/withXCUITests", {}],
38
+ ],
39
+ },
40
+ };
41
+ ```
42
+
43
+ If using the community `expo-xcuitest` plugin:
44
+
45
+ ```bash
46
+ pnpm add -D expo-xcuitest
47
+ ```
48
+
49
+ Then in `app.config.ts`:
50
+
51
+ ```ts
52
+ plugins: [
53
+ ["expo-xcuitest", { testTargetName: "AppUITests" }]
54
+ ]
55
+ ```
56
+
57
+ ## Prebuild
58
+
59
+ After updating `app.config.ts`, regenerate the native project:
60
+
61
+ ```bash
62
+ expo prebuild --platform ios --clean
63
+ ```
64
+
65
+ `--clean` ensures a fresh native project from the current config. Commit the generated
66
+ `ios/` directory so CI can build without running prebuild.
67
+
68
+ ## Swift test class
69
+
70
+ Create `ios/AppUITests/AppUITests.swift`:
71
+
72
+ ```swift
73
+ import XCTest
74
+
75
+ class AppUITests: XCTestCase {
76
+
77
+ var app: XCUIApplication!
78
+
79
+ override func setUpWithError() throws {
80
+ continueAfterFailure = false
81
+ app = XCUIApplication()
82
+
83
+ // Inject credentials via scheme environment variables
84
+ app.launchEnvironment["TEST_EMAIL"] = ProcessInfo.processInfo.environment["TEST_EMAIL"] ?? ""
85
+ app.launchEnvironment["TEST_PASSWORD"] = ProcessInfo.processInfo.environment["TEST_PASSWORD"] ?? ""
86
+
87
+ app.launch()
88
+ }
89
+
90
+ func testLoginFlow() throws {
91
+ // Wait for the login screen
92
+ let emailField = app.textFields["email-input"]
93
+ XCTAssertTrue(emailField.waitForExistence(timeout: 10))
94
+
95
+ emailField.tap()
96
+ emailField.typeText(app.launchEnvironment["TEST_EMAIL"]!)
97
+
98
+ let passwordField = app.secureTextFields["password-input"]
99
+ passwordField.tap()
100
+ passwordField.typeText(app.launchEnvironment["TEST_PASSWORD"]!)
101
+
102
+ app.buttons["sign-in-button"].tap()
103
+
104
+ // Assert post-login element
105
+ let dashboard = app.staticTexts["Dashboard"]
106
+ XCTAssertTrue(dashboard.waitForExistence(timeout: 15))
107
+ }
108
+ }
109
+ ```
110
+
111
+ ## accessibilityIdentifier targeting
112
+
113
+ Set `accessibilityIdentifier` in your React Native components so XCUITest can find them:
114
+
115
+ ```tsx
116
+ // In React Native
117
+ <TextInput
118
+ testID="email-input" // becomes accessibilityIdentifier on iOS
119
+ accessibilityLabel="Email"
120
+ />
121
+ ```
122
+
123
+ In XCUITest, query by identifier:
124
+
125
+ ```swift
126
+ app.textFields["email-input"] // TextInput
127
+ app.buttons["sign-in-button"] // TouchableOpacity / Pressable
128
+ app.staticTexts["Dashboard"] // Text component
129
+ ```
130
+
131
+ ## Credentials via scheme environment variables
132
+
133
+ Rather than hardcoding credentials, inject them via the Xcode scheme.
134
+
135
+ In Xcode: Product → Scheme → Edit Scheme → Run → Arguments → Environment Variables.
136
+ Add `TEST_EMAIL` and `TEST_PASSWORD` pointing to your local values.
137
+
138
+ For CI, pass them via `xcodebuild`:
139
+
140
+ ```bash
141
+ xcodebuild test \
142
+ -workspace ios/YourApp.xcworkspace \
143
+ -scheme YourApp \
144
+ -destination 'platform=iOS Simulator,name=iPhone 16,OS=latest' \
145
+ TEST_EMAIL="$TEST_EMAIL" \
146
+ TEST_PASSWORD="$TEST_PASSWORD" \
147
+ | xcbeautify
148
+ ```
149
+
150
+ ## Running tests
151
+
152
+ ```bash
153
+ xcodebuild test \
154
+ -workspace ios/YourApp.xcworkspace \
155
+ -scheme YourApp \
156
+ -destination 'platform=iOS Simulator,name=iPhone 16,OS=latest' \
157
+ | xcbeautify
158
+ ```
159
+
160
+ ## pnpm script
161
+
162
+ ```json
163
+ {
164
+ "scripts": {
165
+ "xcuitest": "xcodebuild test -workspace ios/YourApp.xcworkspace -scheme YourApp -destination 'platform=iOS Simulator,name=iPhone 16,OS=latest' | xcbeautify"
166
+ }
167
+ }
168
+ ```
169
+
170
+ ## Pitfalls
171
+
172
+ **Simulator not booted** — `xcodebuild` will boot the simulator if needed, but the
173
+ first run is slow. Pre-boot with `xcrun simctl boot "iPhone 16"` in CI setup.
174
+
175
+ **accessibilityIdentifier vs testID** — React Native maps `testID` to
176
+ `accessibilityIdentifier` on iOS. Ensure the component renders the prop all the way
177
+ through; some wrappers drop it.
178
+
179
+ **waitForExistence timeout** — always use `waitForExistence(timeout:)` rather than
180
+ asserting element existence immediately. React Native renders asynchronously; the
181
+ element may not be in the view hierarchy at the instant of the assertion.
182
+
183
+ **Derived data cache** — stale derived data can cause confusing failures. Clear with
184
+ `rm -rf ~/Library/Developer/Xcode/DerivedData` if tests pass locally but fail after
185
+ a schema change.
@@ -10,7 +10,7 @@ effort: xhigh
10
10
  Invoked twice per round in non-`claude_only` profiles:
11
11
 
12
12
  1. `round-executor` Step 3.8 — `phase: 'style_only'`, no e2e screenshots. Reviews token/spacing/typography/color/cohesion against the just-written code.
13
- 2. `/cbp-round-execute` Step 5b — `phase: 'screenshot_review'`, with screenshots from `test-e2e-agent`. Reviews rendered output and detects baseline regressions.
13
+ 2. `/cbp-round-execute` Step 5b — `phase: 'screenshot_review'`, with screenshots from the `cbp-e2e-*` specialists. Reviews rendered output and detects baseline regressions.
14
14
 
15
15
  Default `phase: 'full'` runs everything (back-compat for any caller not yet migrated). Inline counterpart of the up-front `frontend-design` skill — `frontend-design` decides direction before code; `frontend-ui` reviews and polishes after code.
16
16
 
@@ -36,7 +36,7 @@ input:
36
36
  context:
37
37
  checkpoint_goal: string
38
38
  round_requirements: string
39
- e2e_screenshots: # Required for phase 'screenshot_review' or 'full' (when present); empty / omitted for 'style_only'. Sourced from round.context.e2e_output.screenshots (populated by test-e2e-agent at /cbp-round-execute Step 5).
39
+ e2e_screenshots: # Required for phase 'screenshot_review' or 'full' (when present); empty / omitted for 'style_only'. Sourced from the aggregated round.context.e2e_outputs[*].screenshots (populated by the cbp-e2e-* specialists at /cbp-round-execute Step 5).
40
40
  - test_name: string
41
41
  path: string # Repo-relative or absolute path to PNG
42
42
  page_or_screen: string
@@ -213,7 +213,7 @@ The skill's auto-fix capability is for in-scope polish, not opportunistic sweeps
213
213
  **Specifically forbidden** (always out of scope, never edited regardless of `files_changed`):
214
214
 
215
215
  - `.claude/**` — managed infrastructure under user-level governance
216
- - Project test infrastructure (e.g., `playwright.config.*`, `e2e/**`) — governed by `test-e2e-agent`
216
+ - Project test infrastructure (e.g., `playwright.config.*`, `e2e/**`) — governed by the `cbp-e2e-*` specialist agents
217
217
  - DB migrations (e.g., `supabase/migrations/**`) — governed by `database-agent`
218
218
  - Vendor mirrors and read-only reference trees
219
219
 
@@ -254,9 +254,9 @@ Go beyond fixing violations — actively improve visual quality. If spacing coul
254
254
 
255
255
  - **Loaded twice per round** (non-`claude_only` profiles):
256
256
  1. `round-executor` Step 3.8 with `phase: 'style_only'` and empty `e2e_screenshots[]` — reviews the just-written code's tokens/spacing/typography/color/cohesion (mandatory when files_changed contains UI / styling files)
257
- 2. `/cbp-round-execute` Step 5b with `phase: 'screenshot_review'` and screenshots from `round.context.e2e_output.screenshots` — runs Phase 6.5 only (rendered-output review + baseline regressions). Skipped when no e2e ran (`claude_only` / `backend` / `has_ui_work === false`).
258
- - **Also invoked by**: `/cbp-checkpoint-check` (TASK-2 deliverable, future) with screenshots from a whole-checkpoint e2e run
259
- - **Consumes**: `e2e_screenshots[]` from `round.context.e2e_output.screenshots` (populated by `test-e2e-agent` at `/cbp-round-execute` Step 5)
257
+ 2. `/cbp-round-execute` Step 5b with `phase: 'screenshot_review'` and screenshots aggregated from `round.context.e2e_outputs[*].screenshots` — runs Phase 6.5 only (rendered-output review + baseline regressions). Skipped when no e2e ran (`claude_only` / `backend`, or no eligible framework in `.codebyplan/e2e.json`).
258
+ - **Also invoked by**: `/cbp-checkpoint-check` with screenshots aggregated from a whole-checkpoint e2e run
259
+ - **Consumes**: `e2e_screenshots[]` aggregated from `round.context.e2e_outputs[*].screenshots` (populated by the `cbp-e2e-*` specialists at `/cbp-round-execute` Step 5)
260
260
  - **Output written to**: `round.context.frontend_ui_review` — when invoked twice per round, the second invocation merges with the first
261
261
  - **Downstream gate**: this skill emits `findings[]` only. Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (baselines never auto-accepted); rendered-visual critical findings are surfaced in the Step 7 findings presentation.
262
262
  - **Paired with**: `frontend-design` (pre-implementation aesthetic decision), `frontend-ux` (interaction-quality self-review, also Step 3.8)
@@ -148,7 +148,7 @@ This rule applies to every file. The skill's auto-fix surface exists because in-
148
148
  **Specifically forbidden** (always out of scope, never edited regardless of `files_changed`):
149
149
 
150
150
  - `.claude/**` — managed infrastructure under user-level governance
151
- - Project test infrastructure (e.g., `playwright.config.*`, `e2e/**`) — governed by `test-e2e-agent`
151
+ - Project test infrastructure (e.g., `playwright.config.*`, `e2e/**`) — governed by the `cbp-e2e-*` specialist agents
152
152
  - DB migrations (e.g., `supabase/migrations/**`) — governed by `database-agent`
153
153
  - Vendor mirrors and read-only reference trees
154
154
 
@@ -56,7 +56,7 @@ Execute the survey instructions inline using Read/Grep/Bash. Save to `round.cont
56
56
  For each entry, route per `rules/file-routing.md`:
57
57
 
58
58
  - `.claude/skills/{name}/SKILL.md` → `cbp-build-cc-skill` via Skill tool
59
- - `.claude/agents/{name}/AGENT.md` → `cbp-build-cc-agent` via Skill tool
59
+ - `.claude/agents/{name}.md` (or `{name}/AGENT.md` folder form) → `cbp-build-cc-agent` via Skill tool
60
60
  - `.claude/rules/{name}.md` → `cbp-build-cc-rule` via Skill tool
61
61
  - `.claude/CLAUDE.md` → `cbp-build-cc-claude-file` via Skill tool (or direct Edit)
62
62
  - `.claude/settings*.json` → `cbp-build-cc-settings` via Skill tool
@@ -145,28 +145,40 @@ Read `task.context.testing_profile` (already loaded in Step 2).
145
145
 
146
146
  On pass, synthesise `testing_qa_output` inline per the procedure in `reference/inline-fallback.md` "Validation fallback" section (output shape defined in `agents/cbp-testing-qa-agent.md` Output Contract) and persist to `round.context.testing_qa_output` at Step 7.
147
147
 
148
- **All other profiles**: spawn `cbp-testing-qa-agent` AND `cbp-test-e2e-agent` in parallel (two Agent calls in the same message) per completed wave (or full executor output in single-wave mode). `cbp-test-e2e-agent` is gated on `has_ui_work === true` AND profile in {`web`, `desktop`, `full_matrix`, `cross_app`} skipped for `claude_only` / `backend`-only.
148
+ **All other profiles**: spawn `cbp-testing-qa-agent` against the wave's `files[]` (or full executor output in single-wave mode), and dispatch e2e specialists **config-driven** in parallel all Agent calls in the same message:
149
149
 
150
- Input contracts: `cbp-testing-qa-agent` receives `executor_output`, `testing_profile`, `has_ui_work` (see `agents/cbp-testing-qa-agent.md` Input Contract). `cbp-test-e2e-agent` receives `repo_id`, `round_number`, `files_changed`, `prior_round_files_changed` (full task aggregate when round_number ≥ 2), `whole_checkpoint_mode: false`, `test_strategy`, `pages_affected`, `has_auth`, `dev_server_port` (see `agents/cbp-test-e2e-agent.md` Input Contract for the full shape).
150
+ 1. **Short-circuit hints** (applied *before* reading `e2e.json`, emit no `e2e_eligible_skipped` signal): if `testing_profile === 'backend'` OR `round.context.round_type === 'survey'`, dispatch `cbp-testing-qa-agent` alone and skip e2e entirely. (The `claude_only` branch above already skips all agent spawns.)
151
+ 2. Read `.codebyplan/e2e.json`. If the file is absent or `frameworks` is missing/empty, no framework is eligible — skip e2e entirely (no `e2e_eligible_skipped` signal) and run `cbp-testing-qa-agent` alone.
152
+ 3. For each entry in `frameworks` where `enabled === true` AND `auto_run === true`: if `platforms[]` does not include the current CI target (e.g. an iOS-only config on a Linux runner with no simulator), skip the framework — a recorded valid platform skip per `rules/e2e-mandatory.md`, NOT added to `e2e_eligible[]`. Otherwise mark it **eligible** when its `app` source path intersects this wave's `files_changed` (repo root for single-app repos). Record the eligible framework names as `round.context.e2e_eligible[]`.
153
+ 4. For every eligible framework, spawn the matching `cbp-e2e-*` specialist (per the `context/testing/e2e.md` dispatch routing table) IN PARALLEL with `cbp-testing-qa-agent` and with each other. Inject `framework`, `app`, `platforms`, and `credential_vars` from `e2e.json` — the config is authoritative; agents do not auto-detect.
154
+ 5. `has_ui_work` and `testing_profile` are **hints only** beyond the short-circuit above — they never suppress an eligible framework. Pure `.claude/`-only and docs-only rounds match no configured `app` path and are therefore not eligible.
151
155
 
152
- **Independence**: neither agent reads the other's output. Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted). Per-wave spawns MAY run in parallel with the next wave's executor when dependency order allows.
156
+ This realises the opt-out contract in `rules/e2e-mandatory.md`: an eligible framework whose specialist does not run without a recorded valid skip reason is an `e2e_eligible_skipped` hard-fail at Step 6.
157
+
158
+ Input contracts: `cbp-testing-qa-agent` receives `executor_output`, `testing_profile`, `has_ui_work` (see `agents/cbp-testing-qa-agent.md` Input Contract). The `cbp-e2e-*` specialist receives `repo_id`, `round_number`, `files_changed`, `prior_round_files_changed` (full task aggregate when round_number ≥ 2), `whole_checkpoint_mode: false`, `framework`, `app`, `platforms`, `credential_vars`, `test_strategy`, `pages_affected`, `has_auth`, `dev_server_port` (see `context/testing/e2e.md` Input Contract for the full shape). `test_strategy` is injected here in per-round mode; `/cbp-checkpoint-check` Step 5b omits it (the specialist self-resolves from `e2e.json` + DB in `whole_checkpoint_mode`).
159
+
160
+ **Independence**: neither agent reads the other's output. Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted). Per-wave spawns MAY run in parallel with the next wave's executor when dependency order allows. The `cbp-e2e-*` specialists are parallel siblings of `cbp-testing-qa-agent` — they do not share state.
153
161
 
154
162
  ### Step 5b: Post-E2E Screenshot Review (cbp-frontend-ui Phase 6.5)
155
163
 
156
- When `round.context.e2e_output.screenshots[]` is non-empty, invoke the `cbp-frontend-ui` skill with `phase: 'screenshot_review'` (input: `files_changed`, `e2e_screenshots: round.context.e2e_output.screenshots`, `context: { checkpoint_goal, round_requirements }`). Under this phase the skill runs only Phase 6.5 (Rendered-Output Visual Review) + 7 + 8 — Phases 1-6 (style) already ran inline at executor Step 3.8 with `phase: 'style_only'`.
164
+ Aggregate screenshots across ALL specialists that ran: `screenshots = Object.values(round.context.e2e_outputs ?? {}).flatMap(o => o.screenshots ?? [])`. When the aggregated list is non-empty, invoke the `cbp-frontend-ui` skill with `phase: 'screenshot_review'` (input: `files_changed`, `e2e_screenshots: <aggregated screenshots>`, `context: { checkpoint_goal, round_requirements }`). Under this phase the skill runs only Phase 6.5 (Rendered-Output Visual Review) + 7 + 8 — Phases 1-6 (style) already ran inline at executor Step 3.8 with `phase: 'style_only'`.
157
165
 
158
166
  Persist findings to `round.context.frontend_ui_review` (merge with Step 3.8's style-only output if present). Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted); rendered_visual critical findings are surfaced in the Step 7 findings presentation. Neither auto-fails the round. cbp-testing-qa-agent does NOT read these findings (full independence per Step 5).
159
167
 
160
- **Skip** when `round.context.e2e_output` is absent, `screenshots` is empty, or `testing_profile === 'claude_only'`.
168
+ **Skip** when `round.context.e2e_outputs` is absent/empty, the aggregated `screenshots` list is empty, or `testing_profile === 'claude_only'`.
161
169
 
162
170
  ### Step 6: Hard-Fail Routing
163
171
 
164
- Per-wave hard-fail signal: `testing_qa_output.totals.hard_fail || e2e_output.status === 'failed' || e2e_output.test_results?.failed > 0`.
172
+ Per-wave hard-fail signal true when ANY hold:
173
+
174
+ - `testing_qa_output.totals.hard_fail === true`.
175
+ - For any framework `f` in `round.context.e2e_outputs`: `e2e_outputs[f].status === 'failed'` OR `e2e_outputs[f].test_results?.failed > 0`.
176
+ - **`e2e_eligible_skipped`**: any framework in `round.context.e2e_eligible[]` for which no specialist output exists in `round.context.e2e_outputs` AND no valid skip reason is recorded (per the `rules/e2e-mandatory.md` valid-skip list). A silently-skipped eligible framework is a hard-fail.
165
177
 
166
178
  **All waves hard_fail: false** → proceed to Step 7. **Any wave hard_fail: true**:
167
179
 
168
- - **Simple fixes** (type errors, lint, missing imports, test assertion fixes, e2e `real`-category with clear code-side root cause, no prior re-trigger this round) → save failure details to round context; retrigger the failing wave's executor; re-run testing-qa AND test-e2e for that wave.
169
- - **Structural OR already re-triggered once OR e2e preflight aborts** → save failure context via MCP `update_round`; auto-trigger `/cbp-round-input`. STOP.
180
+ - **Simple fixes** (type errors, lint, missing imports, test assertion fixes, e2e `real`-category with clear code-side root cause, no prior re-trigger this round) → save failure details to round context; retrigger the failing wave's executor; re-run testing-qa AND the eligible `cbp-e2e-*` specialists for that wave.
181
+ - **Structural OR already re-triggered once OR e2e preflight aborts OR `e2e_eligible_skipped`** → save failure context via MCP `update_round`; auto-trigger `/cbp-round-input`. STOP.
170
182
 
171
183
  ## Inline execution fallback
172
184
 
@@ -180,9 +192,9 @@ When `cbp-testing-qa-agent` spawn fails OR the resolved `testing_profile` is `cl
180
192
 
181
193
  Update round context via MCP `update_round`:
182
194
 
183
- - `context`: { ...existing, executor_output, testing_qa_output, e2e_output, frontend_ui_review }
195
+ - `context`: { ...existing, executor_output, testing_qa_output, e2e_eligible, e2e_outputs, frontend_ui_review }
184
196
 
185
- `e2e_output` and `frontend_ui_review` are present only when the gates above admitted them (e2e ran AND Step 5b ran).
197
+ `e2e_outputs` (a framework-keyed map of specialist outputs, e.g. `{ playwright: {...}, maestro: {...} }`) and `frontend_ui_review` are present only when the gates above admitted them (≥1 eligible framework ran AND Step 5b ran). `e2e_eligible[]` records which frameworks were eligible this round and drives the Step 6 `e2e_eligible_skipped` check.
186
198
 
187
199
  ### Step 8: Auto-trigger Round End
188
200
 
@@ -195,17 +207,18 @@ Trigger `/cbp-round-end`.
195
207
  ## Key Rules
196
208
 
197
209
  - **Code + test writing + inline validation** — planning lives in `round-start`, summary in `round-end`
198
- - Per-wave `cbp-testing-qa-agent` AND `cbp-test-e2e-agent` run in parallel (both against the same wave's `files[]`); they may also run in parallel with the NEXT wave's executor when dependency order allows
199
- - `testing_profile` from `task.context` governs which checks run — read it once in Step 2; pass to every testing-qa + test-e2e spawn
200
- - `claude_only` profile skips all agent spawns (testing-qa AND test-e2e); runs hook syntax and skill structure checks inline
201
- - Step 5b (cbp-frontend-ui Phase 6.5) runs only when e2e produced screenshots gated on `e2e_output.screenshots[]` non-empty
210
+ - Per-wave `cbp-testing-qa-agent` AND the `cbp-e2e-*` specialist run in parallel (both against the same wave's `files[]`); they may also run in parallel with the NEXT wave's executor when dependency order allows
211
+ - `testing_profile` from `task.context` governs which checks run — read it once in Step 2; pass to every testing-qa + e2e specialist spawn
212
+ - `claude_only` profile skips all agent spawns (testing-qa AND `cbp-e2e-*`); runs hook syntax and skill structure checks inline
213
+ - E2E dispatch is **config-driven and opt-out** (`.codebyplan/e2e.json`), not gated on `has_ui_work`/`testing_profile` an eligible framework that silently does not run is an `e2e_eligible_skipped` hard-fail (`rules/e2e-mandatory.md`)
214
+ - Step 5b (cbp-frontend-ui Phase 6.5) runs only when e2e produced screenshots — gated on the aggregated `e2e_outputs[*].screenshots[]` being non-empty
202
215
  - Claude NEVER git adds files in round commands
203
216
 
204
217
  ## Integration
205
218
 
206
219
  - **Reads**: MCP `get_current_task`, `get_rounds`
207
- - **Writes**: MCP `update_round` (context with executor_output + testing_qa_output + e2e_output + frontend_ui_review)
208
- - **Spawns**: `cbp-round-executor` (per wave or single), `cbp-testing-qa-agent` (per wave, parallel sibling of cbp-test-e2e-agent), `cbp-test-e2e-agent` (per wave when has_ui_work + non-claude_only profile), `cbp-database-agent` (if DB work), `cbp-security-agent` (if security review needed)
220
+ - **Writes**: MCP `update_round` (context with executor_output + testing_qa_output + e2e_eligible + e2e_outputs + frontend_ui_review)
221
+ - **Spawns**: `cbp-round-executor` (per wave or single), `cbp-testing-qa-agent` (per wave, parallel sibling of the `cbp-e2e-*` specialists), the `cbp-e2e-*` specialists (config-driven dispatch per `context/testing/e2e.md`, one per eligible framework in `.codebyplan/e2e.json`), `cbp-database-agent` (if DB work), `cbp-security-agent` (if security review needed)
209
222
  - **Skill invocations**: `cbp-frontend-ui` at Step 5b with `phase: 'screenshot_review'` (post-e2e)
210
223
  - **Triggers**: `/cbp-round-end` (auto)
211
224
  - **Triggered by**: `/cbp-round-start` (auto, after plan approval)
@@ -18,12 +18,12 @@ If the `cbp-task-check` agent spawn fails for any reason (`API Error: Extra usag
18
18
  Procedure summary (pointer back to canonical):
19
19
 
20
20
  1. Detect the failure class from the error string; record `round.context.task_check_findings.spawn_failure = { class, error_message, decided_at }`.
21
- 2. Walk the agent's documented Phase 1-10 checklist inline using `Read` / `Grep` / `Bash` / MCP `get_*` tools — the agent's AGENT.md is the inline script.
21
+ 2. Walk the agent's documented Phase 1-10 checklist inline using `Read` / `Grep` / `Bash` / MCP `get_*` tools — the agent's definition file is the inline script.
22
22
  3. Populate the agent's output contract (`verdict`, `route_recommendation`, `requirements_status`, `qa_status`, `code_review_findings`, `user_satisfaction`, `scope_divergence_detected`, etc.) with `mode: 'inline_fallback'` so analytics distinguishes.
23
23
  4. Apply the pre-emptive-skip rule: when the same failure class fired in the previous skill of this session, skip the spawn attempt entirely and go straight to inline.
24
24
  5. Continue the skill — do NOT abort. Inline-fallback is intended to keep the pipeline moving under sustained outages.
25
25
 
26
- Inline-fallback is NOT a quality downgrade trapdoor — every Phase from the AGENT.md MUST be walked, in order, with the same Read/Grep depth the agent would have used. Skipping phases under the banner of fallback is a separate failure mode that `cbp-improve-claude` flags as `inline_fallback_shortcutting`.
26
+ Inline-fallback is NOT a quality downgrade trapdoor — every Phase from the agent definition MUST be walked, in order, with the same Read/Grep depth the agent would have used. Skipping phases under the banner of fallback is a separate failure mode that `cbp-improve-claude` flags as `inline_fallback_shortcutting`.
27
27
 
28
28
  ## When Used
29
29