codebyplan 1.11.1 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +602 -345
- package/package.json +1 -1
- package/templates/README.md +1 -1
- package/templates/agents/cbp-cc-executor.md +1 -1
- package/templates/agents/cbp-e2e-maestro.md +202 -0
- package/templates/agents/cbp-e2e-playwright.md +229 -0
- package/templates/agents/cbp-e2e-tauri.md +184 -0
- package/templates/agents/cbp-e2e-vscode.md +203 -0
- package/templates/agents/cbp-e2e-xcuitest.md +224 -0
- package/templates/agents/cbp-improve-claude.md +1 -1
- package/templates/agents/cbp-round-executor.md +11 -11
- package/templates/agents/cbp-task-check.md +1 -1
- package/templates/agents/cbp-task-planner.md +2 -0
- package/templates/agents/cbp-testing-qa-agent.md +9 -9
- package/templates/context/testing/e2e.md +303 -0
- package/templates/hooks/cbp-statusline.mjs +44 -0
- package/templates/hooks/cbp-statusline.py +24 -2
- package/templates/hooks/cbp-statusline.sh +22 -2
- package/templates/hooks/validate-structure-lengths.sh +2 -0
- package/templates/hooks/validate-structure-smoke.sh +2 -1
- package/templates/hooks/validate-structure-templates.sh +1 -0
- package/templates/rules/README.md +8 -1
- package/templates/rules/context-file-loading.md +4 -1
- package/templates/rules/e2e-mandatory.md +70 -0
- package/templates/rules/supabase-branch-lifecycle.md +99 -0
- package/templates/settings.project.base.json +1 -2
- package/templates/skills/cbp-build-cc-agent/SKILL.md +16 -14
- package/templates/skills/cbp-build-cc-agent/reference/cbp-quality.md +4 -4
- package/templates/skills/cbp-build-cc-agent/scripts/validate-agent.sh +8 -6
- package/templates/skills/cbp-build-cc-mode/SKILL.md +4 -4
- package/templates/skills/cbp-build-cc-settings/reference/cbp-conventions.md +1 -2
- package/templates/skills/cbp-checkpoint-check/SKILL.md +12 -8
- package/templates/skills/cbp-checkpoint-create/SKILL.md +2 -0
- package/templates/skills/cbp-checkpoint-end/SKILL.md +27 -5
- package/templates/skills/cbp-checkpoint-plan/SKILL.md +2 -2
- package/templates/skills/cbp-checkpoint-plan/reference/e2e-discovery-probe.md +5 -5
- package/templates/skills/cbp-e2e-setup/SKILL.md +254 -0
- package/templates/skills/cbp-e2e-setup/reference/maestro.md +200 -0
- package/templates/skills/cbp-e2e-setup/reference/playwright.md +212 -0
- package/templates/skills/cbp-e2e-setup/reference/tauri.md +147 -0
- package/templates/skills/cbp-e2e-setup/reference/vscode.md +154 -0
- package/templates/skills/cbp-e2e-setup/reference/xcuitest.md +185 -0
- package/templates/skills/cbp-frontend-ui/SKILL.md +6 -6
- package/templates/skills/cbp-frontend-ux/SKILL.md +1 -1
- package/templates/skills/cbp-git-worktree-remove/SKILL.md +17 -1
- package/templates/skills/cbp-round-execute/SKILL.md +30 -17
- package/templates/skills/cbp-session-start/SKILL.md +27 -2
- package/templates/skills/cbp-ship-main/SKILL.md +13 -0
- package/templates/skills/cbp-supabase-branch-check/SKILL.md +12 -5
- package/templates/skills/cbp-supabase-migrate/SKILL.md +139 -9
- package/templates/skills/cbp-supabase-migrate/reference/preflight-dry-run.md +1 -1
- package/templates/skills/cbp-supabase-setup/SKILL.md +13 -7
- package/templates/skills/cbp-supabase-setup/reference/branching-setup.md +2 -2
- package/templates/skills/cbp-task-check/SKILL.md +2 -2
- package/templates/skills/cbp-task-start/SKILL.md +2 -0
- package/templates/agents/cbp-test-e2e-agent.md +0 -363
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
scope: org-shared
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# E2E Shared Workflow Contract
|
|
6
|
+
|
|
7
|
+
Loaded by every `cbp-e2e-*` specialist agent. Defines the shared Input/Output contract,
|
|
8
|
+
pre-flight loop, failure classification, screenshot rules, dispatch routing, and
|
|
9
|
+
never-silently-skip obligations. Framework-specific commands live in each agent's body.
|
|
10
|
+
|
|
11
|
+
## Input Contract
|
|
12
|
+
|
|
13
|
+
Passed by the dispatching skill (`/cbp-round-execute` Step 5, `/cbp-checkpoint-check`
|
|
14
|
+
Step 5b, or `/cbp-checkpoint-plan` Step 4 discovery probe). The dispatching skill reads
|
|
15
|
+
`.codebyplan/e2e.json` and injects `framework`, `app`, `platforms`, and credential var
|
|
16
|
+
names — agents do NOT auto-detect platform; the config is authoritative.
|
|
17
|
+
|
|
18
|
+
```yaml
|
|
19
|
+
input:
|
|
20
|
+
repo_id: string # UUID — used to resolve tech_stack from DB
|
|
21
|
+
round_number: number # 1-based; 0 is the sentinel for whole_checkpoint_mode
|
|
22
|
+
files_changed: [{path, action}]
|
|
23
|
+
prior_round_files_changed: # Required when round_number >= 2
|
|
24
|
+
- path: string
|
|
25
|
+
action: string
|
|
26
|
+
user_approved: boolean
|
|
27
|
+
whole_checkpoint_mode: boolean # Default false. When true, run full pages_affected
|
|
28
|
+
test_strategy:
|
|
29
|
+
platform: string
|
|
30
|
+
e2e_framework: string # playwright | maestro | webdriverio | xcuitest | vscode-test
|
|
31
|
+
pages_affected: string[] # Routes or screen names changed
|
|
32
|
+
has_auth: boolean
|
|
33
|
+
dev_server_port: number | null
|
|
34
|
+
framework: string # From .codebyplan/e2e.json — authoritative
|
|
35
|
+
app: string # App path (e.g. apps/web)
|
|
36
|
+
platforms: string[] # e.g. ["web"] | ["ios","android"] | ["desktop"]
|
|
37
|
+
credential_vars: # Env var names to probe at Step 6.5.1
|
|
38
|
+
email: string | null
|
|
39
|
+
password: string | null
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Output Contract
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
output:
|
|
46
|
+
status: 'completed' | 'failed' # 'blocked' is NOT valid — resolve via AskUserQuestion
|
|
47
|
+
tests_written: [{path, action: 'created' | 'modified'}]
|
|
48
|
+
tests_run: boolean # MUST be true when status == 'completed'
|
|
49
|
+
test_results:
|
|
50
|
+
passed: number
|
|
51
|
+
failed: number
|
|
52
|
+
skipped: number
|
|
53
|
+
failures:
|
|
54
|
+
- test_name: string
|
|
55
|
+
error: string
|
|
56
|
+
file: string
|
|
57
|
+
category: 'env' | 'auth' | 'access' | 'flake' | 'real' | 'visual_regression'
|
|
58
|
+
classification_reason: string
|
|
59
|
+
framework_configured: boolean
|
|
60
|
+
preflight:
|
|
61
|
+
dev_server: { required: bool, ok: bool, port: number | null, notes: string }
|
|
62
|
+
simulator: { required: bool, ok: bool, device: string | null, notes: string }
|
|
63
|
+
built_binary: { required: bool, ok: bool, path: string | null, notes: string }
|
|
64
|
+
env_vars: { required: string[], missing: string[], ok: bool }
|
|
65
|
+
auth_probe: { ran: bool, ok: bool, probe_path: string | null, error: string | null }
|
|
66
|
+
screenshots:
|
|
67
|
+
- test_name: string
|
|
68
|
+
path: string # Absolute or repo-relative path to PNG
|
|
69
|
+
page_or_screen: string
|
|
70
|
+
viewport: 'desktop' | 'mobile' | 'tablet' | 'device'
|
|
71
|
+
is_new: bool
|
|
72
|
+
baseline_diff_pct: number | null
|
|
73
|
+
user_interactions: [{question, answer}]
|
|
74
|
+
tech_stack_reconciliation:
|
|
75
|
+
db_framework: string | null
|
|
76
|
+
fs_framework: string | null
|
|
77
|
+
resolution: 'follow_db' | 'follow_fs' | 'configure_missing' | 'skip_app' | 'no_mismatch' | 'no_db_data'
|
|
78
|
+
decided_at: string
|
|
79
|
+
round2_skip_set:
|
|
80
|
+
- spec_path: string
|
|
81
|
+
reason: string
|
|
82
|
+
whole_checkpoint_aggregated: boolean
|
|
83
|
+
critical_issues:
|
|
84
|
+
- type: string
|
|
85
|
+
spec_path: string | null
|
|
86
|
+
reason: string
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Step 5.1 — Page Filter (Round 2+, non-checkpoint mode)
|
|
90
|
+
|
|
91
|
+
When `round_number >= 2` AND `whole_checkpoint_mode === false`:
|
|
92
|
+
|
|
93
|
+
1. Build `unapproved_files` from `prior_round_files_changed` where `user_approved === false`.
|
|
94
|
+
2. For each page in `pages_affected[]`, derive contributing source files:
|
|
95
|
+
- Next.js: `app/<route>/page.tsx` + layout chain + imported components
|
|
96
|
+
- Expo / React Native: screen file + imported components
|
|
97
|
+
- Tauri: route component + Rust handler files
|
|
98
|
+
- Fallback: any file whose path starts with the page's directory
|
|
99
|
+
3. A page **survives** when ANY contributing file is in `unapproved_files`.
|
|
100
|
+
4. A page **is skipped** when ALL contributing files are user-approved.
|
|
101
|
+
5. Record skipped pages in `round2_skip_set[]`.
|
|
102
|
+
6. Replace `pages_affected` with the surviving subset.
|
|
103
|
+
|
|
104
|
+
When `round_number === 1` OR `whole_checkpoint_mode === true`, use `pages_affected` verbatim.
|
|
105
|
+
|
|
106
|
+
## Step 6.5 — Pre-flight (MANDATORY)
|
|
107
|
+
|
|
108
|
+
Never proceed with `tests_run: false`. Resolve every failing check via `AskUserQuestion`
|
|
109
|
+
in a loop — re-probe after user confirmation. An explicit abort returns `status: 'failed'`
|
|
110
|
+
with the blocking preflight field populated.
|
|
111
|
+
|
|
112
|
+
### 6.5.1 Environment Variables
|
|
113
|
+
|
|
114
|
+
Check `apps/{app}/.env.local` and process env. Framework-specific required var names come
|
|
115
|
+
from the `credential_vars` input field (the dispatching skill reads them from
|
|
116
|
+
`.codebyplan/e2e.json`). Naming conventions:
|
|
117
|
+
|
|
118
|
+
- Playwright uses `E2E_TEST_*` (avoids collision with non-E2E `TEST_*` vars).
|
|
119
|
+
- Maestro/XCUITest stay on `TEST_*` per `rules/maestro-auth-state-reset.md`.
|
|
120
|
+
|
|
121
|
+
For any missing var:
|
|
122
|
+
|
|
123
|
+
> "Missing required E2E env vars: `{names}`. Set them in `apps/{app}/.env.local` now,
|
|
124
|
+
> then reply 'ready'. (Or reply 'skip' to abort this e2e run.)"
|
|
125
|
+
|
|
126
|
+
**Hard rule**: Specs MUST NOT contain in-spec env skip gates (`test.skip(!process.env.X, ...)`)
|
|
127
|
+
— those bypass preflight and produce zero-assertion runs. See `rules/e2e-mandatory.md`.
|
|
128
|
+
|
|
129
|
+
### 6.5.2 Runtime Readiness
|
|
130
|
+
|
|
131
|
+
Framework-specific probes are in each agent's body. General obligations:
|
|
132
|
+
|
|
133
|
+
- Playwright: dev server responding at `baseURL` port (curl HTTP 200/3xx).
|
|
134
|
+
- Maestro (iOS): booted simulator (`xcrun simctl list devices booted | grep Booted`).
|
|
135
|
+
- Maestro (Android): connected device/emulator (`adb devices | grep -w device`).
|
|
136
|
+
- WebDriverIO: built Tauri binary present at the path in `wdio.conf.ts`.
|
|
137
|
+
- XCUITest: `xcodebuild -list` returns the scheme; Expo prebuild artifacts present.
|
|
138
|
+
- vscode-test: compiled test JS present (`e2e/**/*.test.js`).
|
|
139
|
+
|
|
140
|
+
On any failure, `AskUserQuestion` with remediation steps; re-probe after "ready". Never
|
|
141
|
+
silently skip a required runtime prerequisite.
|
|
142
|
+
|
|
143
|
+
**Port alignment (Playwright only)**: parse `playwright.config.ts` `baseURL` and compare
|
|
144
|
+
to `.codebyplan/server.json` `port_allocations[]` for the app. On mismatch ask which is
|
|
145
|
+
correct before running.
|
|
146
|
+
|
|
147
|
+
### 6.5.3 Auth Probe (only when `has_auth`)
|
|
148
|
+
|
|
149
|
+
Run the dedicated auth probe — not the full suite. Probe paths per framework are in each
|
|
150
|
+
agent's body.
|
|
151
|
+
|
|
152
|
+
If the probe fails, classify the reason (see Step 7.5 below) and ask:
|
|
153
|
+
|
|
154
|
+
> "Auth probe failed: `{category}` — `{error_summary}`. Common causes: wrong credentials,
|
|
155
|
+
> expired state, auth backend paused, captcha. Options: (1) delete storage state + retry,
|
|
156
|
+
> (2) fix credentials + reply 'ready', (3) abort e2e."
|
|
157
|
+
|
|
158
|
+
On "ready", re-run the probe. Loop up to 3 times before escalating with a new
|
|
159
|
+
`AskUserQuestion` that summarises all 3 attempts' errors.
|
|
160
|
+
|
|
161
|
+
## Step 7.5 — Failure Classification
|
|
162
|
+
|
|
163
|
+
For each failed test, assign exactly one category:
|
|
164
|
+
|
|
165
|
+
| Category | Signals | Resolution |
|
|
166
|
+
|---|---|---|
|
|
167
|
+
| `env` | `process.env.X is undefined`, `ECONNREFUSED`, missing config | Loop to Step 6.5.1 |
|
|
168
|
+
| `auth` | Login-page redirect, 401 after credential submit, `invalid_grant`, `email_not_confirmed` | AskUserQuestion per Step 6.5.3 |
|
|
169
|
+
| `access` | 403/404 on an accessible route, RLS denial text, missing seed data | AskUserQuestion: "Test failed with access error: `{error}`. Options: (1) fix + reply steps, (2) abort." |
|
|
170
|
+
| `flake` | Timeout on first run, passes on immediate retry, network jitter | Retry up to 3 times before reclassifying to `real` |
|
|
171
|
+
| `visual_regression` | `toHaveScreenshot` pixel-diff exceeded threshold | Do NOT retry. Include baseline + actual paths in `screenshots[]` with `baseline_diff_pct`. Do NOT auto-accept baselines. |
|
|
172
|
+
| `real` | Assertion failure on app behavior (wrong text, state, navigation) | Attempt fix (selector, timeout, assertion), max 3 attempts, then report |
|
|
173
|
+
|
|
174
|
+
`env`, `auth`, `access` failures MUST NOT count toward `test_results.failed` until
|
|
175
|
+
preflight passes — they block the run instead.
|
|
176
|
+
|
|
177
|
+
## Screenshot Collection Rule
|
|
178
|
+
|
|
179
|
+
After every run, enumerate all PNGs produced and populate `screenshots[]`. Framework-
|
|
180
|
+
specific paths are in each agent's body. Every entry requires:
|
|
181
|
+
`{test_name, path, page_or_screen, viewport, is_new, baseline_diff_pct}`.
|
|
182
|
+
|
|
183
|
+
Screenshots flow to `cbp-frontend-ui` invoked by `/cbp-round-execute` Step 5b with
|
|
184
|
+
`phase: 'screenshot_review'` — NOT inline by `round-executor` Step 3.8 (which runs
|
|
185
|
+
`phase: 'style_only'` without e2e output).
|
|
186
|
+
|
|
187
|
+
**Baselines are never auto-accepted.** A `toHaveScreenshot` diff is `visual_regression`;
|
|
188
|
+
the user decides via QA whether to update baselines.
|
|
189
|
+
|
|
190
|
+
## Completion Rule
|
|
191
|
+
|
|
192
|
+
`status: 'completed'` is allowed ONLY when:
|
|
193
|
+
|
|
194
|
+
- `tests_run === true`
|
|
195
|
+
- `preflight.*.ok === true` for every required prerequisite
|
|
196
|
+
- Every failure has `category` other than `env`, `auth`, or `access`
|
|
197
|
+
|
|
198
|
+
Otherwise return `status: 'failed'`.
|
|
199
|
+
|
|
200
|
+
## Never-Silently-Skip Rules
|
|
201
|
+
|
|
202
|
+
- Missing simulator / server / binary / env / auth → always `AskUserQuestion`, never `tests_run: false`.
|
|
203
|
+
- No testable targets despite `has_ui_work` → return `status: 'failed'` with reason "no testable targets".
|
|
204
|
+
- All-skipped run (`passed === 0 && skipped > 0` for a spec in `files_changed`) → `status: 'failed'`, add `critical_issues[]` entry `{type: 'e2e_all_skipped', ...}`.
|
|
205
|
+
- User aborts preflight → `status: 'failed'`, add `critical_issues[]` entry `{type: 'preflight_aborted', ...}`.
|
|
206
|
+
|
|
207
|
+
## Dispatch / Eligibility Routing Contract
|
|
208
|
+
|
|
209
|
+
The dispatching skill (`/cbp-round-execute` Step 5 or `/cbp-checkpoint-check` Step 5b)
|
|
210
|
+
selects one specialist per app. Config is in `.codebyplan/e2e.json` (authoritative).
|
|
211
|
+
|
|
212
|
+
| `framework` in e2e.json | Agent spawned | Typical app type |
|
|
213
|
+
|---|---|---|
|
|
214
|
+
| `playwright` | `cbp-e2e-playwright` | Next.js web routes |
|
|
215
|
+
| `maestro` | `cbp-e2e-maestro` | Expo / React Native (android + ios) |
|
|
216
|
+
| `webdriverio` | `cbp-e2e-tauri` | Tauri desktop |
|
|
217
|
+
| `vscode-test` | `cbp-e2e-vscode` | VS Code extension |
|
|
218
|
+
| `xcuitest` | `cbp-e2e-xcuitest` | Native iOS (system dialogs, HealthKit, watchOS) |
|
|
219
|
+
|
|
220
|
+
**Eligibility is config-driven.** A framework is **eligible** in a round when its
|
|
221
|
+
`.codebyplan/e2e.json` entry has `enabled === true` AND `auto_run === true` AND its `app`
|
|
222
|
+
source path intersects the round's `files_changed` (repo root for single-app repos). An
|
|
223
|
+
eligible framework's specialist MUST run — see `rules/e2e-mandatory.md` for the opt-out
|
|
224
|
+
contract and the `e2e_eligible_skipped` hard-fail.
|
|
225
|
+
|
|
226
|
+
An agent is NOT spawned when ANY of the following hold:
|
|
227
|
+
|
|
228
|
+
- `testing_profile` is `claude_only` or `backend` (no UI surface) — a short-circuit hint, applied before reading `e2e.json`.
|
|
229
|
+
- The framework's `enabled` or `auto_run` is `false`, or the `app` path does not intersect `files_changed`.
|
|
230
|
+
- `frameworks` in `e2e.json` is absent or empty (no e2e configured) — zero eligible, no hard-fail.
|
|
231
|
+
- `platforms[]` in `e2e.json` does not include the current CI target (e.g., iOS-only config skipped on a Linux runner without a simulator) — a recorded valid skip per `rules/e2e-mandatory.md`.
|
|
232
|
+
|
|
233
|
+
`has_ui_work` and `testing_profile` (beyond the `claude_only` / `backend` short-circuit) are
|
|
234
|
+
**hints only** — they never suppress an eligible framework. Config is authoritative.
|
|
235
|
+
|
|
236
|
+
**Multi-app monorepos**: the dispatching skill reads `e2e.json` per app path and may
|
|
237
|
+
spawn multiple specialists in the same round (one per eligible framework). Agents run in
|
|
238
|
+
parallel with `cbp-testing-qa-agent`. Each specialist's output is stored under
|
|
239
|
+
`round.context.e2e_outputs[framework]` (a framework-keyed map); `/cbp-round-execute` Step 5b
|
|
240
|
+
aggregates `screenshots[]` across all entries before the `cbp-frontend-ui` review.
|
|
241
|
+
|
|
242
|
+
**whole_checkpoint_mode dispatch** (`/cbp-checkpoint-check` Step 5b and `/cbp-checkpoint-plan`
|
|
243
|
+
Step 4): pass `round_number: 0`, `whole_checkpoint_mode: true`, and the aggregated
|
|
244
|
+
`files_changed` union. The agent ignores `prior_round_files_changed` in this mode.
|
|
245
|
+
|
|
246
|
+
This contract is the single source of truth for dispatch logic. Config-driven dispatch is
|
|
247
|
+
implemented in `/cbp-round-execute` Step 5 and `/cbp-checkpoint-check` Step 5b (CHK-145); the
|
|
248
|
+
routing table above is the authoritative reference those gates match. Enforcement (the
|
|
249
|
+
`e2e_eligible_skipped` hard-fail and the no-in-spec-env-skip gate) lives in
|
|
250
|
+
`rules/e2e-mandatory.md`.
|
|
251
|
+
|
|
252
|
+
## Playwright Auth Provisioning Convention
|
|
253
|
+
|
|
254
|
+
Every repo with Playwright auth ships:
|
|
255
|
+
|
|
256
|
+
- `scripts/provision-e2e-user.ts` — idempotent script creating the canonical E2E user
|
|
257
|
+
and (for multi-tenant repos) a `test` subdomain. Wired to `pnpm e2e:provision`.
|
|
258
|
+
- `.env.local.example` — lists every env var `globalSetup` requires.
|
|
259
|
+
- CI secrets: `E2E_TEST_EMAIL`, `E2E_TEST_PASSWORD`, `NEXT_PUBLIC_SUPABASE_URL`,
|
|
260
|
+
`NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY` (or legacy `_ANON_KEY`).
|
|
261
|
+
|
|
262
|
+
Per-repo specifics (email, vault name, remaining-spec migration list) live in the repo's
|
|
263
|
+
own `docs/e2e-setup.md`, not in this shared file.
|
|
264
|
+
|
|
265
|
+
## Auth State Gitignore
|
|
266
|
+
|
|
267
|
+
Before writing any storage state under `playwright/.auth/`, `tests/.auth/`, or
|
|
268
|
+
`e2e/.auth/`, verify the path is in the nearest `.gitignore`. If absent, ADD the entry
|
|
269
|
+
first. Auth state files contain live session cookies — committing one is a credential
|
|
270
|
+
leak. See `rules/playwright-auth-gitignore.md`.
|
|
271
|
+
|
|
272
|
+
## Supabase Parallelism (Playwright)
|
|
273
|
+
|
|
274
|
+
When `NEXT_PUBLIC_SUPABASE_URL` references a remote project (not localhost), set
|
|
275
|
+
`workers: 1` in `playwright.config.ts` to prevent auth/RLS races. Not needed with a
|
|
276
|
+
local Supabase emulator. Detect via: `grep -E "127\.0\.0\.1|localhost" apps/{app}/.env.local | grep SUPABASE_URL`.
|
|
277
|
+
|
|
278
|
+
## Mock Server for Server-Side Fetch
|
|
279
|
+
|
|
280
|
+
`page.route()` intercepts browser-process requests only. For Next.js server actions,
|
|
281
|
+
route handlers, or middleware (Node-process fetch), spin up a real local HTTP server in
|
|
282
|
+
`globalSetup` and point the dev server at it via `webServer.env`. See
|
|
283
|
+
`rules/playwright-server-side-mocking.md`.
|
|
284
|
+
|
|
285
|
+
## Cold-Start Warmup (Playwright / Next.js)
|
|
286
|
+
|
|
287
|
+
Next.js dev mode (Turbopack) compiles routes lazily. Add a warmup fetch at the end of
|
|
288
|
+
`globalSetup`, after mock server starts, before specs run. Use `redirect: 'manual'` for
|
|
289
|
+
auth-protected routes. Wrap in `try/catch` — warmup is best-effort.
|
|
290
|
+
|
|
291
|
+
## Locator Hygiene (Playwright)
|
|
292
|
+
|
|
293
|
+
Prefer stable accessibility-driven selectors (`getByRole`, `getByLabel`, `getByTestId`)
|
|
294
|
+
over positional CSS selectors (`.locator('.class').nth(N)`). After `page.goto()` inside
|
|
295
|
+
a loop, snapshot text/href BEFORE navigation rather than holding stale `Locator` handles.
|
|
296
|
+
|
|
297
|
+
## Visual Baseline Workflow
|
|
298
|
+
|
|
299
|
+
| Situation | What happens |
|
|
300
|
+
|---|---|
|
|
301
|
+
| No baseline (new screen) | Playwright creates on first run; test passes; `cbp-frontend-ui` at Step 5b reviews semantically. |
|
|
302
|
+
| Baseline exists, diff ≤ threshold | Test passes. |
|
|
303
|
+
| Baseline exists, diff > threshold | `visual_regression` failure. Agent does NOT retry. `cbp-frontend-ui` at Step 5b flags it; `/cbp-round-end` Step 3b constructs user QA item. User decides: fix-task or `--update-snapshots`. |
|
|
@@ -118,6 +118,7 @@ function main() {
|
|
|
118
118
|
rate_limits: true,
|
|
119
119
|
repo_pr: true,
|
|
120
120
|
worktree: true,
|
|
121
|
+
infra_drift: true,
|
|
121
122
|
no_color: false,
|
|
122
123
|
};
|
|
123
124
|
try {
|
|
@@ -136,6 +137,7 @@ function main() {
|
|
|
136
137
|
"rate_limits",
|
|
137
138
|
"repo_pr",
|
|
138
139
|
"worktree",
|
|
140
|
+
"infra_drift",
|
|
139
141
|
]) {
|
|
140
142
|
if (typeof parsed.lines[k] === "boolean") cfg[k] = parsed.lines[k];
|
|
141
143
|
}
|
|
@@ -374,6 +376,48 @@ function main() {
|
|
|
374
376
|
}
|
|
375
377
|
}
|
|
376
378
|
|
|
379
|
+
// ============================================================
|
|
380
|
+
// LINE 7 — Infra drift (monorepo feat branches behind main)
|
|
381
|
+
// ============================================================
|
|
382
|
+
// Only the codebyplan monorepo (templates/ present) on a feat branch can carry
|
|
383
|
+
// stale .claude/ infra. No fetch — counts against the cached origin/main only.
|
|
384
|
+
if (shouldShow("INFRA_DRIFT", cfg.infra_drift)) {
|
|
385
|
+
if (
|
|
386
|
+
BRANCH.startsWith("feat/") &&
|
|
387
|
+
fs.existsSync(
|
|
388
|
+
path.join(root, "packages", "codebyplan-package", "templates")
|
|
389
|
+
)
|
|
390
|
+
) {
|
|
391
|
+
let behind = 0;
|
|
392
|
+
try {
|
|
393
|
+
behind = parseInt(
|
|
394
|
+
execFileSync(
|
|
395
|
+
"git",
|
|
396
|
+
[
|
|
397
|
+
"-C",
|
|
398
|
+
root,
|
|
399
|
+
"rev-list",
|
|
400
|
+
"--count",
|
|
401
|
+
"HEAD..origin/main",
|
|
402
|
+
"--",
|
|
403
|
+
".claude",
|
|
404
|
+
"packages/codebyplan-package/templates",
|
|
405
|
+
],
|
|
406
|
+
{ encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }
|
|
407
|
+
).trim(),
|
|
408
|
+
10
|
|
409
|
+
);
|
|
410
|
+
} catch {
|
|
411
|
+
behind = 0;
|
|
412
|
+
}
|
|
413
|
+
if (Number.isFinite(behind) && behind > 0) {
|
|
414
|
+
out.push(
|
|
415
|
+
`${C.YELLOW}⚠ infra ${behind} behind${C.RST} ${C.DIM}→ /cbp-refresh-infra${C.RST}`
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
377
421
|
process.stdout.write(out.length ? out.join("\n") + "\n" : "");
|
|
378
422
|
}
|
|
379
423
|
|
|
@@ -80,7 +80,8 @@ def main():
|
|
|
80
80
|
# ---- Config: line toggles + no_color -------------------------------------
|
|
81
81
|
cfg = {
|
|
82
82
|
"identity": True, "context": True, "cost": True,
|
|
83
|
-
"rate_limits": True, "repo_pr": True, "worktree": True,
|
|
83
|
+
"rate_limits": True, "repo_pr": True, "worktree": True,
|
|
84
|
+
"infra_drift": True, "no_color": False,
|
|
84
85
|
}
|
|
85
86
|
try:
|
|
86
87
|
with open(os.path.join(root, ".codebyplan", "statusline.json"), "r", encoding="utf-8") as fh:
|
|
@@ -90,7 +91,7 @@ def main():
|
|
|
90
91
|
cfg["no_color"] = parsed["no_color"]
|
|
91
92
|
lines = parsed.get("lines")
|
|
92
93
|
if isinstance(lines, dict):
|
|
93
|
-
for k in ["identity", "context", "cost", "rate_limits", "repo_pr", "worktree"]:
|
|
94
|
+
for k in ["identity", "context", "cost", "rate_limits", "repo_pr", "worktree", "infra_drift"]:
|
|
94
95
|
if isinstance(lines.get(k), bool):
|
|
95
96
|
cfg[k] = lines[k]
|
|
96
97
|
except Exception:
|
|
@@ -321,6 +322,27 @@ def main():
|
|
|
321
322
|
l6 += " %s%s%s" % (DIM, wt_path_disp, RST)
|
|
322
323
|
out.append(l6)
|
|
323
324
|
|
|
325
|
+
# ===== LINE 7 — Infra drift (monorepo feat branches behind main) =====
|
|
326
|
+
# Only the codebyplan monorepo (templates/ present) on a feat branch can carry
|
|
327
|
+
# stale .claude/ infra. No fetch — counts against the cached origin/main only.
|
|
328
|
+
if should_show("INFRA_DRIFT", cfg["infra_drift"]):
|
|
329
|
+
if branch.startswith("feat/") and os.path.isdir(
|
|
330
|
+
os.path.join(root, "packages", "codebyplan-package", "templates")
|
|
331
|
+
):
|
|
332
|
+
behind = 0
|
|
333
|
+
try:
|
|
334
|
+
res = subprocess.run(
|
|
335
|
+
["git", "-C", root, "rev-list", "--count", "HEAD..origin/main",
|
|
336
|
+
"--", ".claude", "packages/codebyplan-package/templates"],
|
|
337
|
+
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True,
|
|
338
|
+
)
|
|
339
|
+
if res.returncode == 0:
|
|
340
|
+
behind = int(res.stdout.strip() or "0")
|
|
341
|
+
except Exception:
|
|
342
|
+
behind = 0
|
|
343
|
+
if behind > 0:
|
|
344
|
+
out.append("%s⚠ infra %d behind%s %s→ /cbp-refresh-infra%s" % (YELLOW, behind, RST, DIM, RST))
|
|
345
|
+
|
|
324
346
|
sys.stdout.write(("\n".join(out) + "\n") if out else "")
|
|
325
347
|
|
|
326
348
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#
|
|
16
16
|
# DISPLAY OPTIONS (team-shared, committed)
|
|
17
17
|
# .codebyplan/statusline.json -> { "lines": {identity,context,cost,rate_limits,
|
|
18
|
-
# repo_pr,worktree}, "no_color": bool }
|
|
18
|
+
# repo_pr,worktree,infra_drift}, "no_color": bool }
|
|
19
19
|
#
|
|
20
20
|
# ENV-VAR OVERRIDES (env > config > default)
|
|
21
21
|
# CBP_STATUSLINE_HIDE_IDENTITY=1 suppress line 1 (folder, branch, model, effort, …)
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
# CBP_STATUSLINE_HIDE_RATE_LIMITS=1 suppress line 4 (5h / 7d rate limits)
|
|
25
25
|
# CBP_STATUSLINE_HIDE_REPO_PR=1 suppress line 5 (repo host/owner/name, PR)
|
|
26
26
|
# CBP_STATUSLINE_HIDE_WORKTREE=1 suppress line 6 (worktree name/branch/path)
|
|
27
|
+
# CBP_STATUSLINE_HIDE_INFRA_DRIFT=1 suppress line 7 (.claude infra commits behind main)
|
|
27
28
|
# CBP_STATUSLINE_NO_COLOR=1 strip all ANSI colour codes (also honoured by $NO_COLOR)
|
|
28
29
|
#
|
|
29
30
|
# TEST SEAMS (no effect in normal use)
|
|
@@ -104,7 +105,7 @@ eval "$(echo "$INPUT" | jq -r '
|
|
|
104
105
|
|
|
105
106
|
# ---- Config: line toggles + no_color from .codebyplan/statusline.json --------
|
|
106
107
|
CFG_IDENTITY=true; CFG_CONTEXT=true; CFG_COST=true
|
|
107
|
-
CFG_RATE_LIMITS=true; CFG_REPO_PR=true; CFG_WORKTREE=true; CFG_NO_COLOR=false
|
|
108
|
+
CFG_RATE_LIMITS=true; CFG_REPO_PR=true; CFG_WORKTREE=true; CFG_INFRA_DRIFT=true; CFG_NO_COLOR=false
|
|
108
109
|
CBP_CFG="$CBP_ROOT/.codebyplan/statusline.json"
|
|
109
110
|
if [ -f "$CBP_CFG" ] && command -v jq >/dev/null 2>&1; then
|
|
110
111
|
# Use `!= false` / `== true` (NOT jq `//`): the `//` operator treats an explicit
|
|
@@ -117,6 +118,7 @@ if [ -f "$CBP_CFG" ] && command -v jq >/dev/null 2>&1; then
|
|
|
117
118
|
"CFG_RATE_LIMITS=\(.lines.rate_limits != false)",
|
|
118
119
|
"CFG_REPO_PR=\(.lines.repo_pr != false)",
|
|
119
120
|
"CFG_WORKTREE=\(.lines.worktree != false)",
|
|
121
|
+
"CFG_INFRA_DRIFT=\(.lines.infra_drift != false)",
|
|
120
122
|
"CFG_NO_COLOR=\(.no_color == true)"
|
|
121
123
|
' "$CBP_CFG" 2>/dev/null)"
|
|
122
124
|
fi
|
|
@@ -401,3 +403,21 @@ if should_show WORKTREE "$CFG_WORKTREE"; then
|
|
|
401
403
|
printf "%b\n" "$L6"
|
|
402
404
|
fi
|
|
403
405
|
fi
|
|
406
|
+
|
|
407
|
+
# ============================================================
|
|
408
|
+
# LINE 7 — Infra drift (monorepo feat branches behind main)
|
|
409
|
+
# ============================================================
|
|
410
|
+
# Only the codebyplan monorepo (templates/ present) on a feat branch can carry
|
|
411
|
+
# stale .claude/ infra. No fetch — counts against the cached origin/main only.
|
|
412
|
+
if should_show INFRA_DRIFT "$CFG_INFRA_DRIFT"; then
|
|
413
|
+
case "$BRANCH" in
|
|
414
|
+
feat/*)
|
|
415
|
+
if [ -d "$CBP_ROOT/packages/codebyplan-package/templates" ]; then
|
|
416
|
+
BEHIND="$(git -C "$CBP_ROOT" rev-list --count HEAD..origin/main -- .claude packages/codebyplan-package/templates 2>/dev/null)"
|
|
417
|
+
if [ -n "$BEHIND" ] && [ "$BEHIND" -gt 0 ] 2>/dev/null; then
|
|
418
|
+
printf "%b\n" "${YELLOW}⚠ infra ${BEHIND} behind${RST} ${DIM}→ /cbp-refresh-infra${RST}"
|
|
419
|
+
fi
|
|
420
|
+
fi
|
|
421
|
+
;;
|
|
422
|
+
esac
|
|
423
|
+
fi
|
|
@@ -19,12 +19,14 @@ _get_limit() {
|
|
|
19
19
|
/CHANGELOG.md|*/CHANGELOG.md|*/user-input.md|/.claude/docs/research/*) echo ""; return;;
|
|
20
20
|
# Managed .claude/ files
|
|
21
21
|
/.claude/rules/*.md) echo "100 200"; return;;
|
|
22
|
+
/.claude/context/testing/e2e.md) echo "300 600"; return;; # consolidated E2E shared-workflow + dispatch contract (CHK-145)
|
|
22
23
|
/.claude/context/*.md|/.claude/context/*/*.md) echo "200 400"; return;;
|
|
23
24
|
/.claude/skills/*/SKILL.md) echo "300 600"; return;;
|
|
24
25
|
/.claude/skills/*/reference/*.md) echo "200 400"; return;;
|
|
25
26
|
/.claude/skills/*/examples/*.md|/.claude/skills/*/templates/*) echo "100 200"; return;;
|
|
26
27
|
/.claude/agents/*/AGENT.md) echo "400 800"; return;;
|
|
27
28
|
/.claude/agents/*/*.md) echo "200 400"; return;;
|
|
29
|
+
/.claude/agents/*.md) echo "400 800"; return;;
|
|
28
30
|
/.claude/hooks/*.sh) echo "150 300"; return;;
|
|
29
31
|
/.claude/docs/architecture/*.md|/.claude/docs/server/*.md) echo "200 400"; return;;
|
|
30
32
|
/.claude/docs/stack/*/index.md|/.claude/docs/stack/*/guide.md) echo "150 300"; return;;
|
|
@@ -62,7 +62,8 @@ run_case() {
|
|
|
62
62
|
|
|
63
63
|
# ===== Good fixtures (must exit 0) =====
|
|
64
64
|
run_case "good-skill" good/skill.md /.claude/skills/cbp-fixture/SKILL.md 0
|
|
65
|
-
run_case "good-agent"
|
|
65
|
+
run_case "good-agent" good/agent.md /.claude/agents/cbp-fixture/AGENT.md 0
|
|
66
|
+
run_case "good-agent-flat" good/agent.md /.claude/agents/cbp-fixture.md 0
|
|
66
67
|
run_case "good-rule" good/rule.md /.claude/rules/cbp-fixture.md 0
|
|
67
68
|
run_case "good-hook" good/hook.sh /.claude/hooks/cbp-fixture.sh 0
|
|
68
69
|
|
|
@@ -12,6 +12,7 @@ TEMPLATE=""
|
|
|
12
12
|
case "$REL_PATH" in
|
|
13
13
|
/.claude/skills/*) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-skill/reference/cbp-quality.md" ;;
|
|
14
14
|
/.claude/agents/*/AGENT.md) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-agent/reference/cbp-quality.md" ;;
|
|
15
|
+
/.claude/agents/*.md) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-agent/reference/cbp-quality.md" ;;
|
|
15
16
|
/.claude/docs/research/*/1-*) TEMPLATE="/docs/templates/.claude/docs/research/1-problem.md" ;;
|
|
16
17
|
/.claude/docs/research/*/2-*) TEMPLATE="/docs/templates/.claude/docs/research/2-claude-default.md" ;;
|
|
17
18
|
/.claude/docs/research/*/3-*) TEMPLATE="/docs/templates/.claude/docs/research/3-official-docs.md" ;;
|
|
@@ -34,7 +34,14 @@ The `install`/`update`/`uninstall` flow handles these files identically to how i
|
|
|
34
34
|
|
|
35
35
|
## Current status
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
Four rules are shipped:
|
|
38
|
+
|
|
39
|
+
| Rule file | Summary |
|
|
40
|
+
|---|---|
|
|
41
|
+
| `scope-vocabulary.md` | Canonical scope-marker enum (`org-shared` / `project-shared` / `repo-only:<name>`) enforced by three validators |
|
|
42
|
+
| `context-file-loading.md` | Context-file load contract — who loads what, when, and how missing files are handled |
|
|
43
|
+
| `todo-backend.md` | Todos queue contract, six DB-layer workflow invariants, and writer obligations for MCP mutators |
|
|
44
|
+
| `supabase-branch-lifecycle.md` | Supabase preview-branch lifecycle mirrors the git feat-branch lifecycle — lazy create on first DB change, delete wherever the git branch is removed |
|
|
38
45
|
|
|
39
46
|
## Contributing a rule
|
|
40
47
|
|
|
@@ -14,8 +14,11 @@ paths:
|
|
|
14
14
|
| Context File | Loaded By | Phase | Purpose |
|
|
15
15
|
|--------------|-----------|-------|---------|
|
|
16
16
|
| `context/testing/unit.md` | `cbp-round-executor` | Step 3.6 | Unit test patterns per framework |
|
|
17
|
-
| `context/testing/e2e.md` | `cbp-
|
|
17
|
+
| `context/testing/e2e.md` | `cbp-e2e-playwright`, `cbp-e2e-maestro`, `cbp-e2e-tauri`, `cbp-e2e-vscode`, `cbp-e2e-xcuitest` | Entry | Shared contract: Input/Output, preflight, failure classification, dispatch routing |
|
|
18
18
|
| `context/testing/e2e.md` | `cbp-testing-qa-agent` | Preflight | Env var list per framework |
|
|
19
|
+
| `context/testing/e2e.md` | `cbp-checkpoint-plan` | Step 4 | Discovery probe dispatch contract |
|
|
20
|
+
| `context/testing/e2e.md` | `cbp-round-execute` | Step 5 | E2E specialist dispatch routing |
|
|
21
|
+
| `context/testing/e2e.md` | `cbp-checkpoint-check` | Step 5b | Whole-checkpoint e2e dispatch |
|
|
19
22
|
| `context/testing/eslint.md` | `cbp-task-planner` | Phase 1.5 | ESLint Compliance Checklist |
|
|
20
23
|
| `context/testing/eslint.md` | `cbp-improve-round` | Phase 1.5 | Config-file compliance audit |
|
|
21
24
|
| `context/mcp-docs.md` | `cbp-task-planner` | Phase 2.6 | MCP library doc lookup contract — per-dependency consultation via DocsByPlan MCP tools (resolve_library_id → search_chunks/lookup_symbol → get_chunk) |
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
---
|
|
2
|
+
scope: org-shared
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# E2E Mandatory Run Contract
|
|
6
|
+
|
|
7
|
+
E2E is **opt-out, not opt-in**. Whenever a framework configured in `.codebyplan/e2e.json`
|
|
8
|
+
covers an app whose source changed in a round, the matching `cbp-e2e-*` specialist runs —
|
|
9
|
+
there is no per-round decision to "add" e2e. This rule defines *when a run is obligatory*
|
|
10
|
+
and *what makes a skip legitimate*. The companion Input/Output contract and the
|
|
11
|
+
framework → agent dispatch routing table live in `context/testing/e2e.md`.
|
|
12
|
+
|
|
13
|
+
## Opt-Out Contract
|
|
14
|
+
|
|
15
|
+
A framework is **eligible** in a round when ALL hold:
|
|
16
|
+
|
|
17
|
+
- `.codebyplan/e2e.json` `frameworks.{name}.enabled === true` AND `auto_run === true`.
|
|
18
|
+
- The framework's `app` source path intersects the round's `files_changed` (repo root for
|
|
19
|
+
single-app repos).
|
|
20
|
+
|
|
21
|
+
When eligible, `/cbp-round-execute` Step 5 spawns the matching specialist in parallel with
|
|
22
|
+
`cbp-testing-qa-agent`; `/cbp-checkpoint-check` Step 5b does the same against the aggregated
|
|
23
|
+
file union with `whole_checkpoint_mode: true`.
|
|
24
|
+
|
|
25
|
+
`has_ui_work` and `testing_profile` are **hints only** — they short-circuit e2e *solely* for
|
|
26
|
+
`claude_only` / `backend`-only rounds (no UI surface). For every other profile config is
|
|
27
|
+
authoritative: a `has_ui_work === false` inference never suppresses an eligible framework.
|
|
28
|
+
|
|
29
|
+
When `frameworks` is absent or empty, zero frameworks are eligible — no specialist runs and
|
|
30
|
+
no hard-fail fires.
|
|
31
|
+
|
|
32
|
+
## `e2e_eligible_skipped` Hard-Fail
|
|
33
|
+
|
|
34
|
+
If a framework was eligible this round but no specialist ran AND no valid skip reason is
|
|
35
|
+
recorded, the round **hard-fails** and `/cbp-round-execute` Step 6 auto-triggers
|
|
36
|
+
`/cbp-round-input`. Silent skips are bugs, not conveniences — this is the enforcement behind
|
|
37
|
+
the opt-out contract.
|
|
38
|
+
|
|
39
|
+
**Valid skip reasons** (must be recorded in `round.context.e2e_outputs[framework]` or the
|
|
40
|
+
round failure context):
|
|
41
|
+
|
|
42
|
+
- User explicitly aborted preflight → specialist returns `status: 'failed'` with a
|
|
43
|
+
`critical_issues[].type === 'preflight_aborted'` entry.
|
|
44
|
+
- Environmental block: `preflight.*.ok === false` (missing simulator / server / binary / env)
|
|
45
|
+
persists after the Step 6.5 AskUserQuestion loop — an env block, not a spec failure.
|
|
46
|
+
- `platforms[]` in `e2e.json` excludes the current CI target (e.g. an iOS-only config on a
|
|
47
|
+
Linux runner with no simulator).
|
|
48
|
+
- Round type is `survey` (no code executed).
|
|
49
|
+
|
|
50
|
+
No other skip reason is valid. "Looked UI-light", "tests are slow", or "covered by unit
|
|
51
|
+
tests" are NOT valid skips.
|
|
52
|
+
|
|
53
|
+
## No In-Spec Env-Skip Gate
|
|
54
|
+
|
|
55
|
+
Spec files MUST NOT contain in-spec env skip gates such as `test.skip(!process.env.X, ...)`.
|
|
56
|
+
They bypass preflight, produce zero-assertion runs, and hide missing env vars behind a green
|
|
57
|
+
check. Pre-flight (`context/testing/e2e.md` Step 6.5.1) is the only mechanism for
|
|
58
|
+
env-conditional skipping.
|
|
59
|
+
|
|
60
|
+
A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_changed` is a
|
|
61
|
+
**hard fail**, not a pass — `cbp-task-check` (`agents/cbp-task-check.md`) refuses a READY
|
|
62
|
+
verdict on a zero-assertion e2e run and routes to a fix round per this rule.
|
|
63
|
+
|
|
64
|
+
## Cross-References
|
|
65
|
+
|
|
66
|
+
- `context/testing/e2e.md` — Input/Output contract, pre-flight loop, failure classification,
|
|
67
|
+
and the dispatch routing table (framework → agent).
|
|
68
|
+
- `agents/cbp-task-check.md` — enforces the zero-assertion hard-fail at verdict time.
|
|
69
|
+
- `skills/cbp-round-execute/SKILL.md` Step 5/6, `skills/cbp-checkpoint-check/SKILL.md` Step 5b
|
|
70
|
+
— the config-driven dispatch and `e2e_eligible_skipped` gate implementations.
|