codebyplan 1.11.0 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +56 -5
- package/package.json +1 -1
- package/templates/README.md +1 -1
- package/templates/agents/cbp-cc-executor.md +1 -1
- package/templates/agents/cbp-e2e-maestro.md +202 -0
- package/templates/agents/cbp-e2e-playwright.md +229 -0
- package/templates/agents/cbp-e2e-tauri.md +184 -0
- package/templates/agents/cbp-e2e-vscode.md +203 -0
- package/templates/agents/cbp-e2e-xcuitest.md +224 -0
- package/templates/agents/cbp-improve-claude.md +1 -1
- package/templates/agents/cbp-round-executor.md +12 -12
- package/templates/agents/cbp-task-check.md +1 -2
- package/templates/agents/cbp-task-planner.md +2 -0
- package/templates/agents/cbp-testing-qa-agent.md +14 -36
- package/templates/context/testing/e2e.md +303 -0
- package/templates/hooks/validate-structure-lengths.sh +2 -0
- package/templates/hooks/validate-structure-smoke.sh +2 -1
- package/templates/hooks/validate-structure-templates.sh +1 -0
- package/templates/rules/context-file-loading.md +4 -1
- package/templates/rules/e2e-mandatory.md +70 -0
- package/templates/skills/cbp-build-cc-agent/SKILL.md +16 -14
- package/templates/skills/cbp-build-cc-agent/reference/cbp-quality.md +4 -4
- package/templates/skills/cbp-build-cc-agent/scripts/validate-agent.sh +8 -6
- package/templates/skills/cbp-build-cc-mode/SKILL.md +4 -4
- package/templates/skills/cbp-checkpoint-check/SKILL.md +12 -8
- package/templates/skills/cbp-checkpoint-complete/SKILL.md +1 -1
- package/templates/skills/cbp-checkpoint-plan/SKILL.md +2 -2
- package/templates/skills/cbp-checkpoint-plan/reference/e2e-discovery-probe.md +5 -5
- package/templates/skills/cbp-e2e-setup/SKILL.md +254 -0
- package/templates/skills/cbp-e2e-setup/reference/maestro.md +200 -0
- package/templates/skills/cbp-e2e-setup/reference/playwright.md +212 -0
- package/templates/skills/cbp-e2e-setup/reference/tauri.md +147 -0
- package/templates/skills/cbp-e2e-setup/reference/vscode.md +154 -0
- package/templates/skills/cbp-e2e-setup/reference/xcuitest.md +185 -0
- package/templates/skills/cbp-frontend-ui/SKILL.md +8 -8
- package/templates/skills/cbp-frontend-ux/SKILL.md +1 -1
- package/templates/skills/cbp-round-end/SKILL.md +16 -26
- package/templates/skills/cbp-round-execute/SKILL.md +31 -18
- package/templates/skills/cbp-task-check/SKILL.md +2 -2
- package/templates/agents/cbp-test-e2e-agent.md +0 -363
|
@@ -69,14 +69,14 @@ output:
|
|
|
69
69
|
specialist_needs: # What specialist agents are needed post-execution
|
|
70
70
|
tests_written:
|
|
71
71
|
unit_tests: string[] # Unit test files written inline (Step 3.6)
|
|
72
|
-
e2e_tests: string[] # Always empty — e2e test files are written by cbp-
|
|
72
|
+
e2e_tests: string[] # Always empty — e2e test files are written by the cbp-e2e-* specialist agents (dispatched per context/testing/e2e.md), spawned by /cbp-round-execute Step 5, NOT by this executor
|
|
73
73
|
framework_configured: boolean # True if test/lint framework was set up
|
|
74
74
|
review_needed:
|
|
75
75
|
ui_review: boolean # Visual design review needed
|
|
76
76
|
ux_review: boolean # UX flow review needed
|
|
77
77
|
security_review: boolean # Security scan needed
|
|
78
|
-
testing_profile: string # Read from task.context.testing_profile (and round.context.testing_profile_override if set); surfaced for /cbp-round-execute Step 5 per-wave cbp-testing-qa-agent + cbp-
|
|
79
|
-
# NOTE:
|
|
78
|
+
testing_profile: string # Read from task.context.testing_profile (and round.context.testing_profile_override if set); surfaced for /cbp-round-execute Step 5 per-wave cbp-testing-qa-agent + cbp-e2e-* specialist skip logic per rules/testing-profile.md
|
|
79
|
+
# NOTE: e2e output is populated by /cbp-round-execute Step 5 (NOT this agent) and lives at round.context.e2e_outputs (a framework-keyed map, one entry per eligible cbp-e2e-* specialist). The executor's Step 3.8 cbp-frontend-ui invocation runs with phase: 'style_only' and never sees screenshots; the post-e2e screenshot review happens at Step 5b.
|
|
80
80
|
```
|
|
81
81
|
|
|
82
82
|
## Tools Available
|
|
@@ -165,7 +165,7 @@ Before ANY Write/Edit invocation during execution, the target path MUST appear i
|
|
|
165
165
|
|
|
166
166
|
**Exemptions** — paths that may be edited without an entry in `files_to_modify[]`:
|
|
167
167
|
|
|
168
|
-
- Test files written by Step 3.6 (unit only — e2e is written by `cbp-
|
|
168
|
+
- Test files written by Step 3.6 (unit only — e2e is written by the `cbp-e2e-*` specialist agents post-executor, not by this agent) when the plan flagged `tests_written` as a deliverable
|
|
169
169
|
- Lockfiles regenerated by `pnpm install` after `package.json` edits already in scope
|
|
170
170
|
- Generated TypeScript types (e.g. `apps/web/src/lib/database.types.ts`) when DB migrations are in scope
|
|
171
171
|
- Auto-formatted prettier rewrites of files already in `files_to_modify[]`
|
|
@@ -181,7 +181,7 @@ Two categories of work are NOT performed by this agent and must be returned to t
|
|
|
181
181
|
| Action | Why excluded | Where it goes |
|
|
182
182
|
|--------|--------------|---------------|
|
|
183
183
|
| MCP `create_task`, `update_task`, `complete_task`, `add_round`, etc. (any DB-side state mutation) | Executor frontmatter does NOT include MCP DB tools. Tool-not-available errors force orchestrator improvisation. | Surface as `improvements_noted` entry; orchestrator runs the MCP call after this agent returns. Executor never tries to invoke MCP DB tools. |
|
|
184
|
-
| Spawning `cbp-
|
|
184
|
+
| Spawning `cbp-e2e-*` specialist agents | Executor's tools list (Read/Write/Edit/Glob/Grep/Bash/TaskUpdate/AskUserQuestion/Skill) does NOT include the `Task` / Agent tool. E2E execution is owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`), spawned by `/cbp-round-execute` Step 5 (parallel with `cbp-testing-qa-agent`) and is invoked by the orchestrator. | Set `specialist_needs.review_needed.ux_review` / `ui_review` if applicable. Do NOT attempt to spawn any e2e agent from inside the executor. |
|
|
185
185
|
|
|
186
186
|
If the plan implies either action, complete the rest of the work and surface the carved-out steps in `improvements_noted[]` for the orchestrator to handle.
|
|
187
187
|
|
|
@@ -358,7 +358,7 @@ When the approved plan includes specialized work, delegate to sub-executor agent
|
|
|
358
358
|
|
|
359
359
|
After implementing features in Step 3, write unit tests for all new/modified code. Tests are deliverables — they ship with the code in the same round.
|
|
360
360
|
|
|
361
|
-
**Reference**: Read `.claude/context/testing/unit.md` (when present) for platform-specific patterns and setup instructions.
|
|
361
|
+
**Reference**: Read `.claude/context/testing/unit.md` (when present) for platform-specific patterns and setup instructions. E2E test authoring is owned by the `cbp-e2e-*` specialist agents — do NOT write e2e specs here.
|
|
362
362
|
|
|
363
363
|
**Platform detection** from `test_strategy` in approved plan (set by `cbp-task-planner` Phase 2.9):
|
|
364
364
|
|
|
@@ -383,7 +383,7 @@ After implementing features in Step 3, write unit tests for all new/modified cod
|
|
|
383
383
|
|
|
384
384
|
### Step 3.7: REMOVED — E2E execution moved to /cbp-round-execute Step 5
|
|
385
385
|
|
|
386
|
-
E2E test authoring + execution is owned by `cbp-
|
|
386
|
+
E2E test authoring + execution is owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`), spawned in parallel with `cbp-testing-qa-agent` by `/cbp-round-execute` Step 5. The executor does NOT spawn them (Step 0.2 carve-out). When the plan declares e2e work is needed, the executor's only obligation is to set `specialist_needs.review_needed.ui_review` / `ux_review` if applicable; the orchestrator handles the rest.
|
|
387
387
|
|
|
388
388
|
### Step 3.65: Defensive React Checklist (after writing component code)
|
|
389
389
|
|
|
@@ -396,7 +396,7 @@ E2E test authoring + execution is owned by `cbp-test-e2e-agent`, spawned in para
|
|
|
396
396
|
|
|
397
397
|
### Step 3.8: Frontend Self-Review (UI + UX, style-only)
|
|
398
398
|
|
|
399
|
-
After unit tests (Step 3.6) and the defensive React checklist (Step 3.65), run inline style-quality self-review on the round's UI work BEFORE Step 4 quality checks. This pass runs WITHOUT e2e screenshots — the screenshot-driven Phase 6.5 of `cbp-frontend-ui` runs separately at `/cbp-round-execute` Step 5b once `cbp-
|
|
399
|
+
After unit tests (Step 3.6) and the defensive React checklist (Step 3.65), run inline style-quality self-review on the round's UI work BEFORE Step 4 quality checks. This pass runs WITHOUT e2e screenshots — the screenshot-driven Phase 6.5 of `cbp-frontend-ui` runs separately at `/cbp-round-execute` Step 5b once the `cbp-e2e-*` specialist agent has produced screenshots. Mirror counterpart of Step 2.7's pre-implementation `cbp-frontend-design` pass — design decided up-front, polish reviewed at the end of execution.
|
|
400
400
|
|
|
401
401
|
**Trigger gate** — fire when `files_changed` contains ANY of:
|
|
402
402
|
|
|
@@ -436,7 +436,7 @@ If none match, skip — proceed directly to Step 4.
|
|
|
436
436
|
- Aggregate `summary` totals into `round.context.frontend_self_review.summary` (combined critical / warning / suggestion / auto_fixed / out_of_scope_fixes).
|
|
437
437
|
|
|
438
438
|
4. **Surface non-mechanical findings** to the round summary:
|
|
439
|
-
- `baseline_regression` and `rendered_visual` findings from `cbp-frontend-ui` are NOT auto-fixed (root cause is typically in app state/data, not styling) — surface
|
|
439
|
+
- `baseline_regression` and `rendered_visual` findings from `cbp-frontend-ui` are NOT auto-fixed (root cause is typically in app state/data, not styling) — surface in `round.context.frontend_ui_review` findings; `/cbp-round-end` Step 7 surfaces baseline-regression findings as a blocking accept-or-fix gate (baselines never auto-accepted).
|
|
440
440
|
- `out_of_scope_fixes` from either skill (findings whose target file is outside `files_changed`) — surface in `improvements_noted[]` for follow-up rounds; the scope gate prevented silent absorption.
|
|
441
441
|
|
|
442
442
|
**Why inline (not a separate spawn)**: the post-implementation review consumes the same files the executor just touched. Spawning a separate agent doubles token cost (re-reading the files) and serialises wall time; invoking via Skill keeps both review passes inside the executor's working memory and lets fixes apply with the same Edit/Write tools that wrote the original code. The Pre-Edit Scope Gate inside each skill provides the same boundary the standalone agent enforced.
|
|
@@ -461,7 +461,7 @@ Analyze the completed work and populate `specialist_needs`:
|
|
|
461
461
|
|
|
462
462
|
**Tests written** (execution phase — completed in Step 3.6):
|
|
463
463
|
- `unit_tests_written`: List unit test files written inline by executor (Step 3.6)
|
|
464
|
-
- `e2e_tests_written`: Always empty here — E2E test authoring is owned by `cbp-
|
|
464
|
+
- `e2e_tests_written`: Always empty here — E2E test authoring is owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`), spawned by `/cbp-round-execute` Step 5 (post-executor)
|
|
465
465
|
- `framework_configured`: true if a unit-test/lint framework was set up from scratch
|
|
466
466
|
|
|
467
467
|
**Review needed** (validation phase — these review quality):
|
|
@@ -515,7 +515,7 @@ This gate makes the contract enforceable. Without it, Step 3.4 can be silently s
|
|
|
515
515
|
|
|
516
516
|
#### Subagent Cost Recording
|
|
517
517
|
|
|
518
|
-
When ANY background subagents were spawned during execution (general-purpose, cbp-database-agent,
|
|
518
|
+
When ANY background subagents were spawned during execution (general-purpose, cbp-database-agent, etc.), populate `round.context.subagent_summaries[]` with one entry per agent:
|
|
519
519
|
|
|
520
520
|
```yaml
|
|
521
521
|
subagent_summaries:
|
|
@@ -583,7 +583,7 @@ Which would you prefer?
|
|
|
583
583
|
- **Spawned by**: `/cbp-round-execute` Step 3 (single-wave 3-AGENT path or per-wave 3-WAVE path)
|
|
584
584
|
- **Returns to**: `/cbp-round-execute` which collects output and runs per-wave `cbp-testing-qa-agent`
|
|
585
585
|
- **Depends on**: `cbp-task-planner` agent (provides approved plan)
|
|
586
|
-
- **May spawn**: `cbp-database-agent` as sub-executor for Supabase operations. (NOT `cbp-
|
|
586
|
+
- **May spawn**: `cbp-database-agent` as sub-executor for Supabase operations. (NOT any `cbp-e2e-*` specialist — those are owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`), spawned by `/cbp-round-execute` Step 5 per Step 0.2 carve-out.)
|
|
587
587
|
|
|
588
588
|
## Structure Knowledge
|
|
589
589
|
|
|
@@ -80,10 +80,9 @@ Compare task work against `checkpoint.goal`:
|
|
|
80
80
|
|
|
81
81
|
Review all QA items across all rounds:
|
|
82
82
|
- **Auto items**: Verify all passed (build, lint, types, tests)
|
|
83
|
-
- **User items**: Verify all marked pass/skip
|
|
84
83
|
- **Default items**: Verify all resolved (pass or skipped with reason)
|
|
85
84
|
|
|
86
|
-
**E2E pass vs skipped distinction**: When reading `auto_qa.items[]` for `check: 'e2e'`, do NOT conflate `status: 'pass'` with `status: 'skipped'`. A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_changed` is a hard fail, not a pass — verdict text MUST explicitly call this out: "E2E spec authored but assertions did not execute (skip-gated)." Do NOT issue a READY verdict on a zero-assertion e2e run; route to a fix round per `rules/
|
|
85
|
+
**E2E pass vs skipped distinction**: When reading `auto_qa.items[]` for `check: 'e2e'`, do NOT conflate `status: 'pass'` with `status: 'skipped'`. A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_changed` is a hard fail, not a pass — verdict text MUST explicitly call this out: "E2E spec authored but assertions did not execute (skip-gated)." Do NOT issue a READY verdict on a zero-assertion e2e run; route to a fix round per `rules/e2e-mandatory.md`.
|
|
87
86
|
|
|
88
87
|
List any pending or failed items. Determine if they are blockers.
|
|
89
88
|
|
|
@@ -502,6 +502,8 @@ plan.testing_profile: 'claude_only' | 'web' | 'desktop' | 'backend' | 'full_matr
|
|
|
502
502
|
|
|
503
503
|
User may override at round-start via `$ARGUMENTS`. Planner's detection is the default — not a hard gate.
|
|
504
504
|
|
|
505
|
+
**E2E eligibility is config-driven at execute time, not here.** `/cbp-round-execute` Step 5 reads `.codebyplan/e2e.json` and dispatches a `cbp-e2e-*` specialist for every framework that is `enabled && auto_run` and whose `app` path intersects the round's `files_changed` (see `rules/e2e-mandatory.md`). `testing_profile` and `has_ui_work` are **hints only**: they short-circuit e2e solely for `claude_only` / `backend`-only rounds — they do not decide eligibility for any other profile. Do not gate e2e on `has_ui_work` in the plan. Optionally, if `.codebyplan/e2e.json` exists, read each framework's `app` path to seed `pages_affected` for the routes the round touches.
|
|
506
|
+
|
|
505
507
|
### Phase 5: Design Solution
|
|
506
508
|
|
|
507
509
|
Honor locked decisions. Create solution design with files, integration points.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
scope: org-shared
|
|
3
3
|
name: cbp-testing-qa-agent
|
|
4
|
-
description: Combined testing, QA generation, and default checklists. Runs build/lint/types/unit-tests/audit, generates auto
|
|
4
|
+
description: Combined testing, QA generation, and default checklists. Runs build/lint/types/unit-tests/audit, generates auto QA items, applies default production checklists. Does NOT consume e2e screenshots or frontend-ui findings.
|
|
5
5
|
tools: Read, Glob, Grep, Bash, AskUserQuestion
|
|
6
6
|
model: sonnet
|
|
7
7
|
effort: xhigh
|
|
@@ -16,11 +16,11 @@ Combined testing, QA generation, and default production checklists in a single a
|
|
|
16
16
|
Single agent that handles non-e2e quality validation in the per-wave validation phase of `/cbp-round-execute` Step 5:
|
|
17
17
|
- Run all 18 automated checks (work + quality verification)
|
|
18
18
|
- **EXECUTE** automated testing commands (build, lint, types, unit tests, visual checks, audit)
|
|
19
|
-
- Generate auto
|
|
19
|
+
- Generate auto QA items
|
|
20
20
|
- Apply default production checklist items
|
|
21
21
|
- Detect unrelated issues and missing tests
|
|
22
22
|
|
|
23
|
-
E2E execution (Playwright / Maestro / WebDriverIO / XCUITest / vscode-test) is owned by `cbp-
|
|
23
|
+
E2E execution (Playwright / Maestro / WebDriverIO / XCUITest / vscode-test) is owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`), spawned in parallel with this agent by `/cbp-round-execute` Step 5. **The agents are fully independent — this agent does NOT read `round.context.e2e_outputs` or `round.context.frontend_ui_review`.** This agent emits auto QA items and default checklist items. Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted).
|
|
24
24
|
|
|
25
25
|
## Input Contract
|
|
26
26
|
|
|
@@ -63,13 +63,6 @@ output:
|
|
|
63
63
|
stdout: string # captured command output
|
|
64
64
|
stderr: string # captured error output
|
|
65
65
|
screenshots: [{page, viewport, status, file}] # visual check only
|
|
66
|
-
user_qa:
|
|
67
|
-
items:
|
|
68
|
-
- type: 'user'
|
|
69
|
-
check: string
|
|
70
|
-
status: 'pending'
|
|
71
|
-
instructions: string
|
|
72
|
-
round_number: number
|
|
73
66
|
default_checklist:
|
|
74
67
|
items:
|
|
75
68
|
- type: 'default'
|
|
@@ -99,7 +92,7 @@ output:
|
|
|
99
92
|
passed: number
|
|
100
93
|
warnings: number
|
|
101
94
|
failed: number
|
|
102
|
-
hard_fail: boolean # true if build/lint/types failed, unit tests (vitest/jest/cargo) failed when applicable, OR npm audit found critical/high vulnerabilities. E2E hard_fail is owned by
|
|
95
|
+
hard_fail: boolean # true if build/lint/types failed, unit tests (vitest/jest/cargo) failed when applicable, OR npm audit found critical/high vulnerabilities. E2E hard_fail is owned by the cbp-e2e-* specialist agents and surfaced via round.context.e2e_outputs.
|
|
103
96
|
critical_issues: string[]
|
|
104
97
|
captured_tasks:
|
|
105
98
|
- issue_index: number # index into unrelated_issues[]
|
|
@@ -154,7 +147,7 @@ Apply `testing_profile` from input before running any checks. When `testing_prof
|
|
|
154
147
|
| full_matrix | Run all checks |
|
|
155
148
|
| cross_app | Run union of touched apps' checks (intersection by detected files) |
|
|
156
149
|
|
|
157
|
-
E2E (Playwright / Maestro / WebDriverIO / XCUITest / vscode-test) is NEVER run by this agent under any profile — it's owned by `cbp-
|
|
150
|
+
E2E (Playwright / Maestro / WebDriverIO / XCUITest / vscode-test) is NEVER run by this agent under any profile — it's owned by the `cbp-e2e-*` specialist agents (dispatched per `context/testing/e2e.md`; parallel siblings spawned by `/cbp-round-execute` Step 5).
|
|
158
151
|
|
|
159
152
|
**CRITICAL: Within your profile's allowed check set (see Profile Gate Matrix above), every applicable command MUST be executed. No skipping an in-scope check without an explicit, logged reason.**
|
|
160
153
|
|
|
@@ -194,7 +187,7 @@ Procedure:
|
|
|
194
187
|
|
|
195
188
|
This closes the cycle where R2 adds a flat-config and the QA pass lints only R2 files, only for `/cbp-task-check` to later lint the full task and surface dozens of errors on R1 files — wasting an entire corrective round. Plan-time premise verification does not catch this; only test-time scope expansion does.
|
|
196
189
|
|
|
197
|
-
**Hard fail means: if any of build/lint/types/unit fails or is not executed when applicable, set `totals.hard_fail = true`. The round CANNOT complete.** E2E hard_fail is set independently by `
|
|
190
|
+
**Hard fail means: if any of build/lint/types/unit fails or is not executed when applicable, set `totals.hard_fail = true`. The round CANNOT complete.** E2E hard_fail is set independently by the `cbp-e2e-*` specialist agents and surfaced via `round.context.e2e_outputs`; `/cbp-round-execute` Step 6 considers both signals.
|
|
198
191
|
|
|
199
192
|
**Step 3a: Execute conditional unit-test checks (HARD FAIL when applicable):**
|
|
200
193
|
|
|
@@ -216,7 +209,7 @@ Run the unit-test runners detected in Step 1:
|
|
|
216
209
|
If condition is met and test fails: set `totals.hard_fail = true`.
|
|
217
210
|
If condition is not met (no applicable files changed): log `SKIPPED: <command> (reason: no applicable files changed)`.
|
|
218
211
|
|
|
219
|
-
E2E commands and their preflight (dev server / simulator / emulator / built binary / auth probe) are owned by `cbp-
|
|
212
|
+
E2E commands and their preflight (dev server / simulator / emulator / built binary / auth probe) are owned by the `cbp-e2e-*` specialist agents. See `context/testing/e2e.md` for the canonical preflight contract (Step 6.5 and the shared workflow).
|
|
220
213
|
|
|
221
214
|
**Step 3b: Execute conditional checks (soft):**
|
|
222
215
|
|
|
@@ -256,7 +249,7 @@ Report findings in `build_analysis` even if the build succeeded.
|
|
|
256
249
|
|
|
257
250
|
When `files_changed` includes a new route file under any `apps/*/src/app/api/` or `apps/*/src/app/mcp/` directory:
|
|
258
251
|
- If dev server is running: curl the endpoint without credentials, assert response is 401/403 (not 200). Log as auto QA item `auth_enforcement`.
|
|
259
|
-
- If dev server is not running:
|
|
252
|
+
- If dev server is not running: log a skipped auto QA item with the exact curl command noted in `notes` for reference.
|
|
260
253
|
|
|
261
254
|
### Phase 3.58: Missing Unit Tests for New API Routes
|
|
262
255
|
|
|
@@ -331,22 +324,7 @@ This aligns with `immediate-issue-capture.md` (resolve-in-current-scope by defau
|
|
|
331
324
|
|
|
332
325
|
**4a. Auto QA items**: Generate from Phase 3 results. One item per test category. Include stdout/stderr.
|
|
333
326
|
|
|
334
|
-
**4b.
|
|
335
|
-
|
|
336
|
-
**4b.0. Connection smoke test suppression**: Before emitting any connection smoke test user QA item (MCP connection, server health, service wiring), check whether the governing config file is unchanged. Governing config map: MCP (Claude Code) → `.mcp.json`; Dev server → `.env.local`, `.codebyplan/server.json` port_allocations; API integrations → `.env.local`. **Suppression rule**: if the governing config is NOT in `files_changed` AND `git diff HEAD -- <config>` is empty, log `{type:"user", check:"<name>", status:"skipped", notes:"Governing config <file> unchanged in this round; connection behavior is unaffected."}` — do NOT emit a pending user QA item.
|
|
337
|
-
|
|
338
|
-
**4b.1. Design source comparison** (mandatory when `has_ui_work` is true): Search the project's design-sources directory (e.g., `docs/design/`, `docs/development/product/sources/design/`) for PNG files matching the page or component being changed. If design PNGs exist, add a mandatory user QA item with check: "Design source fidelity" and instructions: "Compare rendered output against design source PNG. Verify: column layout matches, control shapes match (flat vs pill vs toggle), background colors match, row structure and dividers match, action controls are in the correct column."
|
|
339
|
-
|
|
340
|
-
**4b.2. Volume-gated mechanical-sweep spot-check** (volume-triggered, runs regardless of `has_ui_work`): when `files_changed.length > 100` AND the round is mechanical (`work_type == 'mechanical'` OR round requirements match `/sweep|auto.?fix|batch|backlog/i`), emit a mandatory user QA item:
|
|
341
|
-
|
|
342
|
-
- `check`: `"High-volume mechanical round spot-check"`
|
|
343
|
-
- `status`: `"pending"`
|
|
344
|
-
- `instructions`: "This round modified {N} files mechanically. Open 3–5 changed files in the running app and verify behavior is unchanged. Prioritize files with business logic (services, hooks, reducers) over pure presentation. Spot-check at least one file from each touched module."
|
|
345
|
-
- `round_number`: current
|
|
346
|
-
|
|
347
|
-
Volume gating exists because automated checks (build/lint/types/unit) verify shape but not behaviour preservation; large mechanical sweeps (auto-fix, codemod, refactor) can pass all gates while silently changing semantics in code paths the test suite doesn't cover.
|
|
348
|
-
|
|
349
|
-
**4c. Default checklist items**: See Phase 5.
|
|
327
|
+
**4b. Default checklist items**: See Phase 5.
|
|
350
328
|
|
|
351
329
|
### Phase 5: Default Production Checklist
|
|
352
330
|
|
|
@@ -379,10 +357,10 @@ Return complete output contract.
|
|
|
379
357
|
- Build output analyzed for warnings/deprecations/console.logs (with client/server classification)
|
|
380
358
|
- npm audit executed, vulnerabilities reported by severity, critical/high contribute to hard_fail
|
|
381
359
|
- Unrelated issues discovered and logged
|
|
382
|
-
- Auto
|
|
360
|
+
- Auto and default QA items generated
|
|
383
361
|
- `hard_fail` flag correctly set
|
|
384
362
|
- **Vitest/Jest/Cargo unit-test hard_fail enforced** when source files changed
|
|
385
|
-
- E2E execution + preflight delegated entirely to `
|
|
363
|
+
- E2E execution + preflight delegated entirely to the `cbp-e2e-*` specialist agents (this agent never runs Playwright/Maestro/wdio/etc.)
|
|
386
364
|
|
|
387
365
|
## Failure Modes
|
|
388
366
|
|
|
@@ -395,6 +373,6 @@ Return complete output contract.
|
|
|
395
373
|
|
|
396
374
|
## Integration
|
|
397
375
|
|
|
398
|
-
- **Spawned by**: `/cbp-round-execute` Step 5 (per-wave; runs in parallel with `
|
|
399
|
-
- **Parallel
|
|
400
|
-
- **Output consumed by**: `/cbp-round-execute` Step 6 (hard-fail routing — this agent's `totals.hard_fail` is OR'd
|
|
376
|
+
- **Spawned by**: `/cbp-round-execute` Step 5 (per-wave; runs in parallel with the `cbp-e2e-*` specialists and may also run in parallel with next wave's executor)
|
|
377
|
+
- **Parallel siblings**: `cbp-e2e-*` specialist agents (fully independent — no cross-read; all agents complete on their own timeline using only their own inputs)
|
|
378
|
+
- **Output consumed by**: `/cbp-round-execute` Step 6 (hard-fail routing — this agent's `totals.hard_fail` is OR'd across `round.context.e2e_outputs` entries: any `e2e_outputs[f].test_results.failed > 0` or `e2e_outputs[f].status === 'failed'`, plus the `e2e_eligible_skipped` signal), `/cbp-round-end` Step 3 (reads this agent's `auto_qa[]` and `default_checklist[]`). This agent does not emit `user_qa` items; baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted).
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
---
|
|
2
|
+
scope: org-shared
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# E2E Shared Workflow Contract
|
|
6
|
+
|
|
7
|
+
Loaded by every `cbp-e2e-*` specialist agent. Defines the shared Input/Output contract,
|
|
8
|
+
pre-flight loop, failure classification, screenshot rules, dispatch routing, and
|
|
9
|
+
never-silently-skip obligations. Framework-specific commands live in each agent's body.
|
|
10
|
+
|
|
11
|
+
## Input Contract
|
|
12
|
+
|
|
13
|
+
Passed by the dispatching skill (`/cbp-round-execute` Step 5, `/cbp-checkpoint-check`
|
|
14
|
+
Step 5b, or `/cbp-checkpoint-plan` Step 4 discovery probe). The dispatching skill reads
|
|
15
|
+
`.codebyplan/e2e.json` and injects `framework`, `app`, `platforms`, and credential var
|
|
16
|
+
names — agents do NOT auto-detect platform; the config is authoritative.
|
|
17
|
+
|
|
18
|
+
```yaml
|
|
19
|
+
input:
|
|
20
|
+
repo_id: string # UUID — used to resolve tech_stack from DB
|
|
21
|
+
round_number: number # 1-based; 0 is the sentinel for whole_checkpoint_mode
|
|
22
|
+
files_changed: [{path, action}]
|
|
23
|
+
prior_round_files_changed: # Required when round_number >= 2
|
|
24
|
+
- path: string
|
|
25
|
+
action: string
|
|
26
|
+
user_approved: boolean
|
|
27
|
+
whole_checkpoint_mode: boolean # Default false. When true, run full pages_affected
|
|
28
|
+
test_strategy:
|
|
29
|
+
platform: string
|
|
30
|
+
e2e_framework: string # playwright | maestro | webdriverio | xcuitest | vscode-test
|
|
31
|
+
pages_affected: string[] # Routes or screen names changed
|
|
32
|
+
has_auth: boolean
|
|
33
|
+
dev_server_port: number | null
|
|
34
|
+
framework: string # From .codebyplan/e2e.json — authoritative
|
|
35
|
+
app: string # App path (e.g. apps/web)
|
|
36
|
+
platforms: string[] # e.g. ["web"] | ["ios","android"] | ["desktop"]
|
|
37
|
+
credential_vars: # Env var names to probe at Step 6.5.1
|
|
38
|
+
email: string | null
|
|
39
|
+
password: string | null
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Output Contract
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
output:
|
|
46
|
+
status: 'completed' | 'failed' # 'blocked' is NOT valid — resolve via AskUserQuestion
|
|
47
|
+
tests_written: [{path, action: 'created' | 'modified'}]
|
|
48
|
+
tests_run: boolean # MUST be true when status == 'completed'
|
|
49
|
+
test_results:
|
|
50
|
+
passed: number
|
|
51
|
+
failed: number
|
|
52
|
+
skipped: number
|
|
53
|
+
failures:
|
|
54
|
+
- test_name: string
|
|
55
|
+
error: string
|
|
56
|
+
file: string
|
|
57
|
+
category: 'env' | 'auth' | 'access' | 'flake' | 'real' | 'visual_regression'
|
|
58
|
+
classification_reason: string
|
|
59
|
+
framework_configured: boolean
|
|
60
|
+
preflight:
|
|
61
|
+
dev_server: { required: bool, ok: bool, port: number | null, notes: string }
|
|
62
|
+
simulator: { required: bool, ok: bool, device: string | null, notes: string }
|
|
63
|
+
built_binary: { required: bool, ok: bool, path: string | null, notes: string }
|
|
64
|
+
env_vars: { required: string[], missing: string[], ok: bool }
|
|
65
|
+
auth_probe: { ran: bool, ok: bool, probe_path: string | null, error: string | null }
|
|
66
|
+
screenshots:
|
|
67
|
+
- test_name: string
|
|
68
|
+
path: string # Absolute or repo-relative path to PNG
|
|
69
|
+
page_or_screen: string
|
|
70
|
+
viewport: 'desktop' | 'mobile' | 'tablet' | 'device'
|
|
71
|
+
is_new: bool
|
|
72
|
+
baseline_diff_pct: number | null
|
|
73
|
+
user_interactions: [{question, answer}]
|
|
74
|
+
tech_stack_reconciliation:
|
|
75
|
+
db_framework: string | null
|
|
76
|
+
fs_framework: string | null
|
|
77
|
+
resolution: 'follow_db' | 'follow_fs' | 'configure_missing' | 'skip_app' | 'no_mismatch' | 'no_db_data'
|
|
78
|
+
decided_at: string
|
|
79
|
+
round2_skip_set:
|
|
80
|
+
- spec_path: string
|
|
81
|
+
reason: string
|
|
82
|
+
whole_checkpoint_aggregated: boolean
|
|
83
|
+
critical_issues:
|
|
84
|
+
- type: string
|
|
85
|
+
spec_path: string | null
|
|
86
|
+
reason: string
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Step 5.1 — Page Filter (Round 2+, non-checkpoint mode)
|
|
90
|
+
|
|
91
|
+
When `round_number >= 2` AND `whole_checkpoint_mode === false`:
|
|
92
|
+
|
|
93
|
+
1. Build `unapproved_files` from `prior_round_files_changed` where `user_approved === false`.
|
|
94
|
+
2. For each page in `pages_affected[]`, derive contributing source files:
|
|
95
|
+
- Next.js: `app/<route>/page.tsx` + layout chain + imported components
|
|
96
|
+
- Expo / React Native: screen file + imported components
|
|
97
|
+
- Tauri: route component + Rust handler files
|
|
98
|
+
- Fallback: any file whose path starts with the page's directory
|
|
99
|
+
3. A page **survives** when ANY contributing file is in `unapproved_files`.
|
|
100
|
+
4. A page **is skipped** when ALL contributing files are user-approved.
|
|
101
|
+
5. Record skipped pages in `round2_skip_set[]`.
|
|
102
|
+
6. Replace `pages_affected` with the surviving subset.
|
|
103
|
+
|
|
104
|
+
When `round_number === 1` OR `whole_checkpoint_mode === true`, use `pages_affected` verbatim.
|
|
105
|
+
|
|
106
|
+
## Step 6.5 — Pre-flight (MANDATORY)
|
|
107
|
+
|
|
108
|
+
Never proceed with `tests_run: false`. Resolve every failing check via `AskUserQuestion`
|
|
109
|
+
in a loop — re-probe after user confirmation. An explicit abort returns `status: 'failed'`
|
|
110
|
+
with the blocking preflight field populated.
|
|
111
|
+
|
|
112
|
+
### 6.5.1 Environment Variables
|
|
113
|
+
|
|
114
|
+
Check `apps/{app}/.env.local` and process env. Framework-specific required var names come
|
|
115
|
+
from the `credential_vars` input field (the dispatching skill reads them from
|
|
116
|
+
`.codebyplan/e2e.json`). Naming conventions:
|
|
117
|
+
|
|
118
|
+
- Playwright uses `E2E_TEST_*` (avoids collision with non-E2E `TEST_*` vars).
|
|
119
|
+
- Maestro/XCUITest stay on `TEST_*` per `rules/maestro-auth-state-reset.md`.
|
|
120
|
+
|
|
121
|
+
For any missing var:
|
|
122
|
+
|
|
123
|
+
> "Missing required E2E env vars: `{names}`. Set them in `apps/{app}/.env.local` now,
|
|
124
|
+
> then reply 'ready'. (Or reply 'skip' to abort this e2e run.)"
|
|
125
|
+
|
|
126
|
+
**Hard rule**: Specs MUST NOT contain in-spec env skip gates (`test.skip(!process.env.X, ...)`)
|
|
127
|
+
— those bypass preflight and produce zero-assertion runs. See `rules/e2e-mandatory.md`.
|
|
128
|
+
|
|
129
|
+
### 6.5.2 Runtime Readiness
|
|
130
|
+
|
|
131
|
+
Framework-specific probes are in each agent's body. General obligations:
|
|
132
|
+
|
|
133
|
+
- Playwright: dev server responding at `baseURL` port (curl HTTP 200/3xx).
|
|
134
|
+
- Maestro (iOS): booted simulator (`xcrun simctl list devices booted | grep Booted`).
|
|
135
|
+
- Maestro (Android): connected device/emulator (`adb devices | grep -w device`).
|
|
136
|
+
- WebDriverIO: built Tauri binary present at the path in `wdio.conf.ts`.
|
|
137
|
+
- XCUITest: `xcodebuild -list` returns the scheme; Expo prebuild artifacts present.
|
|
138
|
+
- vscode-test: compiled test JS present (`e2e/**/*.test.js`).
|
|
139
|
+
|
|
140
|
+
On any failure, `AskUserQuestion` with remediation steps; re-probe after "ready". Never
|
|
141
|
+
silently skip a required runtime prerequisite.
|
|
142
|
+
|
|
143
|
+
**Port alignment (Playwright only)**: parse `playwright.config.ts` `baseURL` and compare
|
|
144
|
+
to `.codebyplan/server.json` `port_allocations[]` for the app. On mismatch ask which is
|
|
145
|
+
correct before running.
|
|
146
|
+
|
|
147
|
+
### 6.5.3 Auth Probe (only when `has_auth`)
|
|
148
|
+
|
|
149
|
+
Run the dedicated auth probe — not the full suite. Probe paths per framework are in each
|
|
150
|
+
agent's body.
|
|
151
|
+
|
|
152
|
+
If the probe fails, classify the reason (see Step 7.5 below) and ask:
|
|
153
|
+
|
|
154
|
+
> "Auth probe failed: `{category}` — `{error_summary}`. Common causes: wrong credentials,
|
|
155
|
+
> expired state, auth backend paused, captcha. Options: (1) delete storage state + retry,
|
|
156
|
+
> (2) fix credentials + reply 'ready', (3) abort e2e."
|
|
157
|
+
|
|
158
|
+
On "ready", re-run the probe. Loop up to 3 times before escalating with a new
|
|
159
|
+
`AskUserQuestion` that summarises all 3 attempts' errors.
|
|
160
|
+
|
|
161
|
+
## Step 7.5 — Failure Classification
|
|
162
|
+
|
|
163
|
+
For each failed test, assign exactly one category:
|
|
164
|
+
|
|
165
|
+
| Category | Signals | Resolution |
|
|
166
|
+
|---|---|---|
|
|
167
|
+
| `env` | `process.env.X is undefined`, `ECONNREFUSED`, missing config | Loop to Step 6.5.1 |
|
|
168
|
+
| `auth` | Login-page redirect, 401 after credential submit, `invalid_grant`, `email_not_confirmed` | AskUserQuestion per Step 6.5.3 |
|
|
169
|
+
| `access` | 403/404 on an accessible route, RLS denial text, missing seed data | AskUserQuestion: "Test failed with access error: `{error}`. Options: (1) fix + reply steps, (2) abort." |
|
|
170
|
+
| `flake` | Timeout on first run, passes on immediate retry, network jitter | Retry up to 3 times before reclassifying to `real` |
|
|
171
|
+
| `visual_regression` | `toHaveScreenshot` pixel-diff exceeded threshold | Do NOT retry. Include baseline + actual paths in `screenshots[]` with `baseline_diff_pct`. Do NOT auto-accept baselines. |
|
|
172
|
+
| `real` | Assertion failure on app behavior (wrong text, state, navigation) | Attempt fix (selector, timeout, assertion), max 3 attempts, then report |
|
|
173
|
+
|
|
174
|
+
`env`, `auth`, `access` failures MUST NOT count toward `test_results.failed` until
|
|
175
|
+
preflight passes — they block the run instead.
|
|
176
|
+
|
|
177
|
+
## Screenshot Collection Rule
|
|
178
|
+
|
|
179
|
+
After every run, enumerate all PNGs produced and populate `screenshots[]`. Framework-
|
|
180
|
+
specific paths are in each agent's body. Every entry requires:
|
|
181
|
+
`{test_name, path, page_or_screen, viewport, is_new, baseline_diff_pct}`.
|
|
182
|
+
|
|
183
|
+
Screenshots flow to `cbp-frontend-ui` invoked by `/cbp-round-execute` Step 5b with
|
|
184
|
+
`phase: 'screenshot_review'` — NOT inline by `round-executor` Step 3.8 (which runs
|
|
185
|
+
`phase: 'style_only'` without e2e output).
|
|
186
|
+
|
|
187
|
+
**Baselines are never auto-accepted.** A `toHaveScreenshot` diff is `visual_regression`;
|
|
188
|
+
the user decides via QA whether to update baselines.
|
|
189
|
+
|
|
190
|
+
## Completion Rule
|
|
191
|
+
|
|
192
|
+
`status: 'completed'` is allowed ONLY when:
|
|
193
|
+
|
|
194
|
+
- `tests_run === true`
|
|
195
|
+
- `preflight.*.ok === true` for every required prerequisite
|
|
196
|
+
- Every failure has `category` other than `env`, `auth`, or `access`
|
|
197
|
+
|
|
198
|
+
Otherwise return `status: 'failed'`.
|
|
199
|
+
|
|
200
|
+
## Never-Silently-Skip Rules
|
|
201
|
+
|
|
202
|
+
- Missing simulator / server / binary / env / auth → always `AskUserQuestion`, never `tests_run: false`.
|
|
203
|
+
- No testable targets despite `has_ui_work` → return `status: 'failed'` with reason "no testable targets".
|
|
204
|
+
- All-skipped run (`passed === 0 && skipped > 0` for a spec in `files_changed`) → `status: 'failed'`, add `critical_issues[]` entry `{type: 'e2e_all_skipped', ...}`.
|
|
205
|
+
- User aborts preflight → `status: 'failed'`, add `critical_issues[]` entry `{type: 'preflight_aborted', ...}`.
|
|
206
|
+
|
|
207
|
+
## Dispatch / Eligibility Routing Contract
|
|
208
|
+
|
|
209
|
+
The dispatching skill (`/cbp-round-execute` Step 5 or `/cbp-checkpoint-check` Step 5b)
|
|
210
|
+
selects one specialist per app. Config is in `.codebyplan/e2e.json` (authoritative).
|
|
211
|
+
|
|
212
|
+
| `framework` in e2e.json | Agent spawned | Typical app type |
|
|
213
|
+
|---|---|---|
|
|
214
|
+
| `playwright` | `cbp-e2e-playwright` | Next.js web routes |
|
|
215
|
+
| `maestro` | `cbp-e2e-maestro` | Expo / React Native (android + ios) |
|
|
216
|
+
| `webdriverio` | `cbp-e2e-tauri` | Tauri desktop |
|
|
217
|
+
| `vscode-test` | `cbp-e2e-vscode` | VS Code extension |
|
|
218
|
+
| `xcuitest` | `cbp-e2e-xcuitest` | Native iOS (system dialogs, HealthKit, watchOS) |
|
|
219
|
+
|
|
220
|
+
**Eligibility is config-driven.** A framework is **eligible** in a round when its
|
|
221
|
+
`.codebyplan/e2e.json` entry has `enabled === true` AND `auto_run === true` AND its `app`
|
|
222
|
+
source path intersects the round's `files_changed` (repo root for single-app repos). An
|
|
223
|
+
eligible framework's specialist MUST run — see `rules/e2e-mandatory.md` for the opt-out
|
|
224
|
+
contract and the `e2e_eligible_skipped` hard-fail.
|
|
225
|
+
|
|
226
|
+
An agent is NOT spawned when ANY of the following hold:
|
|
227
|
+
|
|
228
|
+
- `testing_profile` is `claude_only` or `backend` (no UI surface) — a short-circuit hint, applied before reading `e2e.json`.
|
|
229
|
+
- The framework's `enabled` or `auto_run` is `false`, or the `app` path does not intersect `files_changed`.
|
|
230
|
+
- `frameworks` in `e2e.json` is absent or empty (no e2e configured) — zero eligible, no hard-fail.
|
|
231
|
+
- `platforms[]` in `e2e.json` does not include the current CI target (e.g., iOS-only config skipped on a Linux runner without a simulator) — a recorded valid skip per `rules/e2e-mandatory.md`.
|
|
232
|
+
|
|
233
|
+
`has_ui_work` and `testing_profile` (beyond the `claude_only` / `backend` short-circuit) are
|
|
234
|
+
**hints only** — they never suppress an eligible framework. Config is authoritative.
|
|
235
|
+
|
|
236
|
+
**Multi-app monorepos**: the dispatching skill reads `e2e.json` per app path and may
|
|
237
|
+
spawn multiple specialists in the same round (one per eligible framework). Agents run in
|
|
238
|
+
parallel with `cbp-testing-qa-agent`. Each specialist's output is stored under
|
|
239
|
+
`round.context.e2e_outputs[framework]` (a framework-keyed map); `/cbp-round-execute` Step 5b
|
|
240
|
+
aggregates `screenshots[]` across all entries before the `cbp-frontend-ui` review.
|
|
241
|
+
|
|
242
|
+
**whole_checkpoint_mode dispatch** (`/cbp-checkpoint-check` Step 5b and `/cbp-checkpoint-plan`
|
|
243
|
+
Step 4): pass `round_number: 0`, `whole_checkpoint_mode: true`, and the aggregated
|
|
244
|
+
`files_changed` union. The agent ignores `prior_round_files_changed` in this mode.
|
|
245
|
+
|
|
246
|
+
This contract is the single source of truth for dispatch logic. Config-driven dispatch is
|
|
247
|
+
implemented in `/cbp-round-execute` Step 5 and `/cbp-checkpoint-check` Step 5b (CHK-145); the
|
|
248
|
+
routing table above is the authoritative reference those gates match. Enforcement (the
|
|
249
|
+
`e2e_eligible_skipped` hard-fail and the no-in-spec-env-skip gate) lives in
|
|
250
|
+
`rules/e2e-mandatory.md`.
|
|
251
|
+
|
|
252
|
+
## Playwright Auth Provisioning Convention
|
|
253
|
+
|
|
254
|
+
Every repo with Playwright auth ships:
|
|
255
|
+
|
|
256
|
+
- `scripts/provision-e2e-user.ts` — idempotent script creating the canonical E2E user
|
|
257
|
+
and (for multi-tenant repos) a `test` subdomain. Wired to `pnpm e2e:provision`.
|
|
258
|
+
- `.env.local.example` — lists every env var `globalSetup` requires.
|
|
259
|
+
- CI secrets: `E2E_TEST_EMAIL`, `E2E_TEST_PASSWORD`, `NEXT_PUBLIC_SUPABASE_URL`,
|
|
260
|
+
`NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY` (or legacy `_ANON_KEY`).
|
|
261
|
+
|
|
262
|
+
Per-repo specifics (email, vault name, remaining-spec migration list) live in the repo's
|
|
263
|
+
own `docs/e2e-setup.md`, not in this shared file.
|
|
264
|
+
|
|
265
|
+
## Auth State Gitignore
|
|
266
|
+
|
|
267
|
+
Before writing any storage state under `playwright/.auth/`, `tests/.auth/`, or
|
|
268
|
+
`e2e/.auth/`, verify the path is in the nearest `.gitignore`. If absent, ADD the entry
|
|
269
|
+
first. Auth state files contain live session cookies — committing one is a credential
|
|
270
|
+
leak. See `rules/playwright-auth-gitignore.md`.
|
|
271
|
+
|
|
272
|
+
## Supabase Parallelism (Playwright)
|
|
273
|
+
|
|
274
|
+
When `NEXT_PUBLIC_SUPABASE_URL` references a remote project (not localhost), set
|
|
275
|
+
`workers: 1` in `playwright.config.ts` to prevent auth/RLS races. Not needed with a
|
|
276
|
+
local Supabase emulator. Detect via: `grep -E "127\.0\.0\.1|localhost" apps/{app}/.env.local | grep SUPABASE_URL`.
|
|
277
|
+
|
|
278
|
+
## Mock Server for Server-Side Fetch
|
|
279
|
+
|
|
280
|
+
`page.route()` intercepts browser-process requests only. For Next.js server actions,
|
|
281
|
+
route handlers, or middleware (Node-process fetch), spin up a real local HTTP server in
|
|
282
|
+
`globalSetup` and point the dev server at it via `webServer.env`. See
|
|
283
|
+
`rules/playwright-server-side-mocking.md`.
|
|
284
|
+
|
|
285
|
+
## Cold-Start Warmup (Playwright / Next.js)
|
|
286
|
+
|
|
287
|
+
Next.js dev mode (Turbopack) compiles routes lazily. Add a warmup fetch at the end of
|
|
288
|
+
`globalSetup`, after mock server starts, before specs run. Use `redirect: 'manual'` for
|
|
289
|
+
auth-protected routes. Wrap in `try/catch` — warmup is best-effort.
|
|
290
|
+
|
|
291
|
+
## Locator Hygiene (Playwright)
|
|
292
|
+
|
|
293
|
+
Prefer stable accessibility-driven selectors (`getByRole`, `getByLabel`, `getByTestId`)
|
|
294
|
+
over positional CSS selectors (`.locator('.class').nth(N)`). After `page.goto()` inside
|
|
295
|
+
a loop, snapshot text/href BEFORE navigation rather than holding stale `Locator` handles.
|
|
296
|
+
|
|
297
|
+
## Visual Baseline Workflow
|
|
298
|
+
|
|
299
|
+
| Situation | What happens |
|
|
300
|
+
|---|---|
|
|
301
|
+
| No baseline (new screen) | Playwright creates on first run; test passes; `cbp-frontend-ui` at Step 5b reviews semantically. |
|
|
302
|
+
| Baseline exists, diff ≤ threshold | Test passes. |
|
|
303
|
+
| Baseline exists, diff > threshold | `visual_regression` failure. Agent does NOT retry. `cbp-frontend-ui` at Step 5b flags it; `/cbp-round-end` Step 3b constructs user QA item. User decides: fix-task or `--update-snapshots`. |
|
|
@@ -19,12 +19,14 @@ _get_limit() {
|
|
|
19
19
|
/CHANGELOG.md|*/CHANGELOG.md|*/user-input.md|/.claude/docs/research/*) echo ""; return;;
|
|
20
20
|
# Managed .claude/ files
|
|
21
21
|
/.claude/rules/*.md) echo "100 200"; return;;
|
|
22
|
+
/.claude/context/testing/e2e.md) echo "300 600"; return;; # consolidated E2E shared-workflow + dispatch contract (CHK-145)
|
|
22
23
|
/.claude/context/*.md|/.claude/context/*/*.md) echo "200 400"; return;;
|
|
23
24
|
/.claude/skills/*/SKILL.md) echo "300 600"; return;;
|
|
24
25
|
/.claude/skills/*/reference/*.md) echo "200 400"; return;;
|
|
25
26
|
/.claude/skills/*/examples/*.md|/.claude/skills/*/templates/*) echo "100 200"; return;;
|
|
26
27
|
/.claude/agents/*/AGENT.md) echo "400 800"; return;;
|
|
27
28
|
/.claude/agents/*/*.md) echo "200 400"; return;;
|
|
29
|
+
/.claude/agents/*.md) echo "400 800"; return;;
|
|
28
30
|
/.claude/hooks/*.sh) echo "150 300"; return;;
|
|
29
31
|
/.claude/docs/architecture/*.md|/.claude/docs/server/*.md) echo "200 400"; return;;
|
|
30
32
|
/.claude/docs/stack/*/index.md|/.claude/docs/stack/*/guide.md) echo "150 300"; return;;
|
|
@@ -62,7 +62,8 @@ run_case() {
|
|
|
62
62
|
|
|
63
63
|
# ===== Good fixtures (must exit 0) =====
|
|
64
64
|
run_case "good-skill" good/skill.md /.claude/skills/cbp-fixture/SKILL.md 0
|
|
65
|
-
run_case "good-agent"
|
|
65
|
+
run_case "good-agent" good/agent.md /.claude/agents/cbp-fixture/AGENT.md 0
|
|
66
|
+
run_case "good-agent-flat" good/agent.md /.claude/agents/cbp-fixture.md 0
|
|
66
67
|
run_case "good-rule" good/rule.md /.claude/rules/cbp-fixture.md 0
|
|
67
68
|
run_case "good-hook" good/hook.sh /.claude/hooks/cbp-fixture.sh 0
|
|
68
69
|
|
|
@@ -12,6 +12,7 @@ TEMPLATE=""
|
|
|
12
12
|
case "$REL_PATH" in
|
|
13
13
|
/.claude/skills/*) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-skill/reference/cbp-quality.md" ;;
|
|
14
14
|
/.claude/agents/*/AGENT.md) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-agent/reference/cbp-quality.md" ;;
|
|
15
|
+
/.claude/agents/*.md) TEMPLATE="/packages/codebyplan-package/templates/skills/build-cc-agent/reference/cbp-quality.md" ;;
|
|
15
16
|
/.claude/docs/research/*/1-*) TEMPLATE="/docs/templates/.claude/docs/research/1-problem.md" ;;
|
|
16
17
|
/.claude/docs/research/*/2-*) TEMPLATE="/docs/templates/.claude/docs/research/2-claude-default.md" ;;
|
|
17
18
|
/.claude/docs/research/*/3-*) TEMPLATE="/docs/templates/.claude/docs/research/3-official-docs.md" ;;
|
|
@@ -14,8 +14,11 @@ paths:
|
|
|
14
14
|
| Context File | Loaded By | Phase | Purpose |
|
|
15
15
|
|--------------|-----------|-------|---------|
|
|
16
16
|
| `context/testing/unit.md` | `cbp-round-executor` | Step 3.6 | Unit test patterns per framework |
|
|
17
|
-
| `context/testing/e2e.md` | `cbp-
|
|
17
|
+
| `context/testing/e2e.md` | `cbp-e2e-playwright`, `cbp-e2e-maestro`, `cbp-e2e-tauri`, `cbp-e2e-vscode`, `cbp-e2e-xcuitest` | Entry | Shared contract: Input/Output, preflight, failure classification, dispatch routing |
|
|
18
18
|
| `context/testing/e2e.md` | `cbp-testing-qa-agent` | Preflight | Env var list per framework |
|
|
19
|
+
| `context/testing/e2e.md` | `cbp-checkpoint-plan` | Step 4 | Discovery probe dispatch contract |
|
|
20
|
+
| `context/testing/e2e.md` | `cbp-round-execute` | Step 5 | E2E specialist dispatch routing |
|
|
21
|
+
| `context/testing/e2e.md` | `cbp-checkpoint-check` | Step 5b | Whole-checkpoint e2e dispatch |
|
|
19
22
|
| `context/testing/eslint.md` | `cbp-task-planner` | Phase 1.5 | ESLint Compliance Checklist |
|
|
20
23
|
| `context/testing/eslint.md` | `cbp-improve-round` | Phase 1.5 | Config-file compliance audit |
|
|
21
24
|
| `context/mcp-docs.md` | `cbp-task-planner` | Phase 2.6 | MCP library doc lookup contract — per-dependency consultation via DocsByPlan MCP tools (resolve_library_id → search_chunks/lookup_symbol → get_chunk) |
|