create-claude-cabinet 0.25.4 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -5
- package/package.json +1 -1
- package/templates/skills/debrief/SKILL.md +22 -1
- package/templates/skills/verify/SKILL.md +32 -7
- package/templates/skills/verify/install.sh +160 -19
- package/templates/skills/verify/phases/calibrate.md +79 -6
- package/templates/skills/verify/phases/discover.md +29 -0
- package/templates/skills/verify/phases/generate.md +31 -0
- package/templates/skills/verify/phases/recipes.md +113 -0
- package/templates/skills/verify/phases/scenario-template.md +49 -17
- package/templates/verify-runtime/package.json +1 -1
- package/templates/verify-runtime/src/baseline-steps.ts +135 -0
- package/templates/verify-runtime/src/index.ts +14 -0
package/README.md
CHANGED
|
@@ -131,6 +131,32 @@ hooks — things that keep going wrong become things that can't go wrong.
|
|
|
131
131
|
- **`/cc-upgrade`** — when Claude Cabinet publishes updates, this skill
|
|
132
132
|
runs the installer for the mechanical parts and walks you through
|
|
133
133
|
what changed conversationally. Intelligence is the merge strategy.
|
|
134
|
+
- **`/cc-feedback`** — file friction with CC itself mid-session
|
|
135
|
+
without waiting for debrief. When a skill, phase, or convention
|
|
136
|
+
causes pain, this captures the detail and queues it for upstream
|
|
137
|
+
delivery to the Claude Cabinet repo.
|
|
138
|
+
|
|
139
|
+
### Verify (opt-in, off by default)
|
|
140
|
+
|
|
141
|
+
Walkthrough verification harness — Cucumber `.feature` files describing
|
|
142
|
+
user journeys, Playwright running them, and human-in-the-loop verdict
|
|
143
|
+
pauses (Pass / Issue / Skip / Needs-info) at checks that need subjective
|
|
144
|
+
judgment. Replaces flat AC checklists with re-runnable scenarios you can
|
|
145
|
+
read months later.
|
|
146
|
+
|
|
147
|
+
- **`/verify`** — run the suite
|
|
148
|
+
- **`/verify learn`** — bootstrap from a cold start. Claude scans
|
|
149
|
+
routes, memory, git, and the live UI; proposes scenarios; calibrates
|
|
150
|
+
with you; then generates `.feature` files and step stubs
|
|
151
|
+
- **`/verify update "I changed X"`** — keep scenarios in sync as the
|
|
152
|
+
product evolves
|
|
153
|
+
- **`/verify backfill <fid>`** — attach a Verify Plan to a pending
|
|
154
|
+
action's notes
|
|
155
|
+
|
|
156
|
+
Enable with `--modules verify` (existing installs merge, nothing else
|
|
157
|
+
disturbed). Runtime lives at `~/.claude-cabinet/verify/<version>/` and
|
|
158
|
+
ships an opinionated `cabinet-verify` npm package built from de[sic]ify's
|
|
159
|
+
e2e harness.
|
|
134
160
|
|
|
135
161
|
## Your Workflow
|
|
136
162
|
|
|
@@ -158,14 +184,29 @@ that override default behavior for any skill. Write content in a phase
|
|
|
158
184
|
file to customize it, write `skip: true` to disable it, or leave it
|
|
159
185
|
absent to use the default. No config files, no YAML, no DSL.
|
|
160
186
|
|
|
187
|
+
## Adding Modules to an Existing Install
|
|
188
|
+
|
|
189
|
+
Some modules (like `verify` and `memory`) are opt-in. To add one
|
|
190
|
+
without touching anything else in your install:
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
npx create-claude-cabinet --modules verify --yes
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The `--modules` flag **merges** with your existing install — it adds
|
|
197
|
+
the listed modules to what's already there, it doesn't replace your
|
|
198
|
+
module set. Safe to run on a mature project without losing
|
|
199
|
+
customization. You can pass multiple modules: `--modules verify,memory`.
|
|
200
|
+
|
|
161
201
|
## CLI Options
|
|
162
202
|
|
|
163
203
|
```
|
|
164
|
-
npx create-claude-cabinet
|
|
165
|
-
npx create-claude-cabinet my-project
|
|
166
|
-
npx create-claude-cabinet --yes
|
|
167
|
-
npx create-claude-cabinet --yes --no-db
|
|
168
|
-
npx create-claude-cabinet --dry-run
|
|
204
|
+
npx create-claude-cabinet # Interactive walkthrough
|
|
205
|
+
npx create-claude-cabinet my-project # Install in ./my-project/
|
|
206
|
+
npx create-claude-cabinet --yes # Accept all defaults
|
|
207
|
+
npx create-claude-cabinet --yes --no-db # All defaults, skip database
|
|
208
|
+
npx create-claude-cabinet --dry-run # Preview without writing files
|
|
209
|
+
npx create-claude-cabinet --modules verify --yes # Add an opt-in module (merges, doesn't replace)
|
|
169
210
|
```
|
|
170
211
|
|
|
171
212
|
## What Gets Installed
|
package/package.json
CHANGED
|
@@ -493,7 +493,8 @@ Read `phases/report.md` for how to present the debrief summary.
|
|
|
493
493
|
|
|
494
494
|
Phases are either **core** (maintain system state) or **presentation**
|
|
495
495
|
(surface information for the user). For lightweight session closes,
|
|
496
|
-
skip presentation phases. Core phases always run
|
|
496
|
+
skip presentation phases. **Core phases always run — Quick Mode is not
|
|
497
|
+
a license to skip them.**
|
|
497
498
|
|
|
498
499
|
- **Core phases** (always run): inventory, close-work,
|
|
499
500
|
cabinet-consultations, audit-pattern-capture, auto-maintenance,
|
|
@@ -505,6 +506,26 @@ skip presentation phases. Core phases always run.
|
|
|
505
506
|
A project that wants a quick debrief variant skips the report and
|
|
506
507
|
outputs a minimal summary instead.
|
|
507
508
|
|
|
509
|
+
### What Quick Mode does NOT skip
|
|
510
|
+
|
|
511
|
+
**Cabinet-consultations (step 3) is core and MUST run — do NOT skip,
|
|
512
|
+
do NOT paraphrase, do NOT defer.** This is where record-keeper (and
|
|
513
|
+
any other debrief-mandated members) verifies documentation against
|
|
514
|
+
reality. Skipping it is the single most common Quick Mode failure
|
|
515
|
+
mode: the consultations *feel* like overhead because they spawn
|
|
516
|
+
agents, but they are the only mechanism that catches doc drift,
|
|
517
|
+
methodology gaps, and stale state. A debrief without
|
|
518
|
+
cabinet-consultations leaves the next orient reading stale docs.
|
|
519
|
+
|
|
520
|
+
**Audit-pattern-capture, methodology-capture, and upstream-feedback
|
|
521
|
+
are instruction phases — they ship with CC and are always required**,
|
|
522
|
+
in Quick Mode as much as in full debrief. Their per-session cost is
|
|
523
|
+
near zero when nothing fires.
|
|
524
|
+
|
|
525
|
+
If a session genuinely has no audit findings, no methodology work,
|
|
526
|
+
and no CC friction, those phases self-skip in seconds. That is not
|
|
527
|
+
the same as Claude choosing to skip them.
|
|
528
|
+
|
|
508
529
|
## Extending and Calibration
|
|
509
530
|
|
|
510
531
|
See [calibration.md](calibration.md) for the phase-extension pattern
|
|
@@ -30,6 +30,9 @@ related:
|
|
|
30
30
|
- type: file
|
|
31
31
|
path: .claude/skills/verify/phases/backfill.md
|
|
32
32
|
role: "How to draft a ## Verify Plan section for a pending action"
|
|
33
|
+
- type: file
|
|
34
|
+
path: .claude/skills/verify/phases/recipes.md
|
|
35
|
+
role: "Testability gotchas (dnd-kit drag, dynamic file inputs, hash routing) and their workarounds"
|
|
33
36
|
- type: file
|
|
34
37
|
path: cabinet/_briefing.md
|
|
35
38
|
role: "Project identity and configuration"
|
|
@@ -125,8 +128,15 @@ scenario change.
|
|
|
125
128
|
|
|
126
129
|
1. Check if `e2e/` exists in the project root. If not, recommend
|
|
127
130
|
`/verify learn` and exit.
|
|
128
|
-
2.
|
|
129
|
-
|
|
131
|
+
2. **Test-isolation nudge.** If `e2e/` exists but `e2e/start-test-stack.sh`
|
|
132
|
+
does not — and the project's dev stack is suspected to write to a
|
|
133
|
+
real DB (signal: `package.json` references a single `data/`,
|
|
134
|
+
`db/`, or similar shared persistence path) — surface a one-line
|
|
135
|
+
note before running: *"No isolated test stack detected. Scenarios
|
|
136
|
+
will run against your dev stack. Run `/verify learn` to generate
|
|
137
|
+
an isolation scaffold if your dev DB matters."* Do not block.
|
|
138
|
+
3. Run `npm run verify` from the project's `e2e/` dir.
|
|
139
|
+
4. Surface the output. If failures or I-verdicts landed, suggest
|
|
130
140
|
`npm run report:last` to triage.
|
|
131
141
|
|
|
132
142
|
This mode is intentionally thin — the harness is the value, not
|
|
@@ -155,8 +165,12 @@ The "learn" flow runs four phases:
|
|
|
155
165
|
question at a time. Examples: "I see admin routes but only one admin
|
|
156
166
|
user — real persona or fold into main?", "Should the fresh-user
|
|
157
167
|
flow be its own scenario or part of admin?", "What's the dev stack
|
|
158
|
-
URL for preflight?".
|
|
159
|
-
project
|
|
168
|
+
URL for preflight?". Calibrate also includes a **test-isolation
|
|
169
|
+
probe** — if the project's dev stack writes to a real DB, the
|
|
170
|
+
skill captures the DB path, dev-server proxy config, and test
|
|
171
|
+
stack ports so install.sh can emit an `e2e/start-test-stack.sh`
|
|
172
|
+
scaffold. Do NOT batch questions — one at a time per project
|
|
173
|
+
convention.
|
|
160
174
|
|
|
161
175
|
4. **Generate** (read `phases/generate.md`): write the `.feature`
|
|
162
176
|
files using the template in `phases/scenario-template.md` plus
|
|
@@ -221,14 +235,25 @@ once the action runs. Backfill only adds the planning artifact so
|
|
|
221
235
|
| `update.md` | Default: action fid / diff / free-text dispatch | How change descriptions map to edits |
|
|
222
236
|
| `scenario-template.md` | Default: Gherkin with cost+role tags, NN.NN checkIds | Project-specific scenario shape |
|
|
223
237
|
| `backfill.md` | Default: interview-driven Verify Plan section drafting | Project-specific backfill questions |
|
|
238
|
+
| `recipes.md` | Default: dnd-kit, dynamic file input, hash routing gotchas | Project-specific testability recipes |
|
|
224
239
|
|
|
225
240
|
## Principles
|
|
226
241
|
|
|
227
242
|
- **One question at a time.** Calibrate phase NEVER batches questions
|
|
228
243
|
(per CLAUDE.md global convention). Each answer shapes the next.
|
|
229
|
-
- **≤5 scenarios on initial draft.** Force calibration
|
|
230
|
-
expansion. Adding scenarios later is cheap; removing scenarios
|
|
231
|
-
the user
|
|
244
|
+
- **≤5 scenarios on initial draft (cabinet-qa cap).** Force calibration
|
|
245
|
+
before expansion. Adding scenarios later is cheap; removing scenarios
|
|
246
|
+
the user did not ask for is expensive (per process-therapist). The
|
|
247
|
+
cap is load-bearing; do not loosen it without an audit-grade reason.
|
|
248
|
+
- **Depth-first, not shallow-first.** A scenario that touches a
|
|
249
|
+
surface but verifies nothing is worse than no scenario at all —
|
|
250
|
+
it occupies a slot in the catalogue and creates a false sense of
|
|
251
|
+
coverage. The first lap through a scenario should produce real
|
|
252
|
+
assertions and human-verdict pauses for the parts that genuinely
|
|
253
|
+
need subjective judgment. If a step can not be exercised (a
|
|
254
|
+
testability gotcha — see `phases/recipes.md`), file a finding
|
|
255
|
+
against the consuming project and mark the step "skip until
|
|
256
|
+
testable" rather than emitting a no-op stub.
|
|
232
257
|
- **cabinet-qa owns "what's worth a scenario".** /verify learn
|
|
233
258
|
delegates that judgment via subagent; it doesn't re-derive it.
|
|
234
259
|
- **The .feature file is the spec.** Anyone (user, future Claude,
|
|
@@ -131,6 +131,22 @@ plan_mkdir "e2e/reports"
|
|
|
131
131
|
plan_mkdir "e2e/screenshots"
|
|
132
132
|
|
|
133
133
|
# package.json — per CONVENTIONS.md §npm Scripts (frozen contract).
|
|
134
|
+
#
|
|
135
|
+
# Node-version note: `--env-file-if-exists` requires Node 20.12+. We
|
|
136
|
+
# invoke `node` directly (not `NODE_OPTIONS`) because Node 22+ rejects
|
|
137
|
+
# `NODE_OPTIONS='--env-file=...'` ("--env-file= is not allowed in
|
|
138
|
+
# NODE_OPTIONS"). The CLI form is the only path that works across
|
|
139
|
+
# Node 20.12 / 21 / 22+.
|
|
140
|
+
#
|
|
141
|
+
# Cucumber bin path: cucumber-js v11 ships at
|
|
142
|
+
# node_modules/@cucumber/cucumber/bin/cucumber.js. The `cucumber-js`
|
|
143
|
+
# shell wrapper does NOT pass through CLI flags like --import in a
|
|
144
|
+
# way Node honors after the shebang resolves, so we invoke the .js
|
|
145
|
+
# entry directly.
|
|
146
|
+
#
|
|
147
|
+
PREFLIGHT_CMD="node --env-file-if-exists=.env.local node_modules/cabinet-verify/dist/src/cli/preflight.js"
|
|
148
|
+
CUCUMBER_CMD="node --env-file-if-exists=.env.local --import tsx/esm node_modules/@cucumber/cucumber/bin/cucumber.js --import 'steps/**/*.ts' --import 'support/**/*.ts'"
|
|
149
|
+
|
|
134
150
|
PACKAGE_JSON=$(cat <<JSON
|
|
135
151
|
{
|
|
136
152
|
"name": "$(basename "$PWD")-e2e",
|
|
@@ -138,13 +154,16 @@ PACKAGE_JSON=$(cat <<JSON
|
|
|
138
154
|
"private": true,
|
|
139
155
|
"type": "module",
|
|
140
156
|
"description": "Walkthrough verification harness (cabinet-verify).",
|
|
157
|
+
"engines": {
|
|
158
|
+
"node": ">=20.12"
|
|
159
|
+
},
|
|
141
160
|
"scripts": {
|
|
142
|
-
"preflight": "
|
|
143
|
-
"verify": "npm run preflight &&
|
|
144
|
-
"verify:cheap": "npm run preflight &&
|
|
145
|
-
"verify:full": "npm run preflight &&
|
|
146
|
-
"verify:manual": "npm run preflight &&
|
|
147
|
-
"verify:scenario": "npm run preflight &&
|
|
161
|
+
"preflight": "${PREFLIGHT_CMD}",
|
|
162
|
+
"verify": "npm run preflight && ${CUCUMBER_CMD} --tags '@free and not @manual'",
|
|
163
|
+
"verify:cheap": "npm run preflight && ${CUCUMBER_CMD} --tags '(@free or @api-small) and not @manual'",
|
|
164
|
+
"verify:full": "npm run preflight && ${CUCUMBER_CMD} --tags 'not @manual'",
|
|
165
|
+
"verify:manual": "npm run preflight && ${CUCUMBER_CMD} --tags '@manual'",
|
|
166
|
+
"verify:scenario": "npm run preflight && ${CUCUMBER_CMD}",
|
|
148
167
|
"report:last": "cabinet-verify-report-last",
|
|
149
168
|
"report:status": "cabinet-verify-report-status",
|
|
150
169
|
"install:browsers": "playwright install chromium"
|
|
@@ -179,10 +198,14 @@ CUCUMBER_JS=$(cat <<'JS'
|
|
|
179
198
|
// cabinet-verify scaffold. Reads scenarios from features/ and step
|
|
180
199
|
// definitions from steps/ + support/. The cabinet-verify package
|
|
181
200
|
// supplies the World base class and lifecycle hooks via support/world.ts.
|
|
201
|
+
//
|
|
202
|
+
// Note: cucumber-js v11 ignores the `import:` config key when
|
|
203
|
+
// invoked via CLI. Step/support import paths are passed as
|
|
204
|
+
// `--import 'steps/**/*.ts' --import 'support/**/*.ts'` in the npm
|
|
205
|
+
// scripts in package.json — that is the source of truth, not this file.
|
|
182
206
|
export default {
|
|
183
207
|
default: {
|
|
184
208
|
paths: ['features/**/*.feature'],
|
|
185
|
-
import: ['steps/**/*.ts', 'support/**/*.ts'],
|
|
186
209
|
format: ['progress-bar'],
|
|
187
210
|
formatOptions: { colorsEnabled: true },
|
|
188
211
|
},
|
|
@@ -239,32 +262,37 @@ TS
|
|
|
239
262
|
plan_write "e2e/support/world.ts" "$WORLD_TS"
|
|
240
263
|
|
|
241
264
|
AUTH_TS=$(cat <<'TS'
|
|
242
|
-
// Project-side
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
//
|
|
246
|
-
|
|
265
|
+
// Project-side sign-in handler. The cabinet-verify baseline step
|
|
266
|
+
// "I am signed in as the {role} role" handles the no-auth case
|
|
267
|
+
// itself: when CABINET_VERIFY_<ROLE>_EMAIL and _PASSWORD are both
|
|
268
|
+
// blank, the harness navigates to "/" and continues. This file is
|
|
269
|
+
// only consulted when credentials ARE set, i.e. when you actually
|
|
270
|
+
// have an auth flow to drive. Wire it up by calling
|
|
271
|
+
// setSignInHandler(signInAs) at module load (the call at the bottom
|
|
272
|
+
// is the registration).
|
|
273
|
+
import { setSignInHandler, type CabinetVerifyWorld } from 'cabinet-verify';
|
|
247
274
|
|
|
248
275
|
export async function signInAs(world: CabinetVerifyWorld, role: string): Promise<void> {
|
|
249
276
|
const emailEnv = `CABINET_VERIFY_${role.toUpperCase()}_EMAIL`;
|
|
250
277
|
const passwordEnv = `CABINET_VERIFY_${role.toUpperCase()}_PASSWORD`;
|
|
251
|
-
const email = process.env[emailEnv]
|
|
252
|
-
const password = process.env[passwordEnv]
|
|
253
|
-
if (!email || !password) {
|
|
254
|
-
throw new Error(`signInAs: ${emailEnv} or ${passwordEnv} missing in .env.local`);
|
|
255
|
-
}
|
|
278
|
+
const email = process.env[emailEnv]!;
|
|
279
|
+
const password = process.env[passwordEnv]!;
|
|
256
280
|
|
|
257
|
-
// TODO: replace this stub with
|
|
281
|
+
// TODO: replace this stub with the project sign-in flow.
|
|
258
282
|
// Typical shapes:
|
|
259
283
|
// await world.page.goto(world.baseUrl + '/signin');
|
|
260
284
|
// await world.page.getByLabel('Email').fill(email);
|
|
261
285
|
// await world.page.getByLabel('Password').fill(password);
|
|
262
286
|
// await world.page.getByRole('button', { name: 'Sign in' }).click();
|
|
263
287
|
// await world.page.waitForURL(world.baseUrl + '/app');
|
|
288
|
+
void email;
|
|
289
|
+
void password;
|
|
264
290
|
throw new Error(
|
|
265
|
-
`signInAs: not implemented. Fill in support/auth.ts with
|
|
291
|
+
`signInAs: not implemented. Fill in support/auth.ts with the project sign-in flow.`,
|
|
266
292
|
);
|
|
267
293
|
}
|
|
294
|
+
|
|
295
|
+
setSignInHandler(signInAs);
|
|
268
296
|
TS
|
|
269
297
|
)
|
|
270
298
|
plan_write "e2e/support/auth.ts" "$AUTH_TS"
|
|
@@ -308,6 +336,119 @@ TS
|
|
|
308
336
|
)
|
|
309
337
|
plan_write "e2e/support/preflight.ts" "$PREFLIGHT_TS"
|
|
310
338
|
|
|
339
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
340
|
+
# Optional: isolated test-stack scaffold (gated on CABINET_VERIFY_TEST_STACK=1)
|
|
341
|
+
#
|
|
342
|
+
# When the calibrate phase determines that the project's dev stack
|
|
343
|
+
# writes to a real DB whose contents matter, the /verify learn skill
|
|
344
|
+
# sets these env vars before invoking install.sh. They populate a
|
|
345
|
+
# start-test-stack.sh template the user adapts to the project's boot
|
|
346
|
+
# commands. Skipped entirely when the project answered "no" to the
|
|
347
|
+
# real-DB question — those projects drive the dev stack directly.
|
|
348
|
+
#
|
|
349
|
+
# Inputs (env vars set by /verify learn skill when enabled):
|
|
350
|
+
# CABINET_VERIFY_TEST_STACK "1" to enable
|
|
351
|
+
# CABINET_VERIFY_TEST_DB_FILE Path to real DB file (or empty for non-file DBs)
|
|
352
|
+
# CABINET_VERIFY_TEST_PROXY_CONFIG e.g. "vite.config.ts" (or empty)
|
|
353
|
+
# CABINET_VERIFY_TEST_API_PORT e.g. "3457"
|
|
354
|
+
# CABINET_VERIFY_TEST_DEV_PORT e.g. "5176"
|
|
355
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
if [[ "${CABINET_VERIFY_TEST_STACK:-}" == "1" ]]; then
|
|
358
|
+
TS_DB_FILE="${CABINET_VERIFY_TEST_DB_FILE:-}"
|
|
359
|
+
TS_PROXY_CFG="${CABINET_VERIFY_TEST_PROXY_CONFIG:-}"
|
|
360
|
+
TS_API_PORT="${CABINET_VERIFY_TEST_API_PORT:-3457}"
|
|
361
|
+
TS_DEV_PORT="${CABINET_VERIFY_TEST_DEV_PORT:-5176}"
|
|
362
|
+
TS_DB_BASE=""
|
|
363
|
+
if [[ -n "$TS_DB_FILE" ]]; then
|
|
364
|
+
TS_DB_BASE=$(basename "$TS_DB_FILE")
|
|
365
|
+
fi
|
|
366
|
+
|
|
367
|
+
START_TEST_STACK=$(cat <<TSH
|
|
368
|
+
#!/usr/bin/env bash
|
|
369
|
+
# Boot the isolated test stack: a separate API server + dev server
|
|
370
|
+
# pointing at a copy of the real DB. Generated by /verify learn when
|
|
371
|
+
# the calibrate phase flagged that the dev stack writes to
|
|
372
|
+
# a real DB.
|
|
373
|
+
#
|
|
374
|
+
# Usage:
|
|
375
|
+
# bash e2e/start-test-stack.sh # foreground (Ctrl-C to stop)
|
|
376
|
+
# bash e2e/start-test-stack.sh --bg # background (writes PIDs to .e2e-pids)
|
|
377
|
+
# bash e2e/start-test-stack.sh --stop # stop a backgrounded stack
|
|
378
|
+
#
|
|
379
|
+
# Ports:
|
|
380
|
+
# API server: ${TS_API_PORT}
|
|
381
|
+
# Dev server: ${TS_DEV_PORT}
|
|
382
|
+
#
|
|
383
|
+
# The Playwright preflight expects CABINET_VERIFY_DEV_URL to be set to
|
|
384
|
+
# http://localhost:${TS_DEV_PORT} in e2e/.env.local for the test stack
|
|
385
|
+
# (override your existing CABINET_VERIFY_DEV_URL).
|
|
386
|
+
|
|
387
|
+
set -euo pipefail
|
|
388
|
+
|
|
389
|
+
REAL_DB="${TS_DB_FILE}"
|
|
390
|
+
TEST_DB="e2e/fixtures/${TS_DB_BASE:-test.db}"
|
|
391
|
+
|
|
392
|
+
# 1. Snapshot the real DB into the e2e fixtures dir so the test stack
|
|
393
|
+
# never touches the real one.
|
|
394
|
+
if [[ -n "\$REAL_DB" && -f "\$REAL_DB" ]]; then
|
|
395
|
+
mkdir -p "\$(dirname "\$TEST_DB")"
|
|
396
|
+
cp "\$REAL_DB" "\$TEST_DB"
|
|
397
|
+
fi
|
|
398
|
+
|
|
399
|
+
# 2. Boot the API server pointing at the test DB on the test API port.
|
|
400
|
+
# TODO: replace with the boot command for this project. Common shapes:
|
|
401
|
+
# DB_PATH="\$TEST_DB" PORT=${TS_API_PORT} node server.js &
|
|
402
|
+
# DATABASE_URL="postgres://.../test" PORT=${TS_API_PORT} npm run start:api &
|
|
403
|
+
echo "TODO: implement API server boot in start-test-stack.sh (port ${TS_API_PORT})"
|
|
404
|
+
exit 1
|
|
405
|
+
TSH
|
|
406
|
+
)
|
|
407
|
+
plan_write "e2e/start-test-stack.sh" "$START_TEST_STACK"
|
|
408
|
+
if [[ -f "e2e/start-test-stack.sh" ]]; then
|
|
409
|
+
chmod +x e2e/start-test-stack.sh 2>/dev/null || true
|
|
410
|
+
fi
|
|
411
|
+
|
|
412
|
+
README_MD=$(cat <<RDM
|
|
413
|
+
# e2e/ — Walkthrough verification harness
|
|
414
|
+
|
|
415
|
+
Cucumber + Playwright scenarios driven via cabinet-verify.
|
|
416
|
+
|
|
417
|
+
## Running the isolated test stack
|
|
418
|
+
|
|
419
|
+
This dev stack writes to a real DB (calibrated during
|
|
420
|
+
\`/verify learn\`). To keep test runs from polluting that DB, the
|
|
421
|
+
harness expects an isolated test stack on:
|
|
422
|
+
|
|
423
|
+
- API: http://localhost:${TS_API_PORT}
|
|
424
|
+
- Dev: http://localhost:${TS_DEV_PORT}
|
|
425
|
+
|
|
426
|
+
\`\`\`bash
|
|
427
|
+
bash e2e/start-test-stack.sh # foreground
|
|
428
|
+
bash e2e/start-test-stack.sh --bg # background
|
|
429
|
+
\`\`\`
|
|
430
|
+
|
|
431
|
+
\`e2e/start-test-stack.sh\` was generated as a template — the API
|
|
432
|
+
boot command lives behind a \`TODO\` marker. Adapt it to your stack
|
|
433
|
+
(node, uvicorn, npm script, etc.), then point \`CABINET_VERIFY_DEV_URL\`
|
|
434
|
+
at http://localhost:${TS_DEV_PORT} in \`.env.local\` and run
|
|
435
|
+
\`npm run verify\`.
|
|
436
|
+
|
|
437
|
+
## Where state lives
|
|
438
|
+
|
|
439
|
+
- \`e2e/fixtures/\` — copies of real data the test stack reads. Safe
|
|
440
|
+
to wipe; regenerated on next \`start-test-stack.sh\`.
|
|
441
|
+
- \`e2e/reports/\` — verdict ledger output.
|
|
442
|
+
- \`e2e/screenshots/\` — failure screenshots.
|
|
443
|
+
|
|
444
|
+
## More
|
|
445
|
+
|
|
446
|
+
See \`.claude/skills/verify/SKILL.md\` for the full /verify workflow.
|
|
447
|
+
RDM
|
|
448
|
+
)
|
|
449
|
+
plan_write "e2e/README.md" "$README_MD"
|
|
450
|
+
fi
|
|
451
|
+
|
|
311
452
|
# .gitignore updates at project root.
|
|
312
453
|
GITIGNORE_ROOT=".gitignore"
|
|
313
454
|
GITIGNORE_ENTRIES=("e2e/reports/" "e2e/screenshots/" "e2e/fixtures/articles/" "e2e/.env.local" "e2e/node_modules/")
|
|
@@ -34,7 +34,80 @@ Used to seed `CABINET_VERIFY_DEV_URL` in `.env.local.example` and
|
|
|
34
34
|
preflight. Confidence-high default: read `vite.config.{ts,js}` for the
|
|
35
35
|
configured port; if found, use it without asking.
|
|
36
36
|
|
|
37
|
-
### 2.
|
|
37
|
+
### 2. Test isolation (real DB risk)
|
|
38
|
+
|
|
39
|
+
If the project's dev stack writes to a real database — production
|
|
40
|
+
mirror, cached prod data, or a shared dev DB — running Playwright
|
|
41
|
+
scenarios against it pollutes that DB with test artifacts. A stray
|
|
42
|
+
deploy can leak `verify-smoke parent` rows into prod.
|
|
43
|
+
|
|
44
|
+
Ask these four questions sequentially. Skip the follow-ups if the
|
|
45
|
+
first answer is "no".
|
|
46
|
+
|
|
47
|
+
**2a. Does your dev stack write to a real DB?**
|
|
48
|
+
|
|
49
|
+
> "Does `npm run dev` (or equivalent) read/write a real database file
|
|
50
|
+
> or shared instance — i.e., one whose contents you actually care
|
|
51
|
+
> about preserving? (yes/no)"
|
|
52
|
+
|
|
53
|
+
If "no" → skip 2b–2d. The harness drives the dev stack directly.
|
|
54
|
+
Record `testIsolation.enabled = false`.
|
|
55
|
+
|
|
56
|
+
If "yes" → continue. Record `testIsolation.enabled = true`.
|
|
57
|
+
|
|
58
|
+
**2b. Where is the DB file?**
|
|
59
|
+
|
|
60
|
+
> "Where does the dev stack read its DB from? Paste the path
|
|
61
|
+
> (e.g., `data/flow.db`, `~/.local/share/myapp/db.sqlite`)."
|
|
62
|
+
|
|
63
|
+
Record `testIsolation.dbFile`. The scaffold copies this to
|
|
64
|
+
`e2e/fixtures/<basename>.test.db` and points the test API at the copy.
|
|
65
|
+
If the project uses Postgres or another non-file DB, capture the
|
|
66
|
+
connection-string env var name instead and note in the answer notes —
|
|
67
|
+
the generated `start-test-stack.sh` will be a template the user
|
|
68
|
+
adapts.
|
|
69
|
+
|
|
70
|
+
**2c. Where is the dev server's API proxy config?**
|
|
71
|
+
|
|
72
|
+
> "Where does your dev server (Vite, Next, etc.) configure its API
|
|
73
|
+
> proxy? Paste the file path (e.g., `vite.config.ts`, `next.config.js`).
|
|
74
|
+
> Skip if your app talks to the API via absolute URL or there's no
|
|
75
|
+
> separate dev server."
|
|
76
|
+
|
|
77
|
+
Record `testIsolation.proxyConfig`. The scaffold emits a one-line
|
|
78
|
+
patch instruction telling the user to read an env var
|
|
79
|
+
(`<APP>_API_TARGET`) for the proxy target so the test stack can
|
|
80
|
+
override it.
|
|
81
|
+
|
|
82
|
+
**2d. Test stack ports**
|
|
83
|
+
|
|
84
|
+
> "What ports should the test stack use to avoid colliding with your
|
|
85
|
+
> normal dev stack? Default: API on 3457, dev server on 5176."
|
|
86
|
+
|
|
87
|
+
Record `testIsolation.apiPort` and `testIsolation.devPort`.
|
|
88
|
+
|
|
89
|
+
#### What happens with these answers
|
|
90
|
+
|
|
91
|
+
If `testIsolation.enabled = true`, the skill sets the following env
|
|
92
|
+
vars before calling `install.sh`:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
CABINET_VERIFY_TEST_STACK=1
|
|
96
|
+
CABINET_VERIFY_TEST_DB_FILE=<2b answer>
|
|
97
|
+
CABINET_VERIFY_TEST_PROXY_CONFIG=<2c answer or empty>
|
|
98
|
+
CABINET_VERIFY_TEST_API_PORT=<2d apiPort>
|
|
99
|
+
CABINET_VERIFY_TEST_DEV_PORT=<2d devPort>
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
`install.sh` emits `e2e/start-test-stack.sh` populated from these
|
|
103
|
+
values and updates `e2e/README.md` with a "Running the isolated test
|
|
104
|
+
stack" section.
|
|
105
|
+
|
|
106
|
+
If `testIsolation.enabled = false`, the env vars are NOT set,
|
|
107
|
+
install.sh does not emit `start-test-stack.sh`, and no isolation
|
|
108
|
+
scaffold lands in the project.
|
|
109
|
+
|
|
110
|
+
### 3. Persona reality check
|
|
38
111
|
|
|
39
112
|
For each persona suggested by the DiscoveryReport (e.g., `@as-admin`,
|
|
40
113
|
`@as-fresh`), ask:
|
|
@@ -46,7 +119,7 @@ For each persona suggested by the DiscoveryReport (e.g., `@as-admin`,
|
|
|
46
119
|
The answer determines whether the persona's scenarios survive into the
|
|
47
120
|
generated set or fold into the main user scenario.
|
|
48
121
|
|
|
49
|
-
###
|
|
122
|
+
### 4. Cost tag interpretation
|
|
50
123
|
|
|
51
124
|
For each `@api-small` or `@api-large` scenario:
|
|
52
125
|
|
|
@@ -54,7 +127,7 @@ For each `@api-small` or `@api-large` scenario:
|
|
|
54
127
|
> expected per-run cost? Default: \$0.05–0.15. Skip this scenario from
|
|
55
128
|
> `npm run verify:cheap` if cost exceeds your comfort threshold."
|
|
56
129
|
|
|
57
|
-
###
|
|
130
|
+
### 5. Leftover surface triage
|
|
58
131
|
|
|
59
132
|
For each item in `DraftReport.leftover`:
|
|
60
133
|
|
|
@@ -66,7 +139,7 @@ Capped at 5 leftover-triage questions per session to avoid drowning
|
|
|
66
139
|
the user. If more than 5 leftovers exist, generate the first 5 +
|
|
67
140
|
note the rest for a follow-up `/verify learn` invocation.
|
|
68
141
|
|
|
69
|
-
###
|
|
142
|
+
### 6. Live UI crawl opt-in (if not already run)
|
|
70
143
|
|
|
71
144
|
If the dev stack is up and the user wants stronger coverage:
|
|
72
145
|
|
|
@@ -77,7 +150,7 @@ If the dev stack is up and the user wants stronger coverage:
|
|
|
77
150
|
If yes, re-run discovery with the crawl subagent enabled and
|
|
78
151
|
re-prompt cabinet-qa with the expanded surface set.
|
|
79
152
|
|
|
80
|
-
###
|
|
153
|
+
### 7. Scenario name confirmation
|
|
81
154
|
|
|
82
155
|
For each scenario in DraftReport.scenarios, show the proposed name +
|
|
83
156
|
1-line journey summary:
|
|
@@ -89,7 +162,7 @@ If a scenario name is ambiguous or de[sic]ify-coloured (e.g., still
|
|
|
89
162
|
references a domain term from the cabinet-qa pass), the user
|
|
90
163
|
overrides here.
|
|
91
164
|
|
|
92
|
-
###
|
|
165
|
+
### 8. Generate confirmation
|
|
93
166
|
|
|
94
167
|
Final summary before writing files:
|
|
95
168
|
|
|
@@ -82,6 +82,34 @@ and the user should narrow `/verify learn` to a specific surface
|
|
|
82
82
|
(e.g., "learn admin flows only"). For v0.1.0, the skill doesn't
|
|
83
83
|
support surface filtering — escalate to the user.
|
|
84
84
|
|
|
85
|
+
## Routing shape (path vs hash)
|
|
86
|
+
|
|
87
|
+
While scanning routes (subagent 1), determine whether the project uses
|
|
88
|
+
**path routing** (`/forecast`, `/people`) or **hash routing**
|
|
89
|
+
(`#forecast`, `#people`). Hash routing is common in projects with no
|
|
90
|
+
backend server, single-bundle SPAs deployed on static hosts, or
|
|
91
|
+
projects that started with React Router's `HashRouter` for legacy
|
|
92
|
+
reasons.
|
|
93
|
+
|
|
94
|
+
Signals that suggest hash routing:
|
|
95
|
+
|
|
96
|
+
- `import { HashRouter } from 'react-router-dom'` in the app entry
|
|
97
|
+
- `useHashTab`, `parseHash`, `window.location.hash` references in
|
|
98
|
+
routing-adjacent files
|
|
99
|
+
- A route table where entries look like `{ hash: 'forecast', ... }`
|
|
100
|
+
instead of `{ path: '/forecast', ... }`
|
|
101
|
+
- Any link element using `href="#foo"` for in-app navigation rather
|
|
102
|
+
than anchor links to page sections
|
|
103
|
+
|
|
104
|
+
If hash routing is detected, emit a `routingShape: "hash"` field in
|
|
105
|
+
the discovery report so the generate phase produces `#route` instead
|
|
106
|
+
of `/route` in generated `.feature` files. Otherwise, default to
|
|
107
|
+
`routingShape: "path"`.
|
|
108
|
+
|
|
109
|
+
Without this probe, generated feature files use `When I navigate to
|
|
110
|
+
"/forecast"` against a `#forecast` app and every scenario fails at
|
|
111
|
+
step 1 — Flow's cold-start hit this exact mismatch.
|
|
112
|
+
|
|
85
113
|
## Persona signals
|
|
86
114
|
|
|
87
115
|
While running subagent 1 (route scan), look for auth/admin patterns
|
|
@@ -106,6 +134,7 @@ interface DiscoveryReport {
|
|
|
106
134
|
memoryHits: Array<{ topic: string; source: string; summary: string }>;
|
|
107
135
|
crawlHits?: Array<{ url: string; title: string }>;
|
|
108
136
|
personaSignals: Array<{ signal: string; suggestedPersona: string }>;
|
|
137
|
+
routingShape: "path" | "hash";
|
|
109
138
|
}
|
|
110
139
|
```
|
|
111
140
|
|
|
@@ -27,6 +27,37 @@ For each scenario in the calibrated DraftReport:
|
|
|
27
27
|
|
|
28
28
|
Number of files generated = `2 × DraftReport.scenarios.length`.
|
|
29
29
|
|
|
30
|
+
## Routing shape
|
|
31
|
+
|
|
32
|
+
The discovery report (`discover.md` "Routing shape" section) carries a
|
|
33
|
+
`routingShape: "path" | "hash"` field. When rendering `When I navigate
|
|
34
|
+
to "..."` lines:
|
|
35
|
+
|
|
36
|
+
- `path` (default): emit `When I navigate to "/forecast"`
|
|
37
|
+
- `hash`: emit `When I navigate to "#forecast"`
|
|
38
|
+
|
|
39
|
+
A hash-routing project that gets `/forecast` features fails at every
|
|
40
|
+
navigate step — Flow's cold-start hit this. The discover phase probes
|
|
41
|
+
for hash routing specifically to prevent it.
|
|
42
|
+
|
|
43
|
+
## Test-isolation passthrough
|
|
44
|
+
|
|
45
|
+
If calibrate phase recorded `testIsolation.enabled = true`, set
|
|
46
|
+
these env vars before invoking `install.sh`:
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
CABINET_VERIFY_TEST_STACK=1
|
|
50
|
+
CABINET_VERIFY_TEST_DB_FILE=<calibrate 2b answer>
|
|
51
|
+
CABINET_VERIFY_TEST_PROXY_CONFIG=<calibrate 2c answer or empty>
|
|
52
|
+
CABINET_VERIFY_TEST_API_PORT=<calibrate 2d apiPort>
|
|
53
|
+
CABINET_VERIFY_TEST_DEV_PORT=<calibrate 2d devPort>
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
install.sh emits `e2e/start-test-stack.sh` (template with a TODO
|
|
57
|
+
marker for the API boot command) and `e2e/README.md` (operator
|
|
58
|
+
instructions for the isolated stack). The scaffold is skipped
|
|
59
|
+
entirely when `testIsolation.enabled = false`.
|
|
60
|
+
|
|
30
61
|
## Pre-write checks
|
|
31
62
|
|
|
32
63
|
Before writing:
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# /verify — Testability recipes
|
|
2
|
+
|
|
3
|
+
Catalogue of patterns that surface during real `/verify learn` work
|
|
4
|
+
where the obvious test approach turns out to be a dead end. Each
|
|
5
|
+
entry documents the problem, why standard test tooling fails, the
|
|
6
|
+
workaround that lets the scenario proceed, the real fix that should
|
|
7
|
+
land in the consuming project, and the detection signal so the
|
|
8
|
+
discover phase can flag it early.
|
|
9
|
+
|
|
10
|
+
Recipes are upstream-owned. Projects extend with their own
|
|
11
|
+
`phases/recipes-project.md` for patterns specific to their stack.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Recipe 1: dnd-kit drag-and-drop is not driveable from Playwright
|
|
16
|
+
|
|
17
|
+
**Problem.** Scenarios that reorder list items via dnd-kit's
|
|
18
|
+
`PointerSensor` can't be driven from any standard Playwright path:
|
|
19
|
+
`page.mouse.down/move/up`, CDP-level pointer events, or the keyboard
|
|
20
|
+
sensor (which dnd-kit ships but rarely activates by default).
|
|
21
|
+
|
|
22
|
+
**Why it's hard.** dnd-kit's PointerSensor uses an activation
|
|
23
|
+
constraint (delay or distance) that gates dragstart. Playwright's
|
|
24
|
+
synthetic pointer events fire too fast for the delay constraint, and
|
|
25
|
+
the `movementX/Y` values don't satisfy the distance constraint
|
|
26
|
+
because Playwright moves to absolute coordinates, not deltas.
|
|
27
|
+
Headless Chromium's input event semantics differ subtly from a
|
|
28
|
+
real browser, and dnd-kit's measurement of "is the pointer outside
|
|
29
|
+
the activation radius" comes back negative.
|
|
30
|
+
|
|
31
|
+
**Workaround for the scenario.** Use an API surrogate. If the reorder
|
|
32
|
+
ultimately persists via a mutation (PATCH /api/items/order), call the
|
|
33
|
+
API directly from the step body. Assert the resulting order via the
|
|
34
|
+
UI in the next step.
|
|
35
|
+
|
|
36
|
+
**Real fix in the consuming project.** Add a programmatic reorder
|
|
37
|
+
hook to the component, gated on `NODE_ENV === 'test'` or a
|
|
38
|
+
`__test__` data attribute. Expose `window.__test__.reorder(from, to)`
|
|
39
|
+
in the test build. The scenario then drives reordering through that
|
|
40
|
+
hook instead of through dnd-kit's sensor.
|
|
41
|
+
|
|
42
|
+
**Detection signal.** Discovery phase reports any import of
|
|
43
|
+
`@dnd-kit/core` or `@dnd-kit/sortable`. The first scenario that
|
|
44
|
+
needs to verify a reorder filed a finding against the consuming
|
|
45
|
+
project with title "dnd-kit test seam needed".
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Recipe 2: Dynamic `<input>` for file upload silently defeats filechooser
|
|
50
|
+
|
|
51
|
+
**Problem.** The React pattern `document.createElement('input')`,
|
|
52
|
+
attach handler, `.click()` (used to trigger a file picker without a
|
|
53
|
+
permanent visible input) defeats Playwright's
|
|
54
|
+
`page.waitForEvent('filechooser')`. The chooser opens, `setFiles` is
|
|
55
|
+
accepted, the input's `change` event fires — but no bytes reach the
|
|
56
|
+
upload handler downstream because the input element is GC'd before
|
|
57
|
+
the upload code reads it.
|
|
58
|
+
|
|
59
|
+
**Why it's hard.** Playwright's filechooser API assumes a persistent
|
|
60
|
+
`<input type="file">`. With a transient one, the filechooser event
|
|
61
|
+
references an input that no longer exists by the time the upload
|
|
62
|
+
handler runs. There's no console warning; the upload "succeeds" with
|
|
63
|
+
zero bytes.
|
|
64
|
+
|
|
65
|
+
**Workaround for the scenario.** Skip this step in the harness and
|
|
66
|
+
file a finding. Trying to drive transient inputs costs more time
|
|
67
|
+
than the verification gains.
|
|
68
|
+
|
|
69
|
+
**Real fix in the consuming project.** Use a persistent hidden
|
|
70
|
+
`<input ref={...}>` that the component triggers via `ref.click()`.
|
|
71
|
+
Filechooser semantics work as documented.
|
|
72
|
+
|
|
73
|
+
**Detection signal.** Discovery phase greps for the pattern
|
|
74
|
+
`document.createElement('input')` followed by `.click()` within ~5
|
|
75
|
+
lines. Any hit triggers a finding "transient file input — test
|
|
76
|
+
seam required" against the consuming project.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Recipe 3: Hash routing vs path routing mismatch
|
|
81
|
+
|
|
82
|
+
**Problem.** Generated feature files use `When I navigate to "/forecast"`
|
|
83
|
+
against an app that serves routes at `#forecast`. Every scenario fails
|
|
84
|
+
at the navigate step because the dev URL `http://localhost:5173/forecast`
|
|
85
|
+
404s while `http://localhost:5173/#forecast` works.
|
|
86
|
+
|
|
87
|
+
**Why it's hard.** The mismatch is silent at generation time. Cold-
|
|
88
|
+
start operators see "step failed" and assume a selector issue rather
|
|
89
|
+
than a routing-shape issue. The fix is one character per scenario but
|
|
90
|
+
finding the pattern takes a half-hour.
|
|
91
|
+
|
|
92
|
+
**Workaround for the scenario.** Hand-edit `When I navigate to "..."`
|
|
93
|
+
lines to use `#route` form.
|
|
94
|
+
|
|
95
|
+
**Real fix in the consuming project.** None — hash routing is a
|
|
96
|
+
legitimate choice. The fix lives in `/verify learn`'s discover phase:
|
|
97
|
+
detect routing shape and emit the correct form in generated
|
|
98
|
+
`.feature` files. See `phases/discover.md` "Routing shape (path vs
|
|
99
|
+
hash)" section.
|
|
100
|
+
|
|
101
|
+
**Detection signal.** Discovery's routing-shape probe surfaces this
|
|
102
|
+
before generation. If a project switches routing shape after `learn`
|
|
103
|
+
ran, `/verify update` should catch the mismatch on the next scenario
|
|
104
|
+
run.
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Adding new recipes
|
|
109
|
+
|
|
110
|
+
When a `/verify learn` cold-start hits a 30-min+ testability gotcha,
|
|
111
|
+
add it here as a fourth recipe with the same five fields. The pattern
|
|
112
|
+
catches itself: future operators reading recipes.md before starting
|
|
113
|
+
avoid the same time sink.
|
|
@@ -86,33 +86,65 @@ give the operator visual landmarks during the run.
|
|
|
86
86
|
|
|
87
87
|
## Generated step-definition stub shape
|
|
88
88
|
|
|
89
|
+
The five baseline step handlers (`Given the local dev stack is up`,
|
|
90
|
+
`Given I am signed in as the "{role}" role`, `When I navigate to {string}`,
|
|
91
|
+
`Then check {string} {}`, `Then ask the human {string}`) are registered
|
|
92
|
+
by `cabinet-verify` itself when the World module is imported. Per-
|
|
93
|
+
scenario files contain **only** the scenario-specific assertion bodies,
|
|
94
|
+
registered by checkId via `registerCheck`.
|
|
95
|
+
|
|
89
96
|
For each scenario, generate `e2e/steps/scenario-{N}.ts` with:
|
|
90
97
|
|
|
91
98
|
```ts
|
|
92
|
-
import {
|
|
93
|
-
import {
|
|
94
|
-
import { CabinetVerifyWorld } from 'cabinet-verify';
|
|
99
|
+
import { registerCheck } from 'cabinet-verify';
|
|
100
|
+
import type { CabinetVerifyWorld } from 'cabinet-verify';
|
|
95
101
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
102
|
+
// One registerCheck call per `check "N.NN slug"` step in the feature
|
|
103
|
+
// file. The function body is the real assertion — fill in as you
|
|
104
|
+
// verdict the scenario for the first time.
|
|
99
105
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
// quoted arg ('the workspace heading is visible' etc) is in _rest.
|
|
104
|
-
throw new Error('not implemented');
|
|
105
|
-
});
|
|
106
|
+
registerCheck('N.01 slug-name', async (world: CabinetVerifyWorld) => {
|
|
107
|
+
// TODO: replace with the real assertion against world.page.
|
|
108
|
+
throw new Error('not implemented');
|
|
106
109
|
});
|
|
107
110
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
const checkId = space >= 0 ? idAndDescription.slice(0, space) : idAndDescription;
|
|
111
|
-
const description = space >= 0 ? idAndDescription.slice(space + 1) : '';
|
|
112
|
-
await askHumanVerdict(this.page, checkId, description);
|
|
111
|
+
registerCheck('N.02 slug-name', async (world: CabinetVerifyWorld) => {
|
|
112
|
+
throw new Error('not implemented');
|
|
113
113
|
});
|
|
114
|
+
|
|
115
|
+
// …one per check step in the scenario.
|
|
114
116
|
```
|
|
115
117
|
|
|
118
|
+
`ask the human "..."` steps need no per-id registration — the baseline
|
|
119
|
+
handler routes straight to `askHumanVerdict`. They only show up in the
|
|
120
|
+
`.feature` file.
|
|
121
|
+
|
|
122
|
+
### When an interaction is not driveable
|
|
123
|
+
|
|
124
|
+
If a step cannot be exercised by Playwright (drag-and-drop via
|
|
125
|
+
dnd-kit, transient file inputs, etc. — see `phases/recipes.md`),
|
|
126
|
+
DO NOT emit a `// Smoke no-op` body. That creates a passing scenario
|
|
127
|
+
that verifies nothing.
|
|
128
|
+
|
|
129
|
+
Two acceptable shapes instead:
|
|
130
|
+
|
|
131
|
+
1. **Skip until testable.** Throw with an explicit "skip" marker and
|
|
132
|
+
file a finding against the consuming project for the test seam.
|
|
133
|
+
|
|
134
|
+
```ts
|
|
135
|
+
registerCheck('N.07 dnd-reorder-applied', async (world) => {
|
|
136
|
+
throw new Error('SKIP: dnd-kit drag is not driveable from Playwright — see recipes.md Recipe 1');
|
|
137
|
+
});
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
2. **API surrogate.** Bypass the UI for the action; verify the result
|
|
141
|
+
via the UI in the next step. This is the documented dnd-kit
|
|
142
|
+
workaround in `recipes.md`.
|
|
143
|
+
|
|
144
|
+
In both cases, file the finding when you spot the gotcha — not later.
|
|
145
|
+
The recipes document records the pattern so the same time sink does
|
|
146
|
+
not recur on the next consumer.
|
|
147
|
+
|
|
116
148
|
The stubs throw on the auto-check assertion bodies. The user fills
|
|
117
149
|
them in as they verdict the scenario for the first time — typical
|
|
118
150
|
workflow is "run it, see what fails, write the assertion, repeat".
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cabinet-verify",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Walkthrough verification harness for Claude Cabinet. Cucumber + Playwright scenarios with human-in-the-loop verdict pause.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/src/index.js",
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Five universal Cucumber step handlers that every cabinet-verify
|
|
3
|
+
* consumer used to re-implement by hand. Owning them here keeps
|
|
4
|
+
* generated `scenario-N.ts` files free of boilerplate that has to be
|
|
5
|
+
* declared in exactly one file (cucumber-js throws on duplicates).
|
|
6
|
+
*
|
|
7
|
+
* Given the local dev stack is up
|
|
8
|
+
* Given I am signed in as the "{role}" role
|
|
9
|
+
* When I navigate to {string}
|
|
10
|
+
* Then check {string} {}
|
|
11
|
+
* Then ask the human {string}
|
|
12
|
+
*
|
|
13
|
+
* Imports of this module are side-effectful: the Given/When/Then
|
|
14
|
+
* calls register globally with cucumber-js. `index.ts` imports this
|
|
15
|
+
* for its side effect so projects only need a single transitive
|
|
16
|
+
* `import { CabinetVerifyWorld } from 'cabinet-verify'` for the
|
|
17
|
+
* baseline steps to register.
|
|
18
|
+
*
|
|
19
|
+
* Two extension points:
|
|
20
|
+
*
|
|
21
|
+
* - `setSignInHandler(fn)` — projects with real authentication
|
|
22
|
+
* register their sign-in flow. The baseline `Given I am signed in
|
|
23
|
+
* as the "{role}" role` step looks up `CABINET_VERIFY_<ROLE>_EMAIL`
|
|
24
|
+
* and `_PASSWORD`. If both are blank, the harness treats the
|
|
25
|
+
* project as no-auth and just navigates to `/`. If either is set,
|
|
26
|
+
* the registered handler is invoked.
|
|
27
|
+
*
|
|
28
|
+
* - `registerCheck(idAndSlug, fn)` — projects register per-checkId
|
|
29
|
+
* assertions. The baseline `Then check {string} {}` step calls
|
|
30
|
+
* autoCheck with the registered function. If none is registered
|
|
31
|
+
* for a given checkId, the step throws with an actionable message.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
import { Given, When, Then } from '@cucumber/cucumber';
|
|
35
|
+
import { autoCheck } from './auto-check.js';
|
|
36
|
+
import { askHumanVerdict } from './human-verdict.js';
|
|
37
|
+
import { CabinetVerifyWorld } from './world.js';
|
|
38
|
+
|
|
39
|
+
export type SignInHandler = (world: CabinetVerifyWorld, role: string) => Promise<void>;
|
|
40
|
+
export type CheckHandler = (world: CabinetVerifyWorld) => Promise<void>;
|
|
41
|
+
|
|
42
|
+
let signInHandler: SignInHandler | null = null;
|
|
43
|
+
const checkRegistry = new Map<string, CheckHandler>();
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Register the project's sign-in implementation. Called from the
|
|
47
|
+
* generated `support/auth.ts` at module-load time.
|
|
48
|
+
*
|
|
49
|
+
* The baseline `Given I am signed in as the "{role}" role` handler
|
|
50
|
+
* skips this entirely when `CABINET_VERIFY_<ROLE>_EMAIL` and
|
|
51
|
+
* `_PASSWORD` are both blank (no-auth fallback).
|
|
52
|
+
*/
|
|
53
|
+
export function setSignInHandler(handler: SignInHandler): void {
|
|
54
|
+
signInHandler = handler;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Register a per-checkId assertion. The first argument is the full
|
|
59
|
+
* `"NN.NN slug"` form used in the feature file. The function receives
|
|
60
|
+
* the World and should throw on failure.
|
|
61
|
+
*/
|
|
62
|
+
export function registerCheck(idAndSlug: string, handler: CheckHandler): void {
|
|
63
|
+
if (checkRegistry.has(idAndSlug)) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
`registerCheck: '${idAndSlug}' is already registered. checkIds must be unique across the project.`,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
checkRegistry.set(idAndSlug, handler);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
Given('the local dev stack is up', async function (this: CabinetVerifyWorld) {
|
|
72
|
+
// Preflight (npm run preflight, invoked before cucumber-js) is the
|
|
73
|
+
// gate that verifies stack reachability. Re-checking here would add
|
|
74
|
+
// an HTTP round-trip to every scenario for no additional signal.
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
Given(
|
|
78
|
+
'I am signed in as the {string} role',
|
|
79
|
+
async function (this: CabinetVerifyWorld, role: string) {
|
|
80
|
+
const emailEnv = `CABINET_VERIFY_${role.toUpperCase()}_EMAIL`;
|
|
81
|
+
const passwordEnv = `CABINET_VERIFY_${role.toUpperCase()}_PASSWORD`;
|
|
82
|
+
const email = process.env[emailEnv];
|
|
83
|
+
const password = process.env[passwordEnv];
|
|
84
|
+
const hasCredentials = (email && email.length > 0) || (password && password.length > 0);
|
|
85
|
+
|
|
86
|
+
this.role = role;
|
|
87
|
+
|
|
88
|
+
if (!hasCredentials) {
|
|
89
|
+
// No-auth project (Flow's local dev has no password, common
|
|
90
|
+
// case). Land on `/` and let the rest of the scenario carry on.
|
|
91
|
+
await this.page.goto(this.baseUrl + '/');
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (!signInHandler) {
|
|
96
|
+
throw new Error(
|
|
97
|
+
`signInAs(${role}): ${emailEnv}/${passwordEnv} are set but no sign-in handler was registered. ` +
|
|
98
|
+
`Add \`setSignInHandler(signInAs)\` to support/auth.ts, or clear the env vars for a no-auth run.`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
await signInHandler(this, role);
|
|
103
|
+
},
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
When('I navigate to {string}', async function (this: CabinetVerifyWorld, route: string) {
|
|
107
|
+
await this.page.goto(this.baseUrl + route);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
Then(
|
|
111
|
+
'check {string} {}',
|
|
112
|
+
async function (this: CabinetVerifyWorld, idAndSlug: string, _rest: string) {
|
|
113
|
+
await autoCheck(this, idAndSlug, async () => {
|
|
114
|
+
const handler = checkRegistry.get(idAndSlug);
|
|
115
|
+
if (!handler) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
`check ${idAndSlug}: no assertion registered. ` +
|
|
118
|
+
`Add \`registerCheck('${idAndSlug}', async (world) => { /* assertion */ })\` ` +
|
|
119
|
+
`in the matching steps/scenario-N.ts.`,
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
await handler(this);
|
|
123
|
+
});
|
|
124
|
+
},
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
Then(
|
|
128
|
+
'ask the human {string}',
|
|
129
|
+
async function (this: CabinetVerifyWorld, idAndDescription: string) {
|
|
130
|
+
const space = idAndDescription.indexOf(' ');
|
|
131
|
+
const checkId = space >= 0 ? idAndDescription.slice(0, space) : idAndDescription;
|
|
132
|
+
const description = space >= 0 ? idAndDescription.slice(space + 1) : '';
|
|
133
|
+
await askHumanVerdict(this.page, checkId, description);
|
|
134
|
+
},
|
|
135
|
+
);
|
|
@@ -64,3 +64,17 @@ export {
|
|
|
64
64
|
} from './preflight.js';
|
|
65
65
|
|
|
66
66
|
export { CabinetVerifyWorld, type IWorldOptions } from './world.js';
|
|
67
|
+
|
|
68
|
+
// Side-effect import: registers the five baseline Cucumber steps
|
|
69
|
+
// (Given dev-stack-up, Given signed-in-as-role, When navigate, Then
|
|
70
|
+
// check, Then ask-the-human). Projects don't redeclare these — they
|
|
71
|
+
// register per-checkId assertions and an optional sign-in handler via
|
|
72
|
+
// the API re-exported below.
|
|
73
|
+
import './baseline-steps.js';
|
|
74
|
+
|
|
75
|
+
export {
|
|
76
|
+
setSignInHandler,
|
|
77
|
+
registerCheck,
|
|
78
|
+
type SignInHandler,
|
|
79
|
+
type CheckHandler,
|
|
80
|
+
} from './baseline-steps.js';
|