npm - create-claude-cabinet - Versions diffs - 0.25.4 → 0.26.0 - Mend

create-claude-cabinet 0.25.4 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +46 -5
package/package.json +1 -1
package/templates/skills/debrief/SKILL.md +22 -1
package/templates/skills/verify/SKILL.md +32 -7
package/templates/skills/verify/install.sh +160 -19
package/templates/skills/verify/phases/calibrate.md +79 -6
package/templates/skills/verify/phases/discover.md +29 -0
package/templates/skills/verify/phases/generate.md +31 -0
package/templates/skills/verify/phases/recipes.md +113 -0
package/templates/skills/verify/phases/scenario-template.md +49 -17
package/templates/verify-runtime/package.json +1 -1
package/templates/verify-runtime/src/baseline-steps.ts +135 -0
package/templates/verify-runtime/src/index.ts +14 -0

package/README.md CHANGED Viewed

@@ -131,6 +131,32 @@ hooks — things that keep going wrong become things that can't go wrong.
 - **`/cc-upgrade`** — when Claude Cabinet publishes updates, this skill
   runs the installer for the mechanical parts and walks you through
   what changed conversationally. Intelligence is the merge strategy.
+- **`/cc-feedback`** — file friction with CC itself mid-session
+  without waiting for debrief. When a skill, phase, or convention
+  causes pain, this captures the detail and queues it for upstream
+  delivery to the Claude Cabinet repo.
+### Verify (opt-in, off by default)
+Walkthrough verification harness — Cucumber `.feature` files describing
+user journeys, Playwright running them, and human-in-the-loop verdict
+pauses (Pass / Issue / Skip / Needs-info) at checks that need subjective
+judgment. Replaces flat AC checklists with re-runnable scenarios you can
+read months later.
+- **`/verify`** — run the suite
+- **`/verify learn`** — bootstrap from a cold start. Claude scans
+  routes, memory, git, and the live UI; proposes scenarios; calibrates
+  with you; then generates `.feature` files and step stubs
+- **`/verify update "I changed X"`** — keep scenarios in sync as the
+  product evolves
+- **`/verify backfill <fid>`** — attach a Verify Plan to a pending
+  action's notes
+Enable with `--modules verify` (existing installs merge, nothing else
+disturbed). Runtime lives at `~/.claude-cabinet/verify/<version>/` and
+ships an opinionated `cabinet-verify` npm package built from de[sic]ify's
+e2e harness.
 ## Your Workflow
@@ -158,14 +184,29 @@ that override default behavior for any skill. Write content in a phase
 file to customize it, write `skip: true` to disable it, or leave it
 absent to use the default. No config files, no YAML, no DSL.
+## Adding Modules to an Existing Install
+Some modules (like `verify` and `memory`) are opt-in. To add one
+without touching anything else in your install:
+```
+npx create-claude-cabinet --modules verify --yes
+```
+The `--modules` flag **merges** with your existing install — it adds
+the listed modules to what's already there, it doesn't replace your
+module set. Safe to run on a mature project without losing
+customization. You can pass multiple modules: `--modules verify,memory`.
 ## CLI Options
 ```
-npx create-claude-cabinet                 # Interactive walkthrough
-npx create-claude-cabinet my-project      # Install in ./my-project/
-npx create-claude-cabinet --yes           # Accept all defaults
-npx create-claude-cabinet --yes --no-db   # All defaults, skip database
-npx create-claude-cabinet --dry-run       # Preview without writing files
+npx create-claude-cabinet                         # Interactive walkthrough
+npx create-claude-cabinet my-project              # Install in ./my-project/
+npx create-claude-cabinet --yes                   # Accept all defaults
+npx create-claude-cabinet --yes --no-db           # All defaults, skip database
+npx create-claude-cabinet --dry-run               # Preview without writing files
+npx create-claude-cabinet --modules verify --yes  # Add an opt-in module (merges, doesn't replace)
 ```
 ## What Gets Installed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "create-claude-cabinet",
-  "version": "0.25.4",
+  "version": "0.26.0",
   "description": "Claude Cabinet — opinionated process scaffolding for Claude Code projects",
   "bin": {
     "create-claude-cabinet": "bin/create-claude-cabinet.js"

package/templates/skills/debrief/SKILL.md CHANGED Viewed

@@ -493,7 +493,8 @@ Read `phases/report.md` for how to present the debrief summary.
 Phases are either **core** (maintain system state) or **presentation**
 (surface information for the user). For lightweight session closes,
-skip presentation phases. Core phases always run.
+skip presentation phases. **Core phases always run — Quick Mode is not
+a license to skip them.**
 - **Core phases** (always run): inventory, close-work,
   cabinet-consultations, audit-pattern-capture, auto-maintenance,
@@ -505,6 +506,26 @@ skip presentation phases. Core phases always run.
 A project that wants a quick debrief variant skips the report and
 outputs a minimal summary instead.
+### What Quick Mode does NOT skip
+**Cabinet-consultations (step 3) is core and MUST run — do NOT skip,
+do NOT paraphrase, do NOT defer.** This is where record-keeper (and
+any other debrief-mandated members) verifies documentation against
+reality. Skipping it is the single most common Quick Mode failure
+mode: the consultations *feel* like overhead because they spawn
+agents, but they are the only mechanism that catches doc drift,
+methodology gaps, and stale state. A debrief without
+cabinet-consultations leaves the next orient reading stale docs.
+**Audit-pattern-capture, methodology-capture, and upstream-feedback
+are instruction phases — they ship with CC and are always required**,
+in Quick Mode as much as in full debrief. Their per-session cost is
+near zero when nothing fires.
+If a session genuinely has no audit findings, no methodology work,
+and no CC friction, those phases self-skip in seconds. That is not
+the same as Claude choosing to skip them.
 ## Extending and Calibration
 See [calibration.md](calibration.md) for the phase-extension pattern

package/templates/skills/verify/SKILL.md CHANGED Viewed

@@ -30,6 +30,9 @@ related:
   - type: file
     path: .claude/skills/verify/phases/backfill.md
     role: "How to draft a ## Verify Plan section for a pending action"
+  - type: file
+    path: .claude/skills/verify/phases/recipes.md
+    role: "Testability gotchas (dnd-kit drag, dynamic file inputs, hash routing) and their workarounds"
   - type: file
     path: cabinet/_briefing.md
     role: "Project identity and configuration"
@@ -125,8 +128,15 @@ scenario change.
 1. Check if `e2e/` exists in the project root. If not, recommend
    `/verify learn` and exit.
-2. Run `npm run verify` from the project's `e2e/` dir.
-3. Surface the output. If failures or I-verdicts landed, suggest
+2. **Test-isolation nudge.** If `e2e/` exists but `e2e/start-test-stack.sh`
+   does not — and the project's dev stack is suspected to write to a
+   real DB (signal: `package.json` references a single `data/`,
+   `db/`, or similar shared persistence path) — surface a one-line
+   note before running: *"No isolated test stack detected. Scenarios
+   will run against your dev stack. Run `/verify learn` to generate
+   an isolation scaffold if your dev DB matters."* Do not block.
+3. Run `npm run verify` from the project's `e2e/` dir.
+4. Surface the output. If failures or I-verdicts landed, suggest
    `npm run report:last` to triage.
 This mode is intentionally thin — the harness is the value, not
@@ -155,8 +165,12 @@ The "learn" flow runs four phases:
    question at a time. Examples: "I see admin routes but only one admin
    user — real persona or fold into main?", "Should the fresh-user
    flow be its own scenario or part of admin?", "What's the dev stack
-   URL for preflight?". Do NOT batch questions — one at a time per
-   project convention.
+   URL for preflight?". Calibrate also includes a **test-isolation
+   probe** — if the project's dev stack writes to a real DB, the
+   skill captures the DB path, dev-server proxy config, and test
+   stack ports so install.sh can emit an `e2e/start-test-stack.sh`
+   scaffold. Do NOT batch questions — one at a time per project
+   convention.
 4. **Generate** (read `phases/generate.md`): write the `.feature`
    files using the template in `phases/scenario-template.md` plus
@@ -221,14 +235,25 @@ once the action runs. Backfill only adds the planning artifact so
 | `update.md` | Default: action fid / diff / free-text dispatch | How change descriptions map to edits |
 | `scenario-template.md` | Default: Gherkin with cost+role tags, NN.NN checkIds | Project-specific scenario shape |
 | `backfill.md` | Default: interview-driven Verify Plan section drafting | Project-specific backfill questions |
+| `recipes.md` | Default: dnd-kit, dynamic file input, hash routing gotchas | Project-specific testability recipes |
 ## Principles
 - **One question at a time.** Calibrate phase NEVER batches questions
   (per CLAUDE.md global convention). Each answer shapes the next.
-- **≤5 scenarios on initial draft.** Force calibration before
-  expansion. Adding scenarios later is cheap; removing scenarios
-  the user didn't ask for is expensive (per process-therapist).
+- **≤5 scenarios on initial draft (cabinet-qa cap).** Force calibration
+  before expansion. Adding scenarios later is cheap; removing scenarios
+  the user did not ask for is expensive (per process-therapist). The
+  cap is load-bearing; do not loosen it without an audit-grade reason.
+- **Depth-first, not shallow-first.** A scenario that touches a
+  surface but verifies nothing is worse than no scenario at all —
+  it occupies a slot in the catalogue and creates a false sense of
+  coverage. The first lap through a scenario should produce real
+  assertions and human-verdict pauses for the parts that genuinely
+  need subjective judgment. If a step can not be exercised (a
+  testability gotcha — see `phases/recipes.md`), file a finding
+  against the consuming project and mark the step "skip until
+  testable" rather than emitting a no-op stub.
 - **cabinet-qa owns "what's worth a scenario".** /verify learn
   delegates that judgment via subagent; it doesn't re-derive it.
 - **The .feature file is the spec.** Anyone (user, future Claude,

package/templates/skills/verify/install.sh CHANGED Viewed

@@ -131,6 +131,22 @@ plan_mkdir "e2e/reports"
 plan_mkdir "e2e/screenshots"
 # package.json — per CONVENTIONS.md §npm Scripts (frozen contract).
+#
+# Node-version note: `--env-file-if-exists` requires Node 20.12+. We
+# invoke `node` directly (not `NODE_OPTIONS`) because Node 22+ rejects
+# `NODE_OPTIONS='--env-file=...'` ("--env-file= is not allowed in
+# NODE_OPTIONS"). The CLI form is the only path that works across
+# Node 20.12 / 21 / 22+.
+#
+# Cucumber bin path: cucumber-js v11 ships at
+# node_modules/@cucumber/cucumber/bin/cucumber.js. The `cucumber-js`
+# shell wrapper does NOT pass through CLI flags like --import in a
+# way Node honors after the shebang resolves, so we invoke the .js
+# entry directly.
+#
+PREFLIGHT_CMD="node --env-file-if-exists=.env.local node_modules/cabinet-verify/dist/src/cli/preflight.js"
+CUCUMBER_CMD="node --env-file-if-exists=.env.local --import tsx/esm node_modules/@cucumber/cucumber/bin/cucumber.js --import 'steps/**/*.ts' --import 'support/**/*.ts'"
 PACKAGE_JSON=$(cat <<JSON
 {
   "name": "$(basename "$PWD")-e2e",
@@ -138,13 +154,16 @@ PACKAGE_JSON=$(cat <<JSON
   "private": true,
   "type": "module",
   "description": "Walkthrough verification harness (cabinet-verify).",
+  "engines": {
+    "node": ">=20.12"
+  },
   "scripts": {
-    "preflight": "cabinet-verify-preflight",
-    "verify": "npm run preflight && NODE_OPTIONS='--import tsx/esm' cucumber-js --tags '@free and not @manual'",
-    "verify:cheap": "npm run preflight && NODE_OPTIONS='--import tsx/esm' cucumber-js --tags '(@free or @api-small) and not @manual'",
-    "verify:full": "npm run preflight && NODE_OPTIONS='--import tsx/esm' cucumber-js --tags 'not @manual'",
-    "verify:manual": "npm run preflight && NODE_OPTIONS='--import tsx/esm' cucumber-js --tags '@manual'",
-    "verify:scenario": "npm run preflight && NODE_OPTIONS='--import tsx/esm' cucumber-js",
+    "preflight": "${PREFLIGHT_CMD}",
+    "verify": "npm run preflight && ${CUCUMBER_CMD} --tags '@free and not @manual'",
+    "verify:cheap": "npm run preflight && ${CUCUMBER_CMD} --tags '(@free or @api-small) and not @manual'",
+    "verify:full": "npm run preflight && ${CUCUMBER_CMD} --tags 'not @manual'",
+    "verify:manual": "npm run preflight && ${CUCUMBER_CMD} --tags '@manual'",
+    "verify:scenario": "npm run preflight && ${CUCUMBER_CMD}",
     "report:last": "cabinet-verify-report-last",
     "report:status": "cabinet-verify-report-status",
     "install:browsers": "playwright install chromium"
@@ -179,10 +198,14 @@ CUCUMBER_JS=$(cat <<'JS'
 // cabinet-verify scaffold. Reads scenarios from features/ and step
 // definitions from steps/ + support/. The cabinet-verify package
 // supplies the World base class and lifecycle hooks via support/world.ts.
+//
+// Note: cucumber-js v11 ignores the `import:` config key when
+// invoked via CLI. Step/support import paths are passed as
+// `--import 'steps/**/*.ts' --import 'support/**/*.ts'` in the npm
+// scripts in package.json — that is the source of truth, not this file.
 export default {
   default: {
     paths: ['features/**/*.feature'],
-    import: ['steps/**/*.ts', 'support/**/*.ts'],
     format: ['progress-bar'],
     formatOptions: { colorsEnabled: true },
   },
@@ -239,32 +262,37 @@ TS
 plan_write "e2e/support/world.ts" "$WORLD_TS"
 AUTH_TS=$(cat <<'TS'
-// Project-side auth helper. The cabinet-verify base class records the
-// role tag (@as-user / @as-admin / @as-fresh) into this.role; this
-// helper is where you implement the sign-in for each role using your
-// project's auth flow.
-import { CabinetVerifyWorld } from 'cabinet-verify';
+// Project-side sign-in handler. The cabinet-verify baseline step
+// "I am signed in as the {role} role" handles the no-auth case
+// itself: when CABINET_VERIFY_<ROLE>_EMAIL and _PASSWORD are both
+// blank, the harness navigates to "/" and continues. This file is
+// only consulted when credentials ARE set, i.e. when you actually
+// have an auth flow to drive. Wire it up by calling
+// setSignInHandler(signInAs) at module load (the call at the bottom
+// is the registration).
+import { setSignInHandler, type CabinetVerifyWorld } from 'cabinet-verify';
 export async function signInAs(world: CabinetVerifyWorld, role: string): Promise<void> {
   const emailEnv = `CABINET_VERIFY_${role.toUpperCase()}_EMAIL`;
   const passwordEnv = `CABINET_VERIFY_${role.toUpperCase()}_PASSWORD`;
-  const email = process.env[emailEnv];
-  const password = process.env[passwordEnv];
-  if (!email || !password) {
-    throw new Error(`signInAs: ${emailEnv} or ${passwordEnv} missing in .env.local`);
-  }
+  const email = process.env[emailEnv]!;
+  const password = process.env[passwordEnv]!;
-  // TODO: replace this stub with your project's sign-in flow.
+  // TODO: replace this stub with the project sign-in flow.
   // Typical shapes:
   //   await world.page.goto(world.baseUrl + '/signin');
   //   await world.page.getByLabel('Email').fill(email);
   //   await world.page.getByLabel('Password').fill(password);
   //   await world.page.getByRole('button', { name: 'Sign in' }).click();
   //   await world.page.waitForURL(world.baseUrl + '/app');
+  void email;
+  void password;
   throw new Error(
-    `signInAs: not implemented. Fill in support/auth.ts with your project's sign-in flow.`,
+    `signInAs: not implemented. Fill in support/auth.ts with the project sign-in flow.`,
   );
 }
+setSignInHandler(signInAs);
 TS
 )
 plan_write "e2e/support/auth.ts" "$AUTH_TS"
@@ -308,6 +336,119 @@ TS
 )
 plan_write "e2e/support/preflight.ts" "$PREFLIGHT_TS"
+# ──────────────────────────────────────────────────────────────────────
+# Optional: isolated test-stack scaffold (gated on CABINET_VERIFY_TEST_STACK=1)
+#
+# When the calibrate phase determines that the project's dev stack
+# writes to a real DB whose contents matter, the /verify learn skill
+# sets these env vars before invoking install.sh. They populate a
+# start-test-stack.sh template the user adapts to the project's boot
+# commands. Skipped entirely when the project answered "no" to the
+# real-DB question — those projects drive the dev stack directly.
+#
+# Inputs (env vars set by /verify learn skill when enabled):
+#   CABINET_VERIFY_TEST_STACK         "1" to enable
+#   CABINET_VERIFY_TEST_DB_FILE       Path to real DB file (or empty for non-file DBs)
+#   CABINET_VERIFY_TEST_PROXY_CONFIG  e.g. "vite.config.ts" (or empty)
+#   CABINET_VERIFY_TEST_API_PORT      e.g. "3457"
+#   CABINET_VERIFY_TEST_DEV_PORT      e.g. "5176"
+# ──────────────────────────────────────────────────────────────────────
+if [[ "${CABINET_VERIFY_TEST_STACK:-}" == "1" ]]; then
+  TS_DB_FILE="${CABINET_VERIFY_TEST_DB_FILE:-}"
+  TS_PROXY_CFG="${CABINET_VERIFY_TEST_PROXY_CONFIG:-}"
+  TS_API_PORT="${CABINET_VERIFY_TEST_API_PORT:-3457}"
+  TS_DEV_PORT="${CABINET_VERIFY_TEST_DEV_PORT:-5176}"
+  TS_DB_BASE=""
+  if [[ -n "$TS_DB_FILE" ]]; then
+    TS_DB_BASE=$(basename "$TS_DB_FILE")
+  fi
+  START_TEST_STACK=$(cat <<TSH
+#!/usr/bin/env bash
+# Boot the isolated test stack: a separate API server + dev server
+# pointing at a copy of the real DB. Generated by /verify learn when
+# the calibrate phase flagged that the dev stack writes to
+# a real DB.
+#
+# Usage:
+#   bash e2e/start-test-stack.sh             # foreground (Ctrl-C to stop)
+#   bash e2e/start-test-stack.sh --bg        # background (writes PIDs to .e2e-pids)
+#   bash e2e/start-test-stack.sh --stop      # stop a backgrounded stack
+#
+# Ports:
+#   API server: ${TS_API_PORT}
+#   Dev server: ${TS_DEV_PORT}
+#
+# The Playwright preflight expects CABINET_VERIFY_DEV_URL to be set to
+# http://localhost:${TS_DEV_PORT} in e2e/.env.local for the test stack
+# (override your existing CABINET_VERIFY_DEV_URL).
+set -euo pipefail
+REAL_DB="${TS_DB_FILE}"
+TEST_DB="e2e/fixtures/${TS_DB_BASE:-test.db}"
+# 1. Snapshot the real DB into the e2e fixtures dir so the test stack
+#    never touches the real one.
+if [[ -n "\$REAL_DB" && -f "\$REAL_DB" ]]; then
+  mkdir -p "\$(dirname "\$TEST_DB")"
+  cp "\$REAL_DB" "\$TEST_DB"
+fi
+# 2. Boot the API server pointing at the test DB on the test API port.
+#    TODO: replace with the boot command for this project. Common shapes:
+#      DB_PATH="\$TEST_DB" PORT=${TS_API_PORT} node server.js &
+#      DATABASE_URL="postgres://.../test" PORT=${TS_API_PORT} npm run start:api &
+echo "TODO: implement API server boot in start-test-stack.sh (port ${TS_API_PORT})"
+exit 1
+TSH
+)
+  plan_write "e2e/start-test-stack.sh" "$START_TEST_STACK"
+  if [[ -f "e2e/start-test-stack.sh" ]]; then
+    chmod +x e2e/start-test-stack.sh 2>/dev/null || true
+  fi
+  README_MD=$(cat <<RDM
+# e2e/ — Walkthrough verification harness
+Cucumber + Playwright scenarios driven via cabinet-verify.
+## Running the isolated test stack
+This dev stack writes to a real DB (calibrated during
+\`/verify learn\`). To keep test runs from polluting that DB, the
+harness expects an isolated test stack on:
+- API: http://localhost:${TS_API_PORT}
+- Dev: http://localhost:${TS_DEV_PORT}
+\`\`\`bash
+bash e2e/start-test-stack.sh             # foreground
+bash e2e/start-test-stack.sh --bg        # background
+\`\`\`
+\`e2e/start-test-stack.sh\` was generated as a template — the API
+boot command lives behind a \`TODO\` marker. Adapt it to your stack
+(node, uvicorn, npm script, etc.), then point \`CABINET_VERIFY_DEV_URL\`
+at http://localhost:${TS_DEV_PORT} in \`.env.local\` and run
+\`npm run verify\`.
+## Where state lives
+- \`e2e/fixtures/\` — copies of real data the test stack reads. Safe
+  to wipe; regenerated on next \`start-test-stack.sh\`.
+- \`e2e/reports/\` — verdict ledger output.
+- \`e2e/screenshots/\` — failure screenshots.
+## More
+See \`.claude/skills/verify/SKILL.md\` for the full /verify workflow.
+RDM
+)
+  plan_write "e2e/README.md" "$README_MD"
+fi
 # .gitignore updates at project root.
 GITIGNORE_ROOT=".gitignore"
 GITIGNORE_ENTRIES=("e2e/reports/" "e2e/screenshots/" "e2e/fixtures/articles/" "e2e/.env.local" "e2e/node_modules/")

package/templates/skills/verify/phases/calibrate.md CHANGED Viewed

@@ -34,7 +34,80 @@ Used to seed `CABINET_VERIFY_DEV_URL` in `.env.local.example` and
 preflight. Confidence-high default: read `vite.config.{ts,js}` for the
 configured port; if found, use it without asking.
-### 2. Persona reality check
+### 2. Test isolation (real DB risk)
+If the project's dev stack writes to a real database — production
+mirror, cached prod data, or a shared dev DB — running Playwright
+scenarios against it pollutes that DB with test artifacts. A stray
+deploy can leak `verify-smoke parent` rows into prod.
+Ask these four questions sequentially. Skip the follow-ups if the
+first answer is "no".
+**2a. Does your dev stack write to a real DB?**
+> "Does `npm run dev` (or equivalent) read/write a real database file
+> or shared instance — i.e., one whose contents you actually care
+> about preserving? (yes/no)"
+If "no" → skip 2b–2d. The harness drives the dev stack directly.
+Record `testIsolation.enabled = false`.
+If "yes" → continue. Record `testIsolation.enabled = true`.
+**2b. Where is the DB file?**
+> "Where does the dev stack read its DB from? Paste the path
+> (e.g., `data/flow.db`, `~/.local/share/myapp/db.sqlite`)."
+Record `testIsolation.dbFile`. The scaffold copies this to
+`e2e/fixtures/<basename>.test.db` and points the test API at the copy.
+If the project uses Postgres or another non-file DB, capture the
+connection-string env var name instead and note in the answer notes —
+the generated `start-test-stack.sh` will be a template the user
+adapts.
+**2c. Where is the dev server's API proxy config?**
+> "Where does your dev server (Vite, Next, etc.) configure its API
+> proxy? Paste the file path (e.g., `vite.config.ts`, `next.config.js`).
+> Skip if your app talks to the API via absolute URL or there's no
+> separate dev server."
+Record `testIsolation.proxyConfig`. The scaffold emits a one-line
+patch instruction telling the user to read an env var
+(`<APP>_API_TARGET`) for the proxy target so the test stack can
+override it.
+**2d. Test stack ports**
+> "What ports should the test stack use to avoid colliding with your
+> normal dev stack? Default: API on 3457, dev server on 5176."
+Record `testIsolation.apiPort` and `testIsolation.devPort`.
+#### What happens with these answers
+If `testIsolation.enabled = true`, the skill sets the following env
+vars before calling `install.sh`:
+```
+CABINET_VERIFY_TEST_STACK=1
+CABINET_VERIFY_TEST_DB_FILE=<2b answer>
+CABINET_VERIFY_TEST_PROXY_CONFIG=<2c answer or empty>
+CABINET_VERIFY_TEST_API_PORT=<2d apiPort>
+CABINET_VERIFY_TEST_DEV_PORT=<2d devPort>
+```
+`install.sh` emits `e2e/start-test-stack.sh` populated from these
+values and updates `e2e/README.md` with a "Running the isolated test
+stack" section.
+If `testIsolation.enabled = false`, the env vars are NOT set,
+install.sh does not emit `start-test-stack.sh`, and no isolation
+scaffold lands in the project.
+### 3. Persona reality check
 For each persona suggested by the DiscoveryReport (e.g., `@as-admin`,
 `@as-fresh`), ask:
@@ -46,7 +119,7 @@ For each persona suggested by the DiscoveryReport (e.g., `@as-admin`,
 The answer determines whether the persona's scenarios survive into the
 generated set or fold into the main user scenario.
-### 3. Cost tag interpretation
+### 4. Cost tag interpretation
 For each `@api-small` or `@api-large` scenario:
@@ -54,7 +127,7 @@ For each `@api-small` or `@api-large` scenario:
 > expected per-run cost? Default: \$0.05–0.15. Skip this scenario from
 > `npm run verify:cheap` if cost exceeds your comfort threshold."
-### 4. Leftover surface triage
+### 5. Leftover surface triage
 For each item in `DraftReport.leftover`:
@@ -66,7 +139,7 @@ Capped at 5 leftover-triage questions per session to avoid drowning
 the user. If more than 5 leftovers exist, generate the first 5 +
 note the rest for a follow-up `/verify learn` invocation.
-### 5. Live UI crawl opt-in (if not already run)
+### 6. Live UI crawl opt-in (if not already run)
 If the dev stack is up and the user wants stronger coverage:
@@ -77,7 +150,7 @@ If the dev stack is up and the user wants stronger coverage:
 If yes, re-run discovery with the crawl subagent enabled and
 re-prompt cabinet-qa with the expanded surface set.
-### 6. Scenario name confirmation
+### 7. Scenario name confirmation
 For each scenario in DraftReport.scenarios, show the proposed name +
 1-line journey summary:
@@ -89,7 +162,7 @@ If a scenario name is ambiguous or de[sic]ify-coloured (e.g., still
 references a domain term from the cabinet-qa pass), the user
 overrides here.
-### 7. Generate confirmation
+### 8. Generate confirmation
 Final summary before writing files:

package/templates/skills/verify/phases/discover.md CHANGED Viewed

@@ -82,6 +82,34 @@ and the user should narrow `/verify learn` to a specific surface
 (e.g., "learn admin flows only"). For v0.1.0, the skill doesn't
 support surface filtering — escalate to the user.
+## Routing shape (path vs hash)
+While scanning routes (subagent 1), determine whether the project uses
+**path routing** (`/forecast`, `/people`) or **hash routing**
+(`#forecast`, `#people`). Hash routing is common in projects with no
+backend server, single-bundle SPAs deployed on static hosts, or
+projects that started with React Router's `HashRouter` for legacy
+reasons.
+Signals that suggest hash routing:
+- `import { HashRouter } from 'react-router-dom'` in the app entry
+- `useHashTab`, `parseHash`, `window.location.hash` references in
+  routing-adjacent files
+- A route table where entries look like `{ hash: 'forecast', ... }`
+  instead of `{ path: '/forecast', ... }`
+- Any link element using `href="#foo"` for in-app navigation rather
+  than anchor links to page sections
+If hash routing is detected, emit a `routingShape: "hash"` field in
+the discovery report so the generate phase produces `#route` instead
+of `/route` in generated `.feature` files. Otherwise, default to
+`routingShape: "path"`.
+Without this probe, generated feature files use `When I navigate to
+"/forecast"` against a `#forecast` app and every scenario fails at
+step 1 — Flow's cold-start hit this exact mismatch.
 ## Persona signals
 While running subagent 1 (route scan), look for auth/admin patterns
@@ -106,6 +134,7 @@ interface DiscoveryReport {
   memoryHits: Array<{ topic: string; source: string; summary: string }>;
   crawlHits?: Array<{ url: string; title: string }>;
   personaSignals: Array<{ signal: string; suggestedPersona: string }>;
+  routingShape: "path" | "hash";
 }
 ```

package/templates/skills/verify/phases/generate.md CHANGED Viewed

@@ -27,6 +27,37 @@ For each scenario in the calibrated DraftReport:
 Number of files generated = `2 × DraftReport.scenarios.length`.
+## Routing shape
+The discovery report (`discover.md` "Routing shape" section) carries a
+`routingShape: "path" | "hash"` field. When rendering `When I navigate
+to "..."` lines:
+- `path` (default): emit `When I navigate to "/forecast"`
+- `hash`: emit `When I navigate to "#forecast"`
+A hash-routing project that gets `/forecast` features fails at every
+navigate step — Flow's cold-start hit this. The discover phase probes
+for hash routing specifically to prevent it.
+## Test-isolation passthrough
+If calibrate phase recorded `testIsolation.enabled = true`, set
+these env vars before invoking `install.sh`:
+```
+CABINET_VERIFY_TEST_STACK=1
+CABINET_VERIFY_TEST_DB_FILE=<calibrate 2b answer>
+CABINET_VERIFY_TEST_PROXY_CONFIG=<calibrate 2c answer or empty>
+CABINET_VERIFY_TEST_API_PORT=<calibrate 2d apiPort>
+CABINET_VERIFY_TEST_DEV_PORT=<calibrate 2d devPort>
+```
+install.sh emits `e2e/start-test-stack.sh` (template with a TODO
+marker for the API boot command) and `e2e/README.md` (operator
+instructions for the isolated stack). The scaffold is skipped
+entirely when `testIsolation.enabled = false`.
 ## Pre-write checks
 Before writing:

package/templates/skills/verify/phases/recipes.md ADDED Viewed

@@ -0,0 +1,113 @@
+# /verify — Testability recipes
+Catalogue of patterns that surface during real `/verify learn` work
+where the obvious test approach turns out to be a dead end. Each
+entry documents the problem, why standard test tooling fails, the
+workaround that lets the scenario proceed, the real fix that should
+land in the consuming project, and the detection signal so the
+discover phase can flag it early.
+Recipes are upstream-owned. Projects extend with their own
+`phases/recipes-project.md` for patterns specific to their stack.
+---
+## Recipe 1: dnd-kit drag-and-drop is not driveable from Playwright
+**Problem.** Scenarios that reorder list items via dnd-kit's
+`PointerSensor` can't be driven from any standard Playwright path:
+`page.mouse.down/move/up`, CDP-level pointer events, or the keyboard
+sensor (which dnd-kit ships but rarely activates by default).
+**Why it's hard.** dnd-kit's PointerSensor uses an activation
+constraint (delay or distance) that gates dragstart. Playwright's
+synthetic pointer events fire too fast for the delay constraint, and
+the `movementX/Y` values don't satisfy the distance constraint
+because Playwright moves to absolute coordinates, not deltas.
+Headless Chromium's input event semantics differ subtly from a
+real browser, and dnd-kit's measurement of "is the pointer outside
+the activation radius" comes back negative.
+**Workaround for the scenario.** Use an API surrogate. If the reorder
+ultimately persists via a mutation (PATCH /api/items/order), call the
+API directly from the step body. Assert the resulting order via the
+UI in the next step.
+**Real fix in the consuming project.** Add a programmatic reorder
+hook to the component, gated on `NODE_ENV === 'test'` or a
+`__test__` data attribute. Expose `window.__test__.reorder(from, to)`
+in the test build. The scenario then drives reordering through that
+hook instead of through dnd-kit's sensor.
+**Detection signal.** Discovery phase reports any import of
+`@dnd-kit/core` or `@dnd-kit/sortable`. The first scenario that
+needs to verify a reorder filed a finding against the consuming
+project with title "dnd-kit test seam needed".
+---
+## Recipe 2: Dynamic `<input>` for file upload silently defeats filechooser
+**Problem.** The React pattern `document.createElement('input')`,
+attach handler, `.click()` (used to trigger a file picker without a
+permanent visible input) defeats Playwright's
+`page.waitForEvent('filechooser')`. The chooser opens, `setFiles` is
+accepted, the input's `change` event fires — but no bytes reach the
+upload handler downstream because the input element is GC'd before
+the upload code reads it.
+**Why it's hard.** Playwright's filechooser API assumes a persistent
+`<input type="file">`. With a transient one, the filechooser event
+references an input that no longer exists by the time the upload
+handler runs. There's no console warning; the upload "succeeds" with
+zero bytes.
+**Workaround for the scenario.** Skip this step in the harness and
+file a finding. Trying to drive transient inputs costs more time
+than the verification gains.
+**Real fix in the consuming project.** Use a persistent hidden
+`<input ref={...}>` that the component triggers via `ref.click()`.
+Filechooser semantics work as documented.
+**Detection signal.** Discovery phase greps for the pattern
+`document.createElement('input')` followed by `.click()` within ~5
+lines. Any hit triggers a finding "transient file input — test
+seam required" against the consuming project.
+---
+## Recipe 3: Hash routing vs path routing mismatch
+**Problem.** Generated feature files use `When I navigate to "/forecast"`
+against an app that serves routes at `#forecast`. Every scenario fails
+at the navigate step because the dev URL `http://localhost:5173/forecast`
+404s while `http://localhost:5173/#forecast` works.
+**Why it's hard.** The mismatch is silent at generation time. Cold-
+start operators see "step failed" and assume a selector issue rather
+than a routing-shape issue. The fix is one character per scenario but
+finding the pattern takes a half-hour.
+**Workaround for the scenario.** Hand-edit `When I navigate to "..."`
+lines to use `#route` form.
+**Real fix in the consuming project.** None — hash routing is a
+legitimate choice. The fix lives in `/verify learn`'s discover phase:
+detect routing shape and emit the correct form in generated
+`.feature` files. See `phases/discover.md` "Routing shape (path vs
+hash)" section.
+**Detection signal.** Discovery's routing-shape probe surfaces this
+before generation. If a project switches routing shape after `learn`
+ran, `/verify update` should catch the mismatch on the next scenario
+run.
+---
+## Adding new recipes
+When a `/verify learn` cold-start hits a 30-min+ testability gotcha,
+add it here as a fourth recipe with the same five fields. The pattern
+catches itself: future operators reading recipes.md before starting
+avoid the same time sink.

package/templates/skills/verify/phases/scenario-template.md CHANGED Viewed

@@ -86,33 +86,65 @@ give the operator visual landmarks during the run.
 ## Generated step-definition stub shape
+The five baseline step handlers (`Given the local dev stack is up`,
+`Given I am signed in as the "{role}" role`, `When I navigate to {string}`,
+`Then check {string} {}`, `Then ask the human {string}`) are registered
+by `cabinet-verify` itself when the World module is imported. Per-
+scenario files contain **only** the scenario-specific assertion bodies,
+registered by checkId via `registerCheck`.
 For each scenario, generate `e2e/steps/scenario-{N}.ts` with:
 ```ts
-import { Given, When, Then } from '@cucumber/cucumber';
-import { autoCheck, askHumanVerdict } from 'cabinet-verify';
-import { CabinetVerifyWorld } from 'cabinet-verify';
+import { registerCheck } from 'cabinet-verify';
+import type { CabinetVerifyWorld } from 'cabinet-verify';
-When('I navigate to {string}', async function (this: CabinetVerifyWorld, route: string) {
-  await this.page.goto(this.baseUrl + route);
-});
+// One registerCheck call per `check "N.NN slug"` step in the feature
+// file. The function body is the real assertion — fill in as you
+// verdict the scenario for the first time.
-Then('check {string} {}', async function (this: CabinetVerifyWorld, idAndSlug: string, _rest: string) {
-  await autoCheck(this, idAndSlug, async () => {
-    // TODO: replace with the real assertion. The step text after the
-    // quoted arg ('the workspace heading is visible' etc) is in _rest.
-    throw new Error('not implemented');
-  });
+registerCheck('N.01 slug-name', async (world: CabinetVerifyWorld) => {
+  // TODO: replace with the real assertion against world.page.
+  throw new Error('not implemented');
 });
-Then('ask the human {string}', async function (this: CabinetVerifyWorld, idAndDescription: string) {
-  const space = idAndDescription.indexOf(' ');
-  const checkId = space >= 0 ? idAndDescription.slice(0, space) : idAndDescription;
-  const description = space >= 0 ? idAndDescription.slice(space + 1) : '';
-  await askHumanVerdict(this.page, checkId, description);
+registerCheck('N.02 slug-name', async (world: CabinetVerifyWorld) => {
+  throw new Error('not implemented');
 });
+// …one per check step in the scenario.
 ```
+`ask the human "..."` steps need no per-id registration — the baseline
+handler routes straight to `askHumanVerdict`. They only show up in the
+`.feature` file.
+### When an interaction is not driveable
+If a step cannot be exercised by Playwright (drag-and-drop via
+dnd-kit, transient file inputs, etc. — see `phases/recipes.md`),
+DO NOT emit a `// Smoke no-op` body. That creates a passing scenario
+that verifies nothing.
+Two acceptable shapes instead:
+1. **Skip until testable.** Throw with an explicit "skip" marker and
+   file a finding against the consuming project for the test seam.
+   ```ts
+   registerCheck('N.07 dnd-reorder-applied', async (world) => {
+     throw new Error('SKIP: dnd-kit drag is not driveable from Playwright — see recipes.md Recipe 1');
+   });
+   ```
+2. **API surrogate.** Bypass the UI for the action; verify the result
+   via the UI in the next step. This is the documented dnd-kit
+   workaround in `recipes.md`.
+In both cases, file the finding when you spot the gotcha — not later.
+The recipes document records the pattern so the same time sink does
+not recur on the next consumer.
 The stubs throw on the auto-check assertion bodies. The user fills
 them in as they verdict the scenario for the first time — typical
 workflow is "run it, see what fails, write the assertion, repeat".

package/templates/verify-runtime/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cabinet-verify",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Walkthrough verification harness for Claude Cabinet. Cucumber + Playwright scenarios with human-in-the-loop verdict pause.",
   "type": "module",
   "main": "./dist/src/index.js",

package/templates/verify-runtime/src/baseline-steps.ts ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Five universal Cucumber step handlers that every cabinet-verify
+ * consumer used to re-implement by hand. Owning them here keeps
+ * generated `scenario-N.ts` files free of boilerplate that has to be
+ * declared in exactly one file (cucumber-js throws on duplicates).
+ *
+ *   Given the local dev stack is up
+ *   Given I am signed in as the "{role}" role
+ *   When  I navigate to {string}
+ *   Then  check {string} {}
+ *   Then  ask the human {string}
+ *
+ * Imports of this module are side-effectful: the Given/When/Then
+ * calls register globally with cucumber-js. `index.ts` imports this
+ * for its side effect so projects only need a single transitive
+ * `import { CabinetVerifyWorld } from 'cabinet-verify'` for the
+ * baseline steps to register.
+ *
+ * Two extension points:
+ *
+ *  - `setSignInHandler(fn)` — projects with real authentication
+ *    register their sign-in flow. The baseline `Given I am signed in
+ *    as the "{role}" role` step looks up `CABINET_VERIFY_<ROLE>_EMAIL`
+ *    and `_PASSWORD`. If both are blank, the harness treats the
+ *    project as no-auth and just navigates to `/`. If either is set,
+ *    the registered handler is invoked.
+ *
+ *  - `registerCheck(idAndSlug, fn)` — projects register per-checkId
+ *    assertions. The baseline `Then check {string} {}` step calls
+ *    autoCheck with the registered function. If none is registered
+ *    for a given checkId, the step throws with an actionable message.
+ */
+import { Given, When, Then } from '@cucumber/cucumber';
+import { autoCheck } from './auto-check.js';
+import { askHumanVerdict } from './human-verdict.js';
+import { CabinetVerifyWorld } from './world.js';
+export type SignInHandler = (world: CabinetVerifyWorld, role: string) => Promise<void>;
+export type CheckHandler = (world: CabinetVerifyWorld) => Promise<void>;
+let signInHandler: SignInHandler | null = null;
+const checkRegistry = new Map<string, CheckHandler>();
+/**
+ * Register the project's sign-in implementation. Called from the
+ * generated `support/auth.ts` at module-load time.
+ *
+ * The baseline `Given I am signed in as the "{role}" role` handler
+ * skips this entirely when `CABINET_VERIFY_<ROLE>_EMAIL` and
+ * `_PASSWORD` are both blank (no-auth fallback).
+ */
+export function setSignInHandler(handler: SignInHandler): void {
+  signInHandler = handler;
+}
+/**
+ * Register a per-checkId assertion. The first argument is the full
+ * `"NN.NN slug"` form used in the feature file. The function receives
+ * the World and should throw on failure.
+ */
+export function registerCheck(idAndSlug: string, handler: CheckHandler): void {
+  if (checkRegistry.has(idAndSlug)) {
+    throw new Error(
+      `registerCheck: '${idAndSlug}' is already registered. checkIds must be unique across the project.`,
+    );
+  }
+  checkRegistry.set(idAndSlug, handler);
+}
+Given('the local dev stack is up', async function (this: CabinetVerifyWorld) {
+  // Preflight (npm run preflight, invoked before cucumber-js) is the
+  // gate that verifies stack reachability. Re-checking here would add
+  // an HTTP round-trip to every scenario for no additional signal.
+});
+Given(
+  'I am signed in as the {string} role',
+  async function (this: CabinetVerifyWorld, role: string) {
+    const emailEnv = `CABINET_VERIFY_${role.toUpperCase()}_EMAIL`;
+    const passwordEnv = `CABINET_VERIFY_${role.toUpperCase()}_PASSWORD`;
+    const email = process.env[emailEnv];
+    const password = process.env[passwordEnv];
+    const hasCredentials = (email && email.length > 0) || (password && password.length > 0);
+    this.role = role;
+    if (!hasCredentials) {
+      // No-auth project (Flow's local dev has no password, common
+      // case). Land on `/` and let the rest of the scenario carry on.
+      await this.page.goto(this.baseUrl + '/');
+      return;
+    }
+    if (!signInHandler) {
+      throw new Error(
+        `signInAs(${role}): ${emailEnv}/${passwordEnv} are set but no sign-in handler was registered. ` +
+          `Add \`setSignInHandler(signInAs)\` to support/auth.ts, or clear the env vars for a no-auth run.`,
+      );
+    }
+    await signInHandler(this, role);
+  },
+);
+When('I navigate to {string}', async function (this: CabinetVerifyWorld, route: string) {
+  await this.page.goto(this.baseUrl + route);
+});
+Then(
+  'check {string} {}',
+  async function (this: CabinetVerifyWorld, idAndSlug: string, _rest: string) {
+    await autoCheck(this, idAndSlug, async () => {
+      const handler = checkRegistry.get(idAndSlug);
+      if (!handler) {
+        throw new Error(
+          `check ${idAndSlug}: no assertion registered. ` +
+            `Add \`registerCheck('${idAndSlug}', async (world) => { /* assertion */ })\` ` +
+            `in the matching steps/scenario-N.ts.`,
+        );
+      }
+      await handler(this);
+    });
+  },
+);
+Then(
+  'ask the human {string}',
+  async function (this: CabinetVerifyWorld, idAndDescription: string) {
+    const space = idAndDescription.indexOf(' ');
+    const checkId = space >= 0 ? idAndDescription.slice(0, space) : idAndDescription;
+    const description = space >= 0 ? idAndDescription.slice(space + 1) : '';
+    await askHumanVerdict(this.page, checkId, description);
+  },
+);

package/templates/verify-runtime/src/index.ts CHANGED Viewed

@@ -64,3 +64,17 @@ export {
 } from './preflight.js';
 export { CabinetVerifyWorld, type IWorldOptions } from './world.js';
+// Side-effect import: registers the five baseline Cucumber steps
+// (Given dev-stack-up, Given signed-in-as-role, When navigate, Then
+// check, Then ask-the-human). Projects don't redeclare these — they
+// register per-checkId assertions and an optional sign-in handler via
+// the API re-exported below.
+import './baseline-steps.js';
+export {
+  setSignInHandler,
+  registerCheck,
+  type SignInHandler,
+  type CheckHandler,
+} from './baseline-steps.js';