npm - create-claude-cabinet - Versions diffs - 0.41.0 → 0.42.0 - Mend

create-claude-cabinet 0.41.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/lib/cli.js +23 -6
package/lib/settings-merge.js +51 -1
package/package.json +1 -1
package/templates/cabinet/advisories-state-schema.md +68 -0
package/templates/cabinet/elicitation-methods.md +70 -0
package/templates/cabinet/eval-protocol.md +20 -0
package/templates/cabinet/skill-output-conventions.md +21 -3
package/templates/engagement-server/__tests__/e2e-skills.test.mjs +147 -0
package/templates/engagement-server/__tests__/server-harness.mjs +72 -0
package/templates/engagement-server/__tests__/server.test.mjs +181 -0
package/templates/hooks/action-completion-gate.sh +5 -2
package/templates/hooks/action-quality-gate.sh +6 -3
package/templates/hooks/bash-output-compress.sh +147 -0
package/templates/hooks/cc-upstream-guard.sh +7 -3
package/templates/hooks/git-guardrails.sh +7 -3
package/templates/hooks/work-tracker-guard.sh +5 -2
package/templates/mux/config/manage-dx.py +2 -3
package/templates/mux/config/muxlib.py +3 -1
package/templates/mux/config/show-dx.py +3 -4
package/templates/rules/enforcement-pipeline.md +1 -1
package/templates/rules/markdown-prose.md +9 -0
package/templates/scripts/skill-usage.mjs +208 -0
package/templates/scripts/watchtower-ring1.mjs +37 -8
package/templates/scripts/watchtower-ring3-close.mjs +24 -6
package/templates/skills/cabinet-anthropic-insider/SKILL.md +6 -0
package/templates/skills/cabinet-deployment/SKILL.md +265 -0
package/templates/skills/cabinet-deployment/phases/scan-scope.md +40 -0
package/templates/skills/cabinet-seo/SKILL.md +150 -0
package/templates/skills/cabinet-vision/SKILL.md +7 -0
package/templates/skills/cc-link/SKILL.md +1 -0
package/templates/skills/cc-publish/SKILL.md +1 -0
package/templates/skills/cc-remember/SKILL.md +1 -0
package/templates/skills/cc-unlink/SKILL.md +1 -0
package/templates/skills/checklist-discover/SKILL.md +27 -25
package/templates/skills/memory/SKILL.md +1 -0
package/templates/skills/menu/SKILL.md +1 -0
package/templates/skills/onboard/SKILL.md +5 -0
package/templates/skills/orient/SKILL.md +80 -8
package/templates/skills/orient/phases/dx-captures.md +5 -3
package/templates/skills/plan/SKILL.md +60 -1
package/templates/skills/seed/SKILL.md +4 -1
package/templates/skills/threads/SKILL.md +144 -0
package/templates/skills/unwrap/SKILL.md +43 -0
package/templates/skills/watchtower/SKILL.md +1 -1
package/templates/workflows/deliberative-audit.js +38 -15

package/lib/cli.js CHANGED Viewed

@@ -4,7 +4,7 @@ const fs = require('fs');
 const os = require('os');
 const crypto = require('crypto');
 const { copyTemplates } = require('./copy');
-const { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks } = require('./settings-merge');
+const { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, mergeBashCompressHooks } = require('./settings-merge');
 const { create: createMetadata, read: readMetadata } = require('./metadata');
 const { setupDb } = require('./db-setup');
 const { setupVerifyRuntime } = require('./verify-setup');
@@ -471,7 +471,7 @@ const MODULES = {
     mandatory: false,
     default: true,
     lean: true,
-    templates: ['hooks/git-guardrails.sh', 'hooks/cc-upstream-guard.sh', 'hooks/skill-telemetry.sh', 'hooks/skill-tool-telemetry.sh', 'hooks/work-tracker-guard.sh', 'hooks/action-quality-gate.sh', 'hooks/action-completion-gate.sh', 'hooks/memory-index-guard.sh', 'scripts/cc-drift-check.cjs'],
+    templates: ['hooks/git-guardrails.sh', 'hooks/cc-upstream-guard.sh', 'hooks/skill-telemetry.sh', 'hooks/skill-tool-telemetry.sh', 'hooks/work-tracker-guard.sh', 'hooks/action-quality-gate.sh', 'hooks/action-completion-gate.sh', 'hooks/memory-index-guard.sh', 'scripts/cc-drift-check.cjs', 'scripts/skill-usage.mjs'],
   },
   'work-tracking': {
     name: 'Work Tracking (pib-db or markdown)',
@@ -488,7 +488,7 @@ const MODULES = {
     mandatory: false,
     default: true,
     lean: true,
-    templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/qa-dimensions-template.yaml', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md'],
+    templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md'],
   },
   'compliance': {
     name: 'Compliance Stack (rules + enforcement)',
@@ -496,7 +496,7 @@ const MODULES = {
     mandatory: false,
     default: true,
     lean: false,
-    templates: ['rules/enforcement-pipeline.md', 'rules/maintainability.md', 'memory/patterns/_pattern-template.md', 'memory/patterns/pattern-intelligence-first.md'],
+    templates: ['rules/enforcement-pipeline.md', 'rules/maintainability.md', 'rules/markdown-prose.md', 'skills/unwrap', 'memory/patterns/_pattern-template.md', 'memory/patterns/pattern-intelligence-first.md'],
   },
   'memory': {
     name: 'Built-In Memory (cc-remember + reader + validator)',
@@ -533,10 +533,12 @@ const MODULES = {
       'skills/cabinet-boundary-man',
       'skills/cabinet-anthropic-insider', 'skills/cabinet-cc-health',
       'skills/cabinet-data-integrity',
-      'skills/cabinet-debugger', 'skills/cabinet-historian',
+      'skills/cabinet-debugger', 'skills/cabinet-deployment',
+      'skills/cabinet-historian',
       'skills/cabinet-organized-mind', 'skills/cabinet-process-therapist',
       'skills/cabinet-qa', 'skills/cabinet-record-keeper',
       'skills/cabinet-roster-check', 'skills/cabinet-security',
+      'skills/cabinet-seo',
       'skills/cabinet-small-screen', 'skills/cabinet-speed-freak',
       'skills/cabinet-system-advocate', 'skills/cabinet-technical-debt',
       'skills/cabinet-usability', 'skills/cabinet-workflow-cop',
@@ -559,7 +561,7 @@ const MODULES = {
     mandatory: false,
     default: true,
     lean: true,
-    templates: ['skills/onboard', 'skills/seed', 'skills/cc-upgrade', 'skills/cc-link', 'skills/cc-unlink', 'skills/cc-extract', 'skills/cc-feedback'],
+    templates: ['skills/onboard', 'skills/seed', 'skills/cc-upgrade', 'skills/cc-link', 'skills/cc-unlink', 'skills/cc-extract', 'skills/cc-feedback', 'cabinet/elicitation-methods.md'],
   },
   'validate': {
     name: 'Validate',
@@ -654,6 +656,7 @@ const MODULES = {
       'scripts/watchtower-ring3-close.mjs',
       'scripts/watchtower-status.sh',
       'skills/briefing',
+      'skills/threads',
     ],
   },
   mux: {
@@ -675,6 +678,15 @@ const MODULES = {
     postInstall: 'engagement-server-setup',
     templates: [],
   },
+  'bash-compress': {
+    name: 'Bash Output Compression Hook',
+    description: 'PostToolUse hook that compresses noisy Bash stdout (git status walls, npm/yarn install output, find/ls dumps) to reclaim context in long sessions. Off by default. stderr and error/warning lines pass through verbatim; every rewrite carries a visible [compressed] marker; fail-open on any error. Requires the hooks module (it wires into .claude/settings.json).',
+    mandatory: false,
+    default: false,
+    lean: false,
+    requires: ['hooks'],
+    templates: ['hooks/bash-output-compress.sh'],
+  },
 };
 /** Recursively collect all relative file paths under a directory. */
@@ -1292,6 +1304,11 @@ async function run() {
       mergeMuxHooks(settingsPath);
       console.log('  ⚙️  Registered mux worktree health SessionStart hook');
     }
+    if (selectedModules.includes('bash-compress')) {
+      mergeBashCompressHooks(settingsPath);
+      console.log('  ⚙️  Registered bash-output compression PostToolUse hook');
+    }
   }
   // --- Heal user-level ~/.claude/settings.json ---

package/lib/settings-merge.js CHANGED Viewed

@@ -121,6 +121,25 @@ const MUX_HOOKS = {
   ],
 };
+// Opt-in bash-output compression. PostToolUse hook on Bash that compresses
+// known-noisy stdout (git status walls, npm/yarn install output) to reclaim
+// context. Off by default; registered only when the `bash-compress` module
+// is selected. The hook itself is fail-open (passes output through untouched
+// on any error) — see templates/hooks/bash-output-compress.sh.
+const BASH_COMPRESS_HOOKS = {
+  PostToolUse: [
+    {
+      matcher: 'Bash',
+      hooks: [
+        {
+          type: 'command',
+          command: '.claude/hooks/bash-output-compress.sh',
+        },
+      ],
+    },
+  ],
+};
 // Legacy hook script names that should be stripped on any merge.
 // Centralizes cleanup so a user who skips --migrate-memory but runs
 // any other CC operation still gets omega-era hooks pruned.
@@ -308,4 +327,35 @@ function mergeMuxHooks(settingsPath) {
   fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
 }
-module.exports = { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, DEFAULT_HOOKS, WATCHTOWER_HOOKS, MUX_HOOKS, LEGACY_HOOK_COMMANDS };
+/**
+ * Merge the opt-in bash-output compression hook into project settings.
+ * Called from the bash-compress module's install path in cli.js — only
+ * registers the PostToolUse Bash hook when that module is selected.
+ * Idempotent (de-dupes by command path).
+ */
+function mergeBashCompressHooks(settingsPath) {
+  if (!fs.existsSync(settingsPath)) return;
+  const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
+  if (!settings.hooks) settings.hooks = {};
+  for (const [event, newHooks] of Object.entries(BASH_COMPRESS_HOOKS)) {
+    if (!settings.hooks[event]) {
+      settings.hooks[event] = newHooks;
+    } else {
+      for (const newHook of newHooks) {
+        const hookKey = h => h.command || h.prompt || '';
+        const existingKeys = settings.hooks[event].flatMap(h =>
+          h.hooks.map(hh => hookKey(hh))
+        );
+        const newKeys = newHook.hooks.map(h => hookKey(h));
+        if (!newKeys.every(k => existingKeys.includes(k))) {
+          settings.hooks[event].push(newHook);
+        }
+      }
+    }
+  }
+  fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
+}
+module.exports = { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, mergeBashCompressHooks, DEFAULT_HOOKS, WATCHTOWER_HOOKS, MUX_HOOKS, BASH_COMPRESS_HOOKS, LEGACY_HOOK_COMMANDS };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "create-claude-cabinet",
-  "version": "0.41.0",
+  "version": "0.42.0",
   "description": "Claude Cabinet — opinionated process scaffolding for Claude Code projects",
   "bin": {
     "create-claude-cabinet": "bin/create-claude-cabinet.js"

package/templates/cabinet/advisories-state-schema.md ADDED Viewed

@@ -0,0 +1,68 @@
+# Advisory dismissal state — schema and rules
+Orient surfaces stack-aware advisories (install the Ruby language server, register the Railway MCP, install `hookify`, …). Without memory, every advisory re-nags every session — the same attention-fatigue pattern the watchtower rings were built to eliminate. This file defines the per-project state that gives advisories a memory, and the exact rules orient follows so an advisory is never *permanently* silenced by accident.
+## Where it lives
+`.claude/cabinet/advisories-state.json` — **per project, generated at runtime**, NOT shipped as a template. Orient creates it on first write. It must never be added to a module's template array: a shipped stub would overwrite a project's real dismissal history on reinstall (the `.ccrc.json` clobber class of bug). If the file is absent, every advisory is treated as never-seen.
+> Worktree note: `.claude/cabinet/` is copied per worktree, so dismissal state can diverge between a worktree and its main checkout. That is acceptable — advisories are advisory — and is the reason this is project-local, not user-global.
+## Schema
+```json
+{
+  "<advisoryId>": {
+    "status": "suggested" | "declined" | "installed",
+    "count": 2,
+    "last_shown": "2026-06-07",
+    "signal": "gemfile+rb"
+  }
+}
+```
+- **`advisoryId`** — a stable id per advisory, e.g. `lsp:ruby`, `lsp:typescript`, `mcp:railway`, `plugin:hookify`.
+- **`status`**
+  - `suggested` — shown, not yet acted on.
+  - `declined` — the user explicitly waved it off.
+  - `installed` — the thing is present (probe confirmed). **Terminal** — never surface again.
+- **`count`** — how many sessions it has been surfaced while still actionable. Drives the "stop nagging" rule.
+- **`last_shown`** — ISO date of the most recent surfacing.
+- **`signal`** — *the key field that makes "resurface if the stack changed" actually work.* A short, deterministic fingerprint of the stack indicators present when the advisory was last shown/declined. For a multi-indicator advisory like Ruby (`Gemfile` OR `*.rb`), the fingerprint records *which* indicators were present (e.g. `gemfile` vs `gemfile+rb`), so a later change is detectable. Without this stored snapshot, orient has only the *current* indicators and no baseline to diff against — which is the gap this schema closes.
+## The rules orient follows
+Before surfacing any advisory, orient computes the advisory's **current signal** (fingerprint of the indicators present now) and reads the stored entry:
+1. **No entry / file absent** → surface it. Write `{status:"suggested", count:1, last_shown:today, signal:current}`.
+2. **`installed`** → never surface (terminal). (Re-probe may flip a `suggested`/`declined` entry to `installed`; never the reverse automatically.)
+3. **`declined`**
+   - current signal **==** stored signal → **silent.** (Optionally surfaced in `/pulse` only.)
+   - current signal **!=** stored signal → the stack changed since the user declined → **re-surface exactly once.** Reset to `{status:"suggested", count:1, signal:current}`.
+4. **`suggested`**
+   - `count < 2` and signal unchanged → surface again, `count++`, `last_shown=today`.
+   - `count >= 2` and signal unchanged → **go quiet** (mention only in `/pulse`). Do not keep incrementing.
+   - signal **!=** stored signal (at any count) → the stack changed → reset to `{count:1, signal:current}` and surface.
+### The anti-trap guarantee
+**Any change in the stack signal resets an advisory to actionable** — so no advisory is permanently invisible while the thing it suggests is still relevant *and the project keeps evolving*.
+The one deliberately-sticky case: an advisory whose signal **never changes** once it fires. Example: `plugin:hookify`, keyed on the existence of `.claude/rules/enforcement-pipeline.md` — a file that, once created, stays. A `declined` hookify therefore stays declined. That is intended (the user said no, and nothing about the project changed to revisit it), but it must remain **escapable, not a black hole**:
+- It is still listed in `/pulse` (quiet, not gone).
+- Clearing its entry from `advisories-state.json` (or setting `status` back to `suggested`) re-arms it.
+Document any new advisory's signal source here when you add it, and call out explicitly if its signal is static (like hookify) so the sticky behavior is a known property, not a surprise.
+## Advisory ids in use
+| advisoryId | indicator(s) → signal | install action shown (advisory only — orient never runs it) |
+|---|---|---|
+| `lsp:typescript` | `tsconfig.json` or `*.ts` | `/plugin install typescript-lsp` |
+| `lsp:python` | `pyproject.toml` / `requirements.txt` / `*.py` | `/plugin install pyright-lsp` |
+| `lsp:rust` | `Cargo.toml` | `/plugin install rust-analyzer-lsp` |
+| `lsp:go` | `go.mod` | `/plugin install gopls-lsp` |
+| `lsp:ruby` | `Gemfile` or `*.rb` | `/plugin install ruby-lsp@claude-plugins-official` (also needs `gem install ruby-lsp` AND `ENABLE_LSP_TOOL=1`) |
+| `mcp:railway` | `railway.toml` and no railway key in `~/.claude.json` | local: `railway setup agent -y` · remote: register `mcp.railway.com` (OAuth) |
+| `plugin:hookify` | `.claude/rules/enforcement-pipeline.md` exists and hookify not in `claude plugin list` (signal is **static**) | `/plugin install hookify` |

package/templates/cabinet/elicitation-methods.md ADDED Viewed

@@ -0,0 +1,70 @@
+# Elicitation Methods — structured ways to draw out what the user knows
+CC's interviewing moments (onboard, seed, `/plan` scoping, checklist-discover, debrief) improvise their questioning. This is the shared shelf of structured elicitation techniques skills can consult when they need to draw something out — surfacing a hidden assumption, pressure-testing a plan, widening the option space. It sits on the same shelf as `skill-output-conventions.md`: a reference skills *cite*, not a phase they run.
+Derived from the BMAD-METHOD advanced-elicitation method set (Apache-2.0; see attribution at the end), curated and adapted to CC's constraints. Several entries are classic requirements-elicitation / creative-thinking craft that BMAD also draws on; those are noted.
+## How to use this file
+A skill at an interview step consults this file to *choose how to ask* — it does not run a menu. Pick the one or two techniques that fit the moment and the gap you're trying to close, then ask. The technique shapes the question; the conversation stays a conversation.
+## Fit criteria (why these and not the other 70-odd)
+A technique earns a place here only if it meets all four:
+1. **Conversational register** — it works as plain dialogue, not a worksheet or a numbered menu (terminal prose constraint).
+2. **One question at a time** — it can be run as a sequence of single questions, never a batch. This is a CC hard rule (`CLAUDE.md`): write interview questions one at a time, never batched.
+3. **Fits a named CC moment** — onboard, seed, `/plan` scoping, checklist-discover, or debrief.
+4. **No persona-roleplay dependency** — it doesn't require the user (or Claude) to adopt and switch between named personas to function.
+## The methods
+### First-Principles Thinking
+*Moments: /plan scoping, onboard.* Strip away how it's done today and rebuild from what the thing actually needs to do. Use when a plan is anchored on an existing implementation and you suspect the real requirement is simpler or different.
+- Ask: *"Ignore how this works today — what does it actually need to accomplish, at minimum?"* then, one at a time, *"Which of those are truly required versus inherited from the current approach?"*
+### Pre-mortem
+*Moments: /plan scoping, checklist-discover.* Assume the work shipped and failed; reason backward to the cause. Surfaces risks and edge cases the optimistic framing hides. (BMAD-named.)
+- Ask: *"Imagine this shipped and quietly failed a month later — what's the single most likely reason?"* then *"What would we have needed to know up front to prevent that?"*
+### Inversion
+*Moments: /plan, checklist-discover.* Ask how to *guarantee* failure, then avoid those things. Often easier to enumerate than success conditions. (BMAD-named.)
+- Ask: *"What's the surest way to make this go wrong?"* then *"Which of those are we closest to doing by accident?"*
+### Assumption Surfacing
+*Moments: /plan scoping (pairs with the plan-completeness `[NEEDS CLARIFICATION]` marker), investigate.* Name the unspoken assumptions a plan rests on so the shaky ones get checked before building. (Classic requirements-elicitation craft.)
+- Ask: *"What are we assuming is true here that we haven't actually verified?"* then take them one at a time: *"How would we confirm that one cheaply?"*
+### Socratic Questioning
+*Moments: any.* Challenge a claim with "why?" and "how do you know?" until it rests on something solid. Use sparingly — it's a scalpel, not a default. (BMAD-named.)
+- Ask: *"What makes you confident that's the right call?"* then follow the answer down one level at a time.
+### Constraint Removal
+*Moments: onboard (vision), seed (member design), /plan (widen options).* Drop a constraint, see what becomes possible, then add it back deliberately. Widens the option space when thinking feels boxed in. (BMAD-named.)
+- Ask: *"If [time / scope / the existing schema] weren't a limit, what would you do instead?"* then *"What's the smallest version of that we could actually do?"*
+### Stakeholder Lens
+*Moments: onboard (who is served), seed (whose perspective the member encodes).* Re-ask the question from one stakeholder's point of view at a time — the user, a future maintainer, the end customer. One lens per question; never a round-table battery. (Adapted from BMAD's Stakeholder Mapping to honor the one-question rule.)
+- Ask: *"From the end user's point of view, what would make this a win?"* then, next turn, *"Now from the person who maintains it a year from now — same question."*
+### Analogical Reasoning
+*Moments: seed (member design), onboard (mental model).* Find the closest parallel in another domain and borrow its lessons. Good for naming a fuzzy concept or designing something with no obvious precedent. (BMAD-named.)
+- Ask: *"What existing thing — in or out of software — is this most like?"* then *"What does that parallel get right that we should copy, and where does it break down?"*
+### Five Whys
+*Moments: investigate, /plan problem-framing, debrief.* Trace a stated problem to its root by asking "why" about each answer in turn. Naturally one-at-a-time. (Classic root-cause craft BMAD draws on.)
+- Ask: *"Why is that a problem?"* — and about each answer, *"And why is that?"* — usually three to five levels reaches the root.
+### Expand or Contract for Audience
+*Moments: checklist-discover, onboard, debrief presentation.* Deliberately widen or narrow the level of detail to fit who the output serves. Use when scope or depth feels mismatched to the audience. (BMAD-named.)
+- Ask: *"Who reads this, and do they need more breadth or more depth than we have?"* then adjust one dimension at a time.
+## Considered, not kept
+- **Red Team vs Blue Team** — a multi-round attack/defend battery; the full technique needs adversarial persona-switching and several exchanges. The useful core (steelman the opposing case) is covered by Socratic Questioning and Inversion as single questions.
+- **Six Thinking Hats** — a six-perspective battery requiring sequential persona adoption; fails the no-persona-roleplay and one-question criteria. The Stakeholder Lens covers the salvageable part, one lens at a time.
+- **Any numbered-menu / "pick 1-9" selection flows** — BMAD presents methods as an interactive numbered menu; that interaction model fails the terminal-prose constraint. CC skills choose a technique themselves and just ask.
+## Attribution
+Several methods here are derived from the BMAD-METHOD advanced-elicitation method set (`bmad-code-org/BMAD-METHOD`, Apache License 2.0). Method *names and descriptions* that originate there are marked "(BMAD-named)" above; the adaptations to one-question-at-a-time conversational flow, the CC-moment mapping, and the classic-craft additions are CC's own. BMAD is Apache-2.0 licensed; this derived reference preserves that attribution.

package/templates/cabinet/eval-protocol.md CHANGED Viewed

@@ -70,6 +70,26 @@ Also check:
 If a skill hasn't been invoked 3 times in the last month, that itself is
 a finding (coverage gap or trigger problem).
+**Invocation data — don't eyeball it.** The hooks module's skill-telemetry
+hooks log every skill run to `~/.claude/telemetry/telemetry.jsonl`. Read it
+with the dead-skill reader instead of guessing:
+```
+node scripts/skill-usage.mjs            # human report (dead / stale / active)
+node scripts/skill-usage.mjs --quiet    # prints only if there's something to flag
+node scripts/skill-usage.mjs --json     # structured, for programmatic checks
+node scripts/skill-usage.mjs --days 60  # widen the stale threshold
+```
+It cross-references installed user-invocable skills against the telemetry and
+surfaces **DEAD** (never invoked — removal or trigger-phrase candidates) and
+**STALE** (not invoked within the threshold). Telemetry is global across
+projects, so a skill flagged DEAD was invoked in *no* project — a strong
+signal. Cabinet members and other `user-invocable: false` skills are excluded
+(they run as agents, not slash/Skill calls, so they never appear). Treat the
+output as candidates for judgment, not a verdict — a never-invoked skill may
+have bad trigger phrasing rather than no purpose.
 ### 3. Score Each Assertion
 For each assertion, review the sampled executions and score:

package/templates/cabinet/skill-output-conventions.md CHANGED Viewed

@@ -53,9 +53,27 @@ behaves unexpectedly.
   pick more than one.
 - **"Other" is auto-added** by the harness. Never add an "Other" /
   "Something else" option manually — it duplicates.
-- **No reliable preview/comparison field.** The official schema has no
-  dependable preview surface; don't build conventions on preview
-  behavior or expect side-by-side rendering.
+- **A dialog swallows same-turn prose. Never pair one with an
+  explanation the user must read.** When AskUserQuestion fires, the
+  dialog takes over the screen — any prose streamed in the same turn
+  is effectively invisible at decision time. Two compliant shapes
+  (user-confirmed 2026-06-06, after being bitten twice in one day):
+  1. **Explanation-first (default for read-then-decide loops):** end
+     the turn with the full explanation as prose and a plain-text
+     question ("Accept, edit, or skip?"). Take the user's prose
+     answer. No dialog at all — this also satisfies the bounded-list
+     caveat below for sequential same-shaped decisions.
+  2. **Self-sufficient dialog (for small, comparable artifacts):**
+     the dialog carries ALL decision content itself. Single-select
+     options support a `preview` field (markdown, side-by-side) —
+     put the artifact in the preview, attach the SAME preview to
+     every option so it stays visible whichever option is focused,
+     tradeoffs in option `description`s. Same-turn prose: one-line
+     pointer max. If the content doesn't fit a preview pane, you're
+     in shape 1.
+  (Updated 2026-06-06: earlier guidance said preview was
+  undependable; the harness now renders it reliably for single-select
+  questions. multiSelect still has no preview.)
 - **Unavailable in Task-spawned subagents.** Agents launched via the
   Task tool (execute-group worktree agents, Workflow agents,
   `context:fork`) cannot call AskUserQuestion — they must use prose.

package/templates/engagement-server/__tests__/e2e-skills.test.mjs ADDED Viewed

@@ -0,0 +1,147 @@
+// End-to-end test of the engagement "skills" machinery against a live server.
+// Run: node --test templates/engagement-server/__tests__/e2e-skills.test.mjs
+//
+// The collab-consultant / collab-client / setup-accounts SKILL.md files are
+// prompts, not code — what's testable is the machinery they drive:
+//   - engagement-checklist.mjs  (walkthrough state: visibility, answers, persistence)
+//   - engagement-transport.mjs  (builds the API calls the skills POST/GET)
+//   - engagement-crypto.mjs      (the secure credential envelope)
+// This suite wires all three through the real server with a simulated
+// consultant and a simulated client (a stand-in for Ed) — including the
+// encrypted-credential round-trip, the highest-stakes path in /setup-accounts.
+import { test, before, after } from 'node:test';
+import assert from 'node:assert';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { rmSync } from 'node:fs';
+import { freshDb, addEngagement, addUser, addToken, startServer, tmpDbPath } from './server-harness.mjs';
+import { buildApiSendInstruction, buildApiReceiveInstruction } from '../../engagement/engagement-transport.mjs';
+import { generateKeypair, encryptCredential, decryptCredential, serializeEnvelope, deserializeEnvelope } from '../../engagement/engagement-crypto.mjs';
+import { createState, recordAnswer, recordCredentialSent, computeVisibility, getProgress, saveState, loadState } from '../../engagement/engagement-checklist.mjs';
+const DB = tmpDbPath('e2e');
+const PORT = 3992;
+const TOK_CONSULTANT = 'e2e-consultant-AAA';
+const TOK_CLIENT = 'e2e-client-BBB';
+let server;
+const cfg = (token) => ({ endpoint: server.base, token });
+// Execute a transport "instruction" (what the skill hands to the HTTP layer).
+async function exec(instr) {
+  const res = await fetch(instr.url, {
+    method: instr.method,
+    headers: instr.headers,
+    body: instr.body ? JSON.stringify(instr.body) : undefined,
+  });
+  return { status: res.status, json: await res.json().catch(() => null) };
+}
+before(async () => {
+  const db = freshDb(DB);
+  addEngagement(db, { id: 'eng_e2e', name: 'E2E' });
+  addUser(db, { id: 'usr_con', engagementId: 'eng_e2e', email: 'consultant@x', role: 'consultant' });
+  addUser(db, { id: 'usr_cli', engagementId: 'eng_e2e', email: 'client@x', role: 'client' });
+  addToken(db, { rawToken: TOK_CONSULTANT, userId: 'usr_con' });
+  addToken(db, { rawToken: TOK_CLIENT, userId: 'usr_cli' });
+  db.close();
+  server = await startServer({ dbPath: DB, port: PORT });
+});
+after(() => { server?.stop(); });
+// --- 1. Checklist walkthrough state (what /setup-accounts persists) ---
+test('checklist visibility gates a dependent item until its parent is answered', () => {
+  const checklist = {
+    sections: [{
+      key: 'go_live', items: [
+        { key: 'domain', kind: 'decide', prompt: 'Which domain?' },
+        { key: 'dns_manager', kind: 'decide', prompt: 'Who manages DNS?', visibility: { depends_on: 'domain', value_in: ['feeshame.com'] } },
+      ],
+    }],
+  };
+  const state = createState('engagement.yaml');
+  let visible = computeVisibility(checklist, state.answers);
+  assert.ok(visible.has('domain'));
+  assert.ok(!visible.has('dns_manager'), 'dependent item hidden until parent answered');
+  recordAnswer(state, 'domain', 'feeshame.com');
+  visible = computeVisibility(checklist, state.answers);
+  assert.ok(visible.has('dns_manager'), 'dependent item appears once parent value matches');
+  const progress = getProgress(checklist, state);
+  assert.strictEqual(progress.completed, 1);
+  assert.strictEqual(progress.total, 2);
+});
+test('checklist state round-trips through save/load', async () => {
+  const path = join(tmpdir(), `cc-e2e-state-${process.pid}.json`);
+  try {
+    const state = createState('engagement.yaml');
+    recordAnswer(state, 'mail_from', 'hello@carolinalaw.com');
+    await saveState(path, state);
+    const reloaded = await loadState(path);
+    assert.strictEqual(reloaded.answers.mail_from.value, 'hello@carolinalaw.com');
+    assert.ok(reloaded.updated_at);
+  } finally {
+    try { rmSync(path); } catch {}
+  }
+});
+// --- 2. Transport round-trip: consultant sync <-> client, via real instructions ---
+test('consultant sync reaches the client inbox (transport -> server)', async () => {
+  const send = buildApiSendInstruction('packet-payload-001', { ...cfg(TOK_CONSULTANT), message_type: 'packet' });
+  assert.strictEqual(send.url, `${server.base}/api/messages`);
+  assert.strictEqual((await exec(send)).status, 201);
+  const recv = await exec(buildApiReceiveInstruction(cfg(TOK_CLIENT)));
+  assert.strictEqual(recv.status, 200);
+  assert.strictEqual(recv.json.messages.length, 1);
+  assert.strictEqual(recv.json.messages[0].payload, 'packet-payload-001');
+  assert.strictEqual(recv.json.messages[0].from_role, 'consultant');
+});
+test('client response reaches the consultant inbox', async () => {
+  assert.strictEqual((await exec(buildApiSendInstruction('client-feedback-001', { ...cfg(TOK_CLIENT), message_type: 'item_feedback' }))).status, 201);
+  const recv = await exec(buildApiReceiveInstruction(cfg(TOK_CONSULTANT), { type: 'item_feedback' }));
+  assert.strictEqual(recv.json.messages.length, 1);
+  assert.strictEqual(recv.json.messages[0].payload, 'client-feedback-001');
+  assert.strictEqual(recv.json.messages[0].from_role, 'client');
+});
+// --- 3. Encrypted credential round-trip (the /setup-accounts secure path) ---
+test('credential is encrypted client-side, delivered, and only the consultant can decrypt it', async () => {
+  const SECRET = 'postmark-server-token-SUPER-SECRET';
+  const { publicKey, privateKey } = await generateKeypair();
+  // Client encrypts to the consultant's public key and sends the envelope.
+  const envelope = await encryptCredential(SECRET, publicKey);
+  const serialized = serializeEnvelope(envelope);
+  const send = buildApiSendInstruction(serialized, { ...cfg(TOK_CLIENT), message_type: 'credential' });
+  assert.strictEqual((await exec(send)).status, 201);
+  // The plaintext never travels in the clear.
+  assert.ok(!serialized.includes(SECRET), 'serialized envelope must not contain the plaintext');
+  assert.ok(!JSON.stringify(envelope).includes(SECRET), 'envelope fields must not contain the plaintext');
+  // Consultant receives the credential message and decrypts with the private key.
+  const recv = await exec(buildApiReceiveInstruction(cfg(TOK_CONSULTANT), { type: 'credential' }));
+  assert.strictEqual(recv.json.messages.length, 1);
+  const received = deserializeEnvelope(recv.json.messages[0].payload);
+  const decrypted = await decryptCredential(received, privateKey);
+  assert.strictEqual(decrypted, SECRET, 'consultant recovers the original secret');
+  // Wrong key cannot decrypt.
+  const other = await generateKeypair();
+  await assert.rejects(decryptCredential(received, other.privateKey), 'a different private key must fail to decrypt');
+  // The walkthrough records the credential as sent (by envelope id, not value).
+  const state = createState('engagement.yaml');
+  recordCredentialSent(state, 'postmark_token', envelope.envelope_id);
+  assert.strictEqual(state.answers.postmark_token.status, 'sent');
+  assert.strictEqual(state.answers.postmark_token.envelope_id, envelope.envelope_id);
+});

package/templates/engagement-server/__tests__/server-harness.mjs ADDED Viewed

@@ -0,0 +1,72 @@
+// Shared harness for engagement-server integration tests.
+// Spawns the real server.mjs against a throwaway SQLite DB and provides
+// helpers to seed engagements/users/tokens and inspect the DB. Lets the
+// tests exercise the deployed server code without touching any real
+// engagement (every test runs against an isolated temp DB).
+import Database from 'better-sqlite3';
+import { createHash } from 'node:crypto';
+import { spawn } from 'node:child_process';
+import { readFileSync, rmSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { tmpdir } from 'node:os';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+export const SERVER_DIR = join(__dirname, '..');
+export const sha = (raw) => createHash('sha256').update(raw).digest('hex');
+export function tmpDbPath(name) {
+  return join(tmpdir(), `cc-eng-test-${name}-${process.pid}.db`);
+}
+// Create a fresh DB with the server's own schema applied. Returns an open
+// handle the caller seeds, then closes before the server starts.
+export function freshDb(dbPath) {
+  for (const suffix of ['', '-wal', '-shm']) { try { rmSync(dbPath + suffix); } catch {} }
+  const db = new Database(dbPath);
+  db.pragma('journal_mode = WAL');
+  db.exec(readFileSync(join(SERVER_DIR, 'schema.sql'), 'utf-8'));
+  db.pragma('user_version = 1');
+  return db;
+}
+export function addEngagement(db, { id, name = id, authMode = 'local', authConfig = null }) {
+  db.prepare(`INSERT INTO engagements (id,name,auth_mode,auth_config) VALUES (?,?,?,?)`)
+    .run(id, name, authMode, authConfig);
+}
+export function addUser(db, { id, engagementId, name = id, email = null, role }) {
+  db.prepare(`INSERT INTO users (id,engagement_id,name,email,role) VALUES (?,?,?,?,?)`)
+    .run(id, engagementId, name, email, role);
+}
+export function addToken(db, { rawToken, userId, label = null }) {
+  db.prepare(`INSERT INTO api_tokens (token_hash,user_id,label) VALUES (?,?,?)`)
+    .run(sha(rawToken), userId, label);
+}
+// Read-only DB peek (server holds the writer handle; WAL allows concurrent reads).
+export function usersByEmail(dbPath, email) {
+  const db = new Database(dbPath, { readonly: true });
+  const rows = db.prepare(`SELECT id, role FROM users WHERE email = ?`).all(email);
+  db.close();
+  return rows;
+}
+const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
+// Boot the real server.mjs against the throwaway DB and wait for /health.
+export async function startServer({ dbPath, port }) {
+  const env = { ...process.env, DB_PATH: dbPath, PORT: String(port) };
+  delete env.RAILWAY_ENVIRONMENT; // skip HTTPS enforcement locally
+  const proc = spawn('node', ['server.mjs'], { cwd: SERVER_DIR, env });
+  let log = '';
+  proc.stdout.on('data', (d) => { log += d; });
+  proc.stderr.on('data', (d) => { log += d; });
+  const base = `http://127.0.0.1:${port}`;
+  for (let i = 0; i < 100; i++) {
+    try { if ((await fetch(`${base}/health`)).ok) return { base, stop: () => proc.kill('SIGKILL'), log: () => log }; } catch {}
+    await sleep(100);
+  }
+  proc.kill('SIGKILL');
+  throw new Error('engagement server did not become healthy:\n' + log);
+}