create-claude-cabinet 0.44.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +5 -0
  2. package/lib/cli.js +51 -6
  3. package/lib/copy.js +56 -10
  4. package/lib/mux-setup.js +1 -0
  5. package/package.json +1 -1
  6. package/templates/cabinet/checklist-stats-schema.md +104 -0
  7. package/templates/cabinet/checkpoint-protocol.md +17 -5
  8. package/templates/cabinet/qa-dimensions-template.yaml +7 -0
  9. package/templates/cabinet/watchtower-contracts.md +38 -0
  10. package/templates/engagement/pib-db-patches/pib-db-lib.mjs +4 -1
  11. package/templates/hooks/action-completion-gate.sh +17 -0
  12. package/templates/hooks/watchtower-session-start.sh +80 -5
  13. package/templates/mux/__tests__/claude-carveout.fixture.sh +136 -0
  14. package/templates/mux/__tests__/claude-carveout.test.mjs +38 -0
  15. package/templates/mux/__tests__/mux-fail-loud.fixture.sh +254 -0
  16. package/templates/mux/__tests__/mux-fail-loud.test.mjs +41 -0
  17. package/templates/mux/__tests__/worktree-dirty-check.fixture.sh +184 -0
  18. package/templates/mux/__tests__/worktree-dirty-check.test.mjs +35 -0
  19. package/templates/mux/bin/mux +212 -60
  20. package/templates/mux/config/worktree-cleanup.sh +55 -9
  21. package/templates/mux/config/worktree-dirty-check.sh +128 -0
  22. package/templates/mux/config/worktree-session-health.sh +62 -35
  23. package/templates/scripts/__tests__/qa-handoff-aging.e2e.test.mjs +108 -0
  24. package/templates/scripts/__tests__/qa-handoff-gate.test.mjs +335 -0
  25. package/templates/scripts/__tests__/resolve-project.test.mjs +144 -0
  26. package/templates/scripts/__tests__/ring-state-ownership.test.mjs +228 -0
  27. package/templates/scripts/pib-db-lib.mjs +4 -1
  28. package/templates/scripts/pib-db.mjs +4 -1
  29. package/templates/scripts/validate-memory.mjs +6 -2
  30. package/templates/scripts/watchtower-build-context.mjs +12 -8
  31. package/templates/scripts/watchtower-lib.mjs +265 -2
  32. package/templates/scripts/watchtower-migrate-keys.mjs +305 -0
  33. package/templates/scripts/watchtower-queue.mjs +226 -1
  34. package/templates/scripts/watchtower-ring1.mjs +19 -3
  35. package/templates/scripts/watchtower-ring2.mjs +4 -2
  36. package/templates/scripts/watchtower-ring3-close.mjs +92 -88
  37. package/templates/skills/audit/SKILL.md +25 -6
  38. package/templates/skills/audit/phases/checklist-pruning.md +108 -0
  39. package/templates/skills/briefing/SKILL.md +12 -1
  40. package/templates/skills/cabinet/SKILL.md +2 -2
  41. package/templates/skills/collab-consultant/SKILL.md +1 -1
  42. package/templates/skills/debrief/SKILL.md +33 -3
  43. package/templates/skills/debrief/phases/checklist-feedback.md +10 -3
  44. package/templates/skills/debrief/phases/qa-handoff-sweep.md +78 -0
  45. package/templates/skills/engagement-create/SKILL.md +1 -1
  46. package/templates/skills/engagement-help/SKILL.md +1 -1
  47. package/templates/skills/execute/SKILL.md +1 -1
  48. package/templates/skills/execute/phases/post-impl-checklist.md +18 -0
  49. package/templates/skills/execute-group/SKILL.md +76 -24
  50. package/templates/skills/inbox/SKILL.md +30 -7
  51. package/templates/skills/orient/SKILL.md +100 -6
  52. package/templates/skills/orient/phases/checklist-status.md +12 -0
  53. package/templates/skills/plan/SKILL.md +14 -6
  54. package/templates/skills/qa-handoff/SKILL.md +132 -5
  55. package/templates/skills/session-handoff/SKILL.md +165 -0
  56. package/templates/skills/setup-accounts/SKILL.md +1 -1
  57. package/templates/skills/unwrap/SKILL.md +1 -1
  58. package/templates/skills/verify/SKILL.md +2 -2
  59. package/templates/skills/watchtower/SKILL.md +19 -1
  60. package/templates/watchtower/queue/items/item.json.schema +9 -0
  61. package/templates/workflows/deliberative-audit.js +3 -0
  62. package/templates/workflows/execute-group-complete.js +93 -16
  63. package/templates/workflows/execute-group-implement.js +164 -19
package/README.md CHANGED
@@ -241,8 +241,13 @@ npx create-claude-cabinet --yes # Accept all defaults
241
241
  npx create-claude-cabinet --yes --no-db # All defaults, skip database
242
242
  npx create-claude-cabinet --dry-run # Preview without writing files
243
243
  npx create-claude-cabinet --modules verify --yes # Add an opt-in module (merges, doesn't replace)
244
+ npx create-claude-cabinet --frontier-model claude-fable-5 # Designate your frontier model (watchdog)
244
245
  ```
245
246
 
247
+ ### Frontier-model watchdog
248
+
249
+ `--frontier-model <model>` records, once, which model your heavy thinking is supposed to run on. The designation is per-operator (stored in `~/.claude/cc-registry.json` under `frontierModel`, not per-project), and the installer prints the effective value on every run. From then on, `/orient` — and, on watchtower installs, the SessionStart hook — compares the session's actual model against it and leads the briefing with a loud warning on mismatch. The key can be an exact model ID (`claude-fable-5`, exact match required) or a family alias (`fable`, matches any model ID containing it). This is **visibility only**: nothing is pinned, blocked, or rerouted — it just makes "you're accidentally on the wrong model" impossible to miss. A stale key after a model-family transition nags loudly by design; update it with the same flag.
250
+
246
251
  ## What Gets Installed
247
252
 
248
253
  Everything goes into `.claude/` or `scripts/`. Nothing touches your
package/lib/cli.js CHANGED
@@ -3,7 +3,7 @@ const path = require('path');
3
3
  const fs = require('fs');
4
4
  const os = require('os');
5
5
  const crypto = require('crypto');
6
- const { copyTemplates } = require('./copy');
6
+ const { copyTemplates, recordSkip } = require('./copy');
7
7
  const { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, mergeBashCompressHooks } = require('./settings-merge');
8
8
  const { create: createMetadata, read: readMetadata } = require('./metadata');
9
9
  const { setupDb } = require('./db-setup');
@@ -460,11 +460,13 @@ const MODULES = {
460
460
  'skills/orient-quick',
461
461
  'skills/debrief',
462
462
  'skills/debrief-quick',
463
+ 'skills/session-handoff',
463
464
  // Instruction phases — always ship, overriding the default skip-phases rule in copy.js
464
465
  'skills/debrief/phases/audit-pattern-capture.md',
465
466
  'skills/debrief/phases/methodology-capture.md',
466
467
  'skills/debrief/phases/record-lessons.md',
467
468
  'skills/debrief/phases/upstream-feedback.md',
469
+ 'skills/debrief/phases/qa-handoff-sweep.md',
468
470
  'skills/menu',
469
471
  ],
470
472
  },
@@ -491,7 +493,7 @@ const MODULES = {
491
493
  mandatory: false,
492
494
  default: true,
493
495
  lean: true,
494
- templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md'],
496
+ templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'cabinet/checklist-stats-schema.md', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md', 'skills/audit/phases/checklist-pruning.md'],
495
497
  },
496
498
  'compliance': {
497
499
  name: 'Compliance Stack (rules + enforcement)',
@@ -750,6 +752,14 @@ function parseArgs(argv) {
750
752
  else if (arg === '--modules' && i + 1 < args.length) {
751
753
  flags.modules = args[++i].split(',').map(s => s.trim()).filter(Boolean);
752
754
  }
755
+ else if (arg === '--frontier-model' && i + 1 < args.length) {
756
+ // Empty/whitespace values are treated as absent: '' is a substring of
757
+ // every model ID, which would match everything and leave the watchdog
758
+ // permanently silent while appearing configured.
759
+ const value = args[++i].trim();
760
+ if (value) flags.frontierModel = value;
761
+ else flags.frontierModelEmpty = true;
762
+ }
753
763
  else if (!arg.startsWith('-')) flags.targetDir = arg;
754
764
  }
755
765
 
@@ -775,6 +785,11 @@ function printHelp() {
775
785
  disables omega hooks/MCP. Idempotent — safe to re-run.
776
786
  Pair with --dry-run to preview.
777
787
  --unmigrate-memory Roll back --migrate-memory using its backup dir.
788
+ --frontier-model <model> Designate your frontier model (user-level, stored
789
+ in ~/.claude/cc-registry.json). Visibility only: /orient and
790
+ the watchtower SessionStart hook warn loudly when a session
791
+ runs a different model. Does NOT pin or route anything.
792
+ Accepts an exact ID (claude-fable-5) or a family alias (fable).
778
793
  --help, -h Show this help
779
794
 
780
795
  Examples:
@@ -1214,7 +1229,7 @@ async function run() {
1214
1229
  const existingContent = fs.readFileSync(destPath, 'utf8');
1215
1230
  if (existingContent === incoming) {
1216
1231
  totalSkipped++;
1217
- allManifest[mPath] = incomingHash;
1232
+ recordSkip(allManifest, mPath, { identical: true, incomingHash });
1218
1233
  continue;
1219
1234
  }
1220
1235
 
@@ -1230,7 +1245,9 @@ async function run() {
1230
1245
  if (isPhaseFile && !isInstructionPhase && existingContent.trim() !== '' && existingContent.trim() !== incoming.trim()) {
1231
1246
  console.log(` Preserved customized phase: ${tmpl}`);
1232
1247
  totalSkipped++;
1233
- allManifest[mPath] = hashContent(existingContent);
1248
+ // Customized phase = project-owned content → omit from manifest
1249
+ // (recordSkip in copy.js — omission means "not ours").
1250
+ recordSkip(allManifest, mPath);
1234
1251
  continue;
1235
1252
  }
1236
1253
 
@@ -1240,10 +1257,19 @@ async function run() {
1240
1257
  if (existingManifest[mPath]) {
1241
1258
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
1242
1259
  totalOverwritten++;
1260
+ // Log single-file overwrites too — the directory path (copy.js)
1261
+ // already does. Without this, scripts/ updates are invisible in
1262
+ // install output, masking whether a changed script propagated.
1263
+ console.log(` Updated: ${path.relative(projectDir, destPath)}`);
1264
+ allManifest[mPath] = incomingHash;
1243
1265
  } else {
1244
1266
  totalSkipped++;
1267
+ // Project-created file → omit from manifest entirely. Ownership
1268
+ // classification is manifest-PRESENCE-based, so recording ANY
1269
+ // hash here would mark the file upstream-owned and the NEXT
1270
+ // install would silently overwrite it (act:bf21c95b).
1271
+ recordSkip(allManifest, mPath);
1245
1272
  }
1246
- allManifest[mPath] = incomingHash;
1247
1273
  } else {
1248
1274
  const response = await prompts({
1249
1275
  type: 'select',
@@ -1258,10 +1284,13 @@ async function run() {
1258
1284
  if (response.action === 'overwrite') {
1259
1285
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
1260
1286
  totalOverwritten++;
1287
+ allManifest[mPath] = incomingHash;
1261
1288
  } else {
1262
1289
  totalSkipped++;
1290
+ // Keep: the user claimed this file → project-owned → omit from
1291
+ // the manifest so it is never mistaken for upstream content.
1292
+ recordSkip(allManifest, mPath);
1263
1293
  }
1264
- allManifest[mPath] = incomingHash;
1265
1294
  }
1266
1295
  } else {
1267
1296
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
@@ -1610,6 +1639,18 @@ async function run() {
1610
1639
  // Register with folder name. /onboard fills in name and description later.
1611
1640
  registry.projects.push(entry);
1612
1641
  }
1642
+ // --- Frontier-model designation (visibility watchdog) ---
1643
+ // User-level, per-operator key. Read-preserve-rewrite: only the
1644
+ // frontierModel key is touched; every other key rides through.
1645
+ if (flags.frontierModelEmpty) {
1646
+ console.log(' ⚠ Ignoring empty --frontier-model value (an empty key would match every model and silence the watchdog)');
1647
+ }
1648
+ if (flags.frontierModel) {
1649
+ registry.frontierModel = flags.frontierModel;
1650
+ } else if (typeof registry.frontierModel === 'string' && !registry.frontierModel.trim()) {
1651
+ // Heal a hand-edited empty key — treat as absent (see parseArgs note).
1652
+ delete registry.frontierModel;
1653
+ }
1613
1654
  fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
1614
1655
  const otherCount = registry.projects.filter(p => p.path !== projectDir).length;
1615
1656
  if (otherCount > 0) {
@@ -1617,6 +1658,10 @@ async function run() {
1617
1658
  } else {
1618
1659
  console.log(' 📋 Registered in project registry');
1619
1660
  }
1661
+ // Self-announcing: print the effective designation on every run.
1662
+ if (registry.frontierModel) {
1663
+ console.log(` 🛰 Frontier model: ${registry.frontierModel} (visibility watchdog — /orient + SessionStart warn on mismatch; nothing is pinned)`);
1664
+ }
1620
1665
  } catch (err) {
1621
1666
  // Non-fatal — registry is nice-to-have
1622
1667
  }
package/lib/copy.js CHANGED
@@ -7,6 +7,49 @@ function hashContent(content) {
7
7
  return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
8
8
  }
9
9
 
10
+ /**
11
+ * Record the manifest consequence of SKIPPING a file at install time
12
+ * (act:bf21c95b). The single shared rule for ALL skip sites in BOTH
13
+ * install code paths (lib/copy.js and the single-file branches in
14
+ * lib/cli.js):
15
+ *
16
+ * - A skipped file whose on-disk content differs from the incoming
17
+ * template is NOT upstream content — it is project-owned
18
+ * (project-created, user-kept, or a customized phase). It must be
19
+ * OMITTED from the manifest entirely. An absent entry means "not
20
+ * ours". Recording any hash for it poisons the manifest: ownership
21
+ * classification is manifest-PRESENCE-based, so the next install
22
+ * would classify the file upstream-owned and silently overwrite it
23
+ * (and cc-upstream-guard / cc-drift-check would false-positive on it).
24
+ * - The one exception: a skipped file byte-identical to the incoming
25
+ * template is indistinguishable from upstream content and stays
26
+ * tracked under the template hash.
27
+ *
28
+ * Omission only — never a marker value or an alternate manifest value
29
+ * shape (lesson_shared_json_shape_drift). All manifest consumers
30
+ * (ownership classification, cleanup loop, key migration, cc-drift-check,
31
+ * cc-upstream-guard, lib/reset.js) treat an absent key as "not ours" and
32
+ * leave the file alone.
33
+ *
34
+ * Known limitation (documented, not solved here): manifests already
35
+ * poisoned by past installs cannot retroactively distinguish a recorded
36
+ * project-created file from genuine upstream content. This helper only
37
+ * prevents NEW poisoning.
38
+ *
39
+ * @param {object} manifest manifest object being built for this install
40
+ * @param {string} key manifest key for the skipped file
41
+ * @param {object} [opts]
42
+ * @param {boolean} [opts.identical] on-disk content === incoming template
43
+ * @param {string} [opts.incomingHash] hash of the incoming template content
44
+ */
45
+ function recordSkip(manifest, key, { identical = false, incomingHash = null } = {}) {
46
+ if (identical && incomingHash) {
47
+ manifest[key] = incomingHash;
48
+ } else {
49
+ delete manifest[key];
50
+ }
51
+ }
52
+
10
53
  /**
11
54
  * Recursively copy files from src to dest, surfacing conflicts.
12
55
  * Returns { copied: string[], skipped: string[], overwritten: string[] }
@@ -55,7 +98,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
55
98
  const trimmedExisting = existing.trim();
56
99
  if (trimmedExisting !== '' && trimmedExisting !== incoming.trim()) {
57
100
  results.skipped.push(relPath);
58
- results.manifest[relPath] = hashContent(existing);
101
+ // Customized phase = project-owned content → omit from manifest.
102
+ recordSkip(results.manifest, relPath);
59
103
  console.log(` Preserved customized phase: ${displayPath}`);
60
104
  continue;
61
105
  }
@@ -64,7 +108,7 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
64
108
 
65
109
  if (existing === incoming) {
66
110
  results.skipped.push(relPath);
67
- results.manifest[relPath] = incomingHash;
111
+ recordSkip(results.manifest, relPath, { identical: true, incomingHash });
68
112
  continue;
69
113
  }
70
114
 
@@ -82,9 +126,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
82
126
  console.log(` Updated: ${displayPath}`);
83
127
  } else {
84
128
  results.skipped.push(relPath);
85
- // Record the hash of what's actually on disk, not the template —
86
- // otherwise the manifest lies about file content after a skip.
87
- results.manifest[relPath] = hashContent(existing);
129
+ // Project-created file omit from manifest ("not ours").
130
+ recordSkip(results.manifest, relPath);
88
131
  }
89
132
  continue;
90
133
  }
@@ -101,9 +144,9 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
101
144
  });
102
145
 
103
146
  if (!response.action) {
104
- // User cancelled
147
+ // User cancelled → file kept as-is → project-owned → omit.
105
148
  results.skipped.push(relPath);
106
- results.manifest[relPath] = incomingHash;
149
+ recordSkip(results.manifest, relPath);
107
150
  continue;
108
151
  }
109
152
 
@@ -118,17 +161,20 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
118
161
  if (followUp.overwrite && !dryRun) {
119
162
  fs.copyFileSync(srcPath, destPath);
120
163
  results.overwritten.push(relPath);
164
+ results.manifest[relPath] = incomingHash;
121
165
  } else {
166
+ // Diff shown, user kept their file → project-owned → omit.
122
167
  results.skipped.push(relPath);
168
+ recordSkip(results.manifest, relPath);
123
169
  }
124
- results.manifest[relPath] = incomingHash;
125
170
  } else if (response.action === 'overwrite') {
126
171
  if (!dryRun) fs.copyFileSync(srcPath, destPath);
127
172
  results.overwritten.push(relPath);
128
173
  results.manifest[relPath] = incomingHash;
129
174
  } else {
175
+ // 'Keep existing' → project-owned → omit from manifest.
130
176
  results.skipped.push(relPath);
131
- results.manifest[relPath] = incomingHash;
177
+ recordSkip(results.manifest, relPath);
132
178
  }
133
179
  } else {
134
180
  if (!dryRun) {
@@ -169,4 +215,4 @@ function showDiff(existing, incoming, relPath) {
169
215
  console.log('');
170
216
  }
171
217
 
172
- module.exports = { copyTemplates };
218
+ module.exports = { copyTemplates, recordSkip };
package/lib/mux-setup.js CHANGED
@@ -45,6 +45,7 @@ const MANAGED_FILES = [
45
45
  { src: 'config/worktree-session-health.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-session-health.sh'), mode: 0o755 },
46
46
  { src: 'config/worktree-health-popup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-health-popup.sh'), mode: 0o755 },
47
47
  { src: 'config/worktree-cleanup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-cleanup.sh'), mode: 0o755 },
48
+ { src: 'config/worktree-dirty-check.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-dirty-check.sh'), mode: 0o755 },
48
49
  { src: 'config/mux.tmux.conf', dest: path.join(os.homedir(), '.config', 'mux', 'mux.tmux.conf') },
49
50
  { src: 'config/unwrap-copy.py', dest: path.join(os.homedir(), '.config', 'mux', 'unwrap-copy.py'), mode: 0o755 },
50
51
  { src: 'config/screenshot-to-clipboard.sh', dest: path.join(os.homedir(), '.config', 'mux', 'screenshot-to-clipboard.sh'), mode: 0o755 },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-claude-cabinet",
3
- "version": "0.44.0",
3
+ "version": "0.45.0",
4
4
  "description": "Claude Cabinet — opinionated process scaffolding for Claude Code projects",
5
5
  "bin": {
6
6
  "create-claude-cabinet": "bin/create-claude-cabinet.js"
@@ -0,0 +1,104 @@
1
+ # Checklist Stats — hit-rate sidecar schema and write protocol
2
+
3
+ `.claude/cabinet/checklist-stats.json` records how the change-impact
4
+ checklist (`qa-dimensions.yaml`) performs over time: which dimensions
5
+ fire, which checks actually catch problems, and what pruning verdicts
6
+ the operator has already given. It is the evidence base for the audit
7
+ skill's `checklist-pruning` phase — without it, the checklist only ever
8
+ grows (debrief's `checklist-feedback` is add-only by design) and decays
9
+ into noise.
10
+
11
+ **This file is RUNTIME STATE, generated on first write — never shipped
12
+ as a template.** Shipping it would clobber accumulated stats on every
13
+ reinstall (same rule as `advisories-state-schema.md`). And it never
14
+ lives inside `qa-dimensions.yaml`: config files do not contain runtime
15
+ state.
16
+
17
+ ## Who writes what
18
+
19
+ | Writer | When | What |
20
+ |--------|------|------|
21
+ | `/execute` `post-impl-checklist` phase | every run past its no-op guard | increments `runs`; per triggered dimension increments `fires`, sets `last_fired` |
22
+ | `/debrief` `checklist-feedback` phase | when a session bug WAS caught via a surfaced check | appends to that dimension's `catches` |
23
+ | `/audit` `checklist-pruning` phase | every pruning verdict (including "keep") | appends to `pruning_reviews` |
24
+
25
+ ## Schema (`schema_version: 1`)
26
+
27
+ ```json
28
+ {
29
+ "schema_version": 1,
30
+ "runs": 14,
31
+ "dimensions": {
32
+ "data-coherence": {
33
+ "fires": 12,
34
+ "last_fired": "2026-06-11",
35
+ "catches": [
36
+ {
37
+ "date": "2026-06-10",
38
+ "check": "[run] Run schema validation if any schema or migration file changed.",
39
+ "note": "caught missing FK backfill before commit"
40
+ }
41
+ ]
42
+ }
43
+ },
44
+ "pruning_reviews": [
45
+ {
46
+ "date": "2026-06-11",
47
+ "target": "test-staleness",
48
+ "verdict": "keep",
49
+ "note": "fires often, zero catches, but cheap insurance at moderate severity"
50
+ }
51
+ ]
52
+ }
53
+ ```
54
+
55
+ Field semantics:
56
+
57
+ - **`runs`** — total executions of the post-impl-checklist phase that
58
+ passed its no-op guard, INCLUDING runs where zero dimensions
59
+ triggered. This is the denominator for "never fired in N runs."
60
+ - **`dimensions.<name>.fires`** — number of runs in which the dimension
61
+ triggered (matched at least one changed path). Dimension-level, not
62
+ check-level: checks have no stable IDs, so firing is counted where it
63
+ happens (path match) and catching is attributed by quoting the check.
64
+ - **`dimensions.<name>.catches`** — append-only evidence that a
65
+ surfaced check caught a real issue. `check` quotes the check text as
66
+ written in the yaml at the time.
67
+ - **`pruning_reviews`** — append-only verdict log. `verdict` is one of
68
+ `removed | trimmed | paths-fixed | severity-changed | keep`. The
69
+ pruning phase skips candidates with any verdict in the last 90 days,
70
+ so a "keep" decision is not re-litigated at every audit.
71
+
72
+ ## Write protocol
73
+
74
+ 1. Read the file. If absent, bootstrap the skeleton
75
+ (`{"schema_version": 1, "runs": 0, "dimensions": {}, "pruning_reviews": []}`).
76
+ If present but unparseable, move it aside to
77
+ `checklist-stats.json.corrupt-<YYYY-MM-DD>` (never delete) and
78
+ bootstrap fresh.
79
+ 2. Modify in memory.
80
+ 3. Write to `checklist-stats.json.tmp`, then rename over the original
81
+ (atomic — safe under concurrent sessions).
82
+
83
+ **Fail-open, always:** a stats read or write failure must never block
84
+ the phase doing the recording. Emit one warning line and continue —
85
+ losing a data point is fine; blocking an execute/debrief/audit run over
86
+ bookkeeping is not.
87
+
88
+ ## Anti-trap rules
89
+
90
+ - **Stats inform; the human decides.** Nothing auto-prunes from this
91
+ data, ever. Low hit-rate is evidence presented at audit, not a
92
+ trigger.
93
+ - **Per-dimension judgment, not universal thresholds.** A high-severity
94
+ security check that fires often and never catches may still be cheap
95
+ insurance; an info-severity check with the same profile is noise.
96
+ The pruning phase presents severity alongside the numbers.
97
+ - **Renames orphan stats.** If a dimension is renamed in
98
+ `qa-dimensions.yaml`, its stats entry goes stale. The pruning phase
99
+ reports entries with no matching dimension as orphans (offer to fold
100
+ or drop them); writers simply start a fresh entry under the new name.
101
+ - **Counts are honest, not precise.** Concurrent sessions can lose an
102
+ increment to a race; the rename-based write keeps the file valid and
103
+ the trend signal is what matters. Do not build exact-count logic on
104
+ top of this file.
@@ -37,7 +37,7 @@ high-stakes reviews is to put judgment in front of the operator.
37
37
  | Mode | Where it runs | What a `stop`/`pause` does | Used by |
38
38
  |------|---------------|----------------------------|---------|
39
39
  | **Interactive CP** | Main session (skill level) | Surfaced to the operator, who decides (proceed / drop / override / abort). Never automatic. | `/execute-group` CP1 |
40
- | **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is `/validate`. | `/execute-group` CP3 |
40
+ | **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is merge-delta `/validate` — new failures vs the group's pre-merge baseline; inherited debt is reported, not gated. | `/execute-group` CP3 |
41
41
  | **Full CP** | Main session or workflow | Halts on `stop`, escalates 3+ `pause` to a halt, requires explicit override. The classic gate. | `/execute` CP1/CP2/CP3 |
42
42
 
43
43
  **Why Interactive and Advisory exist.** `/execute-group` once ran CP1 and CP3
@@ -45,8 +45,20 @@ as autonomous gates inside a single workflow: a cabinet `stop` halted the run
45
45
  or reverted a merge with no human in the loop. False positives there cost real
46
46
  money (a CP1 halted twice consecutively — 1.6M+ tokens — on concerns the plan
47
47
  text already addressed). Moving CP1 to interactive (operator decides) and CP3
48
- to advisory (concerns recorded, `/validate` is the only hard gate) keeps the
49
- review signal while removing the destructive autonomous action.
48
+ to advisory (concerns recorded, merge-delta `/validate` is the only hard
49
+ gate) keeps the review signal while removing the destructive autonomous
50
+ action.
51
+
52
+ **The hard gate is merge-delta, not absolute.** `/execute-group` captures a
53
+ `/validate` baseline on main before the group's first merge. Only failures
54
+ NOT in that baseline (i.e. failures the group itself introduced) gate a merge
55
+ or completion. Failures that pre-date the group are inherited debt: listed
56
+ loudly in the Completion Report's `pre_existing_debt` section, never gated.
57
+ This too is field-driven — two consecutive groups were gated on documented
58
+ pre-existing main debt with zero merge-delta regressions, and the manual
59
+ recovery (judge the delta by hand, close the plans) ran identically both
60
+ times, so the delta judgment was promoted into the gate itself. The gate
61
+ stays hard for new failures: the point is removing ritual, not weakening it.
50
62
 
51
63
  ### Interactive CP adds a required `addressed_by_plan` field
52
64
 
@@ -154,8 +166,8 @@ At **Interactive CP** (`/execute-group` CP1), add the required
154
166
  The escalation below is **Full CP** behavior (used by `/execute`). For
155
167
  **Interactive CP** the verdicts are surfaced to the operator severity-first
156
168
  and the operator decides — no automatic halt. For **Advisory CP** the concerns
157
- are recorded in the Completion Report and nothing halts or reverts; `/validate`
158
- is the only automatic gate. See "Checkpoint modes" above.
169
+ are recorded in the Completion Report and nothing halts or reverts; merge-delta
170
+ `/validate` is the only automatic gate. See "Checkpoint modes" above.
159
171
 
160
172
  Collect every verdict, then:
161
173
 
@@ -11,6 +11,13 @@
11
11
  # and surfaces the matched dimensions' checks as context for the
12
12
  # pre-commit cabinet sweep (Checkpoint 3). QA is the primary consumer.
13
13
  #
14
+ # The checklist learns in both directions: /debrief's checklist-feedback
15
+ # phase ADDS checks when bugs slip through, and /audit's
16
+ # checklist-pruning phase surfaces low-hit-rate dimensions for
17
+ # human-approved REMOVAL (evidence lives in checklist-stats.json — see
18
+ # cabinet/checklist-stats-schema.md; runtime state never lives in this
19
+ # file).
20
+ #
14
21
  # ── Schema ────────────────────────────────────────────────────────
15
22
  # dimensions: # top-level map; keys are dimension names
16
23
  # <dimension-name>:
@@ -58,6 +58,44 @@ attention window. If content exceeds 30 lines, truncation order:
58
58
  2. Drop Portfolio Pulse detail for quiet projects
59
59
  3. Never truncate "What Needs Attention" or "Where You Left Off"
60
60
 
61
+ ## Project State Section Ownership
62
+
63
+ `state/projects/<slug>.md` is written by two rings. Every section has
64
+ exactly ONE owner; a ring must never rebuild a section the other ring
65
+ owns. The merge that enforces this is `preserveRing3LastSession()` in
66
+ `watchtower-lib.mjs`, applied by Ring 1 before each per-project write.
67
+
68
+ | Section | Owner | Notes |
69
+ |----------------------|--------|----------------------------------------|
70
+ | `# <name>` header | Ring 1 | Timestamp refreshed every run |
71
+ | `## Active Plans` | Ring 1 | Rebuilt from pib-db every run |
72
+ | `## Last Session` | Ring 3 | Once authored — see below |
73
+ | `## Standing Issues` | Ring 1 | Rebuilt every run |
74
+ | `## Tech Stack` | Ring 1 | Rebuilt every run |
75
+
76
+ Ring 3's sessionSummary writes the rich Last Session summary with an
77
+ `_<date> (<session-id>)_` attribution line directly under the header.
78
+ That attribution line IS the ownership marker: until Ring 3 has authored
79
+ the section, Ring 1 writes its own ephemeral fallback ("Active: …" /
80
+ last-commit line) and rebuilds it freely; once the marker is present,
81
+ Ring 1 must carry the existing section forward verbatim. Without this,
82
+ Ring 1's full-file rebuild deterministically clobbers Ring 3's summary
83
+ within one cron tick (~5 minutes).
84
+
85
+ ### Thread File Durability (disk wins over model)
86
+
87
+ `state/threads/<slug>.json` carries the sibling rule: **disk wins over
88
+ model**. If the thread file exists, Ring 3 ALWAYS appends to
89
+ `cursor_history` — the LLM's `is_new` field is advisory naming metadata
90
+ only, never an authorization to fresh-write over an existing file. One
91
+ hallucinated `is_new: true` must not wipe an append-only history. The
92
+ canonical implementation is `updateThreadFile()` in `watchtower-lib.mjs`.
93
+
94
+ Corrupt thread files are never silently replaced: the corrupt file is
95
+ backed up aside as `<slug>.json.corrupt-<ts>`, a fresh file is written,
96
+ and the recovery is logged loudly. Per-thread writes are isolated — one
97
+ bad thread file must not abort writes for the remaining threads.
98
+
61
99
  ## Enrichment Directory
62
100
 
63
101
  Per-item enrichment lives in `queue/items/<id>/enrichment/`. Four
@@ -210,7 +210,10 @@ function validateSurfaceArea(notes) {
210
210
  }
211
211
 
212
212
  // Extract everything after ## Surface Area until the next ## or end
213
- const sectionMatch = notes.match(/^## Surface Area[^\n]*\n([\s\S]*?)(?=\n## |\n*$)/m);
213
+ // Lookahead ends only at the next "## " header or absolute end-of-string.
214
+ // A bare `\n*$` here terminated the match at a blank line right after the
215
+ // header, yielding an empty capture for standard markdown spacing.
216
+ const sectionMatch = notes.match(/^## Surface Area[^\n]*\n([\s\S]*?)(?=\n## |$(?![\s\S]))/m);
214
217
  const sectionBody = sectionMatch ? sectionMatch[1] : '';
215
218
  const hasEntry = /^- (?:files|dirs):/m.test(sectionBody);
216
219
  if (!hasEntry) {
@@ -20,7 +20,19 @@ if [ -z "$FID" ]; then
20
20
  exit 0
21
21
  fi
22
22
 
23
+ # Resolve the MAIN checkout: /execute and /execute-group write completion
24
+ # artifacts (breadcrumbs, group Completion Reports) to the MAIN checkout's
25
+ # .claude/verification/ — a session running in a linked/mux worktree must read
26
+ # the same files, not the worktree's disposable gitignored copy. Fail open to
27
+ # the cwd-relative path when not in a git repo (or git < 2.31).
23
28
  VERIFY_DIR=".claude/verification"
29
+ COMMON_DIR=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null)
30
+ if [ -n "$COMMON_DIR" ] && [ -d "$COMMON_DIR" ]; then
31
+ MAIN_CHECKOUT=$(dirname "$COMMON_DIR")
32
+ if [ -d "$MAIN_CHECKOUT" ]; then
33
+ VERIFY_DIR="$MAIN_CHECKOUT/.claude/verification"
34
+ fi
35
+ fi
24
36
  BREADCRUMB="$VERIFY_DIR/$FID.json"
25
37
 
26
38
  if [ ! -f "$BREADCRUMB" ]; then
@@ -56,7 +68,12 @@ fi
56
68
  #
57
69
  # Tag lookup is best-effort: if pib.db can't be read, GRP_LABEL is empty and
58
70
  # this gate is skipped — the base breadcrumb gate above still applies.
71
+ # Same worktree resolution for the db: a worktree without its own pib.db
72
+ # falls back to the main checkout's, so the grp gate doesn't silently skip.
59
73
  DB_PATH="${PIB_DB_PATH:-pib.db}"
74
+ if [ ! -f "$DB_PATH" ] && [ -z "$PIB_DB_PATH" ] && [ -n "$MAIN_CHECKOUT" ] && [ -f "$MAIN_CHECKOUT/pib.db" ]; then
75
+ DB_PATH="$MAIN_CHECKOUT/pib.db"
76
+ fi
60
77
  TAGS=$(python3 -c "
61
78
  import sqlite3, sys
62
79
  try:
@@ -5,6 +5,12 @@
5
5
  # a state summary, then outputs it as hookSpecificOutput for Claude's
6
6
  # additionalContext.
7
7
  #
8
+ # Also runs the frontier-model watchdog: the SessionStart payload (stdin
9
+ # JSON, per the CC hook contract) carries the session's `model` id. If
10
+ # ~/.claude/cc-registry.json designates a frontierModel and this session
11
+ # runs a different model, a loud warning is prepended to the injected
12
+ # context. Visibility only — never blocks anything.
13
+ #
8
14
  # If watchtower is not installed (no config.json), exits silently.
9
15
  # If the context builder produces no output, exits silently.
10
16
  #
@@ -13,16 +19,85 @@
13
19
 
14
20
  command -v jq >/dev/null 2>&1 || exit 0
15
21
 
22
+ # Hook payload arrives on stdin (.tool_input-style JSON; never an env var).
23
+ # Guard against interactive invocation where stdin is a tty.
24
+ PAYLOAD=""
25
+ if [ ! -t 0 ]; then
26
+ PAYLOAD=$(cat)
27
+ fi
28
+
16
29
  WATCHTOWER_DIR="${HOME}/.claude-cabinet/watchtower"
17
30
  PROJECT_PATH="$(pwd)"
18
31
 
19
- # No config watchtower not installed → exit silently
20
- if [ ! -f "${WATCHTOWER_DIR}/config.json" ]; then
21
- exit 0
32
+ # --- Frontier-model watchdog -------------------------------------------------
33
+ # Canonical match rule lives in the orient skill (templates/skills/orient/
34
+ # SKILL.md, "Frontier-Model Watchdog") — this is a reference implementation
35
+ # of that rule, not a second definition:
36
+ # - key starting with "claude-" AND containing a digit => exact model ID,
37
+ # exact case-insensitive match required
38
+ # - anything else => family alias, case-insensitive containment
39
+ # - absent/empty/whitespace key => silent no-op ('' would match everything
40
+ # and silence the watchdog while appearing configured)
41
+ FRONTIER_WARNING=""
42
+ SESSION_MODEL=""
43
+ if [ -n "${PAYLOAD}" ]; then
44
+ SESSION_MODEL=$(printf '%s' "${PAYLOAD}" | jq -r '.model // empty' 2>/dev/null)
45
+ fi
46
+ REGISTRY="${HOME}/.claude/cc-registry.json"
47
+ FRONTIER_KEY=""
48
+ if [ -f "${REGISTRY}" ]; then
49
+ FRONTIER_KEY=$(jq -r '.frontierModel // empty' "${REGISTRY}" 2>/dev/null | tr -d '[:space:]')
50
+ fi
51
+ if [ -n "${FRONTIER_KEY}" ] && [ -z "${SESSION_MODEL}" ]; then
52
+ # A key is configured but the payload exposed no model id (field absent,
53
+ # renamed, or reshaped by a future CC release). Say so instead of going
54
+ # silent — silence here is indistinguishable from "model matches".
55
+ FRONTIER_WARNING="ℹ FRONTIER WATCHDOG: a frontier model is designated (${FRONTIER_KEY}) but the SessionStart payload exposed no session model id — the early-boundary check was SKIPPED, not passed. The /orient watchdog phase remains the boundary."
56
+ fi
57
+ if [ -n "${SESSION_MODEL}" ] && [ -n "${FRONTIER_KEY}" ]; then
58
+ key_lc=$(printf '%s' "${FRONTIER_KEY}" | tr '[:upper:]' '[:lower:]')
59
+ model_lc=$(printf '%s' "${SESSION_MODEL}" | tr '[:upper:]' '[:lower:]')
60
+ # Session model ids may carry a bracketed runtime suffix (e.g.
61
+ # claude-fable-5[1m]); strip it before exact comparison — the suffix is
62
+ # session configuration, not model identity.
63
+ model_base_lc="${model_lc%%\[*}"
64
+ matched=0
65
+ case "${key_lc}" in
66
+ claude-*[0-9]*)
67
+ # Exact model ID — require identity against the suffix-stripped id.
68
+ [ "${model_base_lc}" = "${key_lc}" ] && matched=1
69
+ ;;
70
+ *)
71
+ # Family alias — containment.
72
+ case "${model_lc}" in
73
+ *"${key_lc}"*) matched=1 ;;
74
+ esac
75
+ ;;
76
+ esac
77
+ if [ "${matched}" -eq 0 ]; then
78
+ FRONTIER_WARNING="⚠ FRONTIER WATCHDOG: this session runs ${SESSION_MODEL}; your designated frontier model is ${FRONTIER_KEY} — switch with /model or relaunch. Surface this warning to the user as the FIRST line of any briefing. (Visibility only; nothing is blocked. Update the key with: npx create-claude-cabinet --frontier-model <model>)"
79
+ fi
80
+ fi
81
+ # -----------------------------------------------------------------------------
82
+
83
+ # No config → watchtower not installed → still emit a frontier warning if
84
+ # one fired (the hook only registers on watchtower installs, but a torn-down
85
+ # config should not eat the watchdog), otherwise exit silently.
86
+ CONTEXT=""
87
+ if [ -f "${WATCHTOWER_DIR}/config.json" ]; then
88
+ # Build context. Suppress stderr to avoid noise on missing files.
89
+ CONTEXT=$(node "${WATCHTOWER_DIR}/scripts/watchtower-build-context.mjs" --project-path "${PROJECT_PATH}" 2>/dev/null)
22
90
  fi
23
91
 
24
- # Build context. Suppress stderr to avoid noise on missing files.
25
- CONTEXT=$(node "${WATCHTOWER_DIR}/scripts/watchtower-build-context.mjs" --project-path "${PROJECT_PATH}" 2>/dev/null)
92
+ if [ -n "${FRONTIER_WARNING}" ]; then
93
+ if [ -n "${CONTEXT}" ]; then
94
+ CONTEXT="${FRONTIER_WARNING}
95
+
96
+ ${CONTEXT}"
97
+ else
98
+ CONTEXT="${FRONTIER_WARNING}"
99
+ fi
100
+ fi
26
101
 
27
102
  # Empty context → nothing to inject
28
103
  if [ -z "${CONTEXT}" ]; then