create-claude-cabinet 0.43.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +5 -0
  2. package/lib/cli.js +57 -9
  3. package/lib/copy.js +56 -10
  4. package/lib/mux-setup.js +2 -0
  5. package/package.json +1 -1
  6. package/templates/cabinet/checklist-stats-schema.md +104 -0
  7. package/templates/cabinet/checkpoint-protocol.md +17 -5
  8. package/templates/cabinet/qa-dimensions-template.yaml +7 -0
  9. package/templates/cabinet/watchtower-contracts.md +38 -0
  10. package/templates/engagement/pib-db-patches/pib-db-lib.mjs +4 -1
  11. package/templates/hooks/action-completion-gate.sh +17 -0
  12. package/templates/hooks/watchtower-session-start.sh +80 -5
  13. package/templates/mux/__tests__/claude-carveout.fixture.sh +136 -0
  14. package/templates/mux/__tests__/claude-carveout.test.mjs +38 -0
  15. package/templates/mux/__tests__/mux-fail-loud.fixture.sh +254 -0
  16. package/templates/mux/__tests__/mux-fail-loud.test.mjs +41 -0
  17. package/templates/mux/__tests__/worktree-dirty-check.fixture.sh +184 -0
  18. package/templates/mux/__tests__/worktree-dirty-check.test.mjs +35 -0
  19. package/templates/mux/bin/mux +581 -93
  20. package/templates/mux/config/help.txt +2 -0
  21. package/templates/mux/config/worktree-cleanup.sh +55 -9
  22. package/templates/mux/config/worktree-dirty-check.sh +128 -0
  23. package/templates/mux/config/worktree-session-health.sh +62 -35
  24. package/templates/scripts/__tests__/qa-handoff-aging.e2e.test.mjs +108 -0
  25. package/templates/scripts/__tests__/qa-handoff-gate.test.mjs +335 -0
  26. package/templates/scripts/__tests__/resolve-project.test.mjs +144 -0
  27. package/templates/scripts/__tests__/ring-state-ownership.test.mjs +228 -0
  28. package/templates/scripts/pib-db-lib.mjs +4 -1
  29. package/templates/scripts/pib-db.mjs +4 -1
  30. package/templates/scripts/validate-memory.mjs +6 -2
  31. package/templates/scripts/watchtower-build-context.mjs +12 -8
  32. package/templates/scripts/watchtower-lib.mjs +265 -2
  33. package/templates/scripts/watchtower-migrate-keys.mjs +305 -0
  34. package/templates/scripts/watchtower-queue.mjs +226 -1
  35. package/templates/scripts/watchtower-ring1.mjs +19 -3
  36. package/templates/scripts/watchtower-ring2.mjs +4 -2
  37. package/templates/scripts/watchtower-ring3-close.mjs +92 -88
  38. package/templates/skills/audit/SKILL.md +25 -7
  39. package/templates/skills/audit/phases/checklist-pruning.md +108 -0
  40. package/templates/skills/briefing/SKILL.md +12 -1
  41. package/templates/skills/cabinet/SKILL.md +2 -2
  42. package/templates/skills/cabinet-record-keeper/SKILL.md +6 -1
  43. package/templates/skills/cc-upgrade/SKILL.md +0 -1
  44. package/templates/skills/collab-consultant/SKILL.md +1 -1
  45. package/templates/skills/debrief/SKILL.md +33 -3
  46. package/templates/skills/debrief/phases/checklist-feedback.md +10 -3
  47. package/templates/skills/debrief/phases/qa-handoff-sweep.md +78 -0
  48. package/templates/skills/engagement-create/SKILL.md +1 -1
  49. package/templates/skills/engagement-help/SKILL.md +1 -1
  50. package/templates/skills/execute/SKILL.md +1 -2
  51. package/templates/skills/execute/phases/post-impl-checklist.md +18 -0
  52. package/templates/skills/execute-group/SKILL.md +76 -24
  53. package/templates/skills/inbox/SKILL.md +30 -7
  54. package/templates/skills/investigate/SKILL.md +0 -2
  55. package/templates/skills/orient/SKILL.md +100 -6
  56. package/templates/skills/orient/phases/checklist-status.md +12 -0
  57. package/templates/skills/plan/SKILL.md +14 -7
  58. package/templates/skills/qa-handoff/SKILL.md +243 -25
  59. package/templates/skills/session-handoff/SKILL.md +165 -0
  60. package/templates/skills/setup-accounts/SKILL.md +1 -1
  61. package/templates/skills/unwrap/SKILL.md +1 -1
  62. package/templates/skills/verify/SKILL.md +2 -2
  63. package/templates/skills/watchtower/SKILL.md +19 -1
  64. package/templates/watchtower/queue/items/item.json.schema +9 -0
  65. package/templates/workflows/deliberative-audit.js +3 -0
  66. package/templates/workflows/execute-group-complete.js +93 -16
  67. package/templates/workflows/execute-group-implement.js +164 -19
package/README.md CHANGED
@@ -241,8 +241,13 @@ npx create-claude-cabinet --yes # Accept all defaults
241
241
  npx create-claude-cabinet --yes --no-db # All defaults, skip database
242
242
  npx create-claude-cabinet --dry-run # Preview without writing files
243
243
  npx create-claude-cabinet --modules verify --yes # Add an opt-in module (merges, doesn't replace)
244
+ npx create-claude-cabinet --frontier-model claude-fable-5 # Designate your frontier model (watchdog)
244
245
  ```
245
246
 
247
+ ### Frontier-model watchdog
248
+
249
+ `--frontier-model <model>` records, once, which model your heavy thinking is supposed to run on. The designation is per-operator (stored in `~/.claude/cc-registry.json` under `frontierModel`, not per-project), and the installer prints the effective value on every run. From then on, `/orient` — and, on watchtower installs, the SessionStart hook — compares the session's actual model against it and leads the briefing with a loud warning on mismatch. The key can be an exact model ID (`claude-fable-5`, exact match required) or a family alias (`fable`, matches any model ID containing it). This is **visibility only**: nothing is pinned, blocked, or rerouted — it just makes "you're accidentally on the wrong model" impossible to miss. A stale key after a model-family transition nags loudly by design; update it with the same flag.
250
+
246
251
  ## What Gets Installed
247
252
 
248
253
  Everything goes into `.claude/` or `scripts/`. Nothing touches your
package/lib/cli.js CHANGED
@@ -3,7 +3,7 @@ const path = require('path');
3
3
  const fs = require('fs');
4
4
  const os = require('os');
5
5
  const crypto = require('crypto');
6
- const { copyTemplates } = require('./copy');
6
+ const { copyTemplates, recordSkip } = require('./copy');
7
7
  const { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, mergeBashCompressHooks } = require('./settings-merge');
8
8
  const { create: createMetadata, read: readMetadata } = require('./metadata');
9
9
  const { setupDb } = require('./db-setup');
@@ -397,9 +397,12 @@ function generateAgentWrappers(projectDir) {
397
397
  if (/websearch/.test(toolSignal)) tools.push('WebSearch');
398
398
  if (/webfetch|fetch_docs/.test(toolSignal)) tools.push('WebFetch');
399
399
 
400
- // model: none of the cabinet skills declare one today; default to sonnet,
401
- // but honor an explicit declaration if a member ever sets one.
402
- const model = (typeof fm.model === 'string' && fm.model.trim()) || 'sonnet';
400
+ // model: none of the cabinet skills declare one today; default to inherit
401
+ // (follow the session model a family alias like 'sonnet' goes stale when
402
+ // the frontier moves families), but honor an explicit declaration if a
403
+ // member ever sets one. Background watchtower rings pin their own model
404
+ // separately and are unaffected.
405
+ const model = (typeof fm.model === 'string' && fm.model.trim()) || 'inherit';
403
406
 
404
407
  const wrapper =
405
408
  `---\n` +
@@ -457,11 +460,13 @@ const MODULES = {
457
460
  'skills/orient-quick',
458
461
  'skills/debrief',
459
462
  'skills/debrief-quick',
463
+ 'skills/session-handoff',
460
464
  // Instruction phases — always ship, overriding the default skip-phases rule in copy.js
461
465
  'skills/debrief/phases/audit-pattern-capture.md',
462
466
  'skills/debrief/phases/methodology-capture.md',
463
467
  'skills/debrief/phases/record-lessons.md',
464
468
  'skills/debrief/phases/upstream-feedback.md',
469
+ 'skills/debrief/phases/qa-handoff-sweep.md',
465
470
  'skills/menu',
466
471
  ],
467
472
  },
@@ -488,7 +493,7 @@ const MODULES = {
488
493
  mandatory: false,
489
494
  default: true,
490
495
  lean: true,
491
- templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md'],
496
+ templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'cabinet/checklist-stats-schema.md', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md', 'skills/audit/phases/checklist-pruning.md'],
492
497
  },
493
498
  'compliance': {
494
499
  name: 'Compliance Stack (rules + enforcement)',
@@ -747,6 +752,14 @@ function parseArgs(argv) {
747
752
  else if (arg === '--modules' && i + 1 < args.length) {
748
753
  flags.modules = args[++i].split(',').map(s => s.trim()).filter(Boolean);
749
754
  }
755
+ else if (arg === '--frontier-model' && i + 1 < args.length) {
756
+ // Empty/whitespace values are treated as absent: '' is a substring of
757
+ // every model ID, which would match everything and leave the watchdog
758
+ // permanently silent while appearing configured.
759
+ const value = args[++i].trim();
760
+ if (value) flags.frontierModel = value;
761
+ else flags.frontierModelEmpty = true;
762
+ }
750
763
  else if (!arg.startsWith('-')) flags.targetDir = arg;
751
764
  }
752
765
 
@@ -772,6 +785,11 @@ function printHelp() {
772
785
  disables omega hooks/MCP. Idempotent — safe to re-run.
773
786
  Pair with --dry-run to preview.
774
787
  --unmigrate-memory Roll back --migrate-memory using its backup dir.
788
+ --frontier-model <model> Designate your frontier model (user-level, stored
789
+ in ~/.claude/cc-registry.json). Visibility only: /orient and
790
+ the watchtower SessionStart hook warn loudly when a session
791
+ runs a different model. Does NOT pin or route anything.
792
+ Accepts an exact ID (claude-fable-5) or a family alias (fable).
775
793
  --help, -h Show this help
776
794
 
777
795
  Examples:
@@ -1211,7 +1229,7 @@ async function run() {
1211
1229
  const existingContent = fs.readFileSync(destPath, 'utf8');
1212
1230
  if (existingContent === incoming) {
1213
1231
  totalSkipped++;
1214
- allManifest[mPath] = incomingHash;
1232
+ recordSkip(allManifest, mPath, { identical: true, incomingHash });
1215
1233
  continue;
1216
1234
  }
1217
1235
 
@@ -1227,7 +1245,9 @@ async function run() {
1227
1245
  if (isPhaseFile && !isInstructionPhase && existingContent.trim() !== '' && existingContent.trim() !== incoming.trim()) {
1228
1246
  console.log(` Preserved customized phase: ${tmpl}`);
1229
1247
  totalSkipped++;
1230
- allManifest[mPath] = hashContent(existingContent);
1248
+ // Customized phase = project-owned content → omit from manifest
1249
+ // (recordSkip in copy.js — omission means "not ours").
1250
+ recordSkip(allManifest, mPath);
1231
1251
  continue;
1232
1252
  }
1233
1253
 
@@ -1237,10 +1257,19 @@ async function run() {
1237
1257
  if (existingManifest[mPath]) {
1238
1258
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
1239
1259
  totalOverwritten++;
1260
+ // Log single-file overwrites too — the directory path (copy.js)
1261
+ // already does. Without this, scripts/ updates are invisible in
1262
+ // install output, masking whether a changed script propagated.
1263
+ console.log(` Updated: ${path.relative(projectDir, destPath)}`);
1264
+ allManifest[mPath] = incomingHash;
1240
1265
  } else {
1241
1266
  totalSkipped++;
1267
+ // Project-created file → omit from manifest entirely. Ownership
1268
+ // classification is manifest-PRESENCE-based, so recording ANY
1269
+ // hash here would mark the file upstream-owned and the NEXT
1270
+ // install would silently overwrite it (act:bf21c95b).
1271
+ recordSkip(allManifest, mPath);
1242
1272
  }
1243
- allManifest[mPath] = incomingHash;
1244
1273
  } else {
1245
1274
  const response = await prompts({
1246
1275
  type: 'select',
@@ -1255,10 +1284,13 @@ async function run() {
1255
1284
  if (response.action === 'overwrite') {
1256
1285
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
1257
1286
  totalOverwritten++;
1287
+ allManifest[mPath] = incomingHash;
1258
1288
  } else {
1259
1289
  totalSkipped++;
1290
+ // Keep: the user claimed this file → project-owned → omit from
1291
+ // the manifest so it is never mistaken for upstream content.
1292
+ recordSkip(allManifest, mPath);
1260
1293
  }
1261
- allManifest[mPath] = incomingHash;
1262
1294
  }
1263
1295
  } else {
1264
1296
  if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
@@ -1607,6 +1639,18 @@ async function run() {
1607
1639
  // Register with folder name. /onboard fills in name and description later.
1608
1640
  registry.projects.push(entry);
1609
1641
  }
1642
+ // --- Frontier-model designation (visibility watchdog) ---
1643
+ // User-level, per-operator key. Read-preserve-rewrite: only the
1644
+ // frontierModel key is touched; every other key rides through.
1645
+ if (flags.frontierModelEmpty) {
1646
+ console.log(' ⚠ Ignoring empty --frontier-model value (an empty key would match every model and silence the watchdog)');
1647
+ }
1648
+ if (flags.frontierModel) {
1649
+ registry.frontierModel = flags.frontierModel;
1650
+ } else if (typeof registry.frontierModel === 'string' && !registry.frontierModel.trim()) {
1651
+ // Heal a hand-edited empty key — treat as absent (see parseArgs note).
1652
+ delete registry.frontierModel;
1653
+ }
1610
1654
  fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
1611
1655
  const otherCount = registry.projects.filter(p => p.path !== projectDir).length;
1612
1656
  if (otherCount > 0) {
@@ -1614,6 +1658,10 @@ async function run() {
1614
1658
  } else {
1615
1659
  console.log(' 📋 Registered in project registry');
1616
1660
  }
1661
+ // Self-announcing: print the effective designation on every run.
1662
+ if (registry.frontierModel) {
1663
+ console.log(` 🛰 Frontier model: ${registry.frontierModel} (visibility watchdog — /orient + SessionStart warn on mismatch; nothing is pinned)`);
1664
+ }
1617
1665
  } catch (err) {
1618
1666
  // Non-fatal — registry is nice-to-have
1619
1667
  }
package/lib/copy.js CHANGED
@@ -7,6 +7,49 @@ function hashContent(content) {
7
7
  return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
8
8
  }
9
9
 
10
+ /**
11
+ * Record the manifest consequence of SKIPPING a file at install time
12
+ * (act:bf21c95b). The single shared rule for ALL skip sites in BOTH
13
+ * install code paths (lib/copy.js and the single-file branches in
14
+ * lib/cli.js):
15
+ *
16
+ * - A skipped file whose on-disk content differs from the incoming
17
+ * template is NOT upstream content — it is project-owned
18
+ * (project-created, user-kept, or a customized phase). It must be
19
+ * OMITTED from the manifest entirely. An absent entry means "not
20
+ * ours". Recording any hash for it poisons the manifest: ownership
21
+ * classification is manifest-PRESENCE-based, so the next install
22
+ * would classify the file upstream-owned and silently overwrite it
23
+ * (and cc-upstream-guard / cc-drift-check would false-positive on it).
24
+ * - The one exception: a skipped file byte-identical to the incoming
25
+ * template is indistinguishable from upstream content and stays
26
+ * tracked under the template hash.
27
+ *
28
+ * Omission only — never a marker value or an alternate manifest value
29
+ * shape (lesson_shared_json_shape_drift). All manifest consumers
30
+ * (ownership classification, cleanup loop, key migration, cc-drift-check,
31
+ * cc-upstream-guard, lib/reset.js) treat an absent key as "not ours" and
32
+ * leave the file alone.
33
+ *
34
+ * Known limitation (documented, not solved here): manifests already
35
+ * poisoned by past installs cannot retroactively distinguish a recorded
36
+ * project-created file from genuine upstream content. This helper only
37
+ * prevents NEW poisoning.
38
+ *
39
+ * @param {object} manifest manifest object being built for this install
40
+ * @param {string} key manifest key for the skipped file
41
+ * @param {object} [opts]
42
+ * @param {boolean} [opts.identical] on-disk content === incoming template
43
+ * @param {string} [opts.incomingHash] hash of the incoming template content
44
+ */
45
+ function recordSkip(manifest, key, { identical = false, incomingHash = null } = {}) {
46
+ if (identical && incomingHash) {
47
+ manifest[key] = incomingHash;
48
+ } else {
49
+ delete manifest[key];
50
+ }
51
+ }
52
+
10
53
  /**
11
54
  * Recursively copy files from src to dest, surfacing conflicts.
12
55
  * Returns { copied: string[], skipped: string[], overwritten: string[] }
@@ -55,7 +98,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
55
98
  const trimmedExisting = existing.trim();
56
99
  if (trimmedExisting !== '' && trimmedExisting !== incoming.trim()) {
57
100
  results.skipped.push(relPath);
58
- results.manifest[relPath] = hashContent(existing);
101
+ // Customized phase = project-owned content → omit from manifest.
102
+ recordSkip(results.manifest, relPath);
59
103
  console.log(` Preserved customized phase: ${displayPath}`);
60
104
  continue;
61
105
  }
@@ -64,7 +108,7 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
64
108
 
65
109
  if (existing === incoming) {
66
110
  results.skipped.push(relPath);
67
- results.manifest[relPath] = incomingHash;
111
+ recordSkip(results.manifest, relPath, { identical: true, incomingHash });
68
112
  continue;
69
113
  }
70
114
 
@@ -82,9 +126,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
82
126
  console.log(` Updated: ${displayPath}`);
83
127
  } else {
84
128
  results.skipped.push(relPath);
85
- // Record the hash of what's actually on disk, not the template —
86
- // otherwise the manifest lies about file content after a skip.
87
- results.manifest[relPath] = hashContent(existing);
129
+ // Project-created file omit from manifest ("not ours").
130
+ recordSkip(results.manifest, relPath);
88
131
  }
89
132
  continue;
90
133
  }
@@ -101,9 +144,9 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
101
144
  });
102
145
 
103
146
  if (!response.action) {
104
- // User cancelled
147
+ // User cancelled → file kept as-is → project-owned → omit.
105
148
  results.skipped.push(relPath);
106
- results.manifest[relPath] = incomingHash;
149
+ recordSkip(results.manifest, relPath);
107
150
  continue;
108
151
  }
109
152
 
@@ -118,17 +161,20 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
118
161
  if (followUp.overwrite && !dryRun) {
119
162
  fs.copyFileSync(srcPath, destPath);
120
163
  results.overwritten.push(relPath);
164
+ results.manifest[relPath] = incomingHash;
121
165
  } else {
166
+ // Diff shown, user kept their file → project-owned → omit.
122
167
  results.skipped.push(relPath);
168
+ recordSkip(results.manifest, relPath);
123
169
  }
124
- results.manifest[relPath] = incomingHash;
125
170
  } else if (response.action === 'overwrite') {
126
171
  if (!dryRun) fs.copyFileSync(srcPath, destPath);
127
172
  results.overwritten.push(relPath);
128
173
  results.manifest[relPath] = incomingHash;
129
174
  } else {
175
+ // 'Keep existing' → project-owned → omit from manifest.
130
176
  results.skipped.push(relPath);
131
- results.manifest[relPath] = incomingHash;
177
+ recordSkip(results.manifest, relPath);
132
178
  }
133
179
  } else {
134
180
  if (!dryRun) {
@@ -169,4 +215,4 @@ function showDiff(existing, incoming, relPath) {
169
215
  console.log('');
170
216
  }
171
217
 
172
- module.exports = { copyTemplates };
218
+ module.exports = { copyTemplates, recordSkip };
package/lib/mux-setup.js CHANGED
@@ -45,6 +45,7 @@ const MANAGED_FILES = [
45
45
  { src: 'config/worktree-session-health.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-session-health.sh'), mode: 0o755 },
46
46
  { src: 'config/worktree-health-popup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-health-popup.sh'), mode: 0o755 },
47
47
  { src: 'config/worktree-cleanup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-cleanup.sh'), mode: 0o755 },
48
+ { src: 'config/worktree-dirty-check.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-dirty-check.sh'), mode: 0o755 },
48
49
  { src: 'config/mux.tmux.conf', dest: path.join(os.homedir(), '.config', 'mux', 'mux.tmux.conf') },
49
50
  { src: 'config/unwrap-copy.py', dest: path.join(os.homedir(), '.config', 'mux', 'unwrap-copy.py'), mode: 0o755 },
50
51
  { src: 'config/screenshot-to-clipboard.sh', dest: path.join(os.homedir(), '.config', 'mux', 'screenshot-to-clipboard.sh'), mode: 0o755 },
@@ -57,6 +58,7 @@ const DATA_DIRS = [
57
58
  path.join(os.homedir(), '.config', 'mux', 'dx'),
58
59
  path.join(os.homedir(), '.config', 'mux', 'pending-prompts'),
59
60
  path.join(os.homedir(), '.local', 'share', 'mux', 'wt-health'),
61
+ path.join(os.homedir(), '.local', 'share', 'mux', 'qa-handoff'),
60
62
  ];
61
63
 
62
64
  function sha256(content) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-claude-cabinet",
3
- "version": "0.43.0",
3
+ "version": "0.45.0",
4
4
  "description": "Claude Cabinet — opinionated process scaffolding for Claude Code projects",
5
5
  "bin": {
6
6
  "create-claude-cabinet": "bin/create-claude-cabinet.js"
@@ -0,0 +1,104 @@
1
+ # Checklist Stats — hit-rate sidecar schema and write protocol
2
+
3
+ `.claude/cabinet/checklist-stats.json` records how the change-impact
4
+ checklist (`qa-dimensions.yaml`) performs over time: which dimensions
5
+ fire, which checks actually catch problems, and what pruning verdicts
6
+ the operator has already given. It is the evidence base for the audit
7
+ skill's `checklist-pruning` phase — without it, the checklist only ever
8
+ grows (debrief's `checklist-feedback` is add-only by design) and decays
9
+ into noise.
10
+
11
+ **This file is RUNTIME STATE, generated on first write — never shipped
12
+ as a template.** Shipping it would clobber accumulated stats on every
13
+ reinstall (same rule as `advisories-state-schema.md`). And it never
14
+ lives inside `qa-dimensions.yaml`: config files do not contain runtime
15
+ state.
16
+
17
+ ## Who writes what
18
+
19
+ | Writer | When | What |
20
+ |--------|------|------|
21
+ | `/execute` `post-impl-checklist` phase | every run past its no-op guard | increments `runs`; per triggered dimension increments `fires`, sets `last_fired` |
22
+ | `/debrief` `checklist-feedback` phase | when a session bug WAS caught via a surfaced check | appends to that dimension's `catches` |
23
+ | `/audit` `checklist-pruning` phase | every pruning verdict (including "keep") | appends to `pruning_reviews` |
24
+
25
+ ## Schema (`schema_version: 1`)
26
+
27
+ ```json
28
+ {
29
+ "schema_version": 1,
30
+ "runs": 14,
31
+ "dimensions": {
32
+ "data-coherence": {
33
+ "fires": 12,
34
+ "last_fired": "2026-06-11",
35
+ "catches": [
36
+ {
37
+ "date": "2026-06-10",
38
+ "check": "[run] Run schema validation if any schema or migration file changed.",
39
+ "note": "caught missing FK backfill before commit"
40
+ }
41
+ ]
42
+ }
43
+ },
44
+ "pruning_reviews": [
45
+ {
46
+ "date": "2026-06-11",
47
+ "target": "test-staleness",
48
+ "verdict": "keep",
49
+ "note": "fires often, zero catches, but cheap insurance at moderate severity"
50
+ }
51
+ ]
52
+ }
53
+ ```
54
+
55
+ Field semantics:
56
+
57
+ - **`runs`** — total executions of the post-impl-checklist phase that
58
+ passed its no-op guard, INCLUDING runs where zero dimensions
59
+ triggered. This is the denominator for "never fired in N runs."
60
+ - **`dimensions.<name>.fires`** — number of runs in which the dimension
61
+ triggered (matched at least one changed path). Dimension-level, not
62
+ check-level: checks have no stable IDs, so firing is counted where it
63
+ happens (path match) and catching is attributed by quoting the check.
64
+ - **`dimensions.<name>.catches`** — append-only evidence that a
65
+ surfaced check caught a real issue. `check` quotes the check text as
66
+ written in the yaml at the time.
67
+ - **`pruning_reviews`** — append-only verdict log. `verdict` is one of
68
+ `removed | trimmed | paths-fixed | severity-changed | keep`. The
69
+ pruning phase skips candidates with any verdict in the last 90 days,
70
+ so a "keep" decision is not re-litigated at every audit.
71
+
72
+ ## Write protocol
73
+
74
+ 1. Read the file. If absent, bootstrap the skeleton
75
+ (`{"schema_version": 1, "runs": 0, "dimensions": {}, "pruning_reviews": []}`).
76
+ If present but unparseable, move it aside to
77
+ `checklist-stats.json.corrupt-<YYYY-MM-DD>` (never delete) and
78
+ bootstrap fresh.
79
+ 2. Modify in memory.
80
+ 3. Write to `checklist-stats.json.tmp`, then rename over the original
81
+ (atomic — safe under concurrent sessions).
82
+
83
+ **Fail-open, always:** a stats read or write failure must never block
84
+ the phase doing the recording. Emit one warning line and continue —
85
+ losing a data point is fine; blocking an execute/debrief/audit run over
86
+ bookkeeping is not.
87
+
88
+ ## Anti-trap rules
89
+
90
+ - **Stats inform; the human decides.** Nothing auto-prunes from this
91
+ data, ever. Low hit-rate is evidence presented at audit, not a
92
+ trigger.
93
+ - **Per-dimension judgment, not universal thresholds.** A high-severity
94
+ security check that fires often and never catches may still be cheap
95
+ insurance; an info-severity check with the same profile is noise.
96
+ The pruning phase presents severity alongside the numbers.
97
+ - **Renames orphan stats.** If a dimension is renamed in
98
+ `qa-dimensions.yaml`, its stats entry goes stale. The pruning phase
99
+ reports entries with no matching dimension as orphans (offer to fold
100
+ or drop them); writers simply start a fresh entry under the new name.
101
+ - **Counts are honest, not precise.** Concurrent sessions can lose an
102
+ increment to a race; the rename-based write keeps the file valid and
103
+ the trend signal is what matters. Do not build exact-count logic on
104
+ top of this file.
@@ -37,7 +37,7 @@ high-stakes reviews is to put judgment in front of the operator.
37
37
  | Mode | Where it runs | What a `stop`/`pause` does | Used by |
38
38
  |------|---------------|----------------------------|---------|
39
39
  | **Interactive CP** | Main session (skill level) | Surfaced to the operator, who decides (proceed / drop / override / abort). Never automatic. | `/execute-group` CP1 |
40
- | **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is `/validate`. | `/execute-group` CP3 |
40
+ | **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is merge-delta `/validate` — new failures vs the group's pre-merge baseline; inherited debt is reported, not gated. | `/execute-group` CP3 |
41
41
  | **Full CP** | Main session or workflow | Halts on `stop`, escalates 3+ `pause` to a halt, requires explicit override. The classic gate. | `/execute` CP1/CP2/CP3 |
42
42
 
43
43
  **Why Interactive and Advisory exist.** `/execute-group` once ran CP1 and CP3
@@ -45,8 +45,20 @@ as autonomous gates inside a single workflow: a cabinet `stop` halted the run
45
45
  or reverted a merge with no human in the loop. False positives there cost real
46
46
  money (a CP1 halted twice consecutively — 1.6M+ tokens — on concerns the plan
47
47
  text already addressed). Moving CP1 to interactive (operator decides) and CP3
48
- to advisory (concerns recorded, `/validate` is the only hard gate) keeps the
49
- review signal while removing the destructive autonomous action.
48
+ to advisory (concerns recorded, merge-delta `/validate` is the only hard
49
+ gate) keeps the review signal while removing the destructive autonomous
50
+ action.
51
+
52
+ **The hard gate is merge-delta, not absolute.** `/execute-group` captures a
53
+ `/validate` baseline on main before the group's first merge. Only failures
54
+ NOT in that baseline (i.e. failures the group itself introduced) gate a merge
55
+ or completion. Failures that pre-date the group are inherited debt: listed
56
+ loudly in the Completion Report's `pre_existing_debt` section, never gated.
57
+ This too is field-driven — two consecutive groups were gated on documented
58
+ pre-existing main debt with zero merge-delta regressions, and the manual
59
+ recovery (judge the delta by hand, close the plans) ran identically both
60
+ times, so the delta judgment was promoted into the gate itself. The gate
61
+ stays hard for new failures: the point is removing ritual, not weakening it.
50
62
 
51
63
  ### Interactive CP adds a required `addressed_by_plan` field
52
64
 
@@ -154,8 +166,8 @@ At **Interactive CP** (`/execute-group` CP1), add the required
154
166
  The escalation below is **Full CP** behavior (used by `/execute`). For
155
167
  **Interactive CP** the verdicts are surfaced to the operator severity-first
156
168
  and the operator decides — no automatic halt. For **Advisory CP** the concerns
157
- are recorded in the Completion Report and nothing halts or reverts; `/validate`
158
- is the only automatic gate. See "Checkpoint modes" above.
169
+ are recorded in the Completion Report and nothing halts or reverts; merge-delta
170
+ `/validate` is the only automatic gate. See "Checkpoint modes" above.
159
171
 
160
172
  Collect every verdict, then:
161
173
 
@@ -11,6 +11,13 @@
11
11
  # and surfaces the matched dimensions' checks as context for the
12
12
  # pre-commit cabinet sweep (Checkpoint 3). QA is the primary consumer.
13
13
  #
14
+ # The checklist learns in both directions: /debrief's checklist-feedback
15
+ # phase ADDS checks when bugs slip through, and /audit's
16
+ # checklist-pruning phase surfaces low-hit-rate dimensions for
17
+ # human-approved REMOVAL (evidence lives in checklist-stats.json — see
18
+ # cabinet/checklist-stats-schema.md; runtime state never lives in this
19
+ # file).
20
+ #
14
21
  # ── Schema ────────────────────────────────────────────────────────
15
22
  # dimensions: # top-level map; keys are dimension names
16
23
  # <dimension-name>:
@@ -58,6 +58,44 @@ attention window. If content exceeds 30 lines, truncation order:
58
58
  2. Drop Portfolio Pulse detail for quiet projects
59
59
  3. Never truncate "What Needs Attention" or "Where You Left Off"
60
60
 
61
+ ## Project State Section Ownership
62
+
63
+ `state/projects/<slug>.md` is written by two rings. Every section has
64
+ exactly ONE owner; a ring must never rebuild a section the other ring
65
+ owns. The merge that enforces this is `preserveRing3LastSession()` in
66
+ `watchtower-lib.mjs`, applied by Ring 1 before each per-project write.
67
+
68
+ | Section | Owner | Notes |
69
+ |----------------------|--------|----------------------------------------|
70
+ | `# <name>` header | Ring 1 | Timestamp refreshed every run |
71
+ | `## Active Plans` | Ring 1 | Rebuilt from pib-db every run |
72
+ | `## Last Session` | Ring 3 | Once authored — see below |
73
+ | `## Standing Issues` | Ring 1 | Rebuilt every run |
74
+ | `## Tech Stack` | Ring 1 | Rebuilt every run |
75
+
76
+ Ring 3's sessionSummary writes the rich Last Session summary with an
77
+ `_<date> (<session-id>)_` attribution line directly under the header.
78
+ That attribution line IS the ownership marker: until Ring 3 has authored
79
+ the section, Ring 1 writes its own ephemeral fallback ("Active: …" /
80
+ last-commit line) and rebuilds it freely; once the marker is present,
81
+ Ring 1 must carry the existing section forward verbatim. Without this,
82
+ Ring 1's full-file rebuild deterministically clobbers Ring 3's summary
83
+ within one cron tick (~5 minutes).
84
+
85
+ ### Thread File Durability (disk wins over model)
86
+
87
+ `state/threads/<slug>.json` carries the sibling rule: **disk wins over
88
+ model**. If the thread file exists, Ring 3 ALWAYS appends to
89
+ `cursor_history` — the LLM's `is_new` field is advisory naming metadata
90
+ only, never an authorization to fresh-write over an existing file. One
91
+ hallucinated `is_new: true` must not wipe an append-only history. The
92
+ canonical implementation is `updateThreadFile()` in `watchtower-lib.mjs`.
93
+
94
+ Corrupt thread files are never silently replaced: the corrupt file is
95
+ backed up aside as `<slug>.json.corrupt-<ts>`, a fresh file is written,
96
+ and the recovery is logged loudly. Per-thread writes are isolated — one
97
+ bad thread file must not abort writes for the remaining threads.
98
+
61
99
  ## Enrichment Directory
62
100
 
63
101
  Per-item enrichment lives in `queue/items/<id>/enrichment/`. Four
@@ -210,7 +210,10 @@ function validateSurfaceArea(notes) {
210
210
  }
211
211
 
212
212
  // Extract everything after ## Surface Area until the next ## or end
213
- const sectionMatch = notes.match(/^## Surface Area[^\n]*\n([\s\S]*?)(?=\n## |\n*$)/m);
213
+ // Lookahead ends only at the next "## " header or absolute end-of-string.
214
+ // A bare `\n*$` here terminated the match at a blank line right after the
215
+ // header, yielding an empty capture for standard markdown spacing.
216
+ const sectionMatch = notes.match(/^## Surface Area[^\n]*\n([\s\S]*?)(?=\n## |$(?![\s\S]))/m);
214
217
  const sectionBody = sectionMatch ? sectionMatch[1] : '';
215
218
  const hasEntry = /^- (?:files|dirs):/m.test(sectionBody);
216
219
  if (!hasEntry) {
@@ -20,7 +20,19 @@ if [ -z "$FID" ]; then
20
20
  exit 0
21
21
  fi
22
22
 
23
+ # Resolve the MAIN checkout: /execute and /execute-group write completion
24
+ # artifacts (breadcrumbs, group Completion Reports) to the MAIN checkout's
25
+ # .claude/verification/ — a session running in a linked/mux worktree must read
26
+ # the same files, not the worktree's disposable gitignored copy. Fail open to
27
+ # the cwd-relative path when not in a git repo (or git < 2.31).
23
28
  VERIFY_DIR=".claude/verification"
29
+ COMMON_DIR=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null)
30
+ if [ -n "$COMMON_DIR" ] && [ -d "$COMMON_DIR" ]; then
31
+ MAIN_CHECKOUT=$(dirname "$COMMON_DIR")
32
+ if [ -d "$MAIN_CHECKOUT" ]; then
33
+ VERIFY_DIR="$MAIN_CHECKOUT/.claude/verification"
34
+ fi
35
+ fi
24
36
  BREADCRUMB="$VERIFY_DIR/$FID.json"
25
37
 
26
38
  if [ ! -f "$BREADCRUMB" ]; then
@@ -56,7 +68,12 @@ fi
56
68
  #
57
69
  # Tag lookup is best-effort: if pib.db can't be read, GRP_LABEL is empty and
58
70
  # this gate is skipped — the base breadcrumb gate above still applies.
71
+ # Same worktree resolution for the db: a worktree without its own pib.db
72
+ # falls back to the main checkout's, so the grp gate doesn't silently skip.
59
73
  DB_PATH="${PIB_DB_PATH:-pib.db}"
74
+ if [ ! -f "$DB_PATH" ] && [ -z "$PIB_DB_PATH" ] && [ -n "$MAIN_CHECKOUT" ] && [ -f "$MAIN_CHECKOUT/pib.db" ]; then
75
+ DB_PATH="$MAIN_CHECKOUT/pib.db"
76
+ fi
60
77
  TAGS=$(python3 -c "
61
78
  import sqlite3, sys
62
79
  try: