create-claude-cabinet 0.43.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/lib/cli.js +57 -9
- package/lib/copy.js +56 -10
- package/lib/mux-setup.js +2 -0
- package/package.json +1 -1
- package/templates/cabinet/checklist-stats-schema.md +104 -0
- package/templates/cabinet/checkpoint-protocol.md +17 -5
- package/templates/cabinet/qa-dimensions-template.yaml +7 -0
- package/templates/cabinet/watchtower-contracts.md +38 -0
- package/templates/engagement/pib-db-patches/pib-db-lib.mjs +4 -1
- package/templates/hooks/action-completion-gate.sh +17 -0
- package/templates/hooks/watchtower-session-start.sh +80 -5
- package/templates/mux/__tests__/claude-carveout.fixture.sh +136 -0
- package/templates/mux/__tests__/claude-carveout.test.mjs +38 -0
- package/templates/mux/__tests__/mux-fail-loud.fixture.sh +254 -0
- package/templates/mux/__tests__/mux-fail-loud.test.mjs +41 -0
- package/templates/mux/__tests__/worktree-dirty-check.fixture.sh +184 -0
- package/templates/mux/__tests__/worktree-dirty-check.test.mjs +35 -0
- package/templates/mux/bin/mux +581 -93
- package/templates/mux/config/help.txt +2 -0
- package/templates/mux/config/worktree-cleanup.sh +55 -9
- package/templates/mux/config/worktree-dirty-check.sh +128 -0
- package/templates/mux/config/worktree-session-health.sh +62 -35
- package/templates/scripts/__tests__/qa-handoff-aging.e2e.test.mjs +108 -0
- package/templates/scripts/__tests__/qa-handoff-gate.test.mjs +335 -0
- package/templates/scripts/__tests__/resolve-project.test.mjs +144 -0
- package/templates/scripts/__tests__/ring-state-ownership.test.mjs +228 -0
- package/templates/scripts/pib-db-lib.mjs +4 -1
- package/templates/scripts/pib-db.mjs +4 -1
- package/templates/scripts/validate-memory.mjs +6 -2
- package/templates/scripts/watchtower-build-context.mjs +12 -8
- package/templates/scripts/watchtower-lib.mjs +265 -2
- package/templates/scripts/watchtower-migrate-keys.mjs +305 -0
- package/templates/scripts/watchtower-queue.mjs +226 -1
- package/templates/scripts/watchtower-ring1.mjs +19 -3
- package/templates/scripts/watchtower-ring2.mjs +4 -2
- package/templates/scripts/watchtower-ring3-close.mjs +92 -88
- package/templates/skills/audit/SKILL.md +25 -7
- package/templates/skills/audit/phases/checklist-pruning.md +108 -0
- package/templates/skills/briefing/SKILL.md +12 -1
- package/templates/skills/cabinet/SKILL.md +2 -2
- package/templates/skills/cabinet-record-keeper/SKILL.md +6 -1
- package/templates/skills/cc-upgrade/SKILL.md +0 -1
- package/templates/skills/collab-consultant/SKILL.md +1 -1
- package/templates/skills/debrief/SKILL.md +33 -3
- package/templates/skills/debrief/phases/checklist-feedback.md +10 -3
- package/templates/skills/debrief/phases/qa-handoff-sweep.md +78 -0
- package/templates/skills/engagement-create/SKILL.md +1 -1
- package/templates/skills/engagement-help/SKILL.md +1 -1
- package/templates/skills/execute/SKILL.md +1 -2
- package/templates/skills/execute/phases/post-impl-checklist.md +18 -0
- package/templates/skills/execute-group/SKILL.md +76 -24
- package/templates/skills/inbox/SKILL.md +30 -7
- package/templates/skills/investigate/SKILL.md +0 -2
- package/templates/skills/orient/SKILL.md +100 -6
- package/templates/skills/orient/phases/checklist-status.md +12 -0
- package/templates/skills/plan/SKILL.md +14 -7
- package/templates/skills/qa-handoff/SKILL.md +243 -25
- package/templates/skills/session-handoff/SKILL.md +165 -0
- package/templates/skills/setup-accounts/SKILL.md +1 -1
- package/templates/skills/unwrap/SKILL.md +1 -1
- package/templates/skills/verify/SKILL.md +2 -2
- package/templates/skills/watchtower/SKILL.md +19 -1
- package/templates/watchtower/queue/items/item.json.schema +9 -0
- package/templates/workflows/deliberative-audit.js +3 -0
- package/templates/workflows/execute-group-complete.js +93 -16
- package/templates/workflows/execute-group-implement.js +164 -19
package/README.md
CHANGED
|
@@ -241,8 +241,13 @@ npx create-claude-cabinet --yes # Accept all defaults
|
|
|
241
241
|
npx create-claude-cabinet --yes --no-db # All defaults, skip database
|
|
242
242
|
npx create-claude-cabinet --dry-run # Preview without writing files
|
|
243
243
|
npx create-claude-cabinet --modules verify --yes # Add an opt-in module (merges, doesn't replace)
|
|
244
|
+
npx create-claude-cabinet --frontier-model claude-fable-5 # Designate your frontier model (watchdog)
|
|
244
245
|
```
|
|
245
246
|
|
|
247
|
+
### Frontier-model watchdog
|
|
248
|
+
|
|
249
|
+
`--frontier-model <model>` records, once, which model your heavy thinking is supposed to run on. The designation is per-operator (stored in `~/.claude/cc-registry.json` under `frontierModel`, not per-project), and the installer prints the effective value on every run. From then on, `/orient` — and, on watchtower installs, the SessionStart hook — compares the session's actual model against it and leads the briefing with a loud warning on mismatch. The key can be an exact model ID (`claude-fable-5`, exact match required) or a family alias (`fable`, matches any model ID containing it). This is **visibility only**: nothing is pinned, blocked, or rerouted — it just makes "you're accidentally on the wrong model" impossible to miss. A stale key after a model-family transition nags loudly by design; update it with the same flag.
|
|
250
|
+
|
|
246
251
|
## What Gets Installed
|
|
247
252
|
|
|
248
253
|
Everything goes into `.claude/` or `scripts/`. Nothing touches your
|
package/lib/cli.js
CHANGED
|
@@ -3,7 +3,7 @@ const path = require('path');
|
|
|
3
3
|
const fs = require('fs');
|
|
4
4
|
const os = require('os');
|
|
5
5
|
const crypto = require('crypto');
|
|
6
|
-
const { copyTemplates } = require('./copy');
|
|
6
|
+
const { copyTemplates, recordSkip } = require('./copy');
|
|
7
7
|
const { mergeSettings, healUserSettings, mergeWatchtowerHooks, mergeMuxHooks, mergeBashCompressHooks } = require('./settings-merge');
|
|
8
8
|
const { create: createMetadata, read: readMetadata } = require('./metadata');
|
|
9
9
|
const { setupDb } = require('./db-setup');
|
|
@@ -397,9 +397,12 @@ function generateAgentWrappers(projectDir) {
|
|
|
397
397
|
if (/websearch/.test(toolSignal)) tools.push('WebSearch');
|
|
398
398
|
if (/webfetch|fetch_docs/.test(toolSignal)) tools.push('WebFetch');
|
|
399
399
|
|
|
400
|
-
// model: none of the cabinet skills declare one today; default to
|
|
401
|
-
//
|
|
402
|
-
|
|
400
|
+
// model: none of the cabinet skills declare one today; default to inherit
|
|
401
|
+
// (follow the session model — a family alias like 'sonnet' goes stale when
|
|
402
|
+
// the frontier moves families), but honor an explicit declaration if a
|
|
403
|
+
// member ever sets one. Background watchtower rings pin their own model
|
|
404
|
+
// separately and are unaffected.
|
|
405
|
+
const model = (typeof fm.model === 'string' && fm.model.trim()) || 'inherit';
|
|
403
406
|
|
|
404
407
|
const wrapper =
|
|
405
408
|
`---\n` +
|
|
@@ -457,11 +460,13 @@ const MODULES = {
|
|
|
457
460
|
'skills/orient-quick',
|
|
458
461
|
'skills/debrief',
|
|
459
462
|
'skills/debrief-quick',
|
|
463
|
+
'skills/session-handoff',
|
|
460
464
|
// Instruction phases — always ship, overriding the default skip-phases rule in copy.js
|
|
461
465
|
'skills/debrief/phases/audit-pattern-capture.md',
|
|
462
466
|
'skills/debrief/phases/methodology-capture.md',
|
|
463
467
|
'skills/debrief/phases/record-lessons.md',
|
|
464
468
|
'skills/debrief/phases/upstream-feedback.md',
|
|
469
|
+
'skills/debrief/phases/qa-handoff-sweep.md',
|
|
465
470
|
'skills/menu',
|
|
466
471
|
],
|
|
467
472
|
},
|
|
@@ -488,7 +493,7 @@ const MODULES = {
|
|
|
488
493
|
mandatory: false,
|
|
489
494
|
default: true,
|
|
490
495
|
lean: true,
|
|
491
|
-
templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md'],
|
|
496
|
+
templates: ['skills/plan', 'skills/execute', 'skills/execute/phases/post-impl-checklist.md', 'skills/debrief/phases/checklist-feedback.md', 'skills/checklist-discover', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group-implement.js', 'workflows/execute-group-complete.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md', 'cabinet/elicitation-methods.md', 'cabinet/qa-dimensions-template.yaml', 'cabinet/checklist-stats-schema.md', 'scripts/qa-dimensions-validator.cjs', 'skills/orient/phases/checklist-status.md', 'skills/audit/phases/checklist-pruning.md'],
|
|
492
497
|
},
|
|
493
498
|
'compliance': {
|
|
494
499
|
name: 'Compliance Stack (rules + enforcement)',
|
|
@@ -747,6 +752,14 @@ function parseArgs(argv) {
|
|
|
747
752
|
else if (arg === '--modules' && i + 1 < args.length) {
|
|
748
753
|
flags.modules = args[++i].split(',').map(s => s.trim()).filter(Boolean);
|
|
749
754
|
}
|
|
755
|
+
else if (arg === '--frontier-model' && i + 1 < args.length) {
|
|
756
|
+
// Empty/whitespace values are treated as absent: '' is a substring of
|
|
757
|
+
// every model ID, which would match everything and leave the watchdog
|
|
758
|
+
// permanently silent while appearing configured.
|
|
759
|
+
const value = args[++i].trim();
|
|
760
|
+
if (value) flags.frontierModel = value;
|
|
761
|
+
else flags.frontierModelEmpty = true;
|
|
762
|
+
}
|
|
750
763
|
else if (!arg.startsWith('-')) flags.targetDir = arg;
|
|
751
764
|
}
|
|
752
765
|
|
|
@@ -772,6 +785,11 @@ function printHelp() {
|
|
|
772
785
|
disables omega hooks/MCP. Idempotent — safe to re-run.
|
|
773
786
|
Pair with --dry-run to preview.
|
|
774
787
|
--unmigrate-memory Roll back --migrate-memory using its backup dir.
|
|
788
|
+
--frontier-model <model> Designate your frontier model (user-level, stored
|
|
789
|
+
in ~/.claude/cc-registry.json). Visibility only: /orient and
|
|
790
|
+
the watchtower SessionStart hook warn loudly when a session
|
|
791
|
+
runs a different model. Does NOT pin or route anything.
|
|
792
|
+
Accepts an exact ID (claude-fable-5) or a family alias (fable).
|
|
775
793
|
--help, -h Show this help
|
|
776
794
|
|
|
777
795
|
Examples:
|
|
@@ -1211,7 +1229,7 @@ async function run() {
|
|
|
1211
1229
|
const existingContent = fs.readFileSync(destPath, 'utf8');
|
|
1212
1230
|
if (existingContent === incoming) {
|
|
1213
1231
|
totalSkipped++;
|
|
1214
|
-
allManifest
|
|
1232
|
+
recordSkip(allManifest, mPath, { identical: true, incomingHash });
|
|
1215
1233
|
continue;
|
|
1216
1234
|
}
|
|
1217
1235
|
|
|
@@ -1227,7 +1245,9 @@ async function run() {
|
|
|
1227
1245
|
if (isPhaseFile && !isInstructionPhase && existingContent.trim() !== '' && existingContent.trim() !== incoming.trim()) {
|
|
1228
1246
|
console.log(` Preserved customized phase: ${tmpl}`);
|
|
1229
1247
|
totalSkipped++;
|
|
1230
|
-
|
|
1248
|
+
// Customized phase = project-owned content → omit from manifest
|
|
1249
|
+
// (recordSkip in copy.js — omission means "not ours").
|
|
1250
|
+
recordSkip(allManifest, mPath);
|
|
1231
1251
|
continue;
|
|
1232
1252
|
}
|
|
1233
1253
|
|
|
@@ -1237,10 +1257,19 @@ async function run() {
|
|
|
1237
1257
|
if (existingManifest[mPath]) {
|
|
1238
1258
|
if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
|
|
1239
1259
|
totalOverwritten++;
|
|
1260
|
+
// Log single-file overwrites too — the directory path (copy.js)
|
|
1261
|
+
// already does. Without this, scripts/ updates are invisible in
|
|
1262
|
+
// install output, masking whether a changed script propagated.
|
|
1263
|
+
console.log(` Updated: ${path.relative(projectDir, destPath)}`);
|
|
1264
|
+
allManifest[mPath] = incomingHash;
|
|
1240
1265
|
} else {
|
|
1241
1266
|
totalSkipped++;
|
|
1267
|
+
// Project-created file → omit from manifest entirely. Ownership
|
|
1268
|
+
// classification is manifest-PRESENCE-based, so recording ANY
|
|
1269
|
+
// hash here would mark the file upstream-owned and the NEXT
|
|
1270
|
+
// install would silently overwrite it (act:bf21c95b).
|
|
1271
|
+
recordSkip(allManifest, mPath);
|
|
1242
1272
|
}
|
|
1243
|
-
allManifest[mPath] = incomingHash;
|
|
1244
1273
|
} else {
|
|
1245
1274
|
const response = await prompts({
|
|
1246
1275
|
type: 'select',
|
|
@@ -1255,10 +1284,13 @@ async function run() {
|
|
|
1255
1284
|
if (response.action === 'overwrite') {
|
|
1256
1285
|
if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
|
|
1257
1286
|
totalOverwritten++;
|
|
1287
|
+
allManifest[mPath] = incomingHash;
|
|
1258
1288
|
} else {
|
|
1259
1289
|
totalSkipped++;
|
|
1290
|
+
// Keep: the user claimed this file → project-owned → omit from
|
|
1291
|
+
// the manifest so it is never mistaken for upstream content.
|
|
1292
|
+
recordSkip(allManifest, mPath);
|
|
1260
1293
|
}
|
|
1261
|
-
allManifest[mPath] = incomingHash;
|
|
1262
1294
|
}
|
|
1263
1295
|
} else {
|
|
1264
1296
|
if (!flags.dryRun) fs.copyFileSync(srcPath, destPath);
|
|
@@ -1607,6 +1639,18 @@ async function run() {
|
|
|
1607
1639
|
// Register with folder name. /onboard fills in name and description later.
|
|
1608
1640
|
registry.projects.push(entry);
|
|
1609
1641
|
}
|
|
1642
|
+
// --- Frontier-model designation (visibility watchdog) ---
|
|
1643
|
+
// User-level, per-operator key. Read-preserve-rewrite: only the
|
|
1644
|
+
// frontierModel key is touched; every other key rides through.
|
|
1645
|
+
if (flags.frontierModelEmpty) {
|
|
1646
|
+
console.log(' ⚠ Ignoring empty --frontier-model value (an empty key would match every model and silence the watchdog)');
|
|
1647
|
+
}
|
|
1648
|
+
if (flags.frontierModel) {
|
|
1649
|
+
registry.frontierModel = flags.frontierModel;
|
|
1650
|
+
} else if (typeof registry.frontierModel === 'string' && !registry.frontierModel.trim()) {
|
|
1651
|
+
// Heal a hand-edited empty key — treat as absent (see parseArgs note).
|
|
1652
|
+
delete registry.frontierModel;
|
|
1653
|
+
}
|
|
1610
1654
|
fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
|
|
1611
1655
|
const otherCount = registry.projects.filter(p => p.path !== projectDir).length;
|
|
1612
1656
|
if (otherCount > 0) {
|
|
@@ -1614,6 +1658,10 @@ async function run() {
|
|
|
1614
1658
|
} else {
|
|
1615
1659
|
console.log(' 📋 Registered in project registry');
|
|
1616
1660
|
}
|
|
1661
|
+
// Self-announcing: print the effective designation on every run.
|
|
1662
|
+
if (registry.frontierModel) {
|
|
1663
|
+
console.log(` 🛰 Frontier model: ${registry.frontierModel} (visibility watchdog — /orient + SessionStart warn on mismatch; nothing is pinned)`);
|
|
1664
|
+
}
|
|
1617
1665
|
} catch (err) {
|
|
1618
1666
|
// Non-fatal — registry is nice-to-have
|
|
1619
1667
|
}
|
package/lib/copy.js
CHANGED
|
@@ -7,6 +7,49 @@ function hashContent(content) {
|
|
|
7
7
|
return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Record the manifest consequence of SKIPPING a file at install time
|
|
12
|
+
* (act:bf21c95b). The single shared rule for ALL skip sites in BOTH
|
|
13
|
+
* install code paths (lib/copy.js and the single-file branches in
|
|
14
|
+
* lib/cli.js):
|
|
15
|
+
*
|
|
16
|
+
* - A skipped file whose on-disk content differs from the incoming
|
|
17
|
+
* template is NOT upstream content — it is project-owned
|
|
18
|
+
* (project-created, user-kept, or a customized phase). It must be
|
|
19
|
+
* OMITTED from the manifest entirely. An absent entry means "not
|
|
20
|
+
* ours". Recording any hash for it poisons the manifest: ownership
|
|
21
|
+
* classification is manifest-PRESENCE-based, so the next install
|
|
22
|
+
* would classify the file upstream-owned and silently overwrite it
|
|
23
|
+
* (and cc-upstream-guard / cc-drift-check would false-positive on it).
|
|
24
|
+
* - The one exception: a skipped file byte-identical to the incoming
|
|
25
|
+
* template is indistinguishable from upstream content and stays
|
|
26
|
+
* tracked under the template hash.
|
|
27
|
+
*
|
|
28
|
+
* Omission only — never a marker value or an alternate manifest value
|
|
29
|
+
* shape (lesson_shared_json_shape_drift). All manifest consumers
|
|
30
|
+
* (ownership classification, cleanup loop, key migration, cc-drift-check,
|
|
31
|
+
* cc-upstream-guard, lib/reset.js) treat an absent key as "not ours" and
|
|
32
|
+
* leave the file alone.
|
|
33
|
+
*
|
|
34
|
+
* Known limitation (documented, not solved here): manifests already
|
|
35
|
+
* poisoned by past installs cannot retroactively distinguish a recorded
|
|
36
|
+
* project-created file from genuine upstream content. This helper only
|
|
37
|
+
* prevents NEW poisoning.
|
|
38
|
+
*
|
|
39
|
+
* @param {object} manifest manifest object being built for this install
|
|
40
|
+
* @param {string} key manifest key for the skipped file
|
|
41
|
+
* @param {object} [opts]
|
|
42
|
+
* @param {boolean} [opts.identical] on-disk content === incoming template
|
|
43
|
+
* @param {string} [opts.incomingHash] hash of the incoming template content
|
|
44
|
+
*/
|
|
45
|
+
function recordSkip(manifest, key, { identical = false, incomingHash = null } = {}) {
|
|
46
|
+
if (identical && incomingHash) {
|
|
47
|
+
manifest[key] = incomingHash;
|
|
48
|
+
} else {
|
|
49
|
+
delete manifest[key];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
10
53
|
/**
|
|
11
54
|
* Recursively copy files from src to dest, surfacing conflicts.
|
|
12
55
|
* Returns { copied: string[], skipped: string[], overwritten: string[] }
|
|
@@ -55,7 +98,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
|
|
|
55
98
|
const trimmedExisting = existing.trim();
|
|
56
99
|
if (trimmedExisting !== '' && trimmedExisting !== incoming.trim()) {
|
|
57
100
|
results.skipped.push(relPath);
|
|
58
|
-
|
|
101
|
+
// Customized phase = project-owned content → omit from manifest.
|
|
102
|
+
recordSkip(results.manifest, relPath);
|
|
59
103
|
console.log(` Preserved customized phase: ${displayPath}`);
|
|
60
104
|
continue;
|
|
61
105
|
}
|
|
@@ -64,7 +108,7 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
|
|
|
64
108
|
|
|
65
109
|
if (existing === incoming) {
|
|
66
110
|
results.skipped.push(relPath);
|
|
67
|
-
results.manifest
|
|
111
|
+
recordSkip(results.manifest, relPath, { identical: true, incomingHash });
|
|
68
112
|
continue;
|
|
69
113
|
}
|
|
70
114
|
|
|
@@ -82,9 +126,8 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
|
|
|
82
126
|
console.log(` Updated: ${displayPath}`);
|
|
83
127
|
} else {
|
|
84
128
|
results.skipped.push(relPath);
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
results.manifest[relPath] = hashContent(existing);
|
|
129
|
+
// Project-created file → omit from manifest ("not ours").
|
|
130
|
+
recordSkip(results.manifest, relPath);
|
|
88
131
|
}
|
|
89
132
|
continue;
|
|
90
133
|
}
|
|
@@ -101,9 +144,9 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
|
|
|
101
144
|
});
|
|
102
145
|
|
|
103
146
|
if (!response.action) {
|
|
104
|
-
// User cancelled
|
|
147
|
+
// User cancelled → file kept as-is → project-owned → omit.
|
|
105
148
|
results.skipped.push(relPath);
|
|
106
|
-
results.manifest
|
|
149
|
+
recordSkip(results.manifest, relPath);
|
|
107
150
|
continue;
|
|
108
151
|
}
|
|
109
152
|
|
|
@@ -118,17 +161,20 @@ async function walkAndCopy(srcRoot, destRoot, currentSrc, results, dryRun, skipC
|
|
|
118
161
|
if (followUp.overwrite && !dryRun) {
|
|
119
162
|
fs.copyFileSync(srcPath, destPath);
|
|
120
163
|
results.overwritten.push(relPath);
|
|
164
|
+
results.manifest[relPath] = incomingHash;
|
|
121
165
|
} else {
|
|
166
|
+
// Diff shown, user kept their file → project-owned → omit.
|
|
122
167
|
results.skipped.push(relPath);
|
|
168
|
+
recordSkip(results.manifest, relPath);
|
|
123
169
|
}
|
|
124
|
-
results.manifest[relPath] = incomingHash;
|
|
125
170
|
} else if (response.action === 'overwrite') {
|
|
126
171
|
if (!dryRun) fs.copyFileSync(srcPath, destPath);
|
|
127
172
|
results.overwritten.push(relPath);
|
|
128
173
|
results.manifest[relPath] = incomingHash;
|
|
129
174
|
} else {
|
|
175
|
+
// 'Keep existing' → project-owned → omit from manifest.
|
|
130
176
|
results.skipped.push(relPath);
|
|
131
|
-
results.manifest
|
|
177
|
+
recordSkip(results.manifest, relPath);
|
|
132
178
|
}
|
|
133
179
|
} else {
|
|
134
180
|
if (!dryRun) {
|
|
@@ -169,4 +215,4 @@ function showDiff(existing, incoming, relPath) {
|
|
|
169
215
|
console.log('');
|
|
170
216
|
}
|
|
171
217
|
|
|
172
|
-
module.exports = { copyTemplates };
|
|
218
|
+
module.exports = { copyTemplates, recordSkip };
|
package/lib/mux-setup.js
CHANGED
|
@@ -45,6 +45,7 @@ const MANAGED_FILES = [
|
|
|
45
45
|
{ src: 'config/worktree-session-health.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-session-health.sh'), mode: 0o755 },
|
|
46
46
|
{ src: 'config/worktree-health-popup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-health-popup.sh'), mode: 0o755 },
|
|
47
47
|
{ src: 'config/worktree-cleanup.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-cleanup.sh'), mode: 0o755 },
|
|
48
|
+
{ src: 'config/worktree-dirty-check.sh', dest: path.join(os.homedir(), '.config', 'mux', 'worktree-dirty-check.sh'), mode: 0o755 },
|
|
48
49
|
{ src: 'config/mux.tmux.conf', dest: path.join(os.homedir(), '.config', 'mux', 'mux.tmux.conf') },
|
|
49
50
|
{ src: 'config/unwrap-copy.py', dest: path.join(os.homedir(), '.config', 'mux', 'unwrap-copy.py'), mode: 0o755 },
|
|
50
51
|
{ src: 'config/screenshot-to-clipboard.sh', dest: path.join(os.homedir(), '.config', 'mux', 'screenshot-to-clipboard.sh'), mode: 0o755 },
|
|
@@ -57,6 +58,7 @@ const DATA_DIRS = [
|
|
|
57
58
|
path.join(os.homedir(), '.config', 'mux', 'dx'),
|
|
58
59
|
path.join(os.homedir(), '.config', 'mux', 'pending-prompts'),
|
|
59
60
|
path.join(os.homedir(), '.local', 'share', 'mux', 'wt-health'),
|
|
61
|
+
path.join(os.homedir(), '.local', 'share', 'mux', 'qa-handoff'),
|
|
60
62
|
];
|
|
61
63
|
|
|
62
64
|
function sha256(content) {
|
package/package.json
CHANGED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Checklist Stats — hit-rate sidecar schema and write protocol
|
|
2
|
+
|
|
3
|
+
`.claude/cabinet/checklist-stats.json` records how the change-impact
|
|
4
|
+
checklist (`qa-dimensions.yaml`) performs over time: which dimensions
|
|
5
|
+
fire, which checks actually catch problems, and what pruning verdicts
|
|
6
|
+
the operator has already given. It is the evidence base for the audit
|
|
7
|
+
skill's `checklist-pruning` phase — without it, the checklist only ever
|
|
8
|
+
grows (debrief's `checklist-feedback` is add-only by design) and decays
|
|
9
|
+
into noise.
|
|
10
|
+
|
|
11
|
+
**This file is RUNTIME STATE, generated on first write — never shipped
|
|
12
|
+
as a template.** Shipping it would clobber accumulated stats on every
|
|
13
|
+
reinstall (same rule as `advisories-state-schema.md`). And it never
|
|
14
|
+
lives inside `qa-dimensions.yaml`: config files do not contain runtime
|
|
15
|
+
state.
|
|
16
|
+
|
|
17
|
+
## Who writes what
|
|
18
|
+
|
|
19
|
+
| Writer | When | What |
|
|
20
|
+
|--------|------|------|
|
|
21
|
+
| `/execute` `post-impl-checklist` phase | every run past its no-op guard | increments `runs`; per triggered dimension increments `fires`, sets `last_fired` |
|
|
22
|
+
| `/debrief` `checklist-feedback` phase | when a session bug WAS caught via a surfaced check | appends to that dimension's `catches` |
|
|
23
|
+
| `/audit` `checklist-pruning` phase | every pruning verdict (including "keep") | appends to `pruning_reviews` |
|
|
24
|
+
|
|
25
|
+
## Schema (`schema_version: 1`)
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"schema_version": 1,
|
|
30
|
+
"runs": 14,
|
|
31
|
+
"dimensions": {
|
|
32
|
+
"data-coherence": {
|
|
33
|
+
"fires": 12,
|
|
34
|
+
"last_fired": "2026-06-11",
|
|
35
|
+
"catches": [
|
|
36
|
+
{
|
|
37
|
+
"date": "2026-06-10",
|
|
38
|
+
"check": "[run] Run schema validation if any schema or migration file changed.",
|
|
39
|
+
"note": "caught missing FK backfill before commit"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"pruning_reviews": [
|
|
45
|
+
{
|
|
46
|
+
"date": "2026-06-11",
|
|
47
|
+
"target": "test-staleness",
|
|
48
|
+
"verdict": "keep",
|
|
49
|
+
"note": "fires often, zero catches, but cheap insurance at moderate severity"
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Field semantics:
|
|
56
|
+
|
|
57
|
+
- **`runs`** — total executions of the post-impl-checklist phase that
|
|
58
|
+
passed its no-op guard, INCLUDING runs where zero dimensions
|
|
59
|
+
triggered. This is the denominator for "never fired in N runs."
|
|
60
|
+
- **`dimensions.<name>.fires`** — number of runs in which the dimension
|
|
61
|
+
triggered (matched at least one changed path). Dimension-level, not
|
|
62
|
+
check-level: checks have no stable IDs, so firing is counted where it
|
|
63
|
+
happens (path match) and catching is attributed by quoting the check.
|
|
64
|
+
- **`dimensions.<name>.catches`** — append-only evidence that a
|
|
65
|
+
surfaced check caught a real issue. `check` quotes the check text as
|
|
66
|
+
written in the yaml at the time.
|
|
67
|
+
- **`pruning_reviews`** — append-only verdict log. `verdict` is one of
|
|
68
|
+
`removed | trimmed | paths-fixed | severity-changed | keep`. The
|
|
69
|
+
pruning phase skips candidates with any verdict in the last 90 days,
|
|
70
|
+
so a "keep" decision is not re-litigated at every audit.
|
|
71
|
+
|
|
72
|
+
## Write protocol
|
|
73
|
+
|
|
74
|
+
1. Read the file. If absent, bootstrap the skeleton
|
|
75
|
+
(`{"schema_version": 1, "runs": 0, "dimensions": {}, "pruning_reviews": []}`).
|
|
76
|
+
If present but unparseable, move it aside to
|
|
77
|
+
`checklist-stats.json.corrupt-<YYYY-MM-DD>` (never delete) and
|
|
78
|
+
bootstrap fresh.
|
|
79
|
+
2. Modify in memory.
|
|
80
|
+
3. Write to `checklist-stats.json.tmp`, then rename over the original
|
|
81
|
+
(atomic — safe under concurrent sessions).
|
|
82
|
+
|
|
83
|
+
**Fail-open, always:** a stats read or write failure must never block
|
|
84
|
+
the phase doing the recording. Emit one warning line and continue —
|
|
85
|
+
losing a data point is fine; blocking an execute/debrief/audit run over
|
|
86
|
+
bookkeeping is not.
|
|
87
|
+
|
|
88
|
+
## Anti-trap rules
|
|
89
|
+
|
|
90
|
+
- **Stats inform; the human decides.** Nothing auto-prunes from this
|
|
91
|
+
data, ever. Low hit-rate is evidence presented at audit, not a
|
|
92
|
+
trigger.
|
|
93
|
+
- **Per-dimension judgment, not universal thresholds.** A high-severity
|
|
94
|
+
security check that fires often and never catches may still be cheap
|
|
95
|
+
insurance; an info-severity check with the same profile is noise.
|
|
96
|
+
The pruning phase presents severity alongside the numbers.
|
|
97
|
+
- **Renames orphan stats.** If a dimension is renamed in
|
|
98
|
+
`qa-dimensions.yaml`, its stats entry goes stale. The pruning phase
|
|
99
|
+
reports entries with no matching dimension as orphans (offer to fold
|
|
100
|
+
or drop them); writers simply start a fresh entry under the new name.
|
|
101
|
+
- **Counts are honest, not precise.** Concurrent sessions can lose an
|
|
102
|
+
increment to a race; the rename-based write keeps the file valid and
|
|
103
|
+
the trend signal is what matters. Do not build exact-count logic on
|
|
104
|
+
top of this file.
|
|
@@ -37,7 +37,7 @@ high-stakes reviews is to put judgment in front of the operator.
|
|
|
37
37
|
| Mode | Where it runs | What a `stop`/`pause` does | Used by |
|
|
38
38
|
|------|---------------|----------------------------|---------|
|
|
39
39
|
| **Interactive CP** | Main session (skill level) | Surfaced to the operator, who decides (proceed / drop / override / abort). Never automatic. | `/execute-group` CP1 |
|
|
40
|
-
| **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is `/validate
|
|
40
|
+
| **Advisory CP** | Workflow | Recorded in the Completion Report as a concern. Never halts or reverts. The only automatic gate alongside it is merge-delta `/validate` — new failures vs the group's pre-merge baseline; inherited debt is reported, not gated. | `/execute-group` CP3 |
|
|
41
41
|
| **Full CP** | Main session or workflow | Halts on `stop`, escalates 3+ `pause` to a halt, requires explicit override. The classic gate. | `/execute` CP1/CP2/CP3 |
|
|
42
42
|
|
|
43
43
|
**Why Interactive and Advisory exist.** `/execute-group` once ran CP1 and CP3
|
|
@@ -45,8 +45,20 @@ as autonomous gates inside a single workflow: a cabinet `stop` halted the run
|
|
|
45
45
|
or reverted a merge with no human in the loop. False positives there cost real
|
|
46
46
|
money (a CP1 halted twice consecutively — 1.6M+ tokens — on concerns the plan
|
|
47
47
|
text already addressed). Moving CP1 to interactive (operator decides) and CP3
|
|
48
|
-
to advisory (concerns recorded, `/validate` is the only hard
|
|
49
|
-
review signal while removing the destructive autonomous
|
|
48
|
+
to advisory (concerns recorded, merge-delta `/validate` is the only hard
|
|
49
|
+
gate) keeps the review signal while removing the destructive autonomous
|
|
50
|
+
action.
|
|
51
|
+
|
|
52
|
+
**The hard gate is merge-delta, not absolute.** `/execute-group` captures a
|
|
53
|
+
`/validate` baseline on main before the group's first merge. Only failures
|
|
54
|
+
NOT in that baseline (i.e. failures the group itself introduced) gate a merge
|
|
55
|
+
or completion. Failures that pre-date the group are inherited debt: listed
|
|
56
|
+
loudly in the Completion Report's `pre_existing_debt` section, never gated.
|
|
57
|
+
This too is field-driven — two consecutive groups were gated on documented
|
|
58
|
+
pre-existing main debt with zero merge-delta regressions, and the manual
|
|
59
|
+
recovery (judge the delta by hand, close the plans) ran identically both
|
|
60
|
+
times, so the delta judgment was promoted into the gate itself. The gate
|
|
61
|
+
stays hard for new failures: the point is removing ritual, not weakening it.
|
|
50
62
|
|
|
51
63
|
### Interactive CP adds a required `addressed_by_plan` field
|
|
52
64
|
|
|
@@ -154,8 +166,8 @@ At **Interactive CP** (`/execute-group` CP1), add the required
|
|
|
154
166
|
The escalation below is **Full CP** behavior (used by `/execute`). For
|
|
155
167
|
**Interactive CP** the verdicts are surfaced to the operator severity-first
|
|
156
168
|
and the operator decides — no automatic halt. For **Advisory CP** the concerns
|
|
157
|
-
are recorded in the Completion Report and nothing halts or reverts;
|
|
158
|
-
is the only automatic gate. See "Checkpoint modes" above.
|
|
169
|
+
are recorded in the Completion Report and nothing halts or reverts; merge-delta
|
|
170
|
+
`/validate` is the only automatic gate. See "Checkpoint modes" above.
|
|
159
171
|
|
|
160
172
|
Collect every verdict, then:
|
|
161
173
|
|
|
@@ -11,6 +11,13 @@
|
|
|
11
11
|
# and surfaces the matched dimensions' checks as context for the
|
|
12
12
|
# pre-commit cabinet sweep (Checkpoint 3). QA is the primary consumer.
|
|
13
13
|
#
|
|
14
|
+
# The checklist learns in both directions: /debrief's checklist-feedback
|
|
15
|
+
# phase ADDS checks when bugs slip through, and /audit's
|
|
16
|
+
# checklist-pruning phase surfaces low-hit-rate dimensions for
|
|
17
|
+
# human-approved REMOVAL (evidence lives in checklist-stats.json — see
|
|
18
|
+
# cabinet/checklist-stats-schema.md; runtime state never lives in this
|
|
19
|
+
# file).
|
|
20
|
+
#
|
|
14
21
|
# ── Schema ────────────────────────────────────────────────────────
|
|
15
22
|
# dimensions: # top-level map; keys are dimension names
|
|
16
23
|
# <dimension-name>:
|
|
@@ -58,6 +58,44 @@ attention window. If content exceeds 30 lines, truncation order:
|
|
|
58
58
|
2. Drop Portfolio Pulse detail for quiet projects
|
|
59
59
|
3. Never truncate "What Needs Attention" or "Where You Left Off"
|
|
60
60
|
|
|
61
|
+
## Project State Section Ownership
|
|
62
|
+
|
|
63
|
+
`state/projects/<slug>.md` is written by two rings. Every section has
|
|
64
|
+
exactly ONE owner; a ring must never rebuild a section the other ring
|
|
65
|
+
owns. The merge that enforces this is `preserveRing3LastSession()` in
|
|
66
|
+
`watchtower-lib.mjs`, applied by Ring 1 before each per-project write.
|
|
67
|
+
|
|
68
|
+
| Section | Owner | Notes |
|
|
69
|
+
|----------------------|--------|----------------------------------------|
|
|
70
|
+
| `# <name>` header | Ring 1 | Timestamp refreshed every run |
|
|
71
|
+
| `## Active Plans` | Ring 1 | Rebuilt from pib-db every run |
|
|
72
|
+
| `## Last Session` | Ring 3 | Once authored — see below |
|
|
73
|
+
| `## Standing Issues` | Ring 1 | Rebuilt every run |
|
|
74
|
+
| `## Tech Stack` | Ring 1 | Rebuilt every run |
|
|
75
|
+
|
|
76
|
+
Ring 3's sessionSummary writes the rich Last Session summary with an
|
|
77
|
+
`_<date> (<session-id>)_` attribution line directly under the header.
|
|
78
|
+
That attribution line IS the ownership marker: until Ring 3 has authored
|
|
79
|
+
the section, Ring 1 writes its own ephemeral fallback ("Active: …" /
|
|
80
|
+
last-commit line) and rebuilds it freely; once the marker is present,
|
|
81
|
+
Ring 1 must carry the existing section forward verbatim. Without this,
|
|
82
|
+
Ring 1's full-file rebuild deterministically clobbers Ring 3's summary
|
|
83
|
+
within one cron tick (~5 minutes).
|
|
84
|
+
|
|
85
|
+
### Thread File Durability (disk wins over model)
|
|
86
|
+
|
|
87
|
+
`state/threads/<slug>.json` carries the sibling rule: **disk wins over
|
|
88
|
+
model**. If the thread file exists, Ring 3 ALWAYS appends to
|
|
89
|
+
`cursor_history` — the LLM's `is_new` field is advisory naming metadata
|
|
90
|
+
only, never an authorization to fresh-write over an existing file. One
|
|
91
|
+
hallucinated `is_new: true` must not wipe an append-only history. The
|
|
92
|
+
canonical implementation is `updateThreadFile()` in `watchtower-lib.mjs`.
|
|
93
|
+
|
|
94
|
+
Corrupt thread files are never silently replaced: the corrupt file is
|
|
95
|
+
backed up aside as `<slug>.json.corrupt-<ts>`, a fresh file is written,
|
|
96
|
+
and the recovery is logged loudly. Per-thread writes are isolated — one
|
|
97
|
+
bad thread file must not abort writes for the remaining threads.
|
|
98
|
+
|
|
61
99
|
## Enrichment Directory
|
|
62
100
|
|
|
63
101
|
Per-item enrichment lives in `queue/items/<id>/enrichment/`. Four
|
|
@@ -210,7 +210,10 @@ function validateSurfaceArea(notes) {
|
|
|
210
210
|
}
|
|
211
211
|
|
|
212
212
|
// Extract everything after ## Surface Area until the next ## or end
|
|
213
|
-
|
|
213
|
+
// Lookahead ends only at the next "## " header or absolute end-of-string.
|
|
214
|
+
// A bare `\n*$` here terminated the match at a blank line right after the
|
|
215
|
+
// header, yielding an empty capture for standard markdown spacing.
|
|
216
|
+
const sectionMatch = notes.match(/^## Surface Area[^\n]*\n([\s\S]*?)(?=\n## |$(?![\s\S]))/m);
|
|
214
217
|
const sectionBody = sectionMatch ? sectionMatch[1] : '';
|
|
215
218
|
const hasEntry = /^- (?:files|dirs):/m.test(sectionBody);
|
|
216
219
|
if (!hasEntry) {
|
|
@@ -20,7 +20,19 @@ if [ -z "$FID" ]; then
|
|
|
20
20
|
exit 0
|
|
21
21
|
fi
|
|
22
22
|
|
|
23
|
+
# Resolve the MAIN checkout: /execute and /execute-group write completion
|
|
24
|
+
# artifacts (breadcrumbs, group Completion Reports) to the MAIN checkout's
|
|
25
|
+
# .claude/verification/ — a session running in a linked/mux worktree must read
|
|
26
|
+
# the same files, not the worktree's disposable gitignored copy. Fail open to
|
|
27
|
+
# the cwd-relative path when not in a git repo (or git < 2.31).
|
|
23
28
|
VERIFY_DIR=".claude/verification"
|
|
29
|
+
COMMON_DIR=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null)
|
|
30
|
+
if [ -n "$COMMON_DIR" ] && [ -d "$COMMON_DIR" ]; then
|
|
31
|
+
MAIN_CHECKOUT=$(dirname "$COMMON_DIR")
|
|
32
|
+
if [ -d "$MAIN_CHECKOUT" ]; then
|
|
33
|
+
VERIFY_DIR="$MAIN_CHECKOUT/.claude/verification"
|
|
34
|
+
fi
|
|
35
|
+
fi
|
|
24
36
|
BREADCRUMB="$VERIFY_DIR/$FID.json"
|
|
25
37
|
|
|
26
38
|
if [ ! -f "$BREADCRUMB" ]; then
|
|
@@ -56,7 +68,12 @@ fi
|
|
|
56
68
|
#
|
|
57
69
|
# Tag lookup is best-effort: if pib.db can't be read, GRP_LABEL is empty and
|
|
58
70
|
# this gate is skipped — the base breadcrumb gate above still applies.
|
|
71
|
+
# Same worktree resolution for the db: a worktree without its own pib.db
|
|
72
|
+
# falls back to the main checkout's, so the grp gate doesn't silently skip.
|
|
59
73
|
DB_PATH="${PIB_DB_PATH:-pib.db}"
|
|
74
|
+
if [ ! -f "$DB_PATH" ] && [ -z "$PIB_DB_PATH" ] && [ -n "$MAIN_CHECKOUT" ] && [ -f "$MAIN_CHECKOUT/pib.db" ]; then
|
|
75
|
+
DB_PATH="$MAIN_CHECKOUT/pib.db"
|
|
76
|
+
fi
|
|
60
77
|
TAGS=$(python3 -c "
|
|
61
78
|
import sqlite3, sys
|
|
62
79
|
try:
|