create-claude-cabinet 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/lib/cli.js +26 -0
- package/lib/engagement-server-setup.js +34 -9
- package/lib/migrate-from-omega.js +13 -1
- package/lib/mux-setup.js +33 -9
- package/lib/watchtower-setup.js +210 -0
- package/package.json +5 -1
- package/templates/cabinet/_cabinet-member-template.md +8 -3
- package/templates/cabinet/advisories-state-schema.md +34 -7
- package/templates/cabinet/composition-patterns.md +4 -3
- package/templates/cabinet/skill-output-conventions.md +35 -1
- package/templates/cabinet/watchtower-contracts.md +89 -1
- package/templates/engagement/pib-db-patches/pib-db-lib.mjs +10 -1
- package/templates/mux/__tests__/mux-fail-loud.fixture.sh +44 -0
- package/templates/mux/__tests__/station-liveness.fixture.sh +234 -0
- package/templates/mux/__tests__/station-liveness.test.mjs +47 -0
- package/templates/mux/bin/mux +281 -55
- package/templates/scripts/__tests__/advisor-pass.test.mjs +238 -0
- package/templates/scripts/__tests__/advisories.test.mjs +262 -0
- package/templates/scripts/__tests__/batch-disposition.test.mjs +137 -0
- package/templates/scripts/__tests__/feedback-outbox-flush.test.mjs +232 -0
- package/templates/scripts/__tests__/qa-handoff-gate.test.mjs +68 -0
- package/templates/scripts/__tests__/ring-state-ownership.test.mjs +108 -3
- package/templates/scripts/__tests__/ring2-thread-context.test.mjs +189 -0
- package/templates/scripts/__tests__/ring3-dedup.test.mjs +387 -0
- package/templates/scripts/__tests__/routine-dispatch.test.mjs +312 -0
- package/templates/scripts/watchtower-advisories.mjs +305 -0
- package/templates/scripts/watchtower-build-context.mjs +110 -11
- package/templates/scripts/watchtower-lib.mjs +177 -1
- package/templates/scripts/watchtower-queue.mjs +146 -1
- package/templates/scripts/watchtower-ring1.mjs +129 -9
- package/templates/scripts/watchtower-ring2.mjs +118 -21
- package/templates/scripts/watchtower-ring3-close.mjs +466 -49
- package/templates/scripts/watchtower-routines.mjs +358 -0
- package/templates/scripts/watchtower-status.sh +1 -1
- package/templates/skills/audit/SKILL.md +5 -1
- package/templates/skills/briefing/SKILL.md +342 -234
- package/templates/skills/cabinet-anthropic-insider/SKILL.md +14 -6
- package/templates/skills/cabinet-historian/SKILL.md +14 -11
- package/templates/skills/cabinet-system-advocate/SKILL.md +22 -21
- package/templates/skills/cabinet-user-advocate/SKILL.md +13 -7
- package/templates/skills/cc-publish/SKILL.md +105 -19
- package/templates/skills/debrief/SKILL.md +127 -12
- package/templates/skills/execute/SKILL.md +6 -0
- package/templates/skills/inbox/SKILL.md +67 -6
- package/templates/skills/orient/SKILL.md +69 -47
- package/templates/skills/plan/SKILL.md +8 -0
- package/templates/skills/qa-drain/SKILL.md +119 -0
- package/templates/skills/session-handoff/SKILL.md +175 -6
- package/templates/skills/triage-audit/SKILL.md +6 -0
- package/templates/skills/watchtower/SKILL.md +46 -1
- package/templates/watchtower/config.json.template +3 -1
- package/templates/watchtower/queue/items/item.json.schema +1 -1
|
@@ -133,7 +133,41 @@ that format here; reference it.
|
|
|
133
133
|
user's actual situation. This applies whether the choice is rendered via
|
|
134
134
|
AskUserQuestion or prose — the primitive changes, the posture doesn't.
|
|
135
135
|
|
|
136
|
-
## 9.
|
|
136
|
+
## 9. Operator-Facing Register: Plain English by Default
|
|
137
|
+
|
|
138
|
+
When a skill addresses the operator about substantial work — a plan, an
|
|
139
|
+
audit finding, a checkpoint decision, a status report — there are **two
|
|
140
|
+
readers, and they need two registers:**
|
|
141
|
+
|
|
142
|
+
- **The filed artifact** (the plan in pib-db, the finding's record, the
|
|
143
|
+
report on disk) stays **technical and complete** — exact file paths,
|
|
144
|
+
function names, fids, acceptance criteria. A future session executes
|
|
145
|
+
from it cold, so precision is the whole point.
|
|
146
|
+
- **What you SAY to the operator** is **plain English** — concept first,
|
|
147
|
+
the stakes spelled out, any decision framed as options with tradeoffs.
|
|
148
|
+
Keep file paths and symbol names out of the spoken prose; they live in
|
|
149
|
+
the filed artifact, reachable on request or behind a collapsed
|
|
150
|
+
"full detail" pointer.
|
|
151
|
+
|
|
152
|
+
The operator is the director, not the implementer (see the user-role
|
|
153
|
+
brief): they decide *what* and *why* and must be able to weigh in without
|
|
154
|
+
parsing every technical detail. A wall of paths and identifiers is noise
|
|
155
|
+
to that judgment — it buries the one thing they're being asked to decide.
|
|
156
|
+
Lead with what changed and why it matters; keep the file-and-line
|
|
157
|
+
narrative for the record.
|
|
158
|
+
|
|
159
|
+
This is the **default register** for operator-facing skill output. It
|
|
160
|
+
does NOT relax the filed artifact's rigor — a plan still files
|
|
161
|
+
QA-compliant notes, an audit still records exact locations. The split is
|
|
162
|
+
between the *record* (technical) and the *briefing* (plain). When in
|
|
163
|
+
doubt, write the briefing as if explaining to a sharp colleague who
|
|
164
|
+
hasn't seen the code.
|
|
165
|
+
|
|
166
|
+
This is a *register* rule (how to phrase), distinct from §5 (prose vs
|
|
167
|
+
dialog as the *primitive*) and §8 (present-don't-prescribe as the
|
|
168
|
+
*posture*) — all three compose.
|
|
169
|
+
|
|
170
|
+
## 10. Calibration Examples
|
|
137
171
|
|
|
138
172
|
**Before/after — engagement decision items** (the Tier 1 conversion):
|
|
139
173
|
|
|
@@ -80,7 +80,10 @@ the section, Ring 1 writes its own ephemeral fallback ("Active: …" /
|
|
|
80
80
|
last-commit line) and rebuilds it freely; once the marker is present,
|
|
81
81
|
Ring 1 must carry the existing section forward verbatim. Without this,
|
|
82
82
|
Ring 1's full-file rebuild deterministically clobbers Ring 3's summary
|
|
83
|
-
within one cron tick (~5 minutes).
|
|
83
|
+
within one cron tick (~5 minutes). Rebuild writes must go through
|
|
84
|
+
`writeProjectStatePreservingRing3()` in `watchtower-lib.mjs` (re-read
|
|
85
|
+
verify with retry), never a raw read→merge→write — the raw form has a
|
|
86
|
+
read-then-write race that drops a Last Session Ring 3 authors mid-merge.
|
|
84
87
|
|
|
85
88
|
### Thread File Durability (disk wins over model)
|
|
86
89
|
|
|
@@ -111,6 +114,91 @@ standard files:
|
|
|
111
114
|
`/inbox` reads these when `enrichment_status` is `"complete"`.
|
|
112
115
|
Missing files degrade gracefully (null in the read result).
|
|
113
116
|
|
|
117
|
+
## Routine Dispatch (declared interactive routines)
|
|
118
|
+
|
|
119
|
+
Projects declare interactive customs as **routines** in `config.json`,
|
|
120
|
+
under their project entry:
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
"projects": {
|
|
124
|
+
"flow": {
|
|
125
|
+
"path": "/Users/x/flow",
|
|
126
|
+
"routines": [
|
|
127
|
+
{
|
|
128
|
+
"name": "morning-briefing",
|
|
129
|
+
"description": "Walk the operator through the day's landscape",
|
|
130
|
+
"trigger": { "type": "time-of-day", "at": "08:30" },
|
|
131
|
+
"script": ".claude/routines/morning-briefing.md",
|
|
132
|
+
"urgency": "normal",
|
|
133
|
+
"stale_after_hours": 24
|
|
134
|
+
}
|
|
135
|
+
]
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
- `name` — kebab-case slug, unique within the project.
|
|
141
|
+
- `trigger.type` — `time-of-day` (`at: "HH:MM"`, local, fires once per
|
|
142
|
+
day), `interval` (`minutes`, >= 5), `path-nonempty` (`path` relative
|
|
143
|
+
to the project root or absolute; `cooldown_minutes` default 60), or
|
|
144
|
+
`session-close` (fires when Ring 3 closes a session for the project;
|
|
145
|
+
worktree sessions resolve to the main project; `cooldown_minutes`
|
|
146
|
+
default 0).
|
|
147
|
+
- `script` — project-authored phase file (the established CC phase
|
|
148
|
+
convention) holding the routine's conversation script. The receiving
|
|
149
|
+
session reads it and runs it; watchtower never parses it.
|
|
150
|
+
- `stale_after_hours` (default 24) — how long a pending dispatch stays
|
|
151
|
+
blocking before a fresh firing supersedes it.
|
|
152
|
+
|
|
153
|
+
Ownership: `watchtower-routines.mjs` owns trigger evaluation, firing
|
|
154
|
+
state (`state/routine-state.json`), and dispatch. Ring 1 ticks the
|
|
155
|
+
mechanical triggers; Ring 3 raises session-close events. Both are
|
|
156
|
+
feature-flagged by `defaults.routine_dispatch`. A firing files an inbox
|
|
157
|
+
item (category `routine` — the durable record) and pushes a descriptor
|
|
158
|
+
through `mux qa dispatch`, the SAME hardened desk-dispatch path as
|
|
159
|
+
qa-handoffs (act:796fe6dc) — never a parallel one. Terminal exits on
|
|
160
|
+
any dispatched-category item (qa-handoff, routine) clear the matching
|
|
161
|
+
mux descriptor; routine items resolve normally (no QA gate, normal
|
|
162
|
+
expiry — expiry also clears the descriptor). A pending routine item
|
|
163
|
+
blocks refiring of the same routine until resolved or stale.
|
|
164
|
+
|
|
165
|
+
## Session Advisor Pass (re-homed standing advisors)
|
|
166
|
+
|
|
167
|
+
The standing session advisors (historian, system-advocate,
|
|
168
|
+
user-advocate, anthropic-insider) hold two session-boundary seats, both
|
|
169
|
+
index-driven via `.claude/skills/_index.json` — the roster is whoever
|
|
170
|
+
declares the mandate, never a hardcoded list (act:aded4fc9):
|
|
171
|
+
|
|
172
|
+
- **Close side (automatic):** Ring 3's Phase 2m runs every member whose
|
|
173
|
+
`standingMandate` includes `session-close` against the compressed
|
|
174
|
+
session transcript, scoped by `directives.session-close`. Findings
|
|
175
|
+
file as `advisor-finding` inbox items (max 2 per member per session,
|
|
176
|
+
deduped against pending + resolved/dismissed corpora). Feature-flagged
|
|
177
|
+
by `defaults.session_advisors`. Cost: reuses Ring 3's pinned-sonnet
|
|
178
|
+
run — no Claude Code spawn.
|
|
179
|
+
- **Start side (pull, intent-aware):** `/briefing` full mode spawns
|
|
180
|
+
every member whose `standingMandate` includes `briefing`, scoped by
|
|
181
|
+
`directives.briefing`, with the briefing's gathered portfolio context.
|
|
182
|
+
Observations route through the curator's Step 3b selection and Step 4
|
|
183
|
+
close — never a permanent section (anti-accretion invariant).
|
|
184
|
+
- **Consultation side (skill-level):** the /orient and /debrief
|
|
185
|
+
cabinet-consultations phases persist their members' ACTIONABLE
|
|
186
|
+
findings through this same channel before the briefing/report
|
|
187
|
+
renders (act:84daa7bc) — `advisor-finding` items with
|
|
188
|
+
`filed_by: "manual"` and `evidence.directive_key: "orient"` or
|
|
189
|
+
`"debrief"`, capped at 2 per member per session like Phase 2m.
|
|
190
|
+
Same dedup-vs-pending discipline (an existing pending item is
|
|
191
|
+
referenced, not re-filed), same /briefing re-surfacing — never a
|
|
192
|
+
parallel path. Projects without watchtower fall back to a pib-db
|
|
193
|
+
action; with neither available, the finding is marked UNTRACKED in
|
|
194
|
+
the rendered output so the non-persistence is loud.
|
|
195
|
+
|
|
196
|
+
`advisor-finding` is ungated and undispatched: normal expiry applies
|
|
197
|
+
(findings decay naturally), and `/briefing` weighs fresh ones near the
|
|
198
|
+
top of the owning project's chunk — the re-surfacing the close pass
|
|
199
|
+
promises. A `session-close`/`briefing` mandate without the matching
|
|
200
|
+
directive key is a data error: consumers skip the member and log it.
|
|
201
|
+
|
|
114
202
|
## Deferred Schemas
|
|
115
203
|
|
|
116
204
|
The following are specified in project notes but NOT formalized until
|
|
@@ -8,10 +8,19 @@
|
|
|
8
8
|
// None of them do console.log — callers decide how to present output.
|
|
9
9
|
|
|
10
10
|
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
-
import { ENGAGEMENT_EVENTS_CREATE, ENGAGEMENT_EVENTS_INDEXES } from '../.claude/engagement/sql-constants.mjs';
|
|
12
11
|
import { join } from 'node:path';
|
|
13
12
|
import { randomUUID } from 'node:crypto';
|
|
14
13
|
|
|
14
|
+
// sql-constants lives in the installed engagement runtime (.claude/engagement/),
|
|
15
|
+
// which is gitignored in the CC source repo — absent on every fresh checkout/CI,
|
|
16
|
+
// where npm test must still pass (act:d64feaac). Fall back to the committed
|
|
17
|
+
// template source of truth. Same code in template and installed copies — no fork.
|
|
18
|
+
const { ENGAGEMENT_EVENTS_CREATE, ENGAGEMENT_EVENTS_INDEXES } =
|
|
19
|
+
await import('../.claude/engagement/sql-constants.mjs').catch((err) => {
|
|
20
|
+
if (err.code !== 'ERR_MODULE_NOT_FOUND') throw err;
|
|
21
|
+
return import('../templates/engagement/sql-constants.mjs');
|
|
22
|
+
});
|
|
23
|
+
|
|
15
24
|
// ---------------------------------------------------------------------------
|
|
16
25
|
// Helpers
|
|
17
26
|
// ---------------------------------------------------------------------------
|
|
@@ -249,6 +249,50 @@ assert "loud fall-through names why" grep -q "No worktree isolation" <<<"$out"
|
|
|
249
249
|
assert "main landing carries NO @mux_worktree" \
|
|
250
250
|
test -z "$(win_opt seeded-plain @mux_worktree)"
|
|
251
251
|
|
|
252
|
+
echo "== T10: prompt-bearing launch stages the prompt as the CLI initial-prompt arg"
|
|
253
|
+
# queue_claude_start writes the prompt to a per-window seed file and launches
|
|
254
|
+
# `claude "$(cat <seedfile>)"` — auto-submit with no keystroke timing. We assert
|
|
255
|
+
# on that synchronous file (the pane's async `claude` run is not raced).
|
|
256
|
+
SEED_DIR="$FAKE_HOME/.local/share/mux/seed-prompts"
|
|
257
|
+
# A clean lead sentence + ". " gives slugify a valid window name; the gnarly
|
|
258
|
+
# body (newlines, quotes, $, backticks) still rides the seed file verbatim —
|
|
259
|
+
# queue_claude_start stages the full prompt, not the slug.
|
|
260
|
+
PROMPT=$'Fix the parser. Body line with "double quotes", a $DOLLAR and a `backtick`.\nSecond body line.'
|
|
261
|
+
printf '%s' "$PROMPT" > "$SANDBOX/expected-body.txt"
|
|
262
|
+
before=$(win_count)
|
|
263
|
+
out=$(run_mux "$PROJ" new "$PROMPT" 2>&1); rc=$?
|
|
264
|
+
assert "mux new with a prompt exits 0 ($rc)" test "$rc" -eq 0
|
|
265
|
+
assert "a worktree window opened" test "$(win_count)" -eq $((before + 1))
|
|
266
|
+
seedf=$(ls -t "$SEED_DIR"/sess-*.txt 2>/dev/null | head -1)
|
|
267
|
+
assert "a seed-prompt file was written" test -n "$seedf"
|
|
268
|
+
if [[ -n "$seedf" ]]; then
|
|
269
|
+
assert "auto_orient=1 prepends the orient instruction as line 1" \
|
|
270
|
+
bash -c "head -1 '$seedf' | grep -qx 'Run /orient-quick first to load state, then:'"
|
|
271
|
+
tail -n +3 "$seedf" > "$SANDBOX/got-body.txt"
|
|
272
|
+
assert "multi-line prompt body preserved byte-identical (quotes/\$/backticks intact)" \
|
|
273
|
+
diff "$SANDBOX/expected-body.txt" "$SANDBOX/got-body.txt"
|
|
274
|
+
fi
|
|
275
|
+
|
|
276
|
+
echo "== T12: the prompt is an ARG to claude, never executed as shell (auto-submit safety)"
|
|
277
|
+
# The motivating hazard the reviewers flagged: auto-submitting by pasting the
|
|
278
|
+
# prompt into the pane and pressing Enter would EXECUTE it as shell commands if
|
|
279
|
+
# `claude` isn't up. Passing it as `claude "<prompt>"` makes that impossible —
|
|
280
|
+
# if claude can't launch the prompt is just an unused argument. There is no
|
|
281
|
+
# real `claude` on PATH here, so the launch fails; the sentinel proves the
|
|
282
|
+
# prompt's embedded command never ran.
|
|
283
|
+
SENTINEL="$SANDBOX/PROMPT_WAS_EXECUTED"
|
|
284
|
+
rm -f "$SENTINEL"
|
|
285
|
+
# Lead sentence + ". " for a valid slug; the touch sits on its own line, so if
|
|
286
|
+
# this ever regressed to paste-into-shell + Enter, line 2 WOULD run and create
|
|
287
|
+
# the sentinel. As a `claude "<arg>"` argument it never executes.
|
|
288
|
+
PWN_PROMPT=$'Reproduce the bug. \ntouch "'"$SENTINEL"$'"'
|
|
289
|
+
before=$(win_count)
|
|
290
|
+
out=$(run_mux "$PROJ" new "$PWN_PROMPT" 2>&1); rc=$?
|
|
291
|
+
assert "mux new with a command-shaped prompt exits 0 ($rc)" test "$rc" -eq 0
|
|
292
|
+
sleep 1 # let the pane's shell attempt `cd … && claude "$(cat …)"`
|
|
293
|
+
assert "prompt content was NOT executed as shell (no sentinel created)" \
|
|
294
|
+
test ! -e "$SENTINEL"
|
|
295
|
+
|
|
252
296
|
echo ""
|
|
253
297
|
echo "RESULT: $pass passed, $fail failed (sandbox: $SANDBOX)"
|
|
254
298
|
exit $fail
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Sandboxed AC verification for act:ca5ac156 (ensure_main_station pane-liveness
|
|
3
|
+
# hardening). The bug: ensure_main_station trusted the @mux_claude window
|
|
4
|
+
# marker as the ONLY Claude-liveness signal, so a live pre-v0.44 Claude with
|
|
5
|
+
# no marker got the qa_launch_fresh keystrokes (C-u + `cd … && claude` +
|
|
6
|
+
# Enter) typed INTO its input box.
|
|
7
|
+
#
|
|
8
|
+
# Everything runs under a mktemp dir with HOME overridden AND a private
|
|
9
|
+
# throwaway tmux server (tmux -L mux-stl-test-$$ -f /dev/null) — it never
|
|
10
|
+
# touches the real ~/.mux/worktrees, ~/.config/mux, or the operator's live
|
|
11
|
+
# tmux server. Fake Claudes are REAL node processes printing Claude Code's UI
|
|
12
|
+
# footer, so pane_current_command and the pane snapshot both look authentic.
|
|
13
|
+
# The test server is killed in cleanup even on assertion failure.
|
|
14
|
+
set -uo pipefail
|
|
15
|
+
|
|
16
|
+
REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
|
|
17
|
+
SANDBOX=$(mktemp -d /tmp/cc-mux-station-test.XXXXXX)
|
|
18
|
+
FAKE_HOME="$SANDBOX/home"
|
|
19
|
+
PROJ="$SANDBOX/proj"
|
|
20
|
+
WT_DIR="$FAKE_HOME/.mux/worktrees"
|
|
21
|
+
MUX="$REPO_ROOT/templates/mux/bin/mux"
|
|
22
|
+
NODE_BIN="$(command -v node)"
|
|
23
|
+
|
|
24
|
+
pass=0; fail=0
|
|
25
|
+
ok() { echo " PASS: $1"; pass=$((pass+1)); }
|
|
26
|
+
bad() { echo " FAIL: $1"; fail=$((fail+1)); }
|
|
27
|
+
assert() { # assert <desc> <cmd...>
|
|
28
|
+
local desc="$1"; shift
|
|
29
|
+
if "$@" >/dev/null 2>&1; then ok "$desc"; else bad "$desc"; fi
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# --- private throwaway tmux server -------------------------------------------
|
|
33
|
+
unset TMUX TMUX_PANE
|
|
34
|
+
SOCK="mux-stl-test-$$"
|
|
35
|
+
TMX() { tmux -L "$SOCK" -f /dev/null "$@"; }
|
|
36
|
+
cleanup() { TMX kill-server 2>/dev/null || true; rm -rf "$SANDBOX"; }
|
|
37
|
+
trap cleanup EXIT
|
|
38
|
+
|
|
39
|
+
# --- sandbox home -------------------------------------------------------------
|
|
40
|
+
mkdir -p "$FAKE_HOME/.config/mux" "$WT_DIR" "$FAKE_HOME/.local/share/mux/wt-health" \
|
|
41
|
+
"$FAKE_HOME/.claude/projects" "$SANDBOX/bin"
|
|
42
|
+
cp "$REPO_ROOT/templates/mux/config/worktree-session-health.sh" "$FAKE_HOME/.config/mux/"
|
|
43
|
+
cp "$REPO_ROOT/templates/mux/config/worktree-dirty-check.sh" "$FAKE_HOME/.config/mux/"
|
|
44
|
+
chmod +x "$FAKE_HOME/.config/mux/"*.sh
|
|
45
|
+
|
|
46
|
+
# Stub claude FIRST in the panes' PATH so a relaunch never starts the real
|
|
47
|
+
# Claude Code inside the test server.
|
|
48
|
+
cat > "$SANDBOX/bin/claude" <<'SH'
|
|
49
|
+
#!/bin/bash
|
|
50
|
+
echo "stub-claude up — ? for shortcuts"
|
|
51
|
+
exec tail -f /dev/null
|
|
52
|
+
SH
|
|
53
|
+
chmod +x "$SANDBOX/bin/claude"
|
|
54
|
+
|
|
55
|
+
# Fake live Claude: a REAL node process whose visible pane shows Claude
|
|
56
|
+
# Code's footer — pane_current_command = "node", snapshot matches the
|
|
57
|
+
# pane_is_live_claude markers. No @mux_claude marker is ever set on it.
|
|
58
|
+
cat > "$SANDBOX/fake-claude.js" <<'JS'
|
|
59
|
+
console.log('╭──────────────────────────────╮');
|
|
60
|
+
console.log('│ > │');
|
|
61
|
+
console.log('╰──────────────────────────────╯');
|
|
62
|
+
console.log(' ? for shortcuts');
|
|
63
|
+
setInterval(() => {}, 1 << 30);
|
|
64
|
+
JS
|
|
65
|
+
# Noise process: also node, but with NO Claude UI markers — must classify
|
|
66
|
+
# as "other", never get the marker backfilled, never get keystrokes.
|
|
67
|
+
cat > "$SANDBOX/noise.js" <<'JS'
|
|
68
|
+
console.log('dev server listening on :3000');
|
|
69
|
+
setInterval(() => {}, 1 << 30);
|
|
70
|
+
JS
|
|
71
|
+
|
|
72
|
+
# Stub muxlib.py — answers only what cmd_new/cmd_qa/create_worktree ask.
|
|
73
|
+
cat > "$FAKE_HOME/.config/mux/muxlib.py" <<'PY'
|
|
74
|
+
import sys, os
|
|
75
|
+
cmd = sys.argv[1] if len(sys.argv) > 1 else ""
|
|
76
|
+
if cmd == "project-path":
|
|
77
|
+
p = os.environ.get("STUB_PROJ_PATH", "")
|
|
78
|
+
if p:
|
|
79
|
+
print(p)
|
|
80
|
+
else:
|
|
81
|
+
sys.exit(1)
|
|
82
|
+
elif cmd == "project-names":
|
|
83
|
+
print("live")
|
|
84
|
+
elif cmd in ("worktree-add", "worktree-remove", "narrate-mark"):
|
|
85
|
+
pass
|
|
86
|
+
elif cmd == "worktree-is-active":
|
|
87
|
+
print("yes")
|
|
88
|
+
elif cmd == "narrate-check":
|
|
89
|
+
print("yes")
|
|
90
|
+
PY
|
|
91
|
+
echo '{"active":[]}' > "$FAKE_HOME/.config/mux/worktrees.json"
|
|
92
|
+
echo '{}' > "$FAKE_HOME/.config/mux/projects.json"
|
|
93
|
+
|
|
94
|
+
# --- fixture project ------------------------------------------------------------
|
|
95
|
+
git init -q "$PROJ"
|
|
96
|
+
git -C "$PROJ" config user.email t@t.t; git -C "$PROJ" config user.name t
|
|
97
|
+
echo code > "$PROJ/file.txt"
|
|
98
|
+
git -C "$PROJ" add -A && git -C "$PROJ" commit -qm init
|
|
99
|
+
MAIN_SLUG=$(echo "$PROJ" | sed 's|[/.]|-|g')
|
|
100
|
+
mkdir -p "$FAKE_HOME/.claude/projects/$MAIN_SLUG"
|
|
101
|
+
|
|
102
|
+
# --- desks on the private server -------------------------------------------------
|
|
103
|
+
# One session per scenario so marker backfill in one test can't leak into the
|
|
104
|
+
# next. Window 0 is the main-station candidate (lowest-index non-worktree).
|
|
105
|
+
# live → unmarked fake Claude (the act:ca5ac156 victim)
|
|
106
|
+
# noise → unmarked node WITHOUT Claude markers ("other")
|
|
107
|
+
# dead → bare shell (window 1's Claude died — relaunch is correct)
|
|
108
|
+
# marked → fake Claude WITH @mux_claude=1 (pre-fix happy path, no-op)
|
|
109
|
+
# dispatch → unmarked idle fake Claude for the `mux qa dispatch` path
|
|
110
|
+
HOME="$FAKE_HOME" PATH="$SANDBOX/bin:$PATH" \
|
|
111
|
+
TMX new-session -d -s live -x 200 -y 50 -c "$PROJ" "$NODE_BIN $SANDBOX/fake-claude.js" \
|
|
112
|
+
|| { echo "FATAL: could not start private tmux server"; exit 1; }
|
|
113
|
+
TMX new-session -d -s noise -x 200 -y 50 -c "$PROJ" "$NODE_BIN $SANDBOX/noise.js"
|
|
114
|
+
TMX new-session -d -s dead -x 200 -y 50 -c "$PROJ"
|
|
115
|
+
TMX new-session -d -s marked -x 200 -y 50 -c "$PROJ" "$NODE_BIN $SANDBOX/fake-claude.js"
|
|
116
|
+
TMX set-window-option -t "marked:0" @mux_claude 1
|
|
117
|
+
TMX new-session -d -s dispatch -x 200 -y 50 -c "$PROJ" "$NODE_BIN $SANDBOX/fake-claude.js"
|
|
118
|
+
# verstr → unmarked fake Claude whose EXECUTABLE is named like a bare version
|
|
119
|
+
# string ("2.1.169"): the macOS native installer execs a versioned binary, so
|
|
120
|
+
# a real Claude pane reports pane_current_command as the version (live
|
|
121
|
+
# finding, dec-c1dbcd8b QA). Symlinked node keeps the process authentic.
|
|
122
|
+
mkdir -p "$SANDBOX/bin"
|
|
123
|
+
ln -sf "$NODE_BIN" "$SANDBOX/bin/2.1.169"
|
|
124
|
+
TMX new-session -d -s verstr -x 200 -y 50 -c "$PROJ" "$SANDBOX/bin/2.1.169 $SANDBOX/fake-claude.js"
|
|
125
|
+
sleep 1 # let the fake-Claude footers render before any pane classification
|
|
126
|
+
|
|
127
|
+
SOCKET_PATH=$(TMX display-message -p -t live '#{socket_path}')
|
|
128
|
+
SERVER_PID=$(TMX display-message -p -t live '#{pid}')
|
|
129
|
+
|
|
130
|
+
# Run mux "inside" a given desk: TMUX points at the test socket, TMUX_PANE at
|
|
131
|
+
# that desk's window-0 pane so session resolution lands on the right desk.
|
|
132
|
+
run_mux_in() { # run_mux_in <session> <args...>
|
|
133
|
+
local sess="$1"; shift
|
|
134
|
+
local pane; pane=$(TMX display-message -p -t "${sess}:0" '#{pane_id}')
|
|
135
|
+
env HOME="$FAKE_HOME" STUB_PROJ_PATH="$PROJ" \
|
|
136
|
+
TMUX="${SOCKET_PATH},${SERVER_PID},0" TMUX_PANE="$pane" \
|
|
137
|
+
bash "$MUX" "$@"
|
|
138
|
+
}
|
|
139
|
+
win_count() { TMX list-windows -t "$1" 2>/dev/null | wc -l | tr -d ' '; }
|
|
140
|
+
win_opt() { TMX show-options -wv -t "$1" "$2" 2>/dev/null; }
|
|
141
|
+
# Pane snapshots and command output go through files — pane content contains
|
|
142
|
+
# single quotes (`cd '…'`), which inline <<<'…' interpolation can't survive.
|
|
143
|
+
SNAP="$SANDBOX/snap.txt"; OUT="$SANDBOX/out.txt"
|
|
144
|
+
pane_snap() { TMX capture-pane -t "$1" -p -S -50 >"$SNAP" 2>/dev/null; }
|
|
145
|
+
|
|
146
|
+
echo "== T1: live UNMARKED Claude => no keystroke injection + marker backfill (the act:ca5ac156 bug)"
|
|
147
|
+
before=$(win_count live)
|
|
148
|
+
run_mux_in live new "review the parser" >"$OUT" 2>&1; rc=$?
|
|
149
|
+
sleep 0.5
|
|
150
|
+
assert "mux new exits 0 ($rc)" test "$rc" -eq 0
|
|
151
|
+
pane_snap "live:0"
|
|
152
|
+
assert "NO launch keystrokes typed into the live Claude's input box" \
|
|
153
|
+
bash -c "! grep -qF \"cd '\" '$SNAP'"
|
|
154
|
+
assert "@mux_claude=1 BACKFILLED on the unmarked live Claude" \
|
|
155
|
+
test "$(win_opt live:0 @mux_claude)" = "1"
|
|
156
|
+
assert "work still routed to a new worktree window" \
|
|
157
|
+
test "$(win_count live)" -eq $((before + 1))
|
|
158
|
+
|
|
159
|
+
echo "== T2: unmarked node WITHOUT Claude markers => 'other', refuse loudly, no backfill"
|
|
160
|
+
before=$(win_count noise)
|
|
161
|
+
run_mux_in noise new "tighten the linter" >"$OUT" 2>&1; rc=$?
|
|
162
|
+
sleep 0.5
|
|
163
|
+
assert "mux new exits 0 ($rc)" test "$rc" -eq 0
|
|
164
|
+
pane_snap "noise:0"
|
|
165
|
+
assert "NO launch keystrokes typed into the unknown program" \
|
|
166
|
+
bash -c "! grep -qF \"cd '\" '$SNAP'"
|
|
167
|
+
assert "@mux_claude NOT backfilled on a non-Claude node process" \
|
|
168
|
+
test -z "$(win_opt noise:0 @mux_claude)"
|
|
169
|
+
assert "refusal is loud and names the program" \
|
|
170
|
+
bash -c "grep -qF \"running 'node'\" '$OUT'"
|
|
171
|
+
assert "work still routed to a new worktree window" \
|
|
172
|
+
test "$(win_count noise)" -eq $((before + 1))
|
|
173
|
+
|
|
174
|
+
echo "== T3: bare shell (Claude died) => relaunch there (existing behavior preserved)"
|
|
175
|
+
run_mux_in dead new "polish the docs" >"$OUT" 2>&1; rc=$?
|
|
176
|
+
sleep 0.5
|
|
177
|
+
assert "mux new exits 0 ($rc)" test "$rc" -eq 0
|
|
178
|
+
pane_snap "dead:0"
|
|
179
|
+
assert "launch keystrokes typed into the bare shell" \
|
|
180
|
+
bash -c "grep -qF \"cd '\" '$SNAP'"
|
|
181
|
+
assert "relaunched station marked @mux_claude=1" \
|
|
182
|
+
test "$(win_opt dead:0 @mux_claude)" = "1"
|
|
183
|
+
# The station relaunch is an EMPTY-prompt queue_claude_start: it must take the
|
|
184
|
+
# plain `cd && claude` path and write NO initial-prompt seed file (a stray seed
|
|
185
|
+
# arg would auto-submit garbage into the fresh station). The work prompt
|
|
186
|
+
# ("polish the docs") rode a separate worktree window, which DOES get a seed.
|
|
187
|
+
assert "empty-prompt station relaunch writes NO seed file for dead:0" \
|
|
188
|
+
test ! -f "$FAKE_HOME/.local/share/mux/seed-prompts/dead-0.txt"
|
|
189
|
+
|
|
190
|
+
echo "== T4: marked live Claude => untouched no-op (pre-fix happy path preserved)"
|
|
191
|
+
before=$(win_count marked)
|
|
192
|
+
run_mux_in marked new "harden the cache" >"$OUT" 2>&1; rc=$?
|
|
193
|
+
sleep 0.5
|
|
194
|
+
assert "mux new exits 0 ($rc)" test "$rc" -eq 0
|
|
195
|
+
pane_snap "marked:0"
|
|
196
|
+
assert "NO keystrokes typed into the marked Claude" \
|
|
197
|
+
bash -c "! grep -qF \"cd '\" '$SNAP'"
|
|
198
|
+
assert "work still routed to a new worktree window" \
|
|
199
|
+
test "$(win_count marked)" -eq $((before + 1))
|
|
200
|
+
|
|
201
|
+
echo "== T5: qa dispatch reaches an unmarked-but-live IDLE Claude (shared classifier heals it)"
|
|
202
|
+
DESC="$SANDBOX/handoff.json"
|
|
203
|
+
cat > "$DESC" <<JSON
|
|
204
|
+
{ "project": "dispatch", "project_path": "$PROJ", "item_id": "dec-test1234",
|
|
205
|
+
"what": "test handoff", "pickup_prompt": "pickup the QA handoff dec-test1234" }
|
|
206
|
+
JSON
|
|
207
|
+
# Run from the `live` desk's pane so the self-dispatch guard doesn't trip.
|
|
208
|
+
run_mux_in live qa dispatch "$DESC" >"$OUT" 2>&1; rc=$?
|
|
209
|
+
sleep 0.5
|
|
210
|
+
assert "qa dispatch exits 0 ($rc)" test "$rc" -eq 0
|
|
211
|
+
assert "dispatch INJECTED instead of queueing (unmarked Claude now visible)" \
|
|
212
|
+
bash -c "grep -qF 'injected into the idle main session' '$OUT'"
|
|
213
|
+
pane_snap "dispatch:0"
|
|
214
|
+
assert "pickup prompt landed in the Claude pane" \
|
|
215
|
+
bash -c "grep -qF 'pickup the QA handoff dec-test1234' '$SNAP'"
|
|
216
|
+
assert "@mux_claude=1 backfilled by the dispatch path too" \
|
|
217
|
+
test "$(win_opt dispatch:0 @mux_claude)" = "1"
|
|
218
|
+
|
|
219
|
+
echo "== T6: real-install process name is a bare version string => still positively verified (dec-c1dbcd8b live finding)"
|
|
220
|
+
before=$(win_count verstr)
|
|
221
|
+
run_mux_in verstr new "check version-named claude" >"$OUT" 2>&1; rc=$?
|
|
222
|
+
sleep 0.5
|
|
223
|
+
assert "mux new exits 0 ($rc)" test "$rc" -eq 0
|
|
224
|
+
pane_snap "verstr:0"
|
|
225
|
+
assert "NO launch keystrokes typed into the version-named live Claude" \
|
|
226
|
+
bash -c "! grep -qF \"cd '\" '$SNAP'"
|
|
227
|
+
assert "@mux_claude=1 BACKFILLED despite digit-led process name" \
|
|
228
|
+
test "$(win_opt verstr:0 @mux_claude)" = "1"
|
|
229
|
+
assert "work still routed to a new worktree window" \
|
|
230
|
+
test "$(win_count verstr)" -eq $((before + 1))
|
|
231
|
+
|
|
232
|
+
echo ""
|
|
233
|
+
echo "RESULT: $pass passed, $fail failed (sandbox: $SANDBOX)"
|
|
234
|
+
exit $fail
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
// Guard test for ensure_main_station pane-liveness hardening (act:ca5ac156).
|
|
2
|
+
//
|
|
3
|
+
// The bug: ensure_main_station trusted the @mux_claude window marker as the
|
|
4
|
+
// only Claude-liveness signal, so a live pre-v0.44 Claude with no marker got
|
|
5
|
+
// the qa_launch_fresh keystrokes (C-u + `cd … && claude` + Enter) typed INTO
|
|
6
|
+
// its input box — the "mux new isn't working" symptom, with the real risk of
|
|
7
|
+
// injecting an unintended prompt into a live session.
|
|
8
|
+
//
|
|
9
|
+
// Drives station-liveness.fixture.sh against a PRIVATE throwaway tmux server
|
|
10
|
+
// (tmux -L mux-stl-test-$$ -f /dev/null) with HOME sandboxed — it never
|
|
11
|
+
// touches the operator's live tmux server, ~/.mux/worktrees, or
|
|
12
|
+
// ~/.config/mux. Fake Claudes are real node processes printing Claude Code's
|
|
13
|
+
// UI footer, so both liveness signals (pane_current_command + pane snapshot)
|
|
14
|
+
// are authentic.
|
|
15
|
+
//
|
|
16
|
+
// Covers:
|
|
17
|
+
// - live UNMARKED Claude: no keystrokes injected, @mux_claude backfilled,
|
|
18
|
+
// work still routed to a worktree window
|
|
19
|
+
// - unmarked node WITHOUT Claude markers: classified "other", loud refusal
|
|
20
|
+
// naming the program, no backfill, no injection
|
|
21
|
+
// - bare shell (station Claude died): relaunch behavior preserved
|
|
22
|
+
// - marked live Claude: untouched no-op preserved
|
|
23
|
+
// - `mux qa dispatch` now reaches an unmarked-but-live idle Claude
|
|
24
|
+
// (qa_pane_state is the single shared classifier — the heal applies to
|
|
25
|
+
// every consumer, not just ensure_main_station)
|
|
26
|
+
|
|
27
|
+
import { test } from 'node:test';
|
|
28
|
+
import assert from 'node:assert';
|
|
29
|
+
import { spawnSync } from 'node:child_process';
|
|
30
|
+
import { fileURLToPath } from 'node:url';
|
|
31
|
+
import path from 'node:path';
|
|
32
|
+
|
|
33
|
+
const fixture = path.join(
|
|
34
|
+
path.dirname(fileURLToPath(import.meta.url)),
|
|
35
|
+
'station-liveness.fixture.sh'
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
test('ensure_main_station pane liveness (sandboxed tmux fixture suite)', () => {
|
|
39
|
+
const res = spawnSync('bash', [fixture], { encoding: 'utf8', timeout: 120_000 });
|
|
40
|
+
const output = `${res.stdout}\n${res.stderr}`;
|
|
41
|
+
assert.strictEqual(
|
|
42
|
+
res.status,
|
|
43
|
+
0,
|
|
44
|
+
`fixture suite reported failures:\n${output}`
|
|
45
|
+
);
|
|
46
|
+
assert.match(output, /RESULT: \d+ passed, 0 failed/);
|
|
47
|
+
});
|