switchroom 0.15.20 → 0.15.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Regression guard — vault/secret RESUME synthetics are turn-gated
3
+ * (the clerk `hotdoc/credentials` mid-turn-strand, 2026-06-14).
4
+ *
5
+ * THE BUG: when an operator approved a vault grant (or provided a
6
+ * secret, or completed a save) WHILE the agent's grant-requesting turn
7
+ * was still finishing, the gateway did a raw `ipcServer.sendToAgent` of
8
+ * the resume synthetic. The socket write succeeded (`delivered=true`)
9
+ * but claude was mid-turn, so the channel notification was typed into
10
+ * its TUI composer and stranded by the turn-completion race (#1556).
11
+ * The pending-inbound buffer never rescued it (it only catches
12
+ * `delivered=false`), so the agent sat idle until the operator manually
13
+ * poked it.
14
+ *
15
+ * Live proof (clerk, 2026-06-13 22:10:57):
16
+ * 22:10:57.098 vault_grant_approved injection delivered=true
17
+ * 22:10:57.277 turn_end #14081 finalAnswer=true (still mid-turn!)
18
+ * 22:12:57.713 inbound msg=14085 → turnStart (operator poke, 2m later)
19
+ *
20
+ * THE FIX: every resume synthetic goes through
21
+ * `deliverResumeSyntheticOrBuffer`, which consults the SAME
22
+ * `decideInboundDelivery` gate the Telegram handleInbound path uses —
23
+ * mid-turn → `buffer-until-idle` (flushed cleanly at turn-end). This
24
+ * file pins (a) the gate decision for a resume synthetic's
25
+ * shape, and (b) that no resume callsite regressed to a raw
26
+ * `ipcServer.sendToAgent`.
27
+ */
28
+ import { describe, it, expect } from "vitest";
29
+ import { readFileSync } from "node:fs";
30
+ import { resolve } from "node:path";
31
+ import { decideInboundDelivery } from "../gateway/inbound-delivery-gate.js";
32
+
33
+ const gatewaySrc = readFileSync(
34
+ resolve(__dirname, "..", "gateway", "gateway.ts"),
35
+ "utf-8",
36
+ );
37
+
38
+ describe("resume synthetics use the turn-gate (mid-turn → buffer)", () => {
39
+ it("a resume synthetic's gate shape buffers mid-turn, delivers when idle", () => {
40
+ // A resume synthetic is never steering and never an interrupt — the
41
+ // exact inputs deliverResumeSyntheticOrBuffer passes to the gate.
42
+ const shape = { isSteering: false as const, isInterrupt: false as const };
43
+ expect(decideInboundDelivery({ ...shape, turnInFlight: true })).toBe(
44
+ "buffer-until-idle",
45
+ );
46
+ expect(decideInboundDelivery({ ...shape, turnInFlight: false })).toBe(
47
+ "deliver",
48
+ );
49
+ });
50
+
51
+ it("the helper exists and gates on decideInboundDelivery before sending", () => {
52
+ const start = gatewaySrc.indexOf(
53
+ "function deliverResumeSyntheticOrBuffer",
54
+ );
55
+ expect(start, "deliverResumeSyntheticOrBuffer helper missing").toBeGreaterThan(0);
56
+ const body = gatewaySrc.slice(start, start + 900);
57
+ // Gate consulted...
58
+ expect(body).toMatch(/decideInboundDelivery\(/);
59
+ // ...and the buffer-until-idle branch buffers BEFORE any send.
60
+ const gateIdx = body.indexOf("decideInboundDelivery(");
61
+ const bufferIdx = body.indexOf("pendingInboundBuffer.push(");
62
+ const sendIdx = body.indexOf("ipcServer.sendToAgent(");
63
+ expect(gateIdx).toBeGreaterThan(0);
64
+ expect(bufferIdx, "must buffer in the helper").toBeGreaterThan(gateIdx);
65
+ expect(sendIdx, "must still deliver in the idle branch").toBeGreaterThan(gateIdx);
66
+ expect(bufferIdx, "buffer-until-idle branch precedes the send branch").toBeLessThan(sendIdx);
67
+ });
68
+
69
+ it("no resume synthetic is sent via a raw ungated ipcServer.sendToAgent", () => {
70
+ // Every resume wake-up — vault_grant_approved/denied, secret_provided/
71
+ // declined, secret_provide_failed, vault_save_completed/failed/discarded
72
+ // — must route through the helper. A raw sendToAgent of one of these
73
+ // named inbound vars would reintroduce the mid-turn strand. The helper
74
+ // deliberately names its param `inbound` (NOT any of these), so the
75
+ // ONLY legitimate raw sendToAgent is the helper's own
76
+ // `ipcServer.sendToAgent(agent, inbound)`; every resume-synthetic var
77
+ // name below must be absent as a raw send argument.
78
+ const rawResumeSends = [
79
+ ...gatewaySrc.matchAll(
80
+ /ipcServer\.sendToAgent\([^,]+,\s*(synthetic|failMsg|denyInbound|discardInbound|failInbound|okInbound)\)/g,
81
+ ),
82
+ ];
83
+ expect(
84
+ rawResumeSends.map((m) => m[1]),
85
+ "resume synthetic sent via raw sendToAgent — must use deliverResumeSyntheticOrBuffer",
86
+ ).toEqual([]);
87
+ });
88
+
89
+ it("the helper's send uses a param name distinct from every resume var (keeps the grep guard honest)", () => {
90
+ // If the helper param were renamed back to `synthetic`, the guard
91
+ // above would get a false pass (the helper's own send would mask a
92
+ // regressed callsite). Pin the param name.
93
+ const start = gatewaySrc.indexOf("function deliverResumeSyntheticOrBuffer");
94
+ const sig = gatewaySrc.slice(start, start + 120);
95
+ expect(sig).toMatch(/deliverResumeSyntheticOrBuffer\(agent: string, inbound: InboundMessage\)/);
96
+ });
97
+ });
@@ -0,0 +1,90 @@
1
+ /**
2
+ * UAT — `/effort` command (#2336, #2342): show + tap-to-switch the
3
+ * Claude reasoning effort for the live session. The effort sibling of
4
+ * `/model`; the picker-driven menu is the same shape.
5
+ *
6
+ * Verified live on test-harness v0.15.21. Switches are session-only
7
+ * (revert on restart), so the tap test restores the original level.
8
+ *
9
+ * Self-skips green on an unwired host (spinUp can't resolve the chat).
10
+ */
11
+ import { describe, expect, it } from "vitest";
12
+ import { spinUp } from "../harness.js";
13
+
14
+ const AGENT = "test-harness";
15
+ const T = 30_000;
16
+
17
+ describe("uat: /effort — show, tap-switch, bad-arg", () => {
18
+ it(
19
+ "bare /effort shows the effort menu with a tap keyboard",
20
+ async () => {
21
+ const sc = await spinUp({ agent: AGENT });
22
+ try {
23
+ await sc.sendDM("/effort");
24
+ const menu = await sc.expectMessage(/Effort —/, { from: "bot", timeout: T });
25
+ expect(menu.text).toMatch(/faster → smarter|low · medium · high/i);
26
+ expect(menu.text).toMatch(/switchroom\.yaml/i);
27
+ const kb = await sc.driver.getKeyboard(sc.botUserId, menu.messageId);
28
+ const labels = (kb ?? []).flat().map((b) => b.text);
29
+ expect(labels.some((t) => /low/i.test(t)), "low button present").toBe(true);
30
+ expect(labels.some((t) => /max/i.test(t)), "max button present").toBe(true);
31
+ } finally {
32
+ await sc.tearDown();
33
+ }
34
+ },
35
+ 60_000,
36
+ );
37
+
38
+ it(
39
+ "tapping a level switches the live session, then restores",
40
+ async () => {
41
+ const sc = await spinUp({ agent: AGENT });
42
+ try {
43
+ await sc.sendDM("/effort");
44
+ const menu = await sc.expectMessage(/Effort —/, { from: "bot", timeout: T });
45
+ const kb = await sc.driver.getKeyboard(sc.botUserId, menu.messageId);
46
+ const flat = (kb ?? []).flat();
47
+ // The current level is prefixed with ✅; pick a DIFFERENT one.
48
+ const current = flat.find((b) => /✅/.test(b.text));
49
+ const target = flat.find(
50
+ (b) => b.callbackData && !/✅/.test(b.text) && /medium|high/i.test(b.text),
51
+ );
52
+ expect(target, "a non-current effort button to tap").toBeDefined();
53
+ await sc.driver.pressButton(sc.botUserId, menu.messageId, target!.callbackData!);
54
+ // The card edits in place to prepend a confirmation line.
55
+ await new Promise((r) => setTimeout(r, 4000));
56
+ const after = await sc.driver.getMessage(sc.botUserId, menu.messageId);
57
+ expect(after?.text ?? "").toMatch(/Effort →|Switched|effort/i);
58
+
59
+ // Restore the original level so test-harness isn't left changed.
60
+ if (current?.callbackData) {
61
+ const kb2 = await sc.driver.getKeyboard(sc.botUserId, menu.messageId);
62
+ const restore = (kb2 ?? [])
63
+ .flat()
64
+ .find((b) => b.callbackData === current.callbackData);
65
+ if (restore?.callbackData) {
66
+ await sc.driver.pressButton(sc.botUserId, menu.messageId, restore.callbackData);
67
+ }
68
+ }
69
+ } finally {
70
+ await sc.tearDown();
71
+ }
72
+ },
73
+ 90_000,
74
+ );
75
+
76
+ it(
77
+ "/effort bogus → reply (error/help), never silence",
78
+ async () => {
79
+ const sc = await spinUp({ agent: AGENT });
80
+ try {
81
+ await sc.sendDM("/effort definitely-not-a-level");
82
+ const reply = await sc.expectMessage(/\S/, { from: "bot", timeout: T });
83
+ expect(reply.text.length).toBeGreaterThan(0);
84
+ } finally {
85
+ await sc.tearDown();
86
+ }
87
+ },
88
+ 60_000,
89
+ );
90
+ });
@@ -0,0 +1,97 @@
1
+ /**
2
+ * End-to-end UAT — agent AUTO-RESUMES after a vault grant approval,
3
+ * under the live `telegram-id` (single-factor) posture. Regression gate
4
+ * for the mid-turn-strand fix (#2340).
5
+ *
6
+ * THE BUG (#2340, clerk 2026-06-13): the gateway injects a synthetic
7
+ * "✅ approved — resume your task" inbound after the operator taps
8
+ * Approve. That inject used a raw `sendToAgent` and only buffered on a
9
+ * disconnected bridge. When the approval landed WHILE the agent's
10
+ * grant-requesting turn was still finishing, the socket write succeeded
11
+ * (`delivered=true`) but claude was mid-turn, so the channel
12
+ * notification stranded in its TUI composer (the #1556 race) and the
13
+ * agent sat idle until manually poked. Fix: route the resume through
14
+ * the same turn-gate as normal inbounds (buffer mid-turn, flush at
15
+ * turn-end). This scenario proves the agent resumes on its own.
16
+ *
17
+ * Posture: the live fleet runs `vault.broker.approvalAuth: telegram-id`
18
+ * (broker auto-unlocked), so tapping Approve mints silently — NO
19
+ * passphrase prompt. The sibling `vault-grant-auto-resume-dm.test.ts`
20
+ * covers the (now-legacy) passphrase posture and stays skipped.
21
+ *
22
+ * No sacrificial key needed: `vault_request_access` mints an ACL grant
23
+ * for the key *pattern*; the card → approve → resume cycle fires
24
+ * whether or not the key holds a value. We assert the RESUME (a fresh
25
+ * bot turn after the tap, with no driver nudge), not a secret value —
26
+ * so nothing sensitive is read or leaked.
27
+ *
28
+ * Self-skips green when the driver can't resolve the chat (unwired
29
+ * host), matching the other opt-in scenarios — uat/** is excluded from
30
+ * gating CI anyway. Mutates host vault state (mints a short grant on
31
+ * test-harness); harmless + TTL-expiring.
32
+ */
33
+
34
+ import { describe, expect, it } from "vitest";
35
+ import { spinUp } from "../harness.js";
36
+
37
+ const AGENT = "test-harness";
38
+ const KEY = "uat/resume-probe";
39
+
40
+ describe("uat: agent auto-resumes after vault grant approval — telegram-id (#2340)", () => {
41
+ it(
42
+ "fires card → operator taps Approve → agent emits a NEW turn with no nudge",
43
+ async () => {
44
+ const sc = await spinUp({ agent: AGENT });
45
+ try {
46
+ // 1. Ask the agent to request access then resume. Steer it to
47
+ // end its turn after the tool call so the approval lands at
48
+ // a turn boundary — the exact window #2340 fixes.
49
+ await sc.sendDM(
50
+ `Call your vault_request_access MCP tool with key="${KEY}", ` +
51
+ `scope="read", reason="UAT #2340 resume gate". After the tool ` +
52
+ `returns "waiting for operator", END YOUR TURN. When the ` +
53
+ `operator approves, you should AUTOMATICALLY resume: confirm ` +
54
+ `the grant landed and that you saw the approval for ${KEY}.`,
55
+ );
56
+
57
+ // 2. Wait for the approval card.
58
+ const card = await sc.expectMessage(/wants vault access/i, {
59
+ from: "bot",
60
+ timeout: 120_000,
61
+ });
62
+
63
+ // 3. Find + tap the Approve button.
64
+ const kb = await sc.driver.getKeyboard(sc.botUserId, card.messageId);
65
+ const approve = kb!
66
+ .flat()
67
+ .find((b) => b.callbackData !== undefined && /approve/i.test(b.text));
68
+ expect(approve, "Approve button present on the card").toBeDefined();
69
+ const tapAtMsgId = card.messageId;
70
+ await sc.driver.pressButton(sc.botUserId, card.messageId, approve!.callbackData!);
71
+
72
+ // 4. Single-factor: card edits to "Granted" with no passphrase
73
+ // prompt. Anchor on the grant confirmation.
74
+ await sc.expectMessage(/Granted|already has|access to/i, {
75
+ from: "bot",
76
+ timeout: 30_000,
77
+ });
78
+
79
+ // 5. THE #2340 ASSERTION: the agent auto-resumes — a NEW bot
80
+ // turn referencing the approval/grant/key, WITHOUT the driver
81
+ // sending anything else. Pre-fix this stranded mid-turn and
82
+ // timed out. The resume reply must be a message newer than
83
+ // the card we tapped (not the card edit).
84
+ const resume = await sc.expectMessage(
85
+ (m) =>
86
+ m.messageId > tapAtMsgId &&
87
+ /(approv|grant|access|resume|landed|✅)/i.test(m.text),
88
+ { from: "bot", timeout: 150_000 },
89
+ );
90
+ expect(resume.text.length).toBeGreaterThan(0);
91
+ } finally {
92
+ await sc.tearDown();
93
+ }
94
+ },
95
+ 360_000,
96
+ );
97
+ });
@@ -34,10 +34,13 @@ describe("uat: /model command — show, switch, bad-name", () => {
34
34
  const sc = await spinUp({ agent: AGENT });
35
35
  try {
36
36
  await sc.sendDM("/model");
37
- // v2 (picker-driven menu): "Now: <model>"; v1 / fallback path:
38
- // "Configured: <model>". Either proves the gateway handled the
39
- // command rather than forwarding it to claude as plain text.
40
- const shape = /Now:|Configured:/i;
37
+ // v2 (picker-driven menu) renders the live model as
38
+ // "Default (new sessions): <model>" (shipped wording, verified
39
+ // live on test-harness v0.15.21); "Now: <model>" was the
40
+ // pre-ship wording; v1 / fallback path renders "Configured:
41
+ // <model>". Any of these proves the gateway handled the command
42
+ // rather than forwarding it to claude as plain text.
43
+ const shape = /Default \(new sessions\):|Now:|Configured:/i;
41
44
  const reply = await sc.expectMessage(shape, {
42
45
  from: "bot",
43
46
  timeout: REPLY_TIMEOUT_MS,
@@ -0,0 +1,71 @@
1
+ /**
2
+ * UAT — `/model` v2 dashboard BUTTON TAP (#2263, #2270, #2271). The
3
+ * existing jtbd-model-command scenario covers the bare dashboard +
4
+ * typed-arg forms; this one exercises the genuinely new path the
5
+ * mtcute driver can now drive: tapping a model button to switch the
6
+ * live session via the picker, and the menu never leaving a dead card
7
+ * (#2270 — keeps buttons + clears the toast).
8
+ *
9
+ * Switches are session-only (revert on restart); the test taps a
10
+ * different model then restores the original so test-harness isn't
11
+ * left changed.
12
+ *
13
+ * Self-skips green on an unwired host.
14
+ */
15
+ import { describe, expect, it } from "vitest";
16
+ import { spinUp } from "../harness.js";
17
+
18
+ const AGENT = "test-harness";
19
+
20
+ describe("uat: /model dashboard button tap switches the live session", () => {
21
+ it(
22
+ "tap a non-current model → switch confirmation; then restore",
23
+ async () => {
24
+ const sc = await spinUp({ agent: AGENT });
25
+ try {
26
+ await sc.sendDM("/model");
27
+ const menu = await sc.expectMessage(/Default \(new sessions\):|Now:/i, {
28
+ from: "bot",
29
+ timeout: 30_000,
30
+ });
31
+ const kb = await sc.driver.getKeyboard(sc.botUserId, menu.messageId);
32
+ const flat = (kb ?? []).flat().filter((b) => b.callbackData);
33
+ // Model buttons carry mdl:s:<tag>; the current one is prefixed
34
+ // ✅. Refresh (mdl:r) is excluded — pick a non-current model.
35
+ const originalLabel = flat
36
+ .find((b) => /✅/.test(b.text))
37
+ ?.text.replace(/^✅\s*/, "");
38
+ const target = flat.find(
39
+ (b) => /mdl:s:/.test(b.callbackData!) && !/✅/.test(b.text),
40
+ );
41
+ expect(target, "a non-current model button to tap").toBeDefined();
42
+
43
+ await sc.driver.pressButton(sc.botUserId, menu.messageId, target!.callbackData!);
44
+ await new Promise((r) => setTimeout(r, 6000));
45
+ // #2270: the card never goes dead — it edits in place to a
46
+ // confirmation and KEEPS a keyboard.
47
+ const after = await sc.driver.getMessage(sc.botUserId, menu.messageId);
48
+ expect(after?.text ?? "").toMatch(/Set model to|Switched|model/i);
49
+ const kbAfter = await sc.driver.getKeyboard(sc.botUserId, menu.messageId);
50
+ expect(
51
+ (kbAfter ?? []).flat().length,
52
+ "menu keeps its buttons after a tap (no dead card, #2270)",
53
+ ).toBeGreaterThan(0);
54
+
55
+ // Restore the original model: tap the button whose label now
56
+ // matches the original (it's no longer the ✅ row).
57
+ if (originalLabel) {
58
+ const restore = (kbAfter ?? [])
59
+ .flat()
60
+ .find((b) => b.callbackData?.startsWith("mdl:s:") && b.text.replace(/^✅\s*/, "") === originalLabel);
61
+ if (restore?.callbackData) {
62
+ await sc.driver.pressButton(sc.botUserId, menu.messageId, restore.callbackData);
63
+ }
64
+ }
65
+ } finally {
66
+ await sc.tearDown();
67
+ }
68
+ },
69
+ 120_000,
70
+ );
71
+ });
@@ -0,0 +1,40 @@
1
+ /**
2
+ * UAT — `/whoami` (#2341): the operator's read-only view of THIS
3
+ * agent's sandbox (same data the agent's `config whoami` MCP tool and
4
+ * the `switchroom config whoami` host CLI report). Read-only, like
5
+ * `/version`; mutates nothing.
6
+ *
7
+ * Verified live on test-harness v0.15.21. Self-skips green on an
8
+ * unwired host.
9
+ */
10
+ import { describe, expect, it } from "vitest";
11
+ import { spinUp } from "../harness.js";
12
+
13
+ const AGENT = "test-harness";
14
+
15
+ describe("uat: /whoami shows the agent sandbox card", () => {
16
+ it(
17
+ "renders tier, model, tools, MCP, and powers",
18
+ async () => {
19
+ const sc = await spinUp({ agent: AGENT });
20
+ try {
21
+ await sc.sendDM("/whoami");
22
+ // Header: "👤 <agent> · <tier>"
23
+ const reply = await sc.expectMessage(/👤\s*test-harness/i, {
24
+ from: "bot",
25
+ timeout: 30_000,
26
+ });
27
+ // The card's load-bearing fields — proves whoami resolved the
28
+ // sandbox (tier/model/tools/mcp/powers), not just echoed a stub.
29
+ expect(reply.text).toMatch(/Model:/i);
30
+ expect(reply.text).toMatch(/Tools:/i);
31
+ expect(reply.text).toMatch(/Powers:/i);
32
+ // Tier marker present in the header (standard / admin / root).
33
+ expect(reply.text).toMatch(/·\s*(standard|admin|root)/i);
34
+ } finally {
35
+ await sc.tearDown();
36
+ }
37
+ },
38
+ 60_000,
39
+ );
40
+ });