npm - switchroom - Versions diffs - 0.10.0 → 0.11.1 - Mend

switchroom 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +5 -4
package/dist/agent-scheduler/index.js +2 -2
package/dist/auth-broker/index.js +125 -3
package/dist/cli/drive-write-pretool.mjs +5436 -0
package/dist/cli/switchroom.js +231 -29
package/dist/host-control/main.js +2 -2
package/dist/vault/approvals/kernel-server.js +2 -2
package/dist/vault/broker/server.js +2 -2
package/package.json +1 -1
package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
package/telegram-plugin/admin-commands/index.ts +2 -0
package/telegram-plugin/auth-snapshot-format.ts +612 -0
package/telegram-plugin/auto-fallback-fleet.ts +215 -0
package/telegram-plugin/auto-fallback.ts +28 -301
package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
package/telegram-plugin/fleet-fallback-gate.ts +105 -0
package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
package/telegram-plugin/gateway/approval-callback.ts +31 -3
package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
package/telegram-plugin/gateway/auth-command.ts +131 -10
package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
package/telegram-plugin/gateway/boot-card.ts +1 -1
package/telegram-plugin/gateway/boot-probes.ts +6 -9
package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
package/telegram-plugin/gateway/gateway.ts +903 -173
package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
package/telegram-plugin/gateway/ipc-server.ts +69 -0
package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
package/telegram-plugin/model-unavailable.ts +28 -12
package/telegram-plugin/silence-poke.ts +153 -1
package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
package/telegram-plugin/tests/boot-probes.test.ts +16 -18
package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
package/telegram-plugin/tests/silence-poke.test.ts +237 -0
package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
package/telegram-plugin/turn-flush-safety.ts +55 -1
package/telegram-plugin/uat/SETUP.md +16 -12
package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129

package/telegram-plugin/tests/boot-probes.test.ts CHANGED Viewed

@@ -896,42 +896,40 @@ describe('probeAccount — nextStep agent-name interpolation', () => {
     }
   })
-  it('not-signed-in hint interpolates agentName instead of <agent>', async () => {
+  it('not-signed-in hint points at RFC H fleet-wide auth verbs', async () => {
     tmpDir = setupAgentDir({})
-    const result = await probeAccount(tmpDir, { agentName: 'finn' })
+    const result = await probeAccount(tmpDir)
     expect(result.status).toBe('degraded')
     expect(result.detail).toBe('not signed in')
     expect(result.nextStep).toBeDefined()
-    expect(result.nextStep).toContain('switchroom auth login finn')
-    expect(result.nextStep).not.toContain('<agent>')
+    expect(result.nextStep).toContain('switchroom auth add')
+    expect(result.nextStep).toContain('--from-oauth')
+    expect(result.nextStep).toContain('switchroom auth use')
+    // RFC H: hint must not point at the retired per-agent `auth login` verb.
+    expect(result.nextStep).not.toContain('auth login')
   })
-  it('expired-token hint interpolates agentName', async () => {
+  it('expired-token hint points at broker auto-refresh + manual fallback', async () => {
     tmpDir = setupAgentDir(
       { oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
       { expiresAt: Date.now() - 86_400_000 }, // expired yesterday
     )
-    const result = await probeAccount(tmpDir, { agentName: 'klanker' })
+    const result = await probeAccount(tmpDir)
     expect(result.status).toBe('fail')
-    expect(result.nextStep).toContain('switchroom auth login klanker')
-    expect(result.nextStep).not.toContain('<agent>')
+    expect(result.nextStep).toContain('switchroom auth refresh')
+    expect(result.nextStep).toContain('--replace')
+    expect(result.nextStep).not.toContain('auth login')
   })
-  it('expiring-soon hint interpolates agentName', async () => {
+  it('expiring-soon hint points at broker auto-refresh window', async () => {
     tmpDir = setupAgentDir(
       { oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
       { expiresAt: Date.now() + 3 * 86_400_000 }, // 3 days left (< 7)
     )
-    const result = await probeAccount(tmpDir, { agentName: 'lawgpt' })
-    expect(result.status).toBe('degraded')
-    expect(result.nextStep).toContain('switchroom auth login lawgpt')
-    expect(result.nextStep).not.toContain('<agent>')
-  })
-  it('falls back to <agent> placeholder when no agentName provided (backwards-compat)', async () => {
-    tmpDir = setupAgentDir({})
     const result = await probeAccount(tmpDir)
-    expect(result.nextStep).toContain('<agent>')
+    expect(result.status).toBe('degraded')
+    expect(result.nextStep).toContain('switchroom auth refresh')
+    expect(result.nextStep).not.toContain('auth login')
   })
 })

package/telegram-plugin/tests/fleet-fallback-gate.test.ts ADDED Viewed

@@ -0,0 +1,197 @@
+import { describe, expect, test } from "bun:test";
+import { createFleetFallbackGate } from "../fleet-fallback-gate.js";
+function fakeClock(start = 0) {
+  let now = start;
+  return {
+    nowFn: () => now,
+    advance(ms: number) { now += ms; },
+    set(ms: number) { now = ms; },
+  };
+}
+describe("createFleetFallbackGate — wouldFire honesty contract", () => {
+  test("fresh state: wouldFire is true", () => {
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
+    expect(gate.wouldFire()).toBe(true);
+  });
+  test("in-flight: wouldFire is false until action resolves", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    let resolveAction: (b: boolean) => void = () => {};
+    const action = () => new Promise<boolean>((r) => { resolveAction = r; });
+    const firePromise = gate.fire(action);
+    expect(gate.wouldFire()).toBe(false);
+    expect(gate.inspect().inFlight).toBe(true);
+    resolveAction(true);
+    await firePromise;
+    // After fire stamps lastFiredAtMs, dedup window blocks until clock advances.
+    expect(gate.wouldFire()).toBe(false);
+    clock.advance(30_000);
+    expect(gate.wouldFire()).toBe(true);
+  });
+  test("post-fire dedup window blocks wouldFire", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    await gate.fire(async () => true);
+    expect(gate.wouldFire()).toBe(false);
+    clock.advance(29_999);
+    expect(gate.wouldFire()).toBe(false);
+    clock.advance(1);
+    expect(gate.wouldFire()).toBe(true);
+  });
+  test("no-op fires (action returns false) DO NOT arm dedup window", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    await gate.fire(async () => false);
+    // Window NOT armed — wouldFire should still be true immediately.
+    expect(gate.wouldFire()).toBe(true);
+    expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
+  });
+  test("thrown action: dedup window NOT armed, gate releases in-flight", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    const errors: unknown[] = [];
+    await gate.fire(async () => { throw new Error("broker exploded"); }, (e) => errors.push(e));
+    expect(gate.inspect().inFlight).toBe(false);
+    expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
+    expect(gate.wouldFire()).toBe(true);
+    expect((errors[0] as Error).message).toBe("broker exploded");
+  });
+  test("no onError: thrown action still releases in-flight without crashing", async () => {
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
+    await gate.fire(async () => { throw new Error("silent"); });
+    expect(gate.inspect().inFlight).toBe(false);
+    expect(gate.wouldFire()).toBe(true);
+  });
+});
+describe("createFleetFallbackGate — fire semantics", () => {
+  test("collapses concurrent callers to one in-flight Promise", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    let calls = 0;
+    let resolveAction: (b: boolean) => void = () => {};
+    const action = () => {
+      calls += 1;
+      return new Promise<boolean>((r) => { resolveAction = r; });
+    };
+    const p1 = gate.fire(action);
+    const p2 = gate.fire(action);
+    const p3 = gate.fire(action);
+    // Same in-flight promise returned to all three callers.
+    expect(p1).toBe(p2);
+    expect(p2).toBe(p3);
+    expect(calls).toBe(1);
+    resolveAction(true);
+    await Promise.all([p1, p2, p3]);
+    expect(calls).toBe(1);
+  });
+  test("fire during dedup window resolves immediately without invoking action", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    let calls = 0;
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(1);
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(1);
+    clock.advance(30_000);
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(2);
+  });
+});
+describe("createFleetFallbackGate — broker reachability check", () => {
+  test("brokerReachable=false makes wouldFire return false even on fresh state", () => {
+    const gate = createFleetFallbackGate({
+      dedupMs: 30_000,
+      nowFn: fakeClock().nowFn,
+      brokerReachable: () => false,
+    });
+    expect(gate.wouldFire()).toBe(false);
+  });
+  test("brokerReachable=true gates as if no check provided", () => {
+    const gate = createFleetFallbackGate({
+      dedupMs: 30_000,
+      nowFn: fakeClock().nowFn,
+      brokerReachable: () => true,
+    });
+    expect(gate.wouldFire()).toBe(true);
+  });
+  test("brokerReachable=false makes fire() short-circuit without invoking action", async () => {
+    let calls = 0;
+    const gate = createFleetFallbackGate({
+      dedupMs: 30_000,
+      nowFn: fakeClock().nowFn,
+      brokerReachable: () => false,
+    });
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(0);
+    expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
+  });
+  test("brokerReachable can flip from false to true between calls", async () => {
+    let reachable = false;
+    let calls = 0;
+    const gate = createFleetFallbackGate({
+      dedupMs: 30_000,
+      nowFn: fakeClock().nowFn,
+      brokerReachable: () => reachable,
+    });
+    expect(gate.wouldFire()).toBe(false);
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(0);
+    reachable = true;
+    expect(gate.wouldFire()).toBe(true);
+    await gate.fire(async () => { calls += 1; return true; });
+    expect(calls).toBe(1);
+  });
+});
+describe("createFleetFallbackGate — reset (test seam)", () => {
+  test("reset clears in-flight + lastFiredAtMs", async () => {
+    const clock = fakeClock();
+    const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
+    await gate.fire(async () => true);
+    expect(gate.inspect().lastFiredAtMs).toBeGreaterThan(Number.NEGATIVE_INFINITY);
+    expect(gate.wouldFire()).toBe(false);
+    gate.reset();
+    expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
+    expect(gate.inspect().inFlight).toBe(false);
+    expect(gate.wouldFire()).toBe(true);
+  });
+});

package/telegram-plugin/tests/model-unavailable.test.ts CHANGED Viewed

@@ -154,7 +154,7 @@ describe('formatModelUnavailableCard — actionable card', () => {
     return resetAt ? { kind, resetAt, raw: 'test' } : { kind, raw: 'test' }
   }
-  it('quota_exhausted with reset → snapshot-stable card', () => {
+  it('quota_exhausted with reset → snapshot-stable card (manual-action shape)', () => {
     const card = formatModelUnavailableCard(
       detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
       'gymbro',
@@ -165,12 +165,30 @@ describe('formatModelUnavailableCard — actionable card', () => {
       Reason: quota exhausted (resets in 5h)
       <b>What to try</b>
-      • <code>/authfallback</code> — switch to the next account slot
+      • <code>/auth use &lt;label&gt;</code> — switch the fleet to a healthy account
       • <code>/auth add</code> — attach another subscription
       • <code>/usage</code> — show quota breakdown"
     `)
   })
+  it('autoFallbackInFlight=true → quiet variant (no manual command list)', () => {
+    // Regression for the "lying card" bug — when the gateway has
+    // already kicked off `fireFleetAutoFallback`, the card MUST NOT
+    // list manual commands the user shouldn't run. Otherwise the
+    // user manually types /auth use while a fleet swap is mid-flight,
+    // racing two writes through the broker.
+    const card = formatModelUnavailableCard(
+      detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
+      'gymbro',
+      { now: NOW, autoFallbackInFlight: true },
+    )
+    expect(card).toContain('Auto-failover in progress')
+    expect(card).not.toContain('What to try')
+    expect(card).not.toContain('/auth use')
+    expect(card).not.toContain('/auth add')
+    expect(card).not.toContain('/authfallback')
+  })
   it('overload without reset omits the parenthetical', () => {
     const card = formatModelUnavailableCard(detection('overload'), 'clerk', { now: NOW })
     expect(card).toContain('Reason: model overloaded')
@@ -183,11 +201,14 @@ describe('formatModelUnavailableCard — actionable card', () => {
     expect(card).not.toContain('(resets')
   })
-  it('always includes the three actionable suggestions', () => {
+  it('default (no autoFallback) variant includes the actionable suggestions', () => {
     const card = formatModelUnavailableCard(detection('quota_exhausted'), 'gymbro', { now: NOW })
-    expect(card).toContain('<code>/authfallback</code>')
+    expect(card).toContain('<code>/auth use')
     expect(card).toContain('<code>/auth add</code>')
     expect(card).toContain('<code>/usage</code>')
+    // Regression — `/authfallback` is no longer a verb (post-RFC-H);
+    // pre-fix the card lied by suggesting it.
+    expect(card).not.toContain('/authfallback')
   })
   it('names the slot in the header when one is supplied', () => {
@@ -283,9 +304,13 @@ describe('integration — gateway suppresses raw stderr in favour of the card',
     // The actionable card replaces the raw verbatim error.
     expect(card).toContain('Model unavailable')
     expect(card).toContain('quota exhausted')
-    expect(card).toContain('/authfallback')
+    // Post-RFC-H: `/authfallback` is no longer a verb. The default
+    // (non-auto-fallback) card now points at `/auth use <label>` —
+    // the canonical fleet-wide swap.
+    expect(card).toContain('/auth use')
     expect(card).toContain('/auth add')
     expect(card).toContain('/usage')
+    expect(card).not.toContain('/authfallback')
     // And the raw stderr text never appears in the user-facing card.
     expect(card).not.toContain('out of extra usage')

package/telegram-plugin/tests/sandbox-hint-posttool.test.ts CHANGED Viewed

@@ -70,6 +70,7 @@ describe('sandbox-hint-posttool', () => {
       tool_name: 'Bash',
       tool_use_id: 'toolu_003',
       tool_response: {
+        exit_code: 100,
         stderr:
           'E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?',
       },
@@ -141,15 +142,224 @@ describe('sandbox-hint-posttool', () => {
   it('caps the scan window for huge tool_response payloads', () => {
     // 100 KiB of harmless output followed by an EROFS — we cap at 64 KiB
     // so this should NOT match. Keeps a runaway tool_response from
-    // pinning the hook on a regex scan.
+    // pinning the hook on a regex scan. The exit_code is set so the
+    // failure-classifier reaches the scan path — without it, #1303's
+    // success-gate would return early for a different reason.
     const huge = 'x'.repeat(100 * 1024) + ' EROFS happened'
     const result = runHook({
       tool_name: 'Bash',
       tool_use_id: 'toolu_007',
-      tool_response: { stdout: huge },
+      tool_response: { exit_code: 1, stdout: huge },
     })
     expect(result.status).toBe(0)
     expect(result.stdout.trim()).toBe('')
   })
+  // #1303 — the hook used to fire on every tool whose payload merely
+  // MENTIONED EROFS / read-only-fs / EACCES /usr / dpkg, regardless of
+  // whether the tool actually failed. Concrete repro: reading a file
+  // whose content describes the sandbox model triggered the advisory
+  // every time. Fix: classify tool_response as success-or-failure FIRST
+  // (only failures can have hit a kernel boundary), AND gate on
+  // write-capable tools only (Read/Grep/Glob can't EROFS).
+  describe('#1303 — false-positive guard', () => {
+    it('does NOT emit when a Read on a file MENTIONS EROFS (Read is not write-capable)', () => {
+      const result = runHook({
+        tool_name: 'Read',
+        tool_use_id: 'toolu_fp_read',
+        // Realistic: an Edit on a file whose Read returns content that
+        // happens to talk about the sandbox model. Pre-fix this fired.
+        tool_response: {
+          file: '/state/agent/home/some-doc.md',
+          content:
+            '# Sandbox notes\n\nWhen a write hits EROFS we say "Read-only file system".\n',
+        },
+      })
+      expect(result.status).toBe(0)
+      expect(result.stdout.trim()).toBe('')
+    })
+    it('does NOT emit when a Grep finds a line containing "Read-only file system"', () => {
+      const result = runHook({
+        tool_name: 'Grep',
+        tool_use_id: 'toolu_fp_grep',
+        tool_response: { stdout: 'docs/sandbox.md:42: Read-only file system semantics' },
+      })
+      expect(result.status).toBe(0)
+      expect(result.stdout.trim()).toBe('')
+    })
+    it('does NOT emit when a successful Bash mentions EROFS in stdout (exit_code=0)', () => {
+      const result = runHook({
+        tool_name: 'Bash',
+        tool_use_id: 'toolu_fp_bash_success',
+        tool_response: {
+          exit_code: 0,
+          stdout: 'I tested EROFS handling: all good.',
+        },
+      })
+      expect(result.status).toBe(0)
+      expect(result.stdout.trim()).toBe('')
+    })
+    it('does NOT emit when a successful Edit echoes new content containing "EROFS"', () => {
+      // The Edit tool's tool_response echoes the modified content. If
+      // the new content mentions EROFS — e.g. when editing this very
+      // hook source — the pre-fix logic fired falsely on every keystroke.
+      const result = runHook({
+        tool_name: 'Edit',
+        tool_use_id: 'toolu_fp_edit_success',
+        tool_response: {
+          // is_error explicitly false; no error field; no exit_code.
+          is_error: false,
+          file_path: '/state/agent/home/hook.mjs',
+          old_string: '// old',
+          new_string: '// new code mentioning EROFS and read-only file system semantics',
+        },
+      })
+      expect(result.status).toBe(0)
+      expect(result.stdout.trim()).toBe('')
+    })
+    it('still emits when an Edit FAILED with is_error=true on a real EROFS', () => {
+      const result = runHook({
+        tool_name: 'Edit',
+        tool_use_id: 'toolu_real_failure',
+        tool_response: {
+          is_error: true,
+          error: "EROFS: read-only file system, open '/opt/switchroom/skills/foo.md'",
+        },
+      })
+      expect(result.status).toBe(0)
+      const ctx = parseContext(result.stdout)
+      expect(ctx).toContain('Sandbox boundary hit')
+    })
+    it('still emits when a Bash FAILED with non-zero exit_code and stderr containing EROFS', () => {
+      const result = runHook({
+        tool_name: 'Bash',
+        tool_use_id: 'toolu_real_bash_failure',
+        tool_response: {
+          exit_code: 1,
+          stderr: "mkdir: cannot create directory '/opt/foo': Read-only file system",
+          stdout: '',
+        },
+      })
+      expect(result.status).toBe(0)
+      const ctx = parseContext(result.stdout)
+      expect(ctx).toContain('Sandbox boundary hit')
+    })
+    it('does NOT emit for tools not in the write-capable allowlist, even on failure-shaped payload', () => {
+      // Even a payload that LOOKS like a failure — `is_error: true` —
+      // cannot reflect a kernel sandbox hit if the tool isn't write-
+      // capable. Read can't EROFS. We refuse to advise.
+      const result = runHook({
+        tool_name: 'WebFetch',
+        tool_use_id: 'toolu_fp_webfetch',
+        tool_response: { is_error: true, error: 'EROFS lookalike in HTTP body' },
+      })
+      expect(result.status).toBe(0)
+      expect(result.stdout.trim()).toBe('')
+    })
+    it('DOES emit for an MCP tool failure (proxies can write)', () => {
+      const result = runHook({
+        tool_name: 'mcp__some-server__write_file',
+        tool_use_id: 'toolu_mcp_failure',
+        tool_response: {
+          is_error: true,
+          error: 'EROFS: read-only file system on /opt/foo',
+        },
+      })
+      expect(result.status).toBe(0)
+      const ctx = parseContext(result.stdout)
+      expect(ctx).toContain('Sandbox boundary hit')
+    })
+  })
+  // Direct unit tests on the classifier helper.
+  describe('classifyFailure', () => {
+    it('returns null for a successful object response', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      expect(mod.__internals.classifyFailure({ exit_code: 0, stdout: 'EROFS mentioned' }))
+        .toBeNull()
+      expect(mod.__internals.classifyFailure({ is_error: false, content: 'EROFS mentioned' }))
+        .toBeNull()
+    })
+    it('returns a structured-failure for is_error=true', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      const got = mod.__internals.classifyFailure({
+        is_error: true,
+        error: 'EROFS: ...',
+      })
+      expect(got?.kind).toBe('structured-failure')
+      expect(got?.body).toContain('EROFS')
+    })
+    it('returns a structured-failure for non-zero exit_code with stderr', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      const got = mod.__internals.classifyFailure({
+        exit_code: 1,
+        stderr: 'Read-only file system',
+        stdout: 'also relevant context',
+      })
+      expect(got?.kind).toBe('structured-failure')
+      // Both stderr and stdout included on failed Bash.
+      expect(got?.body).toContain('Read-only file system')
+      expect(got?.body).toContain('also relevant context')
+    })
+    it('treats a bare string as a candidate to scan', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      const got = mod.__internals.classifyFailure('mkdir: Read-only file system')
+      expect(got?.kind).toBe('bare-string')
+      expect(got?.body).toContain('Read-only file system')
+    })
+    it('returns null for null / undefined / primitives', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      expect(mod.__internals.classifyFailure(null)).toBeNull()
+      expect(mod.__internals.classifyFailure(undefined)).toBeNull()
+      expect(mod.__internals.classifyFailure(42)).toBeNull()
+    })
+  })
+  describe('isWriteCapableTool', () => {
+    it('returns true for the canonical write tools', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      for (const n of ['Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash']) {
+        expect(mod.__internals.isWriteCapableTool(n)).toBe(true)
+      }
+    })
+    it('returns false for read-only tools', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      for (const n of ['Read', 'Grep', 'Glob', 'WebFetch', 'WebSearch', 'TodoWrite']) {
+        expect(mod.__internals.isWriteCapableTool(n)).toBe(false)
+      }
+    })
+    it('returns true for any MCP tool (proxy writes possible)', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      expect(mod.__internals.isWriteCapableTool('mcp__server__do_thing')).toBe(true)
+    })
+    it('returns false for empty / non-string', async () => {
+      const mod = await import('../hooks/sandbox-hint-posttool.mjs')
+      expect(mod.__internals.isWriteCapableTool('')).toBe(false)
+      expect(mod.__internals.isWriteCapableTool(null as any)).toBe(false)
+      expect(mod.__internals.isWriteCapableTool(undefined as any)).toBe(false)
+    })
+  })
 })