switchroom 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +5 -4
  2. package/dist/cli/drive-write-pretool.mjs +5418 -0
  3. package/dist/cli/switchroom.js +201 -24
  4. package/package.json +1 -1
  5. package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
  6. package/telegram-plugin/admin-commands/index.ts +2 -0
  7. package/telegram-plugin/auth-snapshot-format.ts +612 -0
  8. package/telegram-plugin/auto-fallback-fleet.ts +215 -0
  9. package/telegram-plugin/auto-fallback.ts +28 -301
  10. package/telegram-plugin/dist/gateway/gateway.js +4407 -2252
  11. package/telegram-plugin/fleet-fallback-gate.ts +105 -0
  12. package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
  13. package/telegram-plugin/gateway/approval-callback.ts +31 -3
  14. package/telegram-plugin/gateway/auth-command.ts +121 -10
  15. package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
  16. package/telegram-plugin/gateway/boot-card.ts +1 -1
  17. package/telegram-plugin/gateway/boot-probes.ts +6 -9
  18. package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
  19. package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
  20. package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
  21. package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
  22. package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
  23. package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
  24. package/telegram-plugin/gateway/gateway.ts +876 -173
  25. package/telegram-plugin/gateway/hostd-dispatch.ts +127 -0
  26. package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
  27. package/telegram-plugin/gateway/ipc-server.ts +69 -0
  28. package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
  29. package/telegram-plugin/model-unavailable.ts +28 -12
  30. package/telegram-plugin/silence-poke.ts +153 -1
  31. package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
  32. package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
  33. package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
  34. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
  35. package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
  36. package/telegram-plugin/tests/boot-probes.test.ts +16 -18
  37. package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
  38. package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
  39. package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
  40. package/telegram-plugin/tests/silence-poke.test.ts +237 -0
  41. package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
  42. package/telegram-plugin/turn-flush-safety.ts +55 -1
  43. package/telegram-plugin/uat/SETUP.md +16 -12
  44. package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
  45. package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
  46. package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
@@ -896,42 +896,40 @@ describe('probeAccount — nextStep agent-name interpolation', () => {
896
896
  }
897
897
  })
898
898
 
899
- it('not-signed-in hint interpolates agentName instead of <agent>', async () => {
899
+ it('not-signed-in hint points at RFC H fleet-wide auth verbs', async () => {
900
900
  tmpDir = setupAgentDir({})
901
- const result = await probeAccount(tmpDir, { agentName: 'finn' })
901
+ const result = await probeAccount(tmpDir)
902
902
  expect(result.status).toBe('degraded')
903
903
  expect(result.detail).toBe('not signed in')
904
904
  expect(result.nextStep).toBeDefined()
905
- expect(result.nextStep).toContain('switchroom auth login finn')
906
- expect(result.nextStep).not.toContain('<agent>')
905
+ expect(result.nextStep).toContain('switchroom auth add')
906
+ expect(result.nextStep).toContain('--from-oauth')
907
+ expect(result.nextStep).toContain('switchroom auth use')
908
+ // RFC H: hint must not point at the retired per-agent `auth login` verb.
909
+ expect(result.nextStep).not.toContain('auth login')
907
910
  })
908
911
 
909
- it('expired-token hint interpolates agentName', async () => {
912
+ it('expired-token hint points at broker auto-refresh + manual fallback', async () => {
910
913
  tmpDir = setupAgentDir(
911
914
  { oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
912
915
  { expiresAt: Date.now() - 86_400_000 }, // expired yesterday
913
916
  )
914
- const result = await probeAccount(tmpDir, { agentName: 'klanker' })
917
+ const result = await probeAccount(tmpDir)
915
918
  expect(result.status).toBe('fail')
916
- expect(result.nextStep).toContain('switchroom auth login klanker')
917
- expect(result.nextStep).not.toContain('<agent>')
919
+ expect(result.nextStep).toContain('switchroom auth refresh')
920
+ expect(result.nextStep).toContain('--replace')
921
+ expect(result.nextStep).not.toContain('auth login')
918
922
  })
919
923
 
920
- it('expiring-soon hint interpolates agentName', async () => {
924
+ it('expiring-soon hint points at broker auto-refresh window', async () => {
921
925
  tmpDir = setupAgentDir(
922
926
  { oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
923
927
  { expiresAt: Date.now() + 3 * 86_400_000 }, // 3 days left (< 7)
924
928
  )
925
- const result = await probeAccount(tmpDir, { agentName: 'lawgpt' })
926
- expect(result.status).toBe('degraded')
927
- expect(result.nextStep).toContain('switchroom auth login lawgpt')
928
- expect(result.nextStep).not.toContain('<agent>')
929
- })
930
-
931
- it('falls back to <agent> placeholder when no agentName provided (backwards-compat)', async () => {
932
- tmpDir = setupAgentDir({})
933
929
  const result = await probeAccount(tmpDir)
934
- expect(result.nextStep).toContain('<agent>')
930
+ expect(result.status).toBe('degraded')
931
+ expect(result.nextStep).toContain('switchroom auth refresh')
932
+ expect(result.nextStep).not.toContain('auth login')
935
933
  })
936
934
  })
937
935
 
@@ -0,0 +1,197 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { createFleetFallbackGate } from "../fleet-fallback-gate.js";
3
+
4
+ function fakeClock(start = 0) {
5
+ let now = start;
6
+ return {
7
+ nowFn: () => now,
8
+ advance(ms: number) { now += ms; },
9
+ set(ms: number) { now = ms; },
10
+ };
11
+ }
12
+
13
+ describe("createFleetFallbackGate — wouldFire honesty contract", () => {
14
+ test("fresh state: wouldFire is true", () => {
15
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
16
+ expect(gate.wouldFire()).toBe(true);
17
+ });
18
+
19
+ test("in-flight: wouldFire is false until action resolves", async () => {
20
+ const clock = fakeClock();
21
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
22
+
23
+ let resolveAction: (b: boolean) => void = () => {};
24
+ const action = () => new Promise<boolean>((r) => { resolveAction = r; });
25
+
26
+ const firePromise = gate.fire(action);
27
+
28
+ expect(gate.wouldFire()).toBe(false);
29
+ expect(gate.inspect().inFlight).toBe(true);
30
+
31
+ resolveAction(true);
32
+ await firePromise;
33
+
34
+ // After fire stamps lastFiredAtMs, dedup window blocks until clock advances.
35
+ expect(gate.wouldFire()).toBe(false);
36
+ clock.advance(30_000);
37
+ expect(gate.wouldFire()).toBe(true);
38
+ });
39
+
40
+ test("post-fire dedup window blocks wouldFire", async () => {
41
+ const clock = fakeClock();
42
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
43
+
44
+ await gate.fire(async () => true);
45
+ expect(gate.wouldFire()).toBe(false);
46
+
47
+ clock.advance(29_999);
48
+ expect(gate.wouldFire()).toBe(false);
49
+
50
+ clock.advance(1);
51
+ expect(gate.wouldFire()).toBe(true);
52
+ });
53
+
54
+ test("no-op fires (action returns false) DO NOT arm dedup window", async () => {
55
+ const clock = fakeClock();
56
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
57
+
58
+ await gate.fire(async () => false);
59
+ // Window NOT armed — wouldFire should still be true immediately.
60
+ expect(gate.wouldFire()).toBe(true);
61
+ expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
62
+ });
63
+
64
+ test("thrown action: dedup window NOT armed, gate releases in-flight", async () => {
65
+ const clock = fakeClock();
66
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
67
+ const errors: unknown[] = [];
68
+
69
+ await gate.fire(async () => { throw new Error("broker exploded"); }, (e) => errors.push(e));
70
+
71
+ expect(gate.inspect().inFlight).toBe(false);
72
+ expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
73
+ expect(gate.wouldFire()).toBe(true);
74
+ expect((errors[0] as Error).message).toBe("broker exploded");
75
+ });
76
+
77
+ test("no onError: thrown action still releases in-flight without crashing", async () => {
78
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
79
+
80
+ await gate.fire(async () => { throw new Error("silent"); });
81
+
82
+ expect(gate.inspect().inFlight).toBe(false);
83
+ expect(gate.wouldFire()).toBe(true);
84
+ });
85
+ });
86
+
87
+ describe("createFleetFallbackGate — fire semantics", () => {
88
+ test("collapses concurrent callers to one in-flight Promise", async () => {
89
+ const clock = fakeClock();
90
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
91
+ let calls = 0;
92
+ let resolveAction: (b: boolean) => void = () => {};
93
+
94
+ const action = () => {
95
+ calls += 1;
96
+ return new Promise<boolean>((r) => { resolveAction = r; });
97
+ };
98
+
99
+ const p1 = gate.fire(action);
100
+ const p2 = gate.fire(action);
101
+ const p3 = gate.fire(action);
102
+
103
+ // Same in-flight promise returned to all three callers.
104
+ expect(p1).toBe(p2);
105
+ expect(p2).toBe(p3);
106
+ expect(calls).toBe(1);
107
+
108
+ resolveAction(true);
109
+ await Promise.all([p1, p2, p3]);
110
+ expect(calls).toBe(1);
111
+ });
112
+
113
+ test("fire during dedup window resolves immediately without invoking action", async () => {
114
+ const clock = fakeClock();
115
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
116
+ let calls = 0;
117
+
118
+ await gate.fire(async () => { calls += 1; return true; });
119
+ expect(calls).toBe(1);
120
+
121
+ await gate.fire(async () => { calls += 1; return true; });
122
+ expect(calls).toBe(1);
123
+
124
+ clock.advance(30_000);
125
+
126
+ await gate.fire(async () => { calls += 1; return true; });
127
+ expect(calls).toBe(2);
128
+ });
129
+ });
130
+
131
+ describe("createFleetFallbackGate — broker reachability check", () => {
132
+ test("brokerReachable=false makes wouldFire return false even on fresh state", () => {
133
+ const gate = createFleetFallbackGate({
134
+ dedupMs: 30_000,
135
+ nowFn: fakeClock().nowFn,
136
+ brokerReachable: () => false,
137
+ });
138
+ expect(gate.wouldFire()).toBe(false);
139
+ });
140
+
141
+ test("brokerReachable=true gates as if no check provided", () => {
142
+ const gate = createFleetFallbackGate({
143
+ dedupMs: 30_000,
144
+ nowFn: fakeClock().nowFn,
145
+ brokerReachable: () => true,
146
+ });
147
+ expect(gate.wouldFire()).toBe(true);
148
+ });
149
+
150
+ test("brokerReachable=false makes fire() short-circuit without invoking action", async () => {
151
+ let calls = 0;
152
+ const gate = createFleetFallbackGate({
153
+ dedupMs: 30_000,
154
+ nowFn: fakeClock().nowFn,
155
+ brokerReachable: () => false,
156
+ });
157
+
158
+ await gate.fire(async () => { calls += 1; return true; });
159
+ expect(calls).toBe(0);
160
+ expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
161
+ });
162
+
163
+ test("brokerReachable can flip from false to true between calls", async () => {
164
+ let reachable = false;
165
+ let calls = 0;
166
+ const gate = createFleetFallbackGate({
167
+ dedupMs: 30_000,
168
+ nowFn: fakeClock().nowFn,
169
+ brokerReachable: () => reachable,
170
+ });
171
+
172
+ expect(gate.wouldFire()).toBe(false);
173
+ await gate.fire(async () => { calls += 1; return true; });
174
+ expect(calls).toBe(0);
175
+
176
+ reachable = true;
177
+ expect(gate.wouldFire()).toBe(true);
178
+ await gate.fire(async () => { calls += 1; return true; });
179
+ expect(calls).toBe(1);
180
+ });
181
+ });
182
+
183
+ describe("createFleetFallbackGate — reset (test seam)", () => {
184
+ test("reset clears in-flight + lastFiredAtMs", async () => {
185
+ const clock = fakeClock();
186
+ const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
187
+
188
+ await gate.fire(async () => true);
189
+ expect(gate.inspect().lastFiredAtMs).toBeGreaterThan(Number.NEGATIVE_INFINITY);
190
+ expect(gate.wouldFire()).toBe(false);
191
+
192
+ gate.reset();
193
+ expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
194
+ expect(gate.inspect().inFlight).toBe(false);
195
+ expect(gate.wouldFire()).toBe(true);
196
+ });
197
+ });
@@ -154,7 +154,7 @@ describe('formatModelUnavailableCard — actionable card', () => {
154
154
  return resetAt ? { kind, resetAt, raw: 'test' } : { kind, raw: 'test' }
155
155
  }
156
156
 
157
- it('quota_exhausted with reset → snapshot-stable card', () => {
157
+ it('quota_exhausted with reset → snapshot-stable card (manual-action shape)', () => {
158
158
  const card = formatModelUnavailableCard(
159
159
  detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
160
160
  'gymbro',
@@ -165,12 +165,30 @@ describe('formatModelUnavailableCard — actionable card', () => {
165
165
  Reason: quota exhausted (resets in 5h)
166
166
 
167
167
  <b>What to try</b>
168
- • <code>/authfallback</code> — switch to the next account slot
168
+ • <code>/auth use &lt;label&gt;</code> — switch the fleet to a healthy account
169
169
  • <code>/auth add</code> — attach another subscription
170
170
  • <code>/usage</code> — show quota breakdown"
171
171
  `)
172
172
  })
173
173
 
174
+ it('autoFallbackInFlight=true → quiet variant (no manual command list)', () => {
175
+ // Regression for the "lying card" bug — when the gateway has
176
+ // already kicked off `fireFleetAutoFallback`, the card MUST NOT
177
+ // list manual commands the user shouldn't run. Otherwise the
178
+ // user manually types /auth use while a fleet swap is mid-flight,
179
+ // racing two writes through the broker.
180
+ const card = formatModelUnavailableCard(
181
+ detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
182
+ 'gymbro',
183
+ { now: NOW, autoFallbackInFlight: true },
184
+ )
185
+ expect(card).toContain('Auto-failover in progress')
186
+ expect(card).not.toContain('What to try')
187
+ expect(card).not.toContain('/auth use')
188
+ expect(card).not.toContain('/auth add')
189
+ expect(card).not.toContain('/authfallback')
190
+ })
191
+
174
192
  it('overload without reset omits the parenthetical', () => {
175
193
  const card = formatModelUnavailableCard(detection('overload'), 'clerk', { now: NOW })
176
194
  expect(card).toContain('Reason: model overloaded')
@@ -183,11 +201,14 @@ describe('formatModelUnavailableCard — actionable card', () => {
183
201
  expect(card).not.toContain('(resets')
184
202
  })
185
203
 
186
- it('always includes the three actionable suggestions', () => {
204
+ it('default (no autoFallback) variant includes the actionable suggestions', () => {
187
205
  const card = formatModelUnavailableCard(detection('quota_exhausted'), 'gymbro', { now: NOW })
188
- expect(card).toContain('<code>/authfallback</code>')
206
+ expect(card).toContain('<code>/auth use')
189
207
  expect(card).toContain('<code>/auth add</code>')
190
208
  expect(card).toContain('<code>/usage</code>')
209
+ // Regression — `/authfallback` is no longer a verb (post-RFC-H);
210
+ // pre-fix the card lied by suggesting it.
211
+ expect(card).not.toContain('/authfallback')
191
212
  })
192
213
 
193
214
  it('names the slot in the header when one is supplied', () => {
@@ -283,9 +304,13 @@ describe('integration — gateway suppresses raw stderr in favour of the card',
283
304
  // The actionable card replaces the raw verbatim error.
284
305
  expect(card).toContain('Model unavailable')
285
306
  expect(card).toContain('quota exhausted')
286
- expect(card).toContain('/authfallback')
307
+ // Post-RFC-H: `/authfallback` is no longer a verb. The default
308
+ // (non-auto-fallback) card now points at `/auth use <label>` —
309
+ // the canonical fleet-wide swap.
310
+ expect(card).toContain('/auth use')
287
311
  expect(card).toContain('/auth add')
288
312
  expect(card).toContain('/usage')
313
+ expect(card).not.toContain('/authfallback')
289
314
 
290
315
  // And the raw stderr text never appears in the user-facing card.
291
316
  expect(card).not.toContain('out of extra usage')
@@ -70,6 +70,7 @@ describe('sandbox-hint-posttool', () => {
70
70
  tool_name: 'Bash',
71
71
  tool_use_id: 'toolu_003',
72
72
  tool_response: {
73
+ exit_code: 100,
73
74
  stderr:
74
75
  'E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?',
75
76
  },
@@ -141,15 +142,224 @@ describe('sandbox-hint-posttool', () => {
141
142
  it('caps the scan window for huge tool_response payloads', () => {
142
143
  // 100 KiB of harmless output followed by an EROFS — we cap at 64 KiB
143
144
  // so this should NOT match. Keeps a runaway tool_response from
144
- // pinning the hook on a regex scan.
145
+ // pinning the hook on a regex scan. The exit_code is set so the
146
+ // failure-classifier reaches the scan path — without it, #1303's
147
+ // success-gate would return early for a different reason.
145
148
  const huge = 'x'.repeat(100 * 1024) + ' EROFS happened'
146
149
  const result = runHook({
147
150
  tool_name: 'Bash',
148
151
  tool_use_id: 'toolu_007',
149
- tool_response: { stdout: huge },
152
+ tool_response: { exit_code: 1, stdout: huge },
150
153
  })
151
154
 
152
155
  expect(result.status).toBe(0)
153
156
  expect(result.stdout.trim()).toBe('')
154
157
  })
158
+
159
+ // #1303 — the hook used to fire on every tool whose payload merely
160
+ // MENTIONED EROFS / read-only-fs / EACCES /usr / dpkg, regardless of
161
+ // whether the tool actually failed. Concrete repro: reading a file
162
+ // whose content describes the sandbox model triggered the advisory
163
+ // every time. Fix: classify tool_response as success-or-failure FIRST
164
+ // (only failures can have hit a kernel boundary), AND gate on
165
+ // write-capable tools only (Read/Grep/Glob can't EROFS).
166
+ describe('#1303 — false-positive guard', () => {
167
+ it('does NOT emit when a Read on a file MENTIONS EROFS (Read is not write-capable)', () => {
168
+ const result = runHook({
169
+ tool_name: 'Read',
170
+ tool_use_id: 'toolu_fp_read',
171
+ // Realistic: an Edit on a file whose Read returns content that
172
+ // happens to talk about the sandbox model. Pre-fix this fired.
173
+ tool_response: {
174
+ file: '/state/agent/home/some-doc.md',
175
+ content:
176
+ '# Sandbox notes\n\nWhen a write hits EROFS we say "Read-only file system".\n',
177
+ },
178
+ })
179
+
180
+ expect(result.status).toBe(0)
181
+ expect(result.stdout.trim()).toBe('')
182
+ })
183
+
184
+ it('does NOT emit when a Grep finds a line containing "Read-only file system"', () => {
185
+ const result = runHook({
186
+ tool_name: 'Grep',
187
+ tool_use_id: 'toolu_fp_grep',
188
+ tool_response: { stdout: 'docs/sandbox.md:42: Read-only file system semantics' },
189
+ })
190
+
191
+ expect(result.status).toBe(0)
192
+ expect(result.stdout.trim()).toBe('')
193
+ })
194
+
195
+ it('does NOT emit when a successful Bash mentions EROFS in stdout (exit_code=0)', () => {
196
+ const result = runHook({
197
+ tool_name: 'Bash',
198
+ tool_use_id: 'toolu_fp_bash_success',
199
+ tool_response: {
200
+ exit_code: 0,
201
+ stdout: 'I tested EROFS handling: all good.',
202
+ },
203
+ })
204
+
205
+ expect(result.status).toBe(0)
206
+ expect(result.stdout.trim()).toBe('')
207
+ })
208
+
209
+ it('does NOT emit when a successful Edit echoes new content containing "EROFS"', () => {
210
+ // The Edit tool's tool_response echoes the modified content. If
211
+ // the new content mentions EROFS — e.g. when editing this very
212
+ // hook source — the pre-fix logic fired falsely on every keystroke.
213
+ const result = runHook({
214
+ tool_name: 'Edit',
215
+ tool_use_id: 'toolu_fp_edit_success',
216
+ tool_response: {
217
+ // is_error explicitly false; no error field; no exit_code.
218
+ is_error: false,
219
+ file_path: '/state/agent/home/hook.mjs',
220
+ old_string: '// old',
221
+ new_string: '// new code mentioning EROFS and read-only file system semantics',
222
+ },
223
+ })
224
+
225
+ expect(result.status).toBe(0)
226
+ expect(result.stdout.trim()).toBe('')
227
+ })
228
+
229
+ it('still emits when an Edit FAILED with is_error=true on a real EROFS', () => {
230
+ const result = runHook({
231
+ tool_name: 'Edit',
232
+ tool_use_id: 'toolu_real_failure',
233
+ tool_response: {
234
+ is_error: true,
235
+ error: "EROFS: read-only file system, open '/opt/switchroom/skills/foo.md'",
236
+ },
237
+ })
238
+
239
+ expect(result.status).toBe(0)
240
+ const ctx = parseContext(result.stdout)
241
+ expect(ctx).toContain('Sandbox boundary hit')
242
+ })
243
+
244
+ it('still emits when a Bash FAILED with non-zero exit_code and stderr containing EROFS', () => {
245
+ const result = runHook({
246
+ tool_name: 'Bash',
247
+ tool_use_id: 'toolu_real_bash_failure',
248
+ tool_response: {
249
+ exit_code: 1,
250
+ stderr: "mkdir: cannot create directory '/opt/foo': Read-only file system",
251
+ stdout: '',
252
+ },
253
+ })
254
+
255
+ expect(result.status).toBe(0)
256
+ const ctx = parseContext(result.stdout)
257
+ expect(ctx).toContain('Sandbox boundary hit')
258
+ })
259
+
260
+ it('does NOT emit for tools not in the write-capable allowlist, even on failure-shaped payload', () => {
261
+ // Even a payload that LOOKS like a failure — `is_error: true` —
262
+ // cannot reflect a kernel sandbox hit if the tool isn't write-
263
+ // capable. Read can't EROFS. We refuse to advise.
264
+ const result = runHook({
265
+ tool_name: 'WebFetch',
266
+ tool_use_id: 'toolu_fp_webfetch',
267
+ tool_response: { is_error: true, error: 'EROFS lookalike in HTTP body' },
268
+ })
269
+
270
+ expect(result.status).toBe(0)
271
+ expect(result.stdout.trim()).toBe('')
272
+ })
273
+
274
+ it('DOES emit for an MCP tool failure (proxies can write)', () => {
275
+ const result = runHook({
276
+ tool_name: 'mcp__some-server__write_file',
277
+ tool_use_id: 'toolu_mcp_failure',
278
+ tool_response: {
279
+ is_error: true,
280
+ error: 'EROFS: read-only file system on /opt/foo',
281
+ },
282
+ })
283
+
284
+ expect(result.status).toBe(0)
285
+ const ctx = parseContext(result.stdout)
286
+ expect(ctx).toContain('Sandbox boundary hit')
287
+ })
288
+ })
289
+
290
+ // Direct unit tests on the classifier helper.
291
+ describe('classifyFailure', () => {
292
+ it('returns null for a successful object response', async () => {
293
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
294
+ expect(mod.__internals.classifyFailure({ exit_code: 0, stdout: 'EROFS mentioned' }))
295
+ .toBeNull()
296
+ expect(mod.__internals.classifyFailure({ is_error: false, content: 'EROFS mentioned' }))
297
+ .toBeNull()
298
+ })
299
+
300
+ it('returns a structured-failure for is_error=true', async () => {
301
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
302
+ const got = mod.__internals.classifyFailure({
303
+ is_error: true,
304
+ error: 'EROFS: ...',
305
+ })
306
+ expect(got?.kind).toBe('structured-failure')
307
+ expect(got?.body).toContain('EROFS')
308
+ })
309
+
310
+ it('returns a structured-failure for non-zero exit_code with stderr', async () => {
311
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
312
+ const got = mod.__internals.classifyFailure({
313
+ exit_code: 1,
314
+ stderr: 'Read-only file system',
315
+ stdout: 'also relevant context',
316
+ })
317
+ expect(got?.kind).toBe('structured-failure')
318
+ // Both stderr and stdout included on failed Bash.
319
+ expect(got?.body).toContain('Read-only file system')
320
+ expect(got?.body).toContain('also relevant context')
321
+ })
322
+
323
+ it('treats a bare string as a candidate to scan', async () => {
324
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
325
+ const got = mod.__internals.classifyFailure('mkdir: Read-only file system')
326
+ expect(got?.kind).toBe('bare-string')
327
+ expect(got?.body).toContain('Read-only file system')
328
+ })
329
+
330
+ it('returns null for null / undefined / primitives', async () => {
331
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
332
+ expect(mod.__internals.classifyFailure(null)).toBeNull()
333
+ expect(mod.__internals.classifyFailure(undefined)).toBeNull()
334
+ expect(mod.__internals.classifyFailure(42)).toBeNull()
335
+ })
336
+ })
337
+
338
+ describe('isWriteCapableTool', () => {
339
+ it('returns true for the canonical write tools', async () => {
340
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
341
+ for (const n of ['Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash']) {
342
+ expect(mod.__internals.isWriteCapableTool(n)).toBe(true)
343
+ }
344
+ })
345
+
346
+ it('returns false for read-only tools', async () => {
347
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
348
+ for (const n of ['Read', 'Grep', 'Glob', 'WebFetch', 'WebSearch', 'TodoWrite']) {
349
+ expect(mod.__internals.isWriteCapableTool(n)).toBe(false)
350
+ }
351
+ })
352
+
353
+ it('returns true for any MCP tool (proxy writes possible)', async () => {
354
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
355
+ expect(mod.__internals.isWriteCapableTool('mcp__server__do_thing')).toBe(true)
356
+ })
357
+
358
+ it('returns false for empty / non-string', async () => {
359
+ const mod = await import('../hooks/sandbox-hint-posttool.mjs')
360
+ expect(mod.__internals.isWriteCapableTool('')).toBe(false)
361
+ expect(mod.__internals.isWriteCapableTool(null as any)).toBe(false)
362
+ expect(mod.__internals.isWriteCapableTool(undefined as any)).toBe(false)
363
+ })
364
+ })
155
365
  })