switchroom 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/agent-scheduler/index.js +2 -2
- package/dist/auth-broker/index.js +125 -3
- package/dist/cli/drive-write-pretool.mjs +5436 -0
- package/dist/cli/switchroom.js +231 -29
- package/dist/host-control/main.js +2 -2
- package/dist/vault/approvals/kernel-server.js +2 -2
- package/dist/vault/broker/server.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +1 -1
- package/telegram-plugin/admin-commands/index.ts +2 -0
- package/telegram-plugin/auth-snapshot-format.ts +612 -0
- package/telegram-plugin/auto-fallback-fleet.ts +215 -0
- package/telegram-plugin/auto-fallback.ts +28 -301
- package/telegram-plugin/dist/gateway/gateway.js +4314 -2143
- package/telegram-plugin/fleet-fallback-gate.ts +105 -0
- package/telegram-plugin/gateway/approval-callback.test.ts +104 -0
- package/telegram-plugin/gateway/approval-callback.ts +31 -3
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -0
- package/telegram-plugin/gateway/auth-command.ts +131 -10
- package/telegram-plugin/gateway/auth-status-adapter.ts +101 -0
- package/telegram-plugin/gateway/boot-card.ts +1 -1
- package/telegram-plugin/gateway/boot-probes.ts +6 -9
- package/telegram-plugin/gateway/diff-preview-card.test.ts +192 -0
- package/telegram-plugin/gateway/diff-preview-card.ts +170 -0
- package/telegram-plugin/gateway/drive-write-approval.test.ts +312 -0
- package/telegram-plugin/gateway/drive-write-approval.ts +243 -0
- package/telegram-plugin/gateway/folder-picker-handler.test.ts +314 -0
- package/telegram-plugin/gateway/folder-picker-handler.ts +348 -0
- package/telegram-plugin/gateway/gateway.ts +903 -173
- package/telegram-plugin/gateway/hostd-dispatch.ts +137 -2
- package/telegram-plugin/gateway/ipc-protocol.ts +83 -2
- package/telegram-plugin/gateway/ipc-server.ts +69 -0
- package/telegram-plugin/hooks/sandbox-hint-posttool.mjs +103 -12
- package/telegram-plugin/model-unavailable.ts +28 -12
- package/telegram-plugin/silence-poke.ts +153 -1
- package/telegram-plugin/tests/auth-command-format2.test.ts +156 -0
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +429 -0
- package/telegram-plugin/tests/auth-status-adapter.test.ts +129 -0
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +211 -0
- package/telegram-plugin/tests/auto-fallback.test.ts +60 -358
- package/telegram-plugin/tests/boot-probes.test.ts +16 -18
- package/telegram-plugin/tests/fleet-fallback-gate.test.ts +197 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +30 -5
- package/telegram-plugin/tests/sandbox-hint-posttool.test.ts +212 -2
- package/telegram-plugin/tests/silence-poke.test.ts +237 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +112 -0
- package/telegram-plugin/turn-flush-safety.ts +55 -1
- package/telegram-plugin/uat/SETUP.md +16 -12
- package/telegram-plugin/auto-fallback-dispatcher.ts +0 -68
- package/telegram-plugin/tests/auto-fallback-dispatcher.e2e.test.ts +0 -183
- package/telegram-plugin/tests/hostd-dispatch.test.ts +0 -129
|
@@ -896,42 +896,40 @@ describe('probeAccount — nextStep agent-name interpolation', () => {
|
|
|
896
896
|
}
|
|
897
897
|
})
|
|
898
898
|
|
|
899
|
-
it('not-signed-in hint
|
|
899
|
+
it('not-signed-in hint points at RFC H fleet-wide auth verbs', async () => {
|
|
900
900
|
tmpDir = setupAgentDir({})
|
|
901
|
-
const result = await probeAccount(tmpDir
|
|
901
|
+
const result = await probeAccount(tmpDir)
|
|
902
902
|
expect(result.status).toBe('degraded')
|
|
903
903
|
expect(result.detail).toBe('not signed in')
|
|
904
904
|
expect(result.nextStep).toBeDefined()
|
|
905
|
-
expect(result.nextStep).toContain('switchroom auth
|
|
906
|
-
expect(result.nextStep).
|
|
905
|
+
expect(result.nextStep).toContain('switchroom auth add')
|
|
906
|
+
expect(result.nextStep).toContain('--from-oauth')
|
|
907
|
+
expect(result.nextStep).toContain('switchroom auth use')
|
|
908
|
+
// RFC H: hint must not point at the retired per-agent `auth login` verb.
|
|
909
|
+
expect(result.nextStep).not.toContain('auth login')
|
|
907
910
|
})
|
|
908
911
|
|
|
909
|
-
it('expired-token hint
|
|
912
|
+
it('expired-token hint points at broker auto-refresh + manual fallback', async () => {
|
|
910
913
|
tmpDir = setupAgentDir(
|
|
911
914
|
{ oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
|
|
912
915
|
{ expiresAt: Date.now() - 86_400_000 }, // expired yesterday
|
|
913
916
|
)
|
|
914
|
-
const result = await probeAccount(tmpDir
|
|
917
|
+
const result = await probeAccount(tmpDir)
|
|
915
918
|
expect(result.status).toBe('fail')
|
|
916
|
-
expect(result.nextStep).toContain('switchroom auth
|
|
917
|
-
expect(result.nextStep).
|
|
919
|
+
expect(result.nextStep).toContain('switchroom auth refresh')
|
|
920
|
+
expect(result.nextStep).toContain('--replace')
|
|
921
|
+
expect(result.nextStep).not.toContain('auth login')
|
|
918
922
|
})
|
|
919
923
|
|
|
920
|
-
it('expiring-soon hint
|
|
924
|
+
it('expiring-soon hint points at broker auto-refresh window', async () => {
|
|
921
925
|
tmpDir = setupAgentDir(
|
|
922
926
|
{ oauthAccount: { emailAddress: 'me@example.com', billingType: 'max' } },
|
|
923
927
|
{ expiresAt: Date.now() + 3 * 86_400_000 }, // 3 days left (< 7)
|
|
924
928
|
)
|
|
925
|
-
const result = await probeAccount(tmpDir, { agentName: 'lawgpt' })
|
|
926
|
-
expect(result.status).toBe('degraded')
|
|
927
|
-
expect(result.nextStep).toContain('switchroom auth login lawgpt')
|
|
928
|
-
expect(result.nextStep).not.toContain('<agent>')
|
|
929
|
-
})
|
|
930
|
-
|
|
931
|
-
it('falls back to <agent> placeholder when no agentName provided (backwards-compat)', async () => {
|
|
932
|
-
tmpDir = setupAgentDir({})
|
|
933
929
|
const result = await probeAccount(tmpDir)
|
|
934
|
-
expect(result.
|
|
930
|
+
expect(result.status).toBe('degraded')
|
|
931
|
+
expect(result.nextStep).toContain('switchroom auth refresh')
|
|
932
|
+
expect(result.nextStep).not.toContain('auth login')
|
|
935
933
|
})
|
|
936
934
|
})
|
|
937
935
|
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { createFleetFallbackGate } from "../fleet-fallback-gate.js";
|
|
3
|
+
|
|
4
|
+
function fakeClock(start = 0) {
|
|
5
|
+
let now = start;
|
|
6
|
+
return {
|
|
7
|
+
nowFn: () => now,
|
|
8
|
+
advance(ms: number) { now += ms; },
|
|
9
|
+
set(ms: number) { now = ms; },
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
describe("createFleetFallbackGate — wouldFire honesty contract", () => {
|
|
14
|
+
test("fresh state: wouldFire is true", () => {
|
|
15
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
|
|
16
|
+
expect(gate.wouldFire()).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("in-flight: wouldFire is false until action resolves", async () => {
|
|
20
|
+
const clock = fakeClock();
|
|
21
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
22
|
+
|
|
23
|
+
let resolveAction: (b: boolean) => void = () => {};
|
|
24
|
+
const action = () => new Promise<boolean>((r) => { resolveAction = r; });
|
|
25
|
+
|
|
26
|
+
const firePromise = gate.fire(action);
|
|
27
|
+
|
|
28
|
+
expect(gate.wouldFire()).toBe(false);
|
|
29
|
+
expect(gate.inspect().inFlight).toBe(true);
|
|
30
|
+
|
|
31
|
+
resolveAction(true);
|
|
32
|
+
await firePromise;
|
|
33
|
+
|
|
34
|
+
// After fire stamps lastFiredAtMs, dedup window blocks until clock advances.
|
|
35
|
+
expect(gate.wouldFire()).toBe(false);
|
|
36
|
+
clock.advance(30_000);
|
|
37
|
+
expect(gate.wouldFire()).toBe(true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("post-fire dedup window blocks wouldFire", async () => {
|
|
41
|
+
const clock = fakeClock();
|
|
42
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
43
|
+
|
|
44
|
+
await gate.fire(async () => true);
|
|
45
|
+
expect(gate.wouldFire()).toBe(false);
|
|
46
|
+
|
|
47
|
+
clock.advance(29_999);
|
|
48
|
+
expect(gate.wouldFire()).toBe(false);
|
|
49
|
+
|
|
50
|
+
clock.advance(1);
|
|
51
|
+
expect(gate.wouldFire()).toBe(true);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("no-op fires (action returns false) DO NOT arm dedup window", async () => {
|
|
55
|
+
const clock = fakeClock();
|
|
56
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
57
|
+
|
|
58
|
+
await gate.fire(async () => false);
|
|
59
|
+
// Window NOT armed — wouldFire should still be true immediately.
|
|
60
|
+
expect(gate.wouldFire()).toBe(true);
|
|
61
|
+
expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("thrown action: dedup window NOT armed, gate releases in-flight", async () => {
|
|
65
|
+
const clock = fakeClock();
|
|
66
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
67
|
+
const errors: unknown[] = [];
|
|
68
|
+
|
|
69
|
+
await gate.fire(async () => { throw new Error("broker exploded"); }, (e) => errors.push(e));
|
|
70
|
+
|
|
71
|
+
expect(gate.inspect().inFlight).toBe(false);
|
|
72
|
+
expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
|
|
73
|
+
expect(gate.wouldFire()).toBe(true);
|
|
74
|
+
expect((errors[0] as Error).message).toBe("broker exploded");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("no onError: thrown action still releases in-flight without crashing", async () => {
|
|
78
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: fakeClock().nowFn });
|
|
79
|
+
|
|
80
|
+
await gate.fire(async () => { throw new Error("silent"); });
|
|
81
|
+
|
|
82
|
+
expect(gate.inspect().inFlight).toBe(false);
|
|
83
|
+
expect(gate.wouldFire()).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe("createFleetFallbackGate — fire semantics", () => {
|
|
88
|
+
test("collapses concurrent callers to one in-flight Promise", async () => {
|
|
89
|
+
const clock = fakeClock();
|
|
90
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
91
|
+
let calls = 0;
|
|
92
|
+
let resolveAction: (b: boolean) => void = () => {};
|
|
93
|
+
|
|
94
|
+
const action = () => {
|
|
95
|
+
calls += 1;
|
|
96
|
+
return new Promise<boolean>((r) => { resolveAction = r; });
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const p1 = gate.fire(action);
|
|
100
|
+
const p2 = gate.fire(action);
|
|
101
|
+
const p3 = gate.fire(action);
|
|
102
|
+
|
|
103
|
+
// Same in-flight promise returned to all three callers.
|
|
104
|
+
expect(p1).toBe(p2);
|
|
105
|
+
expect(p2).toBe(p3);
|
|
106
|
+
expect(calls).toBe(1);
|
|
107
|
+
|
|
108
|
+
resolveAction(true);
|
|
109
|
+
await Promise.all([p1, p2, p3]);
|
|
110
|
+
expect(calls).toBe(1);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("fire during dedup window resolves immediately without invoking action", async () => {
|
|
114
|
+
const clock = fakeClock();
|
|
115
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
116
|
+
let calls = 0;
|
|
117
|
+
|
|
118
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
119
|
+
expect(calls).toBe(1);
|
|
120
|
+
|
|
121
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
122
|
+
expect(calls).toBe(1);
|
|
123
|
+
|
|
124
|
+
clock.advance(30_000);
|
|
125
|
+
|
|
126
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
127
|
+
expect(calls).toBe(2);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
describe("createFleetFallbackGate — broker reachability check", () => {
|
|
132
|
+
test("brokerReachable=false makes wouldFire return false even on fresh state", () => {
|
|
133
|
+
const gate = createFleetFallbackGate({
|
|
134
|
+
dedupMs: 30_000,
|
|
135
|
+
nowFn: fakeClock().nowFn,
|
|
136
|
+
brokerReachable: () => false,
|
|
137
|
+
});
|
|
138
|
+
expect(gate.wouldFire()).toBe(false);
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
test("brokerReachable=true gates as if no check provided", () => {
|
|
142
|
+
const gate = createFleetFallbackGate({
|
|
143
|
+
dedupMs: 30_000,
|
|
144
|
+
nowFn: fakeClock().nowFn,
|
|
145
|
+
brokerReachable: () => true,
|
|
146
|
+
});
|
|
147
|
+
expect(gate.wouldFire()).toBe(true);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("brokerReachable=false makes fire() short-circuit without invoking action", async () => {
|
|
151
|
+
let calls = 0;
|
|
152
|
+
const gate = createFleetFallbackGate({
|
|
153
|
+
dedupMs: 30_000,
|
|
154
|
+
nowFn: fakeClock().nowFn,
|
|
155
|
+
brokerReachable: () => false,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
159
|
+
expect(calls).toBe(0);
|
|
160
|
+
expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
test("brokerReachable can flip from false to true between calls", async () => {
|
|
164
|
+
let reachable = false;
|
|
165
|
+
let calls = 0;
|
|
166
|
+
const gate = createFleetFallbackGate({
|
|
167
|
+
dedupMs: 30_000,
|
|
168
|
+
nowFn: fakeClock().nowFn,
|
|
169
|
+
brokerReachable: () => reachable,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
expect(gate.wouldFire()).toBe(false);
|
|
173
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
174
|
+
expect(calls).toBe(0);
|
|
175
|
+
|
|
176
|
+
reachable = true;
|
|
177
|
+
expect(gate.wouldFire()).toBe(true);
|
|
178
|
+
await gate.fire(async () => { calls += 1; return true; });
|
|
179
|
+
expect(calls).toBe(1);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
describe("createFleetFallbackGate — reset (test seam)", () => {
|
|
184
|
+
test("reset clears in-flight + lastFiredAtMs", async () => {
|
|
185
|
+
const clock = fakeClock();
|
|
186
|
+
const gate = createFleetFallbackGate({ dedupMs: 30_000, nowFn: clock.nowFn });
|
|
187
|
+
|
|
188
|
+
await gate.fire(async () => true);
|
|
189
|
+
expect(gate.inspect().lastFiredAtMs).toBeGreaterThan(Number.NEGATIVE_INFINITY);
|
|
190
|
+
expect(gate.wouldFire()).toBe(false);
|
|
191
|
+
|
|
192
|
+
gate.reset();
|
|
193
|
+
expect(gate.inspect().lastFiredAtMs).toBe(Number.NEGATIVE_INFINITY);
|
|
194
|
+
expect(gate.inspect().inFlight).toBe(false);
|
|
195
|
+
expect(gate.wouldFire()).toBe(true);
|
|
196
|
+
});
|
|
197
|
+
});
|
|
@@ -154,7 +154,7 @@ describe('formatModelUnavailableCard — actionable card', () => {
|
|
|
154
154
|
return resetAt ? { kind, resetAt, raw: 'test' } : { kind, raw: 'test' }
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
-
it('quota_exhausted with reset → snapshot-stable card', () => {
|
|
157
|
+
it('quota_exhausted with reset → snapshot-stable card (manual-action shape)', () => {
|
|
158
158
|
const card = formatModelUnavailableCard(
|
|
159
159
|
detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
|
|
160
160
|
'gymbro',
|
|
@@ -165,12 +165,30 @@ describe('formatModelUnavailableCard — actionable card', () => {
|
|
|
165
165
|
Reason: quota exhausted (resets in 5h)
|
|
166
166
|
|
|
167
167
|
<b>What to try</b>
|
|
168
|
-
• <code>/
|
|
168
|
+
• <code>/auth use <label></code> — switch the fleet to a healthy account
|
|
169
169
|
• <code>/auth add</code> — attach another subscription
|
|
170
170
|
• <code>/usage</code> — show quota breakdown"
|
|
171
171
|
`)
|
|
172
172
|
})
|
|
173
173
|
|
|
174
|
+
it('autoFallbackInFlight=true → quiet variant (no manual command list)', () => {
|
|
175
|
+
// Regression for the "lying card" bug — when the gateway has
|
|
176
|
+
// already kicked off `fireFleetAutoFallback`, the card MUST NOT
|
|
177
|
+
// list manual commands the user shouldn't run. Otherwise the
|
|
178
|
+
// user manually types /auth use while a fleet swap is mid-flight,
|
|
179
|
+
// racing two writes through the broker.
|
|
180
|
+
const card = formatModelUnavailableCard(
|
|
181
|
+
detection('quota_exhausted', new Date('2026-05-03T13:00:00Z')),
|
|
182
|
+
'gymbro',
|
|
183
|
+
{ now: NOW, autoFallbackInFlight: true },
|
|
184
|
+
)
|
|
185
|
+
expect(card).toContain('Auto-failover in progress')
|
|
186
|
+
expect(card).not.toContain('What to try')
|
|
187
|
+
expect(card).not.toContain('/auth use')
|
|
188
|
+
expect(card).not.toContain('/auth add')
|
|
189
|
+
expect(card).not.toContain('/authfallback')
|
|
190
|
+
})
|
|
191
|
+
|
|
174
192
|
it('overload without reset omits the parenthetical', () => {
|
|
175
193
|
const card = formatModelUnavailableCard(detection('overload'), 'clerk', { now: NOW })
|
|
176
194
|
expect(card).toContain('Reason: model overloaded')
|
|
@@ -183,11 +201,14 @@ describe('formatModelUnavailableCard — actionable card', () => {
|
|
|
183
201
|
expect(card).not.toContain('(resets')
|
|
184
202
|
})
|
|
185
203
|
|
|
186
|
-
it('
|
|
204
|
+
it('default (no autoFallback) variant includes the actionable suggestions', () => {
|
|
187
205
|
const card = formatModelUnavailableCard(detection('quota_exhausted'), 'gymbro', { now: NOW })
|
|
188
|
-
expect(card).toContain('<code>/
|
|
206
|
+
expect(card).toContain('<code>/auth use')
|
|
189
207
|
expect(card).toContain('<code>/auth add</code>')
|
|
190
208
|
expect(card).toContain('<code>/usage</code>')
|
|
209
|
+
// Regression — `/authfallback` is no longer a verb (post-RFC-H);
|
|
210
|
+
// pre-fix the card lied by suggesting it.
|
|
211
|
+
expect(card).not.toContain('/authfallback')
|
|
191
212
|
})
|
|
192
213
|
|
|
193
214
|
it('names the slot in the header when one is supplied', () => {
|
|
@@ -283,9 +304,13 @@ describe('integration — gateway suppresses raw stderr in favour of the card',
|
|
|
283
304
|
// The actionable card replaces the raw verbatim error.
|
|
284
305
|
expect(card).toContain('Model unavailable')
|
|
285
306
|
expect(card).toContain('quota exhausted')
|
|
286
|
-
|
|
307
|
+
// Post-RFC-H: `/authfallback` is no longer a verb. The default
|
|
308
|
+
// (non-auto-fallback) card now points at `/auth use <label>` —
|
|
309
|
+
// the canonical fleet-wide swap.
|
|
310
|
+
expect(card).toContain('/auth use')
|
|
287
311
|
expect(card).toContain('/auth add')
|
|
288
312
|
expect(card).toContain('/usage')
|
|
313
|
+
expect(card).not.toContain('/authfallback')
|
|
289
314
|
|
|
290
315
|
// And the raw stderr text never appears in the user-facing card.
|
|
291
316
|
expect(card).not.toContain('out of extra usage')
|
|
@@ -70,6 +70,7 @@ describe('sandbox-hint-posttool', () => {
|
|
|
70
70
|
tool_name: 'Bash',
|
|
71
71
|
tool_use_id: 'toolu_003',
|
|
72
72
|
tool_response: {
|
|
73
|
+
exit_code: 100,
|
|
73
74
|
stderr:
|
|
74
75
|
'E: Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), are you root?',
|
|
75
76
|
},
|
|
@@ -141,15 +142,224 @@ describe('sandbox-hint-posttool', () => {
|
|
|
141
142
|
it('caps the scan window for huge tool_response payloads', () => {
|
|
142
143
|
// 100 KiB of harmless output followed by an EROFS — we cap at 64 KiB
|
|
143
144
|
// so this should NOT match. Keeps a runaway tool_response from
|
|
144
|
-
// pinning the hook on a regex scan.
|
|
145
|
+
// pinning the hook on a regex scan. The exit_code is set so the
|
|
146
|
+
// failure-classifier reaches the scan path — without it, #1303's
|
|
147
|
+
// success-gate would return early for a different reason.
|
|
145
148
|
const huge = 'x'.repeat(100 * 1024) + ' EROFS happened'
|
|
146
149
|
const result = runHook({
|
|
147
150
|
tool_name: 'Bash',
|
|
148
151
|
tool_use_id: 'toolu_007',
|
|
149
|
-
tool_response: { stdout: huge },
|
|
152
|
+
tool_response: { exit_code: 1, stdout: huge },
|
|
150
153
|
})
|
|
151
154
|
|
|
152
155
|
expect(result.status).toBe(0)
|
|
153
156
|
expect(result.stdout.trim()).toBe('')
|
|
154
157
|
})
|
|
158
|
+
|
|
159
|
+
// #1303 — the hook used to fire on every tool whose payload merely
|
|
160
|
+
// MENTIONED EROFS / read-only-fs / EACCES /usr / dpkg, regardless of
|
|
161
|
+
// whether the tool actually failed. Concrete repro: reading a file
|
|
162
|
+
// whose content describes the sandbox model triggered the advisory
|
|
163
|
+
// every time. Fix: classify tool_response as success-or-failure FIRST
|
|
164
|
+
// (only failures can have hit a kernel boundary), AND gate on
|
|
165
|
+
// write-capable tools only (Read/Grep/Glob can't EROFS).
|
|
166
|
+
describe('#1303 — false-positive guard', () => {
|
|
167
|
+
it('does NOT emit when a Read on a file MENTIONS EROFS (Read is not write-capable)', () => {
|
|
168
|
+
const result = runHook({
|
|
169
|
+
tool_name: 'Read',
|
|
170
|
+
tool_use_id: 'toolu_fp_read',
|
|
171
|
+
// Realistic: an Edit on a file whose Read returns content that
|
|
172
|
+
// happens to talk about the sandbox model. Pre-fix this fired.
|
|
173
|
+
tool_response: {
|
|
174
|
+
file: '/state/agent/home/some-doc.md',
|
|
175
|
+
content:
|
|
176
|
+
'# Sandbox notes\n\nWhen a write hits EROFS we say "Read-only file system".\n',
|
|
177
|
+
},
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
expect(result.status).toBe(0)
|
|
181
|
+
expect(result.stdout.trim()).toBe('')
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
it('does NOT emit when a Grep finds a line containing "Read-only file system"', () => {
|
|
185
|
+
const result = runHook({
|
|
186
|
+
tool_name: 'Grep',
|
|
187
|
+
tool_use_id: 'toolu_fp_grep',
|
|
188
|
+
tool_response: { stdout: 'docs/sandbox.md:42: Read-only file system semantics' },
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
expect(result.status).toBe(0)
|
|
192
|
+
expect(result.stdout.trim()).toBe('')
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
it('does NOT emit when a successful Bash mentions EROFS in stdout (exit_code=0)', () => {
|
|
196
|
+
const result = runHook({
|
|
197
|
+
tool_name: 'Bash',
|
|
198
|
+
tool_use_id: 'toolu_fp_bash_success',
|
|
199
|
+
tool_response: {
|
|
200
|
+
exit_code: 0,
|
|
201
|
+
stdout: 'I tested EROFS handling: all good.',
|
|
202
|
+
},
|
|
203
|
+
})
|
|
204
|
+
|
|
205
|
+
expect(result.status).toBe(0)
|
|
206
|
+
expect(result.stdout.trim()).toBe('')
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
it('does NOT emit when a successful Edit echoes new content containing "EROFS"', () => {
|
|
210
|
+
// The Edit tool's tool_response echoes the modified content. If
|
|
211
|
+
// the new content mentions EROFS — e.g. when editing this very
|
|
212
|
+
// hook source — the pre-fix logic fired falsely on every keystroke.
|
|
213
|
+
const result = runHook({
|
|
214
|
+
tool_name: 'Edit',
|
|
215
|
+
tool_use_id: 'toolu_fp_edit_success',
|
|
216
|
+
tool_response: {
|
|
217
|
+
// is_error explicitly false; no error field; no exit_code.
|
|
218
|
+
is_error: false,
|
|
219
|
+
file_path: '/state/agent/home/hook.mjs',
|
|
220
|
+
old_string: '// old',
|
|
221
|
+
new_string: '// new code mentioning EROFS and read-only file system semantics',
|
|
222
|
+
},
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
expect(result.status).toBe(0)
|
|
226
|
+
expect(result.stdout.trim()).toBe('')
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
it('still emits when an Edit FAILED with is_error=true on a real EROFS', () => {
|
|
230
|
+
const result = runHook({
|
|
231
|
+
tool_name: 'Edit',
|
|
232
|
+
tool_use_id: 'toolu_real_failure',
|
|
233
|
+
tool_response: {
|
|
234
|
+
is_error: true,
|
|
235
|
+
error: "EROFS: read-only file system, open '/opt/switchroom/skills/foo.md'",
|
|
236
|
+
},
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
expect(result.status).toBe(0)
|
|
240
|
+
const ctx = parseContext(result.stdout)
|
|
241
|
+
expect(ctx).toContain('Sandbox boundary hit')
|
|
242
|
+
})
|
|
243
|
+
|
|
244
|
+
it('still emits when a Bash FAILED with non-zero exit_code and stderr containing EROFS', () => {
|
|
245
|
+
const result = runHook({
|
|
246
|
+
tool_name: 'Bash',
|
|
247
|
+
tool_use_id: 'toolu_real_bash_failure',
|
|
248
|
+
tool_response: {
|
|
249
|
+
exit_code: 1,
|
|
250
|
+
stderr: "mkdir: cannot create directory '/opt/foo': Read-only file system",
|
|
251
|
+
stdout: '',
|
|
252
|
+
},
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
expect(result.status).toBe(0)
|
|
256
|
+
const ctx = parseContext(result.stdout)
|
|
257
|
+
expect(ctx).toContain('Sandbox boundary hit')
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
it('does NOT emit for tools not in the write-capable allowlist, even on failure-shaped payload', () => {
|
|
261
|
+
// Even a payload that LOOKS like a failure — `is_error: true` —
|
|
262
|
+
// cannot reflect a kernel sandbox hit if the tool isn't write-
|
|
263
|
+
// capable. Read can't EROFS. We refuse to advise.
|
|
264
|
+
const result = runHook({
|
|
265
|
+
tool_name: 'WebFetch',
|
|
266
|
+
tool_use_id: 'toolu_fp_webfetch',
|
|
267
|
+
tool_response: { is_error: true, error: 'EROFS lookalike in HTTP body' },
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
expect(result.status).toBe(0)
|
|
271
|
+
expect(result.stdout.trim()).toBe('')
|
|
272
|
+
})
|
|
273
|
+
|
|
274
|
+
it('DOES emit for an MCP tool failure (proxies can write)', () => {
|
|
275
|
+
const result = runHook({
|
|
276
|
+
tool_name: 'mcp__some-server__write_file',
|
|
277
|
+
tool_use_id: 'toolu_mcp_failure',
|
|
278
|
+
tool_response: {
|
|
279
|
+
is_error: true,
|
|
280
|
+
error: 'EROFS: read-only file system on /opt/foo',
|
|
281
|
+
},
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
expect(result.status).toBe(0)
|
|
285
|
+
const ctx = parseContext(result.stdout)
|
|
286
|
+
expect(ctx).toContain('Sandbox boundary hit')
|
|
287
|
+
})
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
// Direct unit tests on the classifier helper.
|
|
291
|
+
describe('classifyFailure', () => {
|
|
292
|
+
it('returns null for a successful object response', async () => {
|
|
293
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
294
|
+
expect(mod.__internals.classifyFailure({ exit_code: 0, stdout: 'EROFS mentioned' }))
|
|
295
|
+
.toBeNull()
|
|
296
|
+
expect(mod.__internals.classifyFailure({ is_error: false, content: 'EROFS mentioned' }))
|
|
297
|
+
.toBeNull()
|
|
298
|
+
})
|
|
299
|
+
|
|
300
|
+
it('returns a structured-failure for is_error=true', async () => {
|
|
301
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
302
|
+
const got = mod.__internals.classifyFailure({
|
|
303
|
+
is_error: true,
|
|
304
|
+
error: 'EROFS: ...',
|
|
305
|
+
})
|
|
306
|
+
expect(got?.kind).toBe('structured-failure')
|
|
307
|
+
expect(got?.body).toContain('EROFS')
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
it('returns a structured-failure for non-zero exit_code with stderr', async () => {
|
|
311
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
312
|
+
const got = mod.__internals.classifyFailure({
|
|
313
|
+
exit_code: 1,
|
|
314
|
+
stderr: 'Read-only file system',
|
|
315
|
+
stdout: 'also relevant context',
|
|
316
|
+
})
|
|
317
|
+
expect(got?.kind).toBe('structured-failure')
|
|
318
|
+
// Both stderr and stdout included on failed Bash.
|
|
319
|
+
expect(got?.body).toContain('Read-only file system')
|
|
320
|
+
expect(got?.body).toContain('also relevant context')
|
|
321
|
+
})
|
|
322
|
+
|
|
323
|
+
it('treats a bare string as a candidate to scan', async () => {
|
|
324
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
325
|
+
const got = mod.__internals.classifyFailure('mkdir: Read-only file system')
|
|
326
|
+
expect(got?.kind).toBe('bare-string')
|
|
327
|
+
expect(got?.body).toContain('Read-only file system')
|
|
328
|
+
})
|
|
329
|
+
|
|
330
|
+
it('returns null for null / undefined / primitives', async () => {
|
|
331
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
332
|
+
expect(mod.__internals.classifyFailure(null)).toBeNull()
|
|
333
|
+
expect(mod.__internals.classifyFailure(undefined)).toBeNull()
|
|
334
|
+
expect(mod.__internals.classifyFailure(42)).toBeNull()
|
|
335
|
+
})
|
|
336
|
+
})
|
|
337
|
+
|
|
338
|
+
describe('isWriteCapableTool', () => {
|
|
339
|
+
it('returns true for the canonical write tools', async () => {
|
|
340
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
341
|
+
for (const n of ['Edit', 'MultiEdit', 'Write', 'NotebookEdit', 'Bash']) {
|
|
342
|
+
expect(mod.__internals.isWriteCapableTool(n)).toBe(true)
|
|
343
|
+
}
|
|
344
|
+
})
|
|
345
|
+
|
|
346
|
+
it('returns false for read-only tools', async () => {
|
|
347
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
348
|
+
for (const n of ['Read', 'Grep', 'Glob', 'WebFetch', 'WebSearch', 'TodoWrite']) {
|
|
349
|
+
expect(mod.__internals.isWriteCapableTool(n)).toBe(false)
|
|
350
|
+
}
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
it('returns true for any MCP tool (proxy writes possible)', async () => {
|
|
354
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
355
|
+
expect(mod.__internals.isWriteCapableTool('mcp__server__do_thing')).toBe(true)
|
|
356
|
+
})
|
|
357
|
+
|
|
358
|
+
it('returns false for empty / non-string', async () => {
|
|
359
|
+
const mod = await import('../hooks/sandbox-hint-posttool.mjs')
|
|
360
|
+
expect(mod.__internals.isWriteCapableTool('')).toBe(false)
|
|
361
|
+
expect(mod.__internals.isWriteCapableTool(null as any)).toBe(false)
|
|
362
|
+
expect(mod.__internals.isWriteCapableTool(undefined as any)).toBe(false)
|
|
363
|
+
})
|
|
364
|
+
})
|
|
155
365
|
})
|