typeclaw 0.35.0 → 0.35.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/plugin-tools.ts +18 -8
- package/src/inspect/live.ts +128 -13
- package/src/sandbox/availability.ts +87 -19
- package/src/sandbox/index.ts +4 -0
- package/src/server/index.ts +5 -1
- package/src/shared/protocol.ts +22 -11
package/package.json
CHANGED
|
@@ -37,15 +37,16 @@ import type {
|
|
|
37
37
|
} from '@/plugin'
|
|
38
38
|
import {
|
|
39
39
|
buildSandboxedCommand,
|
|
40
|
-
canBindProcSafely,
|
|
41
40
|
canMountRealProc,
|
|
42
41
|
DEFAULT_SANDBOX_ENV,
|
|
43
42
|
ensureBwrapAvailable,
|
|
44
43
|
ensureSessionTmpDir,
|
|
44
|
+
getProcBindSafetyVerdict,
|
|
45
45
|
isPackageInstallCommand,
|
|
46
46
|
mapVirtualTmpPath,
|
|
47
47
|
resolveHiddenPaths,
|
|
48
48
|
resolvePackageInstallZones,
|
|
49
|
+
resolveProcBindSafetyWithRetry,
|
|
49
50
|
resolveProcSelfExe,
|
|
50
51
|
resolveProtectedZones,
|
|
51
52
|
resolveSandboxSymlinks,
|
|
@@ -673,12 +674,12 @@ function subtractMaskedProtected(
|
|
|
673
674
|
// the kernel permits the mount (canMountRealProc) — it adds PID isolation but
|
|
674
675
|
// needs CAP_SYS_ADMIN (unshare --mount-proc), so it is a deliberate, narrow
|
|
675
676
|
// opt-in; else 'proc-bind' (--ro-bind /proc, NO CAP_SYS_ADMIN) when its userns
|
|
676
|
-
// leak-block is verified safe
|
|
677
|
-
//
|
|
678
|
-
//
|
|
679
|
-
//
|
|
680
|
-
//
|
|
681
|
-
//
|
|
677
|
+
// leak-block is verified safe; else 'tmpfs'. Because sandbox.realProc DEFAULTS
|
|
678
|
+
// FALSE, the first branch is normally skipped and proc-bind is the de-facto
|
|
679
|
+
// default — which is the point: the common path needs no broad outer capability.
|
|
680
|
+
// 'tmpfs' is the last-resort degraded mode where external packages can't run;
|
|
681
|
+
// reached only when proc-bind is DEFINITIVELY unavailable (a real cross-userns
|
|
682
|
+
// environ leak → fail closed) or its safety stays unverifiable after retries.
|
|
682
683
|
//
|
|
683
684
|
// Read from the boot-time `config` snapshot, NOT live getConfig(): sandbox is
|
|
684
685
|
// restart-required, and the strategy MUST track the boot-time CAP_SYS_ADMIN
|
|
@@ -688,7 +689,16 @@ function subtractMaskedProtected(
|
|
|
688
689
|
// container lifetime regardless of how many bash calls hit it.
|
|
689
690
|
async function resolveProcStrategy(): Promise<SandboxProcStrategy> {
|
|
690
691
|
if (config.sandbox.realProc && (await canMountRealProc())) return 'real-proc'
|
|
691
|
-
|
|
692
|
+
// Retry an 'inconclusive' proc-bind probe (transient under load) before
|
|
693
|
+
// degrading — a single such hiccup must not break external-package runs on a
|
|
694
|
+
// capable host. 'unsafe' still fails closed with no retry.
|
|
695
|
+
if (
|
|
696
|
+
await resolveProcBindSafetyWithRetry(
|
|
697
|
+
() => getProcBindSafetyVerdict(),
|
|
698
|
+
(ms) => Bun.sleep(ms),
|
|
699
|
+
)
|
|
700
|
+
)
|
|
701
|
+
return 'proc-bind'
|
|
692
702
|
// Degraded last resort: no working /proc strategy. External package runners
|
|
693
703
|
// (bunx/bun add/bun run <pkg-bin>) will fail with Bun's opaque "NotDir" because
|
|
694
704
|
// /proc/self/{fd,maps} are absent. Warn once so an operator on such an exotic
|
package/src/inspect/live.ts
CHANGED
|
@@ -11,9 +11,15 @@ export type StreamLiveOptions = {
|
|
|
11
11
|
onSubscribed?: (live: boolean) => void
|
|
12
12
|
onError?: (message: string) => void
|
|
13
13
|
connectTimeoutMs?: number
|
|
14
|
+
heartbeatIntervalMs?: number
|
|
15
|
+
pongTimeoutMs?: number
|
|
16
|
+
bufferedAmountCeiling?: number
|
|
14
17
|
}
|
|
15
18
|
|
|
16
19
|
const DEFAULT_CONNECT_TIMEOUT_MS = 5_000
|
|
20
|
+
const DEFAULT_HEARTBEAT_INTERVAL_MS = 10_000
|
|
21
|
+
const DEFAULT_PONG_TIMEOUT_MS = 30_000
|
|
22
|
+
const DEFAULT_BUFFERED_AMOUNT_CEILING = 1_048_576
|
|
17
23
|
|
|
18
24
|
export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<InspectEvent> {
|
|
19
25
|
const WS = opts.WebSocketImpl ?? WebSocket
|
|
@@ -26,6 +32,17 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
|
|
|
26
32
|
const accumulators = new Map<string, string>()
|
|
27
33
|
const thinkingAccumulators = new Map<string, string>()
|
|
28
34
|
|
|
35
|
+
let heartbeat: ReturnType<typeof setInterval> | null = null
|
|
36
|
+
let awaitingPongSince: number | null = null
|
|
37
|
+
let supportsPing = false
|
|
38
|
+
|
|
39
|
+
const stopHeartbeat = (): void => {
|
|
40
|
+
if (heartbeat !== null) {
|
|
41
|
+
clearInterval(heartbeat)
|
|
42
|
+
heartbeat = null
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
29
46
|
const wake = (): void => {
|
|
30
47
|
if (resolveNext !== null) {
|
|
31
48
|
const fn = resolveNext
|
|
@@ -43,13 +60,19 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
|
|
|
43
60
|
return
|
|
44
61
|
}
|
|
45
62
|
if (msg.type === 'subscribed') {
|
|
63
|
+
supportsPing = msg.supportsPing === true
|
|
46
64
|
opts.onSubscribed?.(msg.sessionLive)
|
|
47
65
|
return
|
|
48
66
|
}
|
|
67
|
+
if (msg.type === 'pong') {
|
|
68
|
+
awaitingPongSince = null
|
|
69
|
+
return
|
|
70
|
+
}
|
|
49
71
|
if (msg.type === 'error') {
|
|
50
72
|
opts.onError?.(msg.message)
|
|
51
73
|
pendingError = msg.message
|
|
52
74
|
closed = true
|
|
75
|
+
stopHeartbeat()
|
|
53
76
|
try {
|
|
54
77
|
ws.close()
|
|
55
78
|
} catch {
|
|
@@ -84,6 +107,7 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
|
|
|
84
107
|
})
|
|
85
108
|
ws.addEventListener('close', () => {
|
|
86
109
|
closed = true
|
|
110
|
+
stopHeartbeat()
|
|
87
111
|
wake()
|
|
88
112
|
})
|
|
89
113
|
|
|
@@ -99,6 +123,7 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
|
|
|
99
123
|
'abort',
|
|
100
124
|
() => {
|
|
101
125
|
closed = true
|
|
126
|
+
stopHeartbeat()
|
|
102
127
|
try {
|
|
103
128
|
ws.close()
|
|
104
129
|
} catch {
|
|
@@ -134,25 +159,115 @@ export async function* streamLive(opts: StreamLiveOptions): AsyncGenerator<Inspe
|
|
|
134
159
|
}
|
|
135
160
|
ws.send(JSON.stringify(subscribe))
|
|
136
161
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
162
|
+
startHeartbeat({
|
|
163
|
+
ws,
|
|
164
|
+
intervalMs: opts.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS,
|
|
165
|
+
pongTimeoutMs: opts.pongTimeoutMs ?? DEFAULT_PONG_TIMEOUT_MS,
|
|
166
|
+
bufferedAmountCeiling: opts.bufferedAmountCeiling ?? DEFAULT_BUFFERED_AMOUNT_CEILING,
|
|
167
|
+
supportsPing: () => supportsPing,
|
|
168
|
+
isAwaitingPongSince: () => awaitingPongSince,
|
|
169
|
+
setAwaitingPongSince: (at) => {
|
|
170
|
+
awaitingPongSince = at
|
|
171
|
+
},
|
|
172
|
+
setTimer: (timer) => {
|
|
173
|
+
heartbeat = timer
|
|
174
|
+
},
|
|
175
|
+
onDead: () => {
|
|
176
|
+
closed = true
|
|
177
|
+
stopHeartbeat()
|
|
178
|
+
try {
|
|
179
|
+
ws.close()
|
|
180
|
+
} catch {
|
|
181
|
+
/* ignore */
|
|
182
|
+
}
|
|
183
|
+
wake()
|
|
184
|
+
},
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
try {
|
|
188
|
+
while (true) {
|
|
189
|
+
if (buffer.length > 0) {
|
|
190
|
+
const next = buffer.shift()!
|
|
191
|
+
yield next
|
|
192
|
+
continue
|
|
193
|
+
}
|
|
194
|
+
if (closed) {
|
|
195
|
+
if (pendingError !== null) throw new Error(pendingError)
|
|
196
|
+
return
|
|
197
|
+
}
|
|
198
|
+
const { event, done } = await new Promise<{ event: InspectEvent | null; done: boolean }>((resolve) => {
|
|
199
|
+
resolveNext = resolve
|
|
200
|
+
})
|
|
201
|
+
if (event !== null) yield event
|
|
202
|
+
if (done) {
|
|
203
|
+
if (pendingError !== null) throw new Error(pendingError)
|
|
204
|
+
return
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
} finally {
|
|
208
|
+
// Also fired when the consumer abandons the generator (break from a
|
|
209
|
+
// `for await` calls .return()): close the socket so it can't outlive the
|
|
210
|
+
// viewer, not just the heartbeat timer.
|
|
211
|
+
stopHeartbeat()
|
|
212
|
+
closed = true
|
|
213
|
+
try {
|
|
214
|
+
ws.close()
|
|
215
|
+
} catch {
|
|
216
|
+
/* ignore */
|
|
142
217
|
}
|
|
143
|
-
|
|
144
|
-
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
type HeartbeatOptions = {
|
|
222
|
+
ws: WebSocket
|
|
223
|
+
intervalMs: number
|
|
224
|
+
pongTimeoutMs: number
|
|
225
|
+
bufferedAmountCeiling: number
|
|
226
|
+
// Read live: the `subscribed` reply that sets it arrives after the timer is
|
|
227
|
+
// armed, so a snapshot taken at startHeartbeat time would always be false.
|
|
228
|
+
supportsPing: () => boolean
|
|
229
|
+
isAwaitingPongSince: () => number | null
|
|
230
|
+
setAwaitingPongSince: (at: number | null) => void
|
|
231
|
+
setTimer: (timer: ReturnType<typeof setInterval>) => void
|
|
232
|
+
onDead: () => void
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Steady-state liveness watchdog. The connect gate only bounds the OPENING
|
|
236
|
+
// phase; once subscribed, a wedged socket (send queue not draining, no
|
|
237
|
+
// 'close'/'error') would park the read loop forever. The interval fires on the
|
|
238
|
+
// event-loop timer queue independent of the dead socket, so it always runs.
|
|
239
|
+
// Two death signals, both treated as a clean close (return, never throw) so the
|
|
240
|
+
// viewer recovers to the picker:
|
|
241
|
+
// 1. bufferedAmount past a ceiling — our writes are not draining. Always on:
|
|
242
|
+
// it needs no server cooperation, so it works against any server version.
|
|
243
|
+
// 2. a ping with no pong within the deadline — round-trip liveness lost,
|
|
244
|
+
// which also covers idle tails (a quiet-but-healthy tail still pongs). Only
|
|
245
|
+
// armed when the server advertised supportsPing; a pre-heartbeat server
|
|
246
|
+
// answers an unknown ping with error+close, so probing it would kill the
|
|
247
|
+
// tail. Such a server degrades to bufferedAmount-only detection.
|
|
248
|
+
function startHeartbeat(opts: HeartbeatOptions): void {
|
|
249
|
+
let pingId = 0
|
|
250
|
+
const tick = (): void => {
|
|
251
|
+
if (opts.ws.bufferedAmount >= opts.bufferedAmountCeiling) {
|
|
252
|
+
opts.onDead()
|
|
145
253
|
return
|
|
146
254
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if (done) {
|
|
152
|
-
if (pendingError !== null) throw new Error(pendingError)
|
|
255
|
+
if (!opts.supportsPing()) return
|
|
256
|
+
const awaiting = opts.isAwaitingPongSince()
|
|
257
|
+
if (awaiting !== null) {
|
|
258
|
+
if (Date.now() - awaiting >= opts.pongTimeoutMs) opts.onDead()
|
|
153
259
|
return
|
|
154
260
|
}
|
|
261
|
+
pingId += 1
|
|
262
|
+
const ping: InspectClientMessage = { type: 'ping', id: pingId }
|
|
263
|
+
try {
|
|
264
|
+
opts.ws.send(JSON.stringify(ping))
|
|
265
|
+
opts.setAwaitingPongSince(Date.now())
|
|
266
|
+
} catch {
|
|
267
|
+
opts.onDead()
|
|
268
|
+
}
|
|
155
269
|
}
|
|
270
|
+
opts.setTimer(setInterval(tick, opts.intervalMs))
|
|
156
271
|
}
|
|
157
272
|
|
|
158
273
|
function frameToEvent(
|
|
@@ -138,6 +138,27 @@ export function _resetRealProcProbeCacheForTests(): void {
|
|
|
138
138
|
// future bwrap flag change, would turn this strategy into a secret leak. So we
|
|
139
139
|
// PROBE it directly before ever selecting it — plant a real secret in a sibling
|
|
140
140
|
// process's env and assert the sandbox cannot read it back.
|
|
141
|
+
// The probe has THREE outcomes, not two — collapsing them to a boolean is what
|
|
142
|
+
// caused the silent-degrade bug this verdict type fixes. 'safe'/'unsafe' are definitive capability
|
|
143
|
+
// facts (the userns block held / a leak was observed); 'inconclusive' is a
|
|
144
|
+
// transient local failure (probe timeout under CPU/IO contention, sentinel dying
|
|
145
|
+
// mid-probe, a bwrap startup hiccup) that proves NOTHING about the host. A caller
|
|
146
|
+
// deciding the /proc strategy must tell these apart: an inconclusive probe must
|
|
147
|
+
// trigger a RETRY, never a fall-through to tmpfs that breaks the whole bash call
|
|
148
|
+
// on a host that is actually capable. 'unsafe' must still fail closed with no
|
|
149
|
+
// retry. canBindProcSafely() keeps the old boolean shape for callers that only
|
|
150
|
+
// need "is proc-bind selectable right now"; getProcBindSafetyVerdict() exposes
|
|
151
|
+
// the third state for the retry-owning strategy resolver.
|
|
152
|
+
export type ProcBindSafetyVerdict = 'safe' | 'unsafe' | 'inconclusive'
|
|
153
|
+
|
|
154
|
+
// Only DEFINITIVE verdicts are process-global facts worth caching. Caching
|
|
155
|
+
// 'inconclusive' (i.e. its boolean `false`) would PERMANENTLY disable proc-bind
|
|
156
|
+
// for the process — a single slow first bash call would silently break every
|
|
157
|
+
// later bunx until container restart (the exact "works after restart" symptom
|
|
158
|
+
// this whole machinery exists to kill). So the cache type structurally excludes
|
|
159
|
+
// it.
|
|
160
|
+
type CacheableProcBindSafetyVerdict = Exclude<ProcBindSafetyVerdict, 'inconclusive'>
|
|
161
|
+
|
|
141
162
|
// Keyed by resolved bwrapPath, like ensureBwrapAvailable: the safety answer is a
|
|
142
163
|
// fact about a SPECIFIC bwrap binary, so a caller pinning a non-default path
|
|
143
164
|
// (tests, or a future deployment) must re-probe rather than inherit the default
|
|
@@ -145,19 +166,21 @@ export function _resetRealProcProbeCacheForTests(): void {
|
|
|
145
166
|
// concurrent first callers for one path share a single probe. Both cached
|
|
146
167
|
// process-globally (the answer is a per-container capability fact). Not abortable
|
|
147
168
|
// (see canMountRealProc).
|
|
148
|
-
const procBindProbeCache = new Map<string,
|
|
149
|
-
const procBindProbeInFlight = new Map<string, Promise<
|
|
150
|
-
|
|
151
|
-
// `safe` is the answer; `cacheable` is false for INCONCLUSIVE outcomes (a probe
|
|
152
|
-
// timeout under load, or the sentinel dying mid-probe). Those are transient
|
|
153
|
-
// failure modes, not capability facts, so caching their `safe=false` would
|
|
154
|
-
// PERMANENTLY disable proc-bind for the process — a single slow first bash call
|
|
155
|
-
// would silently break every later bunx until container restart (the exact
|
|
156
|
-
// "works after restart" symptom this whole fix exists to kill). Only a probe that
|
|
157
|
-
// ran to a verdict (definitively safe OR definitively leaking) is cached.
|
|
158
|
-
type ProcBindProbe = { safe: boolean; cacheable: boolean }
|
|
169
|
+
const procBindProbeCache = new Map<string, CacheableProcBindSafetyVerdict>()
|
|
170
|
+
const procBindProbeInFlight = new Map<string, Promise<ProcBindSafetyVerdict>>()
|
|
159
171
|
|
|
160
|
-
|
|
172
|
+
// `verdict` is the answer; only definitive verdicts are `cacheable`. INCONCLUSIVE
|
|
173
|
+
// outcomes (a probe timeout under load, or the sentinel dying mid-probe) are
|
|
174
|
+
// transient failure modes, not capability facts — see the cache rationale above.
|
|
175
|
+
type ProcBindProbe =
|
|
176
|
+
| { verdict: CacheableProcBindSafetyVerdict; cacheable: true }
|
|
177
|
+
| { verdict: 'inconclusive'; cacheable: false }
|
|
178
|
+
|
|
179
|
+
// The three-state probe, deduped + cached like canBindProcSafely. The strategy
|
|
180
|
+
// resolver (resolveProcStrategy in plugin-tools.ts) consumes this so it can RETRY
|
|
181
|
+
// an 'inconclusive' result before degrading the bash call to tmpfs, while still
|
|
182
|
+
// failing closed on 'unsafe'.
|
|
183
|
+
export function getProcBindSafetyVerdict(options?: { bwrapPath?: string }): Promise<ProcBindSafetyVerdict> {
|
|
161
184
|
const bwrap = options?.bwrapPath ?? 'bwrap'
|
|
162
185
|
const cached = procBindProbeCache.get(bwrap)
|
|
163
186
|
if (cached !== undefined) return Promise.resolve(cached)
|
|
@@ -165,9 +188,9 @@ export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boo
|
|
|
165
188
|
if (existing !== undefined) return existing
|
|
166
189
|
|
|
167
190
|
const promise = probeProcBind(bwrap)
|
|
168
|
-
.then(({
|
|
169
|
-
if (cacheable) procBindProbeCache.set(bwrap,
|
|
170
|
-
return
|
|
191
|
+
.then(({ verdict, cacheable }) => {
|
|
192
|
+
if (cacheable) procBindProbeCache.set(bwrap, verdict)
|
|
193
|
+
return verdict
|
|
171
194
|
})
|
|
172
195
|
.finally(() => {
|
|
173
196
|
procBindProbeInFlight.delete(bwrap)
|
|
@@ -176,9 +199,53 @@ export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boo
|
|
|
176
199
|
return promise
|
|
177
200
|
}
|
|
178
201
|
|
|
202
|
+
// Boolean convenience wrapper: 'safe' is the ONLY verdict that makes proc-bind
|
|
203
|
+
// selectable. 'unsafe' AND 'inconclusive' both map to false — callers that only
|
|
204
|
+
// take a boolean (and do not own a retry budget) must fail closed on either.
|
|
205
|
+
// Derives from the deduped verdict probe, so concurrent callers still share one
|
|
206
|
+
// spawn even though this wrapper's own promise identity differs per call.
|
|
207
|
+
export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boolean> {
|
|
208
|
+
return getProcBindSafetyVerdict(options).then((verdict) => verdict === 'safe')
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Default backoff between proc-bind safety re-probes, in ms. Array length = retry
|
|
212
|
+
// count (2 retries after the initial attempt = 3 probes total). The probe is
|
|
213
|
+
// normally sub-ms; it only returns 'inconclusive' under transient CPU/IO
|
|
214
|
+
// contention (e.g. a boot-time storm of concurrent LLM calls saturating the box
|
|
215
|
+
// and tripping the probe's own timeout), so a short staggered wait lets the spike
|
|
216
|
+
// pass before re-proving.
|
|
217
|
+
export const PROC_BIND_RETRY_BACKOFF_MS = [250, 1_000] as const
|
|
218
|
+
|
|
219
|
+
// proc-bind selection must distinguish "definitely unavailable" from "couldn't
|
|
220
|
+
// verify right now". A DEFINITIVE verdict is final: 'safe'→true; a real userns
|
|
221
|
+
// leak ('unsafe')→false with NO retry. Only an 'inconclusive' verdict (transient
|
|
222
|
+
// probe failure that proves nothing about the host) is retried, because degrading
|
|
223
|
+
// the bash call to tmpfs over a transient hiccup is what silently broke
|
|
224
|
+
// external-package runs on capable hosts. 'inconclusive' is never cached
|
|
225
|
+
// (see the cache type), so each retry re-probes from scratch. After the backoff
|
|
226
|
+
// budget is exhausted we fail CLOSED — an unverified leak-block is never treated
|
|
227
|
+
// as safe. Pure and dependency-injected (probe + sleep) so the retry policy is
|
|
228
|
+
// unit-testable without spawning processes; production passes
|
|
229
|
+
// getProcBindSafetyVerdict and Bun.sleep.
|
|
230
|
+
export async function resolveProcBindSafetyWithRetry(
|
|
231
|
+
probe: () => Promise<ProcBindSafetyVerdict>,
|
|
232
|
+
sleep: (ms: number) => Promise<void>,
|
|
233
|
+
backoffMs: readonly number[] = PROC_BIND_RETRY_BACKOFF_MS,
|
|
234
|
+
): Promise<boolean> {
|
|
235
|
+
for (let attempt = 0; ; attempt++) {
|
|
236
|
+
const verdict = await probe()
|
|
237
|
+
if (verdict === 'safe') return true
|
|
238
|
+
if (verdict === 'unsafe') return false
|
|
239
|
+
|
|
240
|
+
const backoff = backoffMs[attempt]
|
|
241
|
+
if (backoff === undefined) return false
|
|
242
|
+
await sleep(backoff)
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
179
246
|
const PROC_BIND_PROBE_SECRET = 'TYPECLAW_PROCBIND_PROBE_SECRET'
|
|
180
247
|
|
|
181
|
-
const INCONCLUSIVE: ProcBindProbe = {
|
|
248
|
+
const INCONCLUSIVE: ProcBindProbe = { verdict: 'inconclusive', cacheable: false }
|
|
182
249
|
|
|
183
250
|
async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
|
|
184
251
|
// The sentinel must model the REAL threat geometry: the agent runtime holds
|
|
@@ -277,13 +344,13 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
|
|
|
277
344
|
// "non-zero" — a non-zero exit also covers script setup failures (a bwrap that
|
|
278
345
|
// started but couldn't read /proc/self/fd), bwrap startup failures (missing
|
|
279
346
|
// lib, transient mount EBUSY → bwrap's own exit), and an external SIGKILL.
|
|
280
|
-
// Caching any of those transient failures as a definitive
|
|
347
|
+
// Caching any of those transient failures as a definitive 'unsafe' would
|
|
281
348
|
// PERMANENTLY disable proc-bind — the same cache-poisoning class as the
|
|
282
349
|
// timeout bug. So only the script's two designated codes are cacheable:
|
|
283
350
|
// PROC_BIND_SAFE (clean run, every open blocked) and PROC_BIND_LEAK (an open
|
|
284
351
|
// SUCCEEDED — a real leak). Setup failures use PROC_BIND_SETUP_FAILED, and any
|
|
285
352
|
// other code (bwrap startup, signals, 127) is treated as inconclusive.
|
|
286
|
-
if (proc.exitCode === PROC_BIND_LEAK) return {
|
|
353
|
+
if (proc.exitCode === PROC_BIND_LEAK) return { verdict: 'unsafe', cacheable: true }
|
|
287
354
|
if (proc.exitCode !== PROC_BIND_SAFE) return INCONCLUSIVE
|
|
288
355
|
// Final liveness: the in-sandbox blocked-open assertions are only meaningful
|
|
289
356
|
// if the sentinel was alive throughout. Re-read its MARKER from the PARENT —
|
|
@@ -293,12 +360,13 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
|
|
|
293
360
|
// kernel liveness, so this marker re-read is the stronger postcondition. A
|
|
294
361
|
// failure here means the sentinel vanished mid-probe → inconclusive.
|
|
295
362
|
if (!(await parentReadsSentinelMarker(sentinelPid))) return INCONCLUSIVE
|
|
296
|
-
return {
|
|
363
|
+
return { verdict: 'safe', cacheable: true }
|
|
297
364
|
} catch {
|
|
298
365
|
return INCONCLUSIVE
|
|
299
366
|
} finally {
|
|
300
367
|
try {
|
|
301
368
|
sentinel?.kill()
|
|
369
|
+
await sentinel?.exited.catch(() => {})
|
|
302
370
|
} catch {
|
|
303
371
|
// killing an already-exited sentinel can throw on some runtimes; cleanup
|
|
304
372
|
// must never propagate out of the probe.
|
package/src/sandbox/index.ts
CHANGED
|
@@ -4,7 +4,11 @@ export {
|
|
|
4
4
|
canBindProcSafely,
|
|
5
5
|
canMountRealProc,
|
|
6
6
|
ensureBwrapAvailable,
|
|
7
|
+
getProcBindSafetyVerdict,
|
|
8
|
+
PROC_BIND_RETRY_BACKOFF_MS,
|
|
9
|
+
resolveProcBindSafetyWithRetry,
|
|
7
10
|
resolveProcSelfExe,
|
|
11
|
+
type ProcBindSafetyVerdict,
|
|
8
12
|
_resetBwrapAvailabilityCacheForTests,
|
|
9
13
|
_resetProcBindProbeCacheForTests,
|
|
10
14
|
_resetRealProcProbeCacheForTests,
|
package/src/server/index.ts
CHANGED
|
@@ -1265,6 +1265,10 @@ function handleInspectMessage(
|
|
|
1265
1265
|
ws.close()
|
|
1266
1266
|
return
|
|
1267
1267
|
}
|
|
1268
|
+
if (msg.type === 'ping') {
|
|
1269
|
+
sendInspect(ws, { type: 'pong', id: msg.id })
|
|
1270
|
+
return
|
|
1271
|
+
}
|
|
1268
1272
|
if (msg.type !== 'subscribe' || typeof msg.sessionId !== 'string' || msg.sessionId === '') {
|
|
1269
1273
|
sendInspect(ws, { type: 'error', message: 'invalid inspect subscription' })
|
|
1270
1274
|
ws.close()
|
|
@@ -1314,7 +1318,7 @@ function handleInspectMessage(
|
|
|
1314
1318
|
})
|
|
1315
1319
|
}
|
|
1316
1320
|
|
|
1317
|
-
sendInspect(ws, { type: 'subscribed', sessionId: msg.sessionId, sessionLive: live !== undefined })
|
|
1321
|
+
sendInspect(ws, { type: 'subscribed', sessionId: msg.sessionId, sessionLive: live !== undefined, supportsPing: true })
|
|
1318
1322
|
}
|
|
1319
1323
|
|
|
1320
1324
|
function extractJobId(target: StreamMessage['target']): string {
|
package/src/shared/protocol.ts
CHANGED
|
@@ -44,16 +44,22 @@ export type TunnelLogsServerMessage =
|
|
|
44
44
|
| { type: 'error'; message: string }
|
|
45
45
|
| { type: 'end' }
|
|
46
46
|
|
|
47
|
-
export type InspectClientMessage =
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
47
|
+
export type InspectClientMessage =
|
|
48
|
+
| {
|
|
49
|
+
type: 'subscribe'
|
|
50
|
+
sessionId: string
|
|
51
|
+
// sinceMs is a wall-clock cutoff for backfilling broadcasts from the
|
|
52
|
+
// in-process Stream ring buffer. The client uses Date.now() - duration;
|
|
53
|
+
// omit to skip broadcast backfill. AgentSession events are NEVER
|
|
54
|
+
// backfilled (the session's pi-coding-agent subscribe API delivers
|
|
55
|
+
// future events only).
|
|
56
|
+
sinceMs?: number
|
|
57
|
+
}
|
|
58
|
+
// Steady-state liveness probe echoed back as a pong. A live tail is
|
|
59
|
+
// legitimately quiet for long stretches, so absence of inbound frames cannot
|
|
60
|
+
// distinguish "idle" from "dead"; a missed pong can. Guards a wedged
|
|
61
|
+
// WebSocket that stays ESTABLISHED yet never fires 'close'/'error'.
|
|
62
|
+
| { type: 'ping'; id: number }
|
|
57
63
|
|
|
58
64
|
export type InspectFramePayload =
|
|
59
65
|
| { kind: 'text_delta'; sessionId: string; delta: string }
|
|
@@ -123,9 +129,14 @@ export type InspectFramePayload =
|
|
|
123
129
|
}
|
|
124
130
|
|
|
125
131
|
export type InspectServerMessage =
|
|
126
|
-
|
|
132
|
+
// supportsPing is the heartbeat capability flag. A pre-heartbeat server omits
|
|
133
|
+
// it; the client must treat its absence as "no ping support" and never send a
|
|
134
|
+
// ping (an old server answers an unknown ping with an error + close, killing
|
|
135
|
+
// the tail). Strict opt-in: only an explicit true arms round-trip probing.
|
|
136
|
+
| { type: 'subscribed'; sessionId: string; sessionLive: boolean; supportsPing?: true }
|
|
127
137
|
| { type: 'frame'; ts: number; payload: InspectFramePayload }
|
|
128
138
|
| { type: 'error'; message: string }
|
|
139
|
+
| { type: 'pong'; id: number }
|
|
129
140
|
|
|
130
141
|
export type ClientMessage =
|
|
131
142
|
| { type: 'prompt'; text: string; delivery?: PromptDelivery }
|