typeclaw 0.32.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/scripts/verify-procbind-sandbox.sh +61 -0
  3. package/src/agent/multimodal/look-at.ts +7 -5
  4. package/src/agent/plugin-tools.ts +47 -12
  5. package/src/agent/session-origin.ts +15 -9
  6. package/src/agent/system-prompt.ts +6 -0
  7. package/src/agent/tools/channel-fetch-attachment.ts +8 -7
  8. package/src/agent/tools/channel-history.ts +2 -0
  9. package/src/bundled-plugins/github-cli-auth/gh-command.ts +267 -13
  10. package/src/bundled-plugins/reviewer/skills/code-review.ts +11 -9
  11. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +1 -0
  12. package/src/channels/adapters/slack-bot-reference.ts +9 -10
  13. package/src/channels/adapters/slack-bot.ts +29 -7
  14. package/src/channels/router.ts +89 -21
  15. package/src/cli/index.ts +42 -2
  16. package/src/cli/init.ts +267 -82
  17. package/src/cli/inspect.ts +5 -2
  18. package/src/cli/model.ts +5 -1
  19. package/src/cli/provider.ts +41 -10
  20. package/src/config/config.ts +23 -11
  21. package/src/config/providers.ts +304 -7
  22. package/src/container/start.ts +12 -7
  23. package/src/init/find-agent-dir.ts +44 -0
  24. package/src/init/index.ts +3 -34
  25. package/src/init/models-dev.ts +2 -0
  26. package/src/init/validate-api-key.ts +13 -0
  27. package/src/inspect/transcript-view.ts +33 -7
  28. package/src/sandbox/availability.ts +354 -2
  29. package/src/sandbox/build.ts +17 -7
  30. package/src/sandbox/index.ts +10 -1
  31. package/src/sandbox/policy.ts +27 -9
  32. package/src/secrets/oauth-xai.ts +342 -0
  33. package/src/secrets/storage.ts +2 -0
  34. package/src/skills/typeclaw-markdown-pdf/SKILL.md +64 -5
  35. package/typeclaw.schema.json +20 -2
@@ -27,6 +27,8 @@ export type TranscriptViewOptions = {
27
27
  createTerminal?: () => Terminal
28
28
  }
29
29
 
30
+ export const MAX_LIVE_HISTORY_ENTRIES = 250
31
+
30
32
  // Read-only pi-tui transcript viewer: the rich counterpart to the line
31
33
  // renderer, matching the live TUI's look (markdown assistant blocks, formatted
32
34
  // tool panels) but with NO editor and NO websocket writes. It owns its own
@@ -45,11 +47,18 @@ export function createTranscriptView(opts: TranscriptViewOptions) {
45
47
  tui.requestRender()
46
48
 
47
49
  // The status line is pinned last (no editor to pin, unlike createTui). Each
48
- // appended history entry is inserted before it: strip status, add entry,
49
- // re-add status.
50
- const append = (component: Component): void => {
50
+ // event's components are inserted before it: strip status, add them, re-add
51
+ // status. pi-tui re-renders (and re-lays out every Markdown block in) the
52
+ // whole child list each frame, so an unbounded live tail makes per-frame cost
53
+ // grow until the viewer stalls; the window evicts the oldest entry to keep
54
+ // render cost bounded. Components are evicted per event so a timestamp never
55
+ // outlives its body. Header and pinned status are never evicted.
56
+ const history = new BoundedComponentWindow(MAX_LIVE_HISTORY_ENTRIES)
57
+ const appendEntry = (components: HistoryEntry): void => {
51
58
  tui.removeChild(status)
52
- tui.addChild(component)
59
+ const evicted = history.push(components)
60
+ if (evicted !== null) for (const component of evicted) tui.removeChild(component)
61
+ for (const component of components) tui.addChild(component)
53
62
  tui.addChild(status)
54
63
  }
55
64
 
@@ -83,15 +92,14 @@ export function createTranscriptView(opts: TranscriptViewOptions) {
83
92
  // transcripts; render per event once live.
84
93
  let live = false
85
94
  const onEvent = (event: InspectEvent): void => {
86
- append(new Text(formatEventTime(event.ts), 0, 0))
87
- append(componentFor(event))
95
+ appendEntry([new Text(formatEventTime(event.ts), 0, 0), componentFor(event)])
88
96
  if (live) tui.requestRender()
89
97
  }
90
98
  const onPhase = (phase: StreamPhase): void => {
91
99
  if (phase.phase === 'replay-end') {
92
100
  tui.requestRender()
93
101
  } else if (phase.phase === 'live-start') {
94
- append(new Text(divider(phase.sessionLive ? 'live' : 'live (broadcasts only)'), 0, 0))
102
+ appendEntry([new Text(divider(phase.sessionLive ? 'live' : 'live (broadcasts only)'), 0, 0)])
95
103
  live = true
96
104
  tui.requestRender()
97
105
  }
@@ -190,3 +198,21 @@ function statusLine(_phase: 'replay'): string {
190
198
  function divider(text: string): string {
191
199
  return colors.dim(`─── ${text} ───`)
192
200
  }
201
+
202
+ export type HistoryEntry = readonly Component[]
203
+
204
+ export class BoundedComponentWindow {
205
+ private readonly entries: HistoryEntry[] = []
206
+
207
+ constructor(private readonly maxEntries: number) {}
208
+
209
+ push(entry: HistoryEntry): HistoryEntry | null {
210
+ this.entries.push(entry)
211
+ if (this.entries.length <= this.maxEntries) return null
212
+ return this.entries.shift() ?? null
213
+ }
214
+
215
+ get size(): number {
216
+ return this.entries.length
217
+ }
218
+ }
@@ -5,6 +5,13 @@ import { SandboxUnavailableError } from './errors'
5
5
  // resolved bwrap path so a test (or a consumer pinning a non-default path)
6
6
  // re-probes instead of reading another path's cached result.
7
7
  const availabilityCache = new Map<string, boolean>()
8
+ // In-flight dedup: bash calls run concurrently (subagents, cron, parallel
9
+ // tool calls), so without this two calls racing before the cache is populated
10
+ // would each spawn a probe. The promise is cleared on settle so a probe that
11
+ // was aborted (not "unavailable", just cancelled) does not poison the next
12
+ // caller — the next call re-probes from scratch. Mirrors the channels
13
+ // membership-cache in-flight pattern.
14
+ const availabilityInFlight = new Map<string, Promise<boolean>>()
8
15
 
9
16
  export async function ensureBwrapAvailable(options?: { bwrapPath?: string }): Promise<void> {
10
17
  const bwrap = options?.bwrapPath ?? 'bwrap'
@@ -12,11 +19,29 @@ export async function ensureBwrapAvailable(options?: { bwrapPath?: string }): Pr
12
19
  if (cached === true) return
13
20
  if (cached === false) throw new SandboxUnavailableError()
14
21
 
15
- const available = await probe(bwrap)
16
- availabilityCache.set(bwrap, available)
22
+ const available = await dedupedProbe(bwrap)
17
23
  if (!available) throw new SandboxUnavailableError()
18
24
  }
19
25
 
26
+ function dedupedProbe(bwrap: string): Promise<boolean> {
27
+ const existing = availabilityInFlight.get(bwrap)
28
+ if (existing !== undefined) return existing
29
+
30
+ const promise = probe(bwrap)
31
+ .then((available) => {
32
+ // Cache unconditionally, including false: a genuinely missing bwrap is a
33
+ // process-global fact, so the negative must stick rather than re-probe on
34
+ // every bash call. (No per-caller signal here — see canMountRealProc.)
35
+ availabilityCache.set(bwrap, available)
36
+ return available
37
+ })
38
+ .finally(() => {
39
+ availabilityInFlight.delete(bwrap)
40
+ })
41
+ availabilityInFlight.set(bwrap, promise)
42
+ return promise
43
+ }
44
+
20
45
  async function probe(bwrap: string): Promise<boolean> {
21
46
  // Bun.spawn throws synchronously with ENOENT when the binary is not on
22
47
  // PATH, rather than resolving with a non-zero exit code — so the
@@ -32,6 +57,333 @@ async function probe(bwrap: string): Promise<boolean> {
32
57
 
33
58
  export function _resetBwrapAvailabilityCacheForTests(): void {
34
59
  availabilityCache.clear()
60
+ availabilityInFlight.clear()
61
+ }
62
+
63
+ // The 'real-proc' sandbox strategy prefixes bwrap with `unshare --pid --fork
64
+ // --mount --mount-proc`, which mounts a fresh procfs in a new PID namespace.
65
+ // That mount needs REAL CAP_SYS_ADMIN. `typeclaw start` grants the container
66
+ // `--cap-add=SYS_ADMIN` when sandbox.realProc is on, but the grant is a no-op
67
+ // on runtimes that virtualize or strip caps: rootless Docker (userns-capped
68
+ // caps), gVisor/runsc (caps never reach the host kernel), Docker Desktop
69
+ // Enhanced Container Isolation (intercepts mount), and AppArmor-enforcing
70
+ // hosts (Ubuntu 24.04+ restricts unprivileged userns even with the cap). On
71
+ // those the `unshare` fails fast with "Operation not permitted" (exit != 0)
72
+ // before bwrap runs. Probing once at the first sandboxed bash call lets the
73
+ // consumer fall back to the '--tmpfs /proc' strategy instead of failing every
74
+ // low-trust bash call — restoring the pre-realProc behavior on unsupported
75
+ // hosts (external-package execution still won't work there, exactly as before).
76
+ let realProcProbeResult: boolean | undefined
77
+ // In-flight dedup for the real-proc probe, same rationale as bwrap above:
78
+ // concurrent first bash calls would otherwise each spawn `unshare`. A single
79
+ // nullable promise suffices (no key — there is one probe), cleared on settle.
80
+ //
81
+ // Deliberately NOT abortable. The answer ("can THIS container mount a fresh
82
+ // procfs?") is a process-global capability fact, not a per-request operation —
83
+ // it does not vary with any one bash call's lifecycle. Threading a caller's
84
+ // AbortSignal here is a category error: a deduped joiner would let the first
85
+ // caller's abort decide a shared fact for everyone waiting on it. The payload
86
+ // (`/bin/true`) exits in milliseconds and the result is cached for the process,
87
+ // so cancellation buys nothing. If a supported environment ever made this probe
88
+ // slow, add an INTERNAL timeout (the result is still global), never a caller
89
+ // signal.
90
+ let realProcProbeInFlight: Promise<boolean> | undefined
91
+
92
+ export function canMountRealProc(): Promise<boolean> {
93
+ if (realProcProbeResult !== undefined) return Promise.resolve(realProcProbeResult)
94
+ if (realProcProbeInFlight !== undefined) return realProcProbeInFlight
95
+
96
+ const promise = probeRealProc()
97
+ .then((canMount) => {
98
+ realProcProbeResult = canMount
99
+ return canMount
100
+ })
101
+ .finally(() => {
102
+ realProcProbeInFlight = undefined
103
+ })
104
+ realProcProbeInFlight = promise
105
+ return promise
106
+ }
107
+
108
+ async function probeRealProc(): Promise<boolean> {
109
+ // `/bin/true` is the cheapest possible payload: the probe only needs to learn
110
+ // whether the kernel lets us create the PID+mount namespaces and mount procfs
111
+ // into them. Bun.spawn throws ENOENT if `unshare` is missing (it is in
112
+ // util-linux, baseline) — that lands in the catch as "cannot do real-proc".
113
+ try {
114
+ const proc = Bun.spawn(['unshare', '--pid', '--fork', '--mount', '--mount-proc', '--', '/bin/true'], {
115
+ stdout: 'ignore',
116
+ stderr: 'ignore',
117
+ })
118
+ await proc.exited
119
+ return proc.exitCode === 0
120
+ } catch {
121
+ return false
122
+ }
123
+ }
124
+
125
+ export function _resetRealProcProbeCacheForTests(): void {
126
+ realProcProbeResult = undefined
127
+ realProcProbeInFlight = undefined
128
+ }
129
+
130
+ // The 'proc-bind' strategy (build.ts) does `bwrap --unshare-all ... --ro-bind
131
+ // /proc /proc`: it binds the container's already-real procfs with NO unshare
132
+ // --mount-proc and NO CAP_SYS_ADMIN, so it works where 'real-proc' is rejected
133
+ // (OrbStack). Its security rests entirely on the kernel BLOCKING cross-userns
134
+ // reads of /proc/<agent>/environ — the FIREWORKS_API_KEY / GH_TOKEN surface —
135
+ // because bwrap's --unshare-all puts the sandbox in a CHILD user namespace. That
136
+ // block is a kernel fact on every mainstream host, but the consumer must never
137
+ // assume it: a misconfigured runtime that preserves parent-userns creds, or a
138
+ // future bwrap flag change, would turn this strategy into a secret leak. So we
139
+ // PROBE it directly before ever selecting it — plant a real secret in a sibling
140
+ // process's env and assert the sandbox cannot read it back.
141
+ // Keyed by resolved bwrapPath, like ensureBwrapAvailable: the safety answer is a
142
+ // fact about a SPECIFIC bwrap binary, so a caller pinning a non-default path
143
+ // (tests, or a future deployment) must re-probe rather than inherit the default
144
+ // binary's result. In-flight dedup for the same reason as canMountRealProc:
145
+ // concurrent first callers for one path share a single probe. Both cached
146
+ // process-globally (the answer is a per-container capability fact). Not abortable
147
+ // (see canMountRealProc).
148
+ const procBindProbeCache = new Map<string, boolean>()
149
+ const procBindProbeInFlight = new Map<string, Promise<boolean>>()
150
+
151
+ // `safe` is the answer; `cacheable` is false for INCONCLUSIVE outcomes (a probe
152
+ // timeout under load, or the sentinel dying mid-probe). Those are transient
153
+ // failure modes, not capability facts, so caching their `safe=false` would
154
+ // PERMANENTLY disable proc-bind for the process — a single slow first bash call
155
+ // would silently break every later bunx until container restart (the exact
156
+ // "works after restart" symptom this whole fix exists to kill). Only a probe that
157
+ // ran to a verdict (definitively safe OR definitively leaking) is cached.
158
+ type ProcBindProbe = { safe: boolean; cacheable: boolean }
159
+
160
+ export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boolean> {
161
+ const bwrap = options?.bwrapPath ?? 'bwrap'
162
+ const cached = procBindProbeCache.get(bwrap)
163
+ if (cached !== undefined) return Promise.resolve(cached)
164
+ const existing = procBindProbeInFlight.get(bwrap)
165
+ if (existing !== undefined) return existing
166
+
167
+ const promise = probeProcBind(bwrap)
168
+ .then(({ safe, cacheable }) => {
169
+ if (cacheable) procBindProbeCache.set(bwrap, safe)
170
+ return safe
171
+ })
172
+ .finally(() => {
173
+ procBindProbeInFlight.delete(bwrap)
174
+ })
175
+ procBindProbeInFlight.set(bwrap, promise)
176
+ return promise
177
+ }
178
+
179
+ const PROC_BIND_PROBE_SECRET = 'TYPECLAW_PROCBIND_PROBE_SECRET'
180
+
181
+ const INCONCLUSIVE: ProcBindProbe = { safe: false, cacheable: false }
182
+
183
+ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
184
+ // The sentinel must model the REAL threat geometry: the agent runtime holds
185
+ // the secret in its env and lives in the PARENT user namespace, while the
186
+ // sandbox is a child userns. So spawn the sentinel as a plain sibling (parent
187
+ // userns, same real uid as the agent runtime) that just sleeps holding the
188
+ // secret, then enter the EXACT proc-bind bwrap shape and prove the sandbox
189
+ // cannot read it. A weaker model (sentinel inside the same userns as the probe
190
+ // bash) would falsely pass.
191
+ let sentinel: Bun.Subprocess | undefined
192
+ try {
193
+ // `env -i` so the sentinel carries ONLY the marker, never the parent's real
194
+ // FIREWORKS_API_KEY/GH_TOKEN — the probe must not itself plant a real secret,
195
+ // independent of Bun.spawn's env merge/replace semantics. `sleep 30` outlives
196
+ // the sub-second probe by a wide margin (so it cannot exit mid-probe and let
197
+ // a post-exit ESRCH masquerade as the EACCES block), yet is short enough to
198
+ // self-reap within seconds if cleanup ever fails to fire .kill().
199
+ sentinel = Bun.spawn(['/usr/bin/env', '-i', `${PROC_BIND_PROBE_SECRET}=leaked`, '/bin/sleep', '30'], {
200
+ stdout: 'ignore',
201
+ stderr: 'ignore',
202
+ })
203
+ const sentinelPid = sentinel.pid
204
+
205
+ // No pid / a failed sentinel setup is a transient local failure, not a
206
+ // capability verdict — inconclusive, so it is not cached.
207
+ if (sentinelPid === undefined) return INCONCLUSIVE
208
+ // Two-sided proof that the in-sandbox block is the USERNS boundary and
209
+ // nothing else. From the PARENT (this process — container root, parent
210
+ // userns, same uid as the sentinel) the sentinel's environ MUST contain the
211
+ // marker: that proves the sentinel is dumpable, same-uid, AND that this pid is
212
+ // OUR sentinel (not a reused pid), so the ONLY thing that can deny the read
213
+ // from inside the sandbox is the child-userns boundary (rules out a false
214
+ // "blocked" from dumpable=0 / uid mismatch). If the parent can't read the
215
+ // marker, the sentinel setup is unsound — inconclusive, fail closed, no cache.
216
+ if (!(await parentReadsSentinelMarker(sentinelPid))) return INCONCLUSIVE
217
+
218
+ const proc = Bun.spawn(
219
+ [
220
+ bwrap,
221
+ '--unshare-all',
222
+ '--clearenv',
223
+ '--ro-bind',
224
+ '/usr',
225
+ '/usr',
226
+ '--dev',
227
+ '/dev',
228
+ '--ro-bind-try',
229
+ '/bin',
230
+ '/bin',
231
+ '--ro-bind-try',
232
+ '/lib',
233
+ '/lib',
234
+ '--ro-bind-try',
235
+ '/lib64',
236
+ '/lib64',
237
+ '--ro-bind',
238
+ '/proc',
239
+ '/proc',
240
+ '--',
241
+ '/bin/sh',
242
+ '-c',
243
+ buildProcBindProbeScript(sentinelPid),
244
+ ],
245
+ { stdout: 'ignore', stderr: 'ignore' },
246
+ )
247
+ // Resolve the probe against three outcomes:
248
+ // - the bwrap probe exits → use its verdict (cacheable)
249
+ // - the sentinel exits FIRST → the in-sandbox open-failures could now be
250
+ // ESRCH (pid gone), so the verdict is void → inconclusive
251
+ // - a hung bwrap (a wedged runtime) → time out, so a stuck probe never
252
+ // stalls the first low-trust bash call → inconclusive
253
+ // The timeout is a setTimeout cleared in finally: an abandoned Bun.sleep timer
254
+ // keeps Bun's event loop alive for the full delay (verified: ~5s hang on
255
+ // process drain after the first bash call), so it MUST be cancelled once the
256
+ // race settles.
257
+ let timer: ReturnType<typeof setTimeout> | undefined
258
+ try {
259
+ const outcome = await Promise.race([
260
+ proc.exited.then(() => 'probe' as const),
261
+ sentinel.exited.then(() => 'sentinel-died' as const),
262
+ new Promise<'timeout'>((resolve) => {
263
+ timer = setTimeout(() => resolve('timeout'), PROC_BIND_PROBE_TIMEOUT_MS)
264
+ }),
265
+ ])
266
+ if (outcome !== 'probe') {
267
+ // SIGKILL + await reaping so a wedged probe leaves no zombie; swallow any
268
+ // error so cleanup never throws out of the probe.
269
+ proc.kill('SIGKILL')
270
+ await proc.exited.catch(() => {})
271
+ return INCONCLUSIVE
272
+ }
273
+ } finally {
274
+ if (timer !== undefined) clearTimeout(timer)
275
+ }
276
+ // Interpret the verdict by the SPECIFIC exit code the script chose, never by
277
+ // "non-zero" — a non-zero exit also covers script setup failures (a bwrap that
278
+ // started but couldn't read /proc/self/fd), bwrap startup failures (missing
279
+ // lib, transient mount EBUSY → bwrap's own exit), and an external SIGKILL.
280
+ // Caching any of those transient failures as a definitive safe=false would
281
+ // PERMANENTLY disable proc-bind — the same cache-poisoning class as the
282
+ // timeout bug. So only the script's two designated codes are cacheable:
283
+ // PROC_BIND_SAFE (clean run, every open blocked) and PROC_BIND_LEAK (an open
284
+ // SUCCEEDED — a real leak). Setup failures use PROC_BIND_SETUP_FAILED, and any
285
+ // other code (bwrap startup, signals, 127) is treated as inconclusive.
286
+ if (proc.exitCode === PROC_BIND_LEAK) return { safe: false, cacheable: true }
287
+ if (proc.exitCode !== PROC_BIND_SAFE) return INCONCLUSIVE
288
+ // Final liveness: the in-sandbox blocked-open assertions are only meaningful
289
+ // if the sentinel was alive throughout. Re-read its MARKER from the PARENT —
290
+ // success proves the pid still resolves to OUR live sentinel, so the in-sandbox
291
+ // open-failures were EACCES, not a post-exit ESRCH (or a reused pid).
292
+ // `sentinel.killed`/`exitCode` only report whether Bun signalled it, not
293
+ // kernel liveness, so this marker re-read is the stronger postcondition. A
294
+ // failure here means the sentinel vanished mid-probe → inconclusive.
295
+ if (!(await parentReadsSentinelMarker(sentinelPid))) return INCONCLUSIVE
296
+ return { safe: true, cacheable: true }
297
+ } catch {
298
+ return INCONCLUSIVE
299
+ } finally {
300
+ try {
301
+ sentinel?.kill()
302
+ } catch {
303
+ // killing an already-exited sentinel can throw on some runtimes; cleanup
304
+ // must never propagate out of the probe.
305
+ }
306
+ }
307
+ }
308
+
309
+ // Cap on the in-sandbox bwrap probe so a wedged runtime cannot stall the first
310
+ // low-trust bash call. The probe normally completes in a few ms; this is a
311
+ // generous ceiling, not a tuning knob.
312
+ const PROC_BIND_PROBE_TIMEOUT_MS = 5_000
313
+
314
+ // Designated probe-script exit codes. ONLY these two are a cacheable verdict;
315
+ // every other code (a setup failure, bwrap startup failure, a signal, 127, …) is
316
+ // inconclusive and must NOT be cached — see the exit-code interpretation in
317
+ // probeProcBind. Chosen well clear of the shell's own conventional codes (1, 2,
318
+ // 126, 127, 128+n) so a setup/bwrap/signal failure can never be mistaken for the
319
+ // safe or leak verdict.
320
+ const PROC_BIND_SAFE = 0
321
+ const PROC_BIND_LEAK = 10
322
+ const PROC_BIND_SETUP_FAILED = 20
323
+
324
+ // The in-sandbox assertion, built as a pure function so a unit test can pin its
325
+ // shape (the integration behavior needs a Linux container + bwrap, unrunnable in
326
+ // CI). It must prove the secret block holds for the RIGHT REASON, not by
327
+ // accident: a naive `cat environ && exit 1` exits non-zero for BOTH a permission
328
+ // failure (EACCES — the real userns block, SAFE) and a missing process (ESRCH —
329
+ // the sentinel died, proves NOTHING), so on a host that actually leaks a sentinel
330
+ // that exited early would false-pass. The checks, in order:
331
+ // 1. self /proc/self/{fd,maps} readable — the property that makes bunx work.
332
+ // 2. the sentinel is ALIVE — /proc/<pid>/status readable. A dead pid fails
333
+ // here, so a later open-failure cannot be ESRCH.
334
+ // 3. environ + maps OPENS fail. `(: < path)` is the no-op builtin with a read
335
+ // redirect: the SHELL opens the file (the same open(2) path Bun/an attacker
336
+ // uses), so a cross-userns EACCES makes the redirect fail and the leak-exit
337
+ // is skipped, while a successful open (a leak) runs `exit PROC_BIND_LEAK`.
338
+ // This replaces an earlier `cat … | grep 'Permission denied'`, which
339
+ // depended on a localized errno STRING (a non-C locale would mistranslate it
340
+ // → grep miss → silent fallback to tmpfs → the bunx crash returns) and on
341
+ // PATH resolving `cat`/`grep` under --clearenv. The redirect uses no external
342
+ // command and no error text, so it is locale- and PATH-independent.
343
+ // The exit codes are DISTINCT by outcome so the caller can cache only definitive
344
+ // verdicts: setup checks exit PROC_BIND_SETUP_FAILED (inconclusive — a bwrap that
345
+ // started but lacks /proc/self/fd, etc.), a detected leak exits PROC_BIND_LEAK
346
+ // (definitive unsafe), a clean run exits PROC_BIND_SAFE. A bare `exit 1` would
347
+ // conflate setup failures with leaks and poison the cache.
348
+ // NOTE: `test -r` is deliberately NOT used for the protected files. It asks
349
+ // access(2) (permission bits + uid), which on a same-uid /proc/<pid>/environ
350
+ // returns "readable" even when the ptrace-gated open(2) is actually blocked —
351
+ // empirically verified. Only an open attempt exercises the real leak path.
352
+ // The `2>/dev/null` needs /dev/null, so the probe's bwrap args include `--dev
353
+ // /dev` (matching build.ts's proc-bind branch). Without it the redirect fails and
354
+ // the verdict is unreliable — the bwrap probe MUST keep `--dev /dev`.
355
+ export function buildProcBindProbeScript(sentinelPid: number): string {
356
+ const blockedOpen = (path: string): string => `(: < ${path}) 2>/dev/null && exit ${PROC_BIND_LEAK}`
357
+ return [
358
+ `test -r /proc/self/fd || exit ${PROC_BIND_SETUP_FAILED}`,
359
+ `test -r /proc/self/maps || exit ${PROC_BIND_SETUP_FAILED}`,
360
+ `test -r /proc/${sentinelPid}/status || exit ${PROC_BIND_SETUP_FAILED}`,
361
+ blockedOpen(`/proc/${sentinelPid}/environ`),
362
+ blockedOpen(`/proc/${sentinelPid}/maps`),
363
+ `exit ${PROC_BIND_SAFE}`,
364
+ ].join('; ')
365
+ }
366
+
367
+ async function parentReadsSentinelMarker(sentinelPid: number): Promise<boolean> {
368
+ // Direct read, not a `cat` subprocess: an actual open(2)+read is the real leak
369
+ // path (matching the in-sandbox `(: < path)` check), with no spawn and no PATH
370
+ // dependence in this non-clearenv parent context. We assert the MARKER bytes are
371
+ // present, not merely that the read succeeded: this (a) makes the check robust
372
+ // even if a procfs/Bun edge returned an empty buffer instead of throwing, and
373
+ // (b) confirms the pid still resolves to OUR sentinel — a reused pid would carry
374
+ // a different environ, failing the marker match. `.bytes()` forces the read so a
375
+ // security-gated file that stats fine but blocks read is reported unreadable.
376
+ try {
377
+ const bytes = await Bun.file(`/proc/${sentinelPid}/environ`).bytes()
378
+ return new TextDecoder().decode(bytes).includes(`${PROC_BIND_PROBE_SECRET}=leaked`)
379
+ } catch {
380
+ return false
381
+ }
382
+ }
383
+
384
+ export function _resetProcBindProbeCacheForTests(): void {
385
+ procBindProbeCache.clear()
386
+ procBindProbeInFlight.clear()
35
387
  }
36
388
 
37
389
  // The bun binary this process runs as (process.execPath). build.ts re-exposes
@@ -38,6 +38,7 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
38
38
  const bwrap = policy.bwrapPath ?? 'bwrap'
39
39
  const procStrategy = policy.proc ?? 'tmpfs'
40
40
  const realProc = procStrategy === 'real-proc'
41
+ const procBind = procStrategy === 'proc-bind'
41
42
 
42
43
  // 'real-proc' splits PID-namespace ownership from bwrap. `unshare --pid
43
44
  // --fork --mount --mount-proc` (util-linux, baseline) creates the new PID +
@@ -118,13 +119,22 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
118
119
  '/lib64',
119
120
  )
120
121
 
121
- if (realProc) {
122
- // The outer `unshare --mount-proc` already mounted a fresh procfs scoped to
123
- // the new PID namespace. --ro-bind /proc /proc binds THAT procfs (not the
124
- // outer container's), so the child gets real /proc/self/{fd,maps} and the
125
- // agent runtime's pids — and their /proc/N/environ secrets are simply
126
- // absent from this namespace. No /proc/self/exe symlink is needed: a real
127
- // /proc/self/exe already resolves correctly.
122
+ if (realProc || procBind) {
123
+ // --ro-bind /proc /proc gives the child a real /proc/self/{fd,maps,exe} so a
124
+ // JS package runner's spawned bin stops aborting with Bun's ENOTDIR. The two
125
+ // strategies differ only in WHICH procfs is bound:
126
+ // real-proc: the outer `unshare --mount-proc` already mounted a fresh
127
+ // procfs scoped to the new PID namespace, so this binds THAT the agent
128
+ // runtime's pids are absent from the namespace entirely (full PID
129
+ // isolation), at the cost of CAP_SYS_ADMIN for the mount.
130
+ // proc-bind: no unshare/mount, so this binds the container's ALREADY-REAL
131
+ // procfs. The agent runtime's pids ARE present, but --unshare-all put
132
+ // this bash in a CHILD user namespace, so the kernel's
133
+ // PTRACE_MODE_READ_FSCREDS check blocks /proc/<agent>/environ (EACCES)
134
+ // and kill()/ptrace against them fail EPERM (no CAP_KILL in the parent
135
+ // userns). Only non-secret metadata (cmdline/status) stays visible.
136
+ // No /proc/self/exe symlink is needed in either case: a real /proc/self/exe
137
+ // resolves correctly.
128
138
  argv.push('--ro-bind', '/proc', '/proc')
129
139
  } else if (procStrategy === 'tmpfs') {
130
140
  // --tmpfs /proc, never --proc /proc (OrbStack's kernel blocks
@@ -1,5 +1,14 @@
1
1
  export { buildSandboxedCommand, type SandboxedCommand } from './build'
2
- export { ensureBwrapAvailable, resolveProcSelfExe, _resetBwrapAvailabilityCacheForTests } from './availability'
2
+ export {
3
+ buildProcBindProbeScript,
4
+ canBindProcSafely,
5
+ canMountRealProc,
6
+ ensureBwrapAvailable,
7
+ resolveProcSelfExe,
8
+ _resetBwrapAvailabilityCacheForTests,
9
+ _resetProcBindProbeCacheForTests,
10
+ _resetRealProcProbeCacheForTests,
11
+ } from './availability'
3
12
  export { resolveHiddenPaths, type HiddenPaths } from './hidden-paths'
4
13
  export {
5
14
  resolveProtectedZones,
@@ -6,15 +6,33 @@ export type SandboxMount =
6
6
 
7
7
  export type SandboxNetwork = 'none' | 'inherit'
8
8
 
9
- // 'tmpfs' (default): empty /proc + a single /proc/self/exe symlink. Works on
10
- // every host but gives no /proc/self/{fd,maps}, so a JS package runner's CHILD
11
- // (the spawned bin) crashes with ENOTDIR reading /proc/self/fd. 'none': no
12
- // /proc at all. 'real-proc': mount a fresh procfs scoped to a NEW pid namespace
13
- // so the child gets a real /proc/self/{fd,maps} WITHOUT seeing the agent
14
- // runtime's pids (no /proc/<agent>/environ leak). 'real-proc' requires the
15
- // outer container to hold CAP_SYS_ADMIN (mount(2) of proc); start.ts only grants
16
- // it when the operator opts in via typeclaw.json#sandbox.realProc.
17
- export type SandboxProcStrategy = 'tmpfs' | 'none' | 'real-proc'
9
+ // 'proc-bind' (the runtime default): --ro-bind /proc /proc binds the container's
10
+ // ALREADY-REAL procfs straight into the sandbox no `unshare --mount-proc`, no
11
+ // CAP_SYS_ADMIN. A JS package runner's child gets a real /proc/self/{fd,maps,exe}
12
+ // so `bunx`/`bun add`/`bun run <pkg-bin>` stop aborting with Bun's ENOTDIR. The
13
+ // agent runtime's /proc/<agent>/environ (FIREWORKS_API_KEY, GH_TOKEN) is NOT
14
+ // leaked: build.ts always emits --unshare-user, so the sandboxed bash runs as
15
+ // mapped-root in a CHILD user namespace that is not an ancestor of the agent
16
+ // runtime's userns. The kernel's PTRACE_MODE_READ_FSCREDS check on
17
+ // /proc/<pid>/environ then fails (EACCES), and kill()/ptrace against those pids
18
+ // fail EPERM (no CAP_KILL in the parent userns) — both verified empirically on
19
+ // live OrbStack. The residual is non-secret metadata (other pids' cmdline/status
20
+ // are visible); accepted on the single-tenant boundary, since environ/mem/ptrace
21
+ // /signal — the API-key surface — stay blocked. Works on every host INCLUDING
22
+ // OrbStack (which denies `unshare --mount-proc` even with CAP_SYS_ADMIN), and is
23
+ // the default precisely because it needs no outer-container capability.
24
+ // 'real-proc' (opt-in, sandbox.realProc=true): mount a fresh procfs scoped to a
25
+ // NEW pid namespace via `unshare --pid --fork --mount --mount-proc`. Adds full
26
+ // PID isolation (the agent runtime's pids are absent from the namespace, not just
27
+ // unreadable) on top of the environ guard, but needs CAP_SYS_ADMIN (mount(2) of
28
+ // proc) — granted by start.ts only when realProc is set — and is rejected on
29
+ // OrbStack. Chosen only when the cap-mount probe confirms it actually works.
30
+ // 'tmpfs' (last-resort degraded fallback): empty /proc + a single /proc/self/exe
31
+ // symlink. Works on every host but gives no /proc/self/{fd,maps}, so a JS package
32
+ // runner's CHILD (the spawned bin) crashes with ENOTDIR reading /proc/self/fd —
33
+ // external packages can't run in the sandbox under this strategy. 'none': no
34
+ // /proc at all.
35
+ export type SandboxProcStrategy = 'tmpfs' | 'none' | 'real-proc' | 'proc-bind'
18
36
 
19
37
  export type SandboxEnvPolicy = {
20
38
  set?: Record<string, string>