typeclaw 0.36.1 → 0.36.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/package.json +2 -2
  2. package/src/agent/index.ts +11 -0
  3. package/src/agent/plugin-tools.ts +43 -21
  4. package/src/agent/restart/index.ts +6 -0
  5. package/src/agent/restart-handoff/index.ts +10 -0
  6. package/src/agent/system-prompt.ts +6 -0
  7. package/src/agent/tools/restart.ts +9 -0
  8. package/src/bundled-plugins/backup/README.md +11 -2
  9. package/src/bundled-plugins/backup/git-auth.ts +58 -0
  10. package/src/bundled-plugins/backup/index.ts +54 -0
  11. package/src/bundled-plugins/backup/runner.ts +82 -12
  12. package/src/channels/adapters/discord-bot-reactions.ts +1 -0
  13. package/src/channels/adapters/line-attachment.ts +97 -0
  14. package/src/channels/adapters/line-classify.ts +14 -3
  15. package/src/channels/adapters/line.ts +5 -1
  16. package/src/channels/manager.ts +15 -3
  17. package/src/channels/router.ts +67 -16
  18. package/src/cli/hostd.ts +37 -4
  19. package/src/cli/reload.ts +26 -5
  20. package/src/cli/ui.ts +6 -0
  21. package/src/container/index.ts +1 -0
  22. package/src/container/start.ts +6 -0
  23. package/src/init/reconcile-plugin-deps.ts +45 -15
  24. package/src/init/restart-deps-preflight.ts +155 -0
  25. package/src/permissions/permissions.ts +24 -4
  26. package/src/plugin/loader.ts +16 -4
  27. package/src/plugin/manager.ts +175 -71
  28. package/src/reload/client.ts +14 -3
  29. package/src/reload/docker-exec-client.ts +109 -0
  30. package/src/reload/index.ts +7 -1
  31. package/src/reload/recover.ts +38 -0
  32. package/src/run/codex-fetch-observer.ts +57 -5
  33. package/src/run/index.ts +5 -0
  34. package/src/sandbox/availability.ts +58 -15
  35. package/src/sandbox/errors.ts +26 -0
  36. package/src/sandbox/index.ts +6 -1
  37. package/src/sandbox/policy.ts +11 -0
  38. package/src/skills/typeclaw-config/SKILL.md +2 -2
  39. package/src/skills/typeclaw-monorepo/SKILL.md +7 -5
  40. package/src/skills/typeclaw-plugins/SKILL.md +11 -2
@@ -0,0 +1,109 @@
1
+ import {
2
+ CONTAINER_PORT,
3
+ containerNameFromCwd,
4
+ defaultDockerExec,
5
+ sanitizeDockerStderr,
6
+ type DockerExec,
7
+ type DockerExecResult,
8
+ } from '@/container'
9
+
10
+ import type { ReloadResult } from './types'
11
+
12
+ export type RequestReloadViaDockerExecOptions = {
13
+ cwd: string
14
+ token: string | null
15
+ scope?: string
16
+ timeoutMs?: number
17
+ exec?: DockerExec
18
+ }
19
+
20
+ type DockerExecReloadEnvelope = { ok: true; results: ReloadResult[] } | { ok: false; reason: string }
21
+
22
+ const DEFAULT_TIMEOUT_MS = 30_000
23
+
24
+ const RELOAD_SCRIPT = String.raw`
25
+ const timeoutMs = Number(process.env.TYPECLAW_RELOAD_TIMEOUT_MS ?? '30000')
26
+ const url = new URL('ws://127.0.0.1:' + (process.env.TYPECLAW_CONTAINER_PORT ?? '8973'))
27
+ if (process.env.TYPECLAW_TUI_TOKEN) url.searchParams.set('token', process.env.TYPECLAW_TUI_TOKEN)
28
+ const ws = new WebSocket(url.toString())
29
+ let settled = false
30
+ const finish = (payload, code) => {
31
+ if (settled) return
32
+ settled = true
33
+ console.log(JSON.stringify(payload))
34
+ if (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN) ws.close()
35
+ setTimeout(() => process.exit(code), 0)
36
+ }
37
+ const timer = setTimeout(() => finish({ ok: false, reason: 'timed out waiting for container-local reload_result after ' + timeoutMs + 'ms' }, 1), timeoutMs)
38
+ ws.addEventListener('open', () => {
39
+ const scope = process.env.TYPECLAW_RELOAD_SCOPE
40
+ ws.send(JSON.stringify(scope ? { type: 'reload', scope } : { type: 'reload' }))
41
+ })
42
+ ws.addEventListener('message', (event) => {
43
+ const msg = JSON.parse(String(event.data))
44
+ if (msg.type !== 'reload_result') return
45
+ clearTimeout(timer)
46
+ finish({ ok: true, results: msg.results }, 0)
47
+ })
48
+ ws.addEventListener('error', (event) => finish({ ok: false, reason: String(event.message ?? event) }, 1))
49
+ ws.addEventListener('close', () => finish({ ok: false, reason: 'container-local websocket closed before reload_result' }, 1))
50
+ `
51
+
52
+ export async function requestReloadViaDockerExec({
53
+ cwd,
54
+ token,
55
+ scope,
56
+ timeoutMs = DEFAULT_TIMEOUT_MS,
57
+ exec = defaultDockerExec,
58
+ }: RequestReloadViaDockerExecOptions): Promise<ReloadResult[]> {
59
+ const envArgs = ['-e', `TYPECLAW_CONTAINER_PORT=${CONTAINER_PORT}`, '-e', `TYPECLAW_RELOAD_TIMEOUT_MS=${timeoutMs}`]
60
+ if (token !== null) envArgs.push('-e', `TYPECLAW_TUI_TOKEN=${token}`)
61
+ if (scope !== undefined) envArgs.push('-e', `TYPECLAW_RELOAD_SCOPE=${scope}`)
62
+
63
+ const signal = AbortSignal.timeout(timeoutMs)
64
+ let result: DockerExecResult
65
+ try {
66
+ result = await exec(['exec', ...envArgs, containerNameFromCwd(cwd), 'bun', '-e', RELOAD_SCRIPT], { signal })
67
+ } catch (err) {
68
+ if (signal.aborted) throw new Error(`docker exec timed out after ${timeoutMs}ms`)
69
+ throw err
70
+ }
71
+ if (signal.aborted) throw new Error(`docker exec timed out after ${timeoutMs}ms`)
72
+ if (result.exitCode !== 0) {
73
+ const envelope = parseEnvelope(result.stdout)
74
+ if (envelope !== null && !envelope.ok) throw new Error(envelope.reason)
75
+ const reason =
76
+ sanitizeDockerStderr(result.stderr) || result.stdout.trim() || `docker exec exited with code ${result.exitCode}`
77
+ throw new Error(reason)
78
+ }
79
+
80
+ const envelope = parseEnvelope(result.stdout)
81
+ if (envelope === null) throw new Error('container-local reload returned invalid JSON')
82
+ if (!envelope.ok) throw new Error(envelope.reason)
83
+ return envelope.results
84
+ }
85
+
86
+ function parseEnvelope(stdout: string): DockerExecReloadEnvelope | null {
87
+ const line = stdout
88
+ .split('\n')
89
+ .map((entry) => entry.trim())
90
+ .filter((entry) => entry.length > 0)
91
+ .at(-1)
92
+ if (line === undefined) return null
93
+ try {
94
+ const parsed: unknown = JSON.parse(line)
95
+ return isEnvelope(parsed) ? parsed : null
96
+ } catch {
97
+ return null
98
+ }
99
+ }
100
+
101
+ function isEnvelope(value: unknown): value is DockerExecReloadEnvelope {
102
+ if (!isRecord(value) || typeof value.ok !== 'boolean') return false
103
+ if (value.ok) return Array.isArray(value.results)
104
+ return typeof value.reason === 'string'
105
+ }
106
+
107
+ function isRecord(value: unknown): value is Record<string, unknown> {
108
+ return typeof value === 'object' && value !== null
109
+ }
@@ -1,4 +1,10 @@
1
- export { requestReload, type RequestReloadOptions } from './client'
1
+ export { ReloadConnectionError, requestReload, type RequestReloadOptions } from './client'
2
+ export { requestReloadViaDockerExec, type RequestReloadViaDockerExecOptions } from './docker-exec-client'
2
3
  export { formatChannelReloadSummary } from './format'
3
4
  export { ReloadRegistry } from './registry'
5
+ export {
6
+ requestReloadWithFallback,
7
+ type RequestReloadWithFallbackOptions,
8
+ type RequestReloadWithFallbackResult,
9
+ } from './recover'
4
10
  export type { Reloadable, ReloadAllResult, ReloadResult } from './types'
@@ -0,0 +1,38 @@
1
+ import { ReloadConnectionError, requestReload } from './client'
2
+ import { requestReloadViaDockerExec } from './docker-exec-client'
3
+ import type { ReloadResult } from './types'
4
+
5
+ export type RequestReloadWithFallbackOptions = {
6
+ url: string
7
+ cwd?: string
8
+ token?: string | null
9
+ scope?: string
10
+ timeoutMs?: number
11
+ reload?: typeof requestReload
12
+ reloadViaDockerExec?: typeof requestReloadViaDockerExec
13
+ }
14
+
15
+ export type RequestReloadWithFallbackResult =
16
+ | { transport: 'host'; results: ReloadResult[] }
17
+ | { transport: 'container-local'; results: ReloadResult[]; hostError: string }
18
+
19
+ export async function requestReloadWithFallback({
20
+ url,
21
+ cwd,
22
+ token,
23
+ scope,
24
+ timeoutMs,
25
+ reload = requestReload,
26
+ reloadViaDockerExec = requestReloadViaDockerExec,
27
+ }: RequestReloadWithFallbackOptions): Promise<RequestReloadWithFallbackResult> {
28
+ try {
29
+ return { transport: 'host', results: await reload({ url, scope, timeoutMs }) }
30
+ } catch (err) {
31
+ if (!(err instanceof ReloadConnectionError) || cwd === undefined || token === undefined) throw err
32
+ return {
33
+ transport: 'container-local',
34
+ results: await reloadViaDockerExec({ cwd, token, scope, timeoutMs }),
35
+ hostError: err.message,
36
+ }
37
+ }
38
+ }
@@ -25,9 +25,29 @@ export type CodexFetchObserverOptions = {
25
25
  ttfbMs?: number
26
26
  // Override the sliding inter-chunk idle deadline applied to the SSE body
27
27
  // reader. Resets on every chunk; if no bytes arrive within this window the
28
- // body stream errors. Default: 300_000 ms, matches `openai/codex`'s Rust CLI
29
- // `DEFAULT_STREAM_IDLE_TIMEOUT_MS`. Set to 0 to disable just this timer.
28
+ // body stream errors. Like the overall deadline, this doubles as a recovery
29
+ // bound: on a silent stall the user waits this long before the retry fires,
30
+ // so it should not exceed the overall ceiling. Default 120_000 ms (was
31
+ // 300_000, which matched `openai/codex`'s Rust CLI but is 5min of dead air
32
+ // before recovery). 120s is loose enough for OpenAI's keepalive-less
33
+ // reasoning pauses (the Responses API sends no SSE heartbeats, so a quiet
34
+ // reasoning window is genuinely byte-silent) while bounded by the overall
35
+ // cap. Set to 0 to disable just this timer.
30
36
  idleMs?: number
37
+ // Override the absolute wall-clock ceiling on a single Codex request,
38
+ // measured from fetch start to body completion. Unlike `idleMs`, it does NOT
39
+ // reset on chunk arrival, so it catches a "slow-trickle" stream that emits
40
+ // bytes inside every idle window yet never reaches a terminal SSE event —
41
+ // the failure mode behind issue #394's multi-minute hang (one observed
42
+ // request occupied 901s before Bun's OS socket deadline fired). On expiry the
43
+ // request is aborted with a retryable error, so this also bounds how long a
44
+ // user waits before the retry fires — keeping it low is a UX requirement, not
45
+ // just a safety net. Default 120_000 ms: across 96 observed requests the
46
+ // slowest *healthy* (completed) one was 45s and p99 was ~30s, with a clean
47
+ // gap up to the 901s hang — so 120s is ~2.7x the healthy max (ample headroom
48
+ // for PoP/TLS outliers) while capping a real hang at ~2min instead of ~15min.
49
+ // Set to 0 to disable just this timer.
50
+ overallMs?: number
31
51
  // Schedule fn for tests. Receives (delayMs, callback) and returns a handle
32
52
  // the wrapper can pass to `clear`. Default: `setTimeout`/`clearTimeout`.
33
53
  scheduler?: TimeoutScheduler
@@ -44,8 +64,10 @@ const ENV_DISABLE_OBSERVER = 'TYPECLAW_CODEX_FETCH_OBSERVER'
44
64
  const ENV_DISABLE_TIMEOUTS = 'TYPECLAW_CODEX_TIMEOUTS'
45
65
  const ENV_TTFB_MS = 'TYPECLAW_CODEX_TTFB_MS'
46
66
  const ENV_IDLE_MS = 'TYPECLAW_CODEX_IDLE_MS'
67
+ const ENV_OVERALL_MS = 'TYPECLAW_CODEX_OVERALL_MS'
47
68
  const DEFAULT_TTFB_MS = 15_000
48
- const DEFAULT_IDLE_MS = 300_000
69
+ const DEFAULT_IDLE_MS = 120_000
70
+ const DEFAULT_OVERALL_MS = 120_000
49
71
  const LOG_PREFIX = '[codex-fetch]'
50
72
 
51
73
  const defaultScheduler: TimeoutScheduler = {
@@ -126,6 +148,7 @@ function readEnvMs(name: string, fallback: number): number {
126
148
 
127
149
  type BodyTapConfig = {
128
150
  idleMs: number
151
+ overallMs: number
129
152
  scheduler: TimeoutScheduler
130
153
  }
131
154
 
@@ -193,17 +216,44 @@ function attachBodyTimingTap(
193
216
 
194
217
  const piped = response.body.pipeThrough(tap, { preventCancel: false })
195
218
 
196
- const idleController = config.idleMs > 0 ? new AbortController() : null
219
+ const idleController = config.idleMs > 0 || config.overallMs > 0 ? new AbortController() : null
197
220
  let idleHandle: unknown = null
198
221
  const armIdleTimer = () => {
199
- if (idleController === null) return
222
+ if (idleController === null || config.idleMs <= 0) return
200
223
  if (idleHandle !== null) config.scheduler.clear(idleHandle)
201
224
  idleHandle = config.scheduler.set(config.idleMs, () => {
202
225
  cause = 'idle_timeout'
203
226
  idleController.abort(new Error(`Codex SSE body idle for ${config.idleMs}ms (typeclaw observer timeout)`))
204
227
  })
205
228
  }
229
+
230
+ // Absolute ceiling on the whole request, armed once and never reset. The
231
+ // budget is measured from `start` (before originalFetch), so the time already
232
+ // spent waiting for headers is subtracted here — otherwise a slow-headers
233
+ // request would get a fresh full `overallMs` for its body on top of the
234
+ // headers wait, doubling the intended ceiling. A non-positive remainder means
235
+ // the budget is already spent, so we schedule at 0 to abort on the next tick.
236
+ // Aborts the shared controller so the existing reader race tears the stream
237
+ // down on the first deadline to fire — idle or overall, whichever comes first.
238
+ let overallHandle: unknown = null
239
+ if (idleController !== null && config.overallMs > 0) {
240
+ const remainingOverallMs = Math.max(0, config.overallMs - (now() - start))
241
+ overallHandle = config.scheduler.set(remainingOverallMs, () => {
242
+ cause = 'overall_timeout'
243
+ idleController.abort(
244
+ new Error(`Codex SSE body exceeded overall deadline of ${config.overallMs}ms (typeclaw observer timeout)`),
245
+ )
246
+ })
247
+ }
248
+ const disarmOverallTimer = () => {
249
+ if (overallHandle !== null) {
250
+ config.scheduler.clear(overallHandle)
251
+ overallHandle = null
252
+ }
253
+ }
254
+
206
255
  const disarmIdleTimer = () => {
256
+ disarmOverallTimer()
207
257
  if (idleHandle !== null) {
208
258
  config.scheduler.clear(idleHandle)
209
259
  idleHandle = null
@@ -295,6 +345,7 @@ export function installCodexFetchObserver(opts: CodexFetchObserverOptions = {}):
295
345
  const timeoutsEnabled = process.env[ENV_DISABLE_TIMEOUTS] !== 'off'
296
346
  const ttfbMs = timeoutsEnabled ? (opts.ttfbMs ?? readEnvMs(ENV_TTFB_MS, DEFAULT_TTFB_MS)) : 0
297
347
  const idleMs = timeoutsEnabled ? (opts.idleMs ?? readEnvMs(ENV_IDLE_MS, DEFAULT_IDLE_MS)) : 0
348
+ const overallMs = timeoutsEnabled ? (opts.overallMs ?? readEnvMs(ENV_OVERALL_MS, DEFAULT_OVERALL_MS)) : 0
298
349
  const originalFetch = globalThis.fetch
299
350
 
300
351
  const wrappedImpl = async (
@@ -352,6 +403,7 @@ export function installCodexFetchObserver(opts: CodexFetchObserverOptions = {}):
352
403
  const requestId = response.headers.get('x-request-id')
353
404
  return attachBodyTimingTap(response, start, headersMs, response.status, retryAfter, requestId, now, logger, {
354
405
  idleMs,
406
+ overallMs,
355
407
  scheduler,
356
408
  })
357
409
  }
package/src/run/index.ts CHANGED
@@ -321,6 +321,7 @@ export async function startAgent({
321
321
  ? { originatingSessionFile: ctx.originatingSessionFile }
322
322
  : {}),
323
323
  handoffOrigin: ctx.handoffOrigin,
324
+ ...(ctx.triggeringAuthorId !== undefined ? { triggeringAuthorId: ctx.triggeringAuthorId } : {}),
324
325
  }
325
326
  : {}),
326
327
  })
@@ -701,6 +702,10 @@ export async function startAgent({
701
702
  console.log(`[plugin] loaded ${summarizeLoaded(pluginsLoaded.loadedPlugins, pluginRegistry)}`)
702
703
  }
703
704
 
705
+ for (const f of pluginsLoaded.failedPlugins) {
706
+ console.warn(`[plugin] DEGRADED: "${f.entry}" disabled (${f.phase}): ${f.error}`)
707
+ }
708
+
704
709
  // Container-side portbroker is instantiated only when the host plumbed a
705
710
  // broker token in via env var. Outside the container (tests, ad-hoc dev
706
711
  // runs), the env var is absent and the broker stays off — same fence as
@@ -220,29 +220,46 @@ export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boo
220
220
  // leak-block guarantee — it only buys more chances to PROVE it.
221
221
  export const PROC_BIND_RETRY_BACKOFF_MS = [250, 1_000, 2_000, 4_000] as const
222
222
 
223
+ // The retrying resolver returns the SAME three states as the probe, never a
224
+ // boolean: 'safe' selects proc-bind; the two failure states stay DELIBERATELY
225
+ // distinct so the caller reacts differently. 'unsafe' is a DEFINITIVE host fact
226
+ // (a real cross-userns environ leak was observed, or the binary is genuinely
227
+ // absent) — permanent, fail closed, retrying buys nothing. 'inconclusive' means
228
+ // the safety probe never returned a definitive verdict within the backoff budget
229
+ // (a boot-time CPU/IO storm tripping the probe's own timeout) — it proves NOTHING
230
+ // about the host, so the SAME container can recover on a later call once the
231
+ // spike passes. Folding these two into a single boolean `false` is what made a
232
+ // transient boot-storm degrade look permanent: the caller degraded to tmpfs AND
233
+ // told the model "retrying won't help", so a capable host stayed broken until
234
+ // restart.
235
+ //
223
236
  // proc-bind selection must distinguish "definitely unavailable" from "couldn't
224
- // verify right now". A DEFINITIVE verdict is final: 'safe'→true; a real userns
225
- // leak ('unsafe')→false with NO retry. Only an 'inconclusive' verdict (transient
226
- // probe failure that proves nothing about the host) is retried, because degrading
227
- // the bash call to tmpfs over a transient hiccup is what silently broke
237
+ // verify right now". A DEFINITIVE verdict is final: 'safe'; a real userns leak
238
+ // ('unsafe') with NO retry. Only an 'inconclusive' verdict (transient probe
239
+ // failure that proves nothing about the host) is retried, because degrading the
240
+ // bash call to tmpfs over a transient hiccup is what silently broke
228
241
  // external-package runs on capable hosts. 'inconclusive' is never cached
229
242
  // (see the cache type), so each retry re-probes from scratch. After the backoff
230
- // budget is exhausted we fail CLOSED — an unverified leak-block is never treated
231
- // as safe. Pure and dependency-injected (probe + sleep) so the retry policy is
232
- // unit-testable without spawning processes; production passes
233
- // getProcBindSafetyVerdict and Bun.sleep.
243
+ // budget is exhausted we return 'inconclusive' — an unverified leak-block is
244
+ // never treated as safe, but the RESULT (a transient unknown, not a definitive
245
+ // 'unsafe') lets the caller offer a retryable degrade. Pure and
246
+ // dependency-injected (probe + sleep) so the retry policy is unit-testable
247
+ // without spawning processes; production passes getProcBindSafetyVerdict and
248
+ // Bun.sleep.
234
249
  export async function resolveProcBindSafetyWithRetry(
235
250
  probe: () => Promise<ProcBindSafetyVerdict>,
236
251
  sleep: (ms: number) => Promise<void>,
237
252
  backoffMs: readonly number[] = PROC_BIND_RETRY_BACKOFF_MS,
238
- ): Promise<boolean> {
253
+ ): Promise<ProcBindSafetyVerdict> {
239
254
  for (let attempt = 0; ; attempt++) {
240
255
  const verdict = await probe()
241
- if (verdict === 'safe') return true
242
- if (verdict === 'unsafe') return false
256
+ if (verdict === 'safe') return 'safe'
257
+ if (verdict === 'unsafe') return 'unsafe'
243
258
 
244
259
  const backoff = backoffMs[attempt]
245
- if (backoff === undefined) return false
260
+ // Budget exhausted: still unverified. Report 'inconclusive' (NOT 'unsafe') so
261
+ // the caller knows this is a retryable unknown, not a definitive host fact.
262
+ if (backoff === undefined) return 'inconclusive'
246
263
  await sleep(backoff)
247
264
  }
248
265
  }
@@ -282,9 +299,14 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
282
299
  // marker: that proves the sentinel is dumpable, same-uid, AND that this pid is
283
300
  // OUR sentinel (not a reused pid), so the ONLY thing that can deny the read
284
301
  // from inside the sandbox is the child-userns boundary (rules out a false
285
- // "blocked" from dumpable=0 / uid mismatch). If the parent can't read the
286
- // marker, the sentinel setup is unsound inconclusive, fail closed, no cache.
287
- if (!(await parentReadsSentinelMarker(sentinelPid))) return INCONCLUSIVE
302
+ // "blocked" from dumpable=0 / uid mismatch). The marker can be absent for a
303
+ // moment right after Bun.spawn: the child pid exists before `/usr/bin/env -i
304
+ // SECRET=... /bin/sleep` has exec'd and replaced its environ. Treating that
305
+ // startup race as immediate INCONCLUSIVE made the retry budget collapse into
306
+ // pure backoff time (~7.25s) and produced the first-tool `bunx` degrade even
307
+ // though the same host proved safe on the next call. Wait briefly for the
308
+ // marker before deciding setup is unsound; a real failure still fails closed.
309
+ if (!(await waitForSentinelMarker(sentinelPid))) return INCONCLUSIVE
288
310
 
289
311
  const proc = Bun.spawn(
290
312
  [
@@ -387,6 +409,9 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
387
409
  // briefly-saturated box; a genuinely wedged runtime still trips it and degrades.
388
410
  const PROC_BIND_PROBE_TIMEOUT_MS = 12_000
389
411
 
412
+ const PROC_BIND_SENTINEL_READY_TIMEOUT_MS = 1_000
413
+ const PROC_BIND_SENTINEL_READY_POLL_MS = 25
414
+
390
415
  // Designated probe-script exit codes. ONLY these two are a cacheable verdict;
391
416
  // every other code (a setup failure, bwrap startup failure, a signal, 127, …) is
392
417
  // inconclusive and must NOT be cached — see the exit-code interpretation in
@@ -457,6 +482,24 @@ async function parentReadsSentinelMarker(sentinelPid: number): Promise<boolean>
457
482
  }
458
483
  }
459
484
 
485
+ async function waitForSentinelMarker(
486
+ sentinelPid: number,
487
+ readMarker: (pid: number) => Promise<boolean> = parentReadsSentinelMarker,
488
+ sleep: (ms: number) => Promise<void> = (ms) => Bun.sleep(ms),
489
+ timeoutMs: number = PROC_BIND_SENTINEL_READY_TIMEOUT_MS,
490
+ pollMs: number = PROC_BIND_SENTINEL_READY_POLL_MS,
491
+ now: () => number = Date.now,
492
+ ): Promise<boolean> {
493
+ const deadline = now() + timeoutMs
494
+ for (;;) {
495
+ if (await readMarker(sentinelPid)) return true
496
+ if (now() >= deadline) return false
497
+ await sleep(pollMs)
498
+ }
499
+ }
500
+
501
+ export const _waitForSentinelMarkerForTests = waitForSentinelMarker
502
+
460
503
  export function _resetProcBindProbeCacheForTests(): void {
461
504
  procBindProbeCache.clear()
462
505
  procBindProbeInFlight.clear()
@@ -41,3 +41,29 @@ export class SandboxDegradedProcError extends Error {
41
41
  )
42
42
  }
43
43
  }
44
+
45
+ // Distinct from SandboxDegradedProcError: that one is the PERMANENT verdict (a
46
+ // real userns leak, or a host with no usable namespaces — retrying is futile).
47
+ // This one fires when the proc-bind safety probe stayed 'inconclusive' through
48
+ // its whole retry budget — typically a boot-time CPU/IO storm tripping the
49
+ // probe's own timeout. The host is very likely capable; the probe just couldn't
50
+ // prove it RIGHT NOW. Because an 'inconclusive' verdict is never cached, the next
51
+ // bash call re-probes from scratch and usually promotes to proc-bind once the
52
+ // spike passes. So the message tells the model the OPPOSITE of the permanent
53
+ // case: retrying IS the fix. Without this split, a single unlucky boot-storm
54
+ // probe degraded a fully-capable container to tmpfs and told the agent it was a
55
+ // permanent environment limit — so it gave up instead of retrying.
56
+ export class SandboxProcProbeUnverifiedError extends Error {
57
+ override readonly name = 'SandboxProcProbeUnverifiedError'
58
+ constructor() {
59
+ super(
60
+ 'sandbox /proc strategy could not be verified right now: the cap-free ' +
61
+ 'proc-bind safety probe stayed inconclusive (usually transient load on the ' +
62
+ 'host while the container was starting up), so this bun package command ' +
63
+ '(bun install / bun add / bunx / bun run) was held back rather than run ' +
64
+ 'under a broken /proc. This is almost certainly temporary and NOT a problem ' +
65
+ 'with the command or the package: retry the SAME command in a few seconds — ' +
66
+ 'the next attempt re-probes and normally succeeds.',
67
+ )
68
+ }
69
+ }
@@ -27,7 +27,12 @@ export { resolveSandboxSymlinks, type SandboxSymlinkSpec } from './symlinks'
27
27
  export { commandNeedsRealProc, isPackageInstallCommand } from './package-install'
28
28
  export { ensureSessionTmpDir, isUnderTmp, mapVirtualTmpPath, SESSION_TMP_ROOT, sessionTmpDir } from './session-tmp'
29
29
  export { formatCommand, shellQuote } from './quote'
30
- export { SandboxDegradedProcError, SandboxPolicyError, SandboxUnavailableError } from './errors'
30
+ export {
31
+ SandboxDegradedProcError,
32
+ SandboxPolicyError,
33
+ SandboxProcProbeUnverifiedError,
34
+ SandboxUnavailableError,
35
+ } from './errors'
31
36
  export {
32
37
  DEFAULT_SANDBOX_ENV,
33
38
  type SandboxCommandFilter,
@@ -142,8 +142,19 @@ export type SandboxPolicy = {
142
142
  // guard: the container env holds FIREWORKS_API_KEY and GH_TOKEN, and env
143
143
  // inheritance is the single highest-risk exfil path for prompt-injected bash.
144
144
  // HOME points at /tmp because the sandbox mounts /tmp as a fresh tmpfs.
145
+ //
146
+ // BUN_TMPDIR / BUN_INSTALL both point under /tmp because `--clearenv` strips
147
+ // the host's TMPDIR, and bun refuses to run without a writable scratch dir it
148
+ // can discover: `bunx`, `bun add`, and `bun run <pkg-bin>` abort with
149
+ // "Unexpected accessing temporary directory. Please set $BUN_TMPDIR or
150
+ // $BUN_INSTALL". /tmp is always writable inside the sandbox (fresh tmpfs, or
151
+ // the per-session bind that overrides it), so both are safe targets. Without
152
+ // these, every sandboxed bun invocation — the core subagent install path —
153
+ // fails before it starts.
145
154
  export const DEFAULT_SANDBOX_ENV: Record<string, string> = {
146
155
  PATH: '/usr/local/bin:/usr/bin:/bin',
147
156
  HOME: '/tmp',
148
157
  LANG: 'C.UTF-8',
158
+ BUN_TMPDIR: '/tmp',
159
+ BUN_INSTALL: '/tmp/.bun',
149
160
  }
@@ -16,7 +16,7 @@ The runtime reads `typeclaw.json` at container startup. Some fields are picked u
16
16
  - `port` — the TCP port the websocket server binds to inside the container. The TUI on the host stage connects to this. Default `8973`. **Restart-required.**
17
17
  - `model` — a fully-qualified `<provider>/<model-id>` string. The runtime resolves this against the built-in provider registry to decide which API to call for every turn. **Live-reloadable.**
18
18
  - `mounts` — additional host directories the user has chosen to expose to you. Each entry produces a `docker run -v <hostPath>:/agent/mounts/<name>` flag at `typeclaw start` time, so the directory shows up at `mounts/<name>` inside your agent folder. **The launcher reads this; the running container does not.** Editing `mounts` only takes effect on the next `typeclaw start`. **Restart-required.**
19
- - `plugins` — array of plugin package names loaded at server boot. **Restart-required.**
19
+ - `plugins` — array of plugin module specifiers loaded at server boot: npm package names for published plugins, or relative paths for local plugins you are authoring. **Restart-required.**
20
20
  - `alias` — additional names the agent answers to when a channel message contains its name in plain text (no `<@id>` mention). The agent folder's directory name (`basename(agentDir)`) is always implicit; `alias` adds further forms (Latin transliteration, nicknames, Korean particles, etc.). Used by the channel engagement layer alongside the structural mention/reply/dm triggers. **Live-reloadable.**
21
21
  - `channels` — per-adapter engagement triggers and history-prefetch knobs for external messengers (Discord, Slack, Telegram, KakaoTalk), plus the GitHub channel (a webhook-driven adapter that watches repos and reviews PRs — see **GitHub channel** below). Access control lives in `roles`, not here. **Live-reloadable** — edits take effect on the next `reload` without a container restart.
22
22
  - `docker.file` — controls what ships in the autogenerated container image. Two layers: (1) **toggles** for opinionated package installs — `tmux`, `gh`, `python`, `xvfb` default on (`true`); `cjkFonts` defaults to `"auto"` (resolved from host locale at start); `ffmpeg`, `cloudflared`, `claudeCode`, `codexCli` default off (`false`) — set a toggle to `false` to omit, or to a version string like `"2.40.0"` to apt-pin (`python`, `cjkFonts`, `cloudflared`, `xvfb`, `claudeCode`, and `codexCli` are boolean-only). Most toggles install apt packages with BuildKit cache mounts; `cloudflared`, `claudeCode`, and `codexCli` are exceptions — `cloudflared` downloads the pinned GitHub release, `claudeCode` runs Anthropic's official `curl | bash` installer, `codexCli` `bun install`s the `@openai/codex` npm package. (2) **`append`** — extra Dockerfile lines spliced in right before `ENTRYPOINT` for anything the toggles don't cover. The whole Dockerfile is rewritten on every `start` from the typeclaw template. Lives under the `docker` namespace alongside future Docker-related blocks (e.g. `docker.compose`). **Restart-required** (next `typeclaw start` rebuilds the image).
@@ -45,7 +45,7 @@ You yourself cannot run `typeclaw restart` — that is a host-stage command and
45
45
  | `port` | no | integer | 1–65535. Defaults to `8973` (T9 spelling of "TYPE"). Change only if the default collides with something on the user's host. **Restart-required.** |
46
46
  | `model` | no | string | Must be one of the values listed in the **Allowed models** section below. Defaults to `openai/gpt-5.4-nano`. **Live-reloadable.** |
47
47
  | `mounts` | no | array of objects | Host directories bind-mounted into your container. Defaults to `[]` (no host paths exposed). Omitted from scaffolded `typeclaw.json` — add it only when the user wants host paths exposed. See **Mounts** section below. **Restart-required.** |
48
- | `plugins` | no | array of strings | Plugin package names loaded at server boot. Defaults to `[]`. **Restart-required.** Plugin-owned config blocks live alongside as additional top-level keys; see **Plugin config blocks**. |
48
+ | `plugins` | no | array of strings | Plugin module specifiers loaded at server boot: use npm package names for published plugins (for example, `typeclaw-gws-multi-account`) and relative paths only for local plugins you are authoring (for example, `./packages/my-plugin`). Defaults to `[]`. **Restart-required.** Plugin-owned config blocks live alongside as additional top-level keys; see **Plugin config blocks**. |
49
49
  | `alias` | no | array of strings | Additional names the agent answers to in channel engagement, on top of the implicit `basename(agentDir)`. Each entry is a non-empty trimmed string matched case-insensitively as a substring of the inbound text. Defaults to `[]`. Hatching populates this with the agent's chosen name. See **Channels and Alias** below for schema/edit mechanics; the matching behavior lives in the `typeclaw-channels` skill. **Live-reloadable.** |
50
50
  | `channels` | no | object | Per-adapter engagement triggers and history-prefetch knobs for external messengers (plus the `github` webhook channel — see **GitHub channel** below). Defaults to `{}` (no adapters configured). `typeclaw init` scaffolds an empty block per requested adapter (e.g. `"discord-bot": {}`) and the schema fills in defaults. Channel access control lives in `roles` — see the `typeclaw-permissions` skill; engagement behavior lives in `typeclaw-channels`. **Live-reloadable.** See **Channels and Alias** below. |
51
51
  | `portForward` | no | object | Allow/deny policy for the host-stage portbroker that auto-forwards container LISTEN ports to `127.0.0.1` on the host. Defaults to `{ "allow": "*" }` (forward everything). Omitted from scaffolded `typeclaw.json`. **Restart-required.** See **portForward** section below. |
@@ -11,10 +11,10 @@ Your agent folder is a **bun monorepo**. The root `package.json` declares `"work
11
11
 
12
12
  You have two free-write zones at the agent root: `workspace/` and `packages/`. Both are exempt from the non-workspace-write guard so you can edit them without acknowledging anything, but their relationship to git is opposite, and picking the wrong one is the most common mistake.
13
13
 
14
- | Zone | Purpose | Tracked in git? | Reusable? |
15
- | ------------ | ------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | -------------------------------------------- |
16
- | `workspace/` | One-off scripts, scratch work, throwaway experiments | **No** — entire dir is gitignored | No (the dir itself is invisible to git) |
17
- | `packages/` | Reusable packages, custom plugins, shared utilities, internal libs | **Yes** — every file is tracked and MUST be committed when edited (only `*/node_modules/` ignored inside) | Yes (committed and importable across agents) |
14
+ | Zone | Purpose | Tracked in git? | Reusable? |
15
+ | ------------ | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | -------------------------------------------- |
16
+ | `workspace/` | One-off scripts, scratch work, throwaway experiments | **No** — entire dir is gitignored | No (the dir itself is invisible to git) |
17
+ | `packages/` | Reusable packages, custom local plugins, shared utilities, internal libs | **Yes** — every file is tracked and MUST be committed when edited (only `*/node_modules/` ignored inside) | Yes (committed and importable across agents) |
18
18
 
19
19
  The two columns to internalize:
20
20
 
@@ -26,7 +26,7 @@ Anything you put in `packages/` MUST land in a commit — see `typeclaw-git`. Th
26
26
  **Decision rule, top to bottom — stop at the first match:**
27
27
 
28
28
  1. **Will another script or another part of the agent folder import this?** → `packages/<name>/`. Even if "another part" is just "tomorrow's me writing a sibling script", a reusable thing belongs here.
29
- 2. **Is this a custom typeclaw plugin** (anything you'd list in `typeclaw.json`'s `plugins`)? → `packages/<plugin-name>/`. Always. Plugins are the canonical packages.
29
+ 2. **Is this a custom local typeclaw plugin you are authoring?** → `packages/<plugin-name>/`. If you are adding an existing or published plugin, keep its npm package specifier in `typeclaw.json#plugins`; do not create or guess a `./packages/...` path.
30
30
  3. **Will the user want to track this in git, see it in PRs, depend on it from a cron job?** → `packages/<name>/`.
31
31
  4. **Is this throwaway** — a one-shot data transformation, a debug script, a scratch experiment that exists for one task and dies? → `workspace/`.
32
32
  5. **Default if unsure** → `packages/<name>/`. Better to commit something reusable than to lose something useful in the gitignored void.
@@ -97,6 +97,8 @@ To depend on a workspace package from the **agent root** (e.g. so cron `exec` jo
97
97
 
98
98
  ## Custom typeclaw plugins live under `packages/`
99
99
 
100
+ This section is only for plugins you are **authoring locally** in the agent folder. If the user asks to add/install an existing or published plugin, use the plugin's npm package specifier in `typeclaw.json#plugins` (for example, `"typeclaw-gws-multi-account"`) and do **not** fabricate a `./packages/...` path.
101
+
100
102
  If you are writing a typeclaw plugin (anything that uses `definePlugin` from `typeclaw/plugin`), the canonical home is `packages/<plugin-name>/`. The workflow:
101
103
 
102
104
  1. **Author**: `packages/my-plugin/index.ts` exports `definePlugin({ ... })` as default.
@@ -115,6 +115,13 @@ Without `configSchema`, `ctx.config` is `never` and any reference is a type erro
115
115
 
116
116
  The **derived name is the key** for the per-plugin config block at the top level of `typeclaw.json`. Two plugins with the same derived name are a boot error.
117
117
 
118
+ Use the entry format that matches the plugin's source:
119
+
120
+ - **Published npm plugin** → put the npm package specifier in `plugins[]`, e.g. `"typeclaw-gws-multi-account"` or `"typeclaw-plugin-standup-log@1.2.3"`. Do **not** invent a `./packages/...` path for a published package.
121
+ - **Local plugin you are authoring in this agent folder** → put its relative path in `plugins[]`, e.g. `"./packages/my-plugin"`. The path must exist and point at local plugin code.
122
+
123
+ If the user says to add/install an existing plugin by package name, preserve that package name. Only use `./packages/<name>` when you are creating or wiring a local workspace package that exists in this repo.
124
+
118
125
  ### Local path safety
119
126
 
120
127
  Local plugin paths **must resolve inside `agentDir`**. Absolute paths (`/etc/...`) and parent-traversing paths (`../../foo`) are rejected with:
@@ -125,9 +132,11 @@ plugin path escapes agent directory: <entry> (resolved to <abs-path>)
125
132
 
126
133
  This is why `./plugins/x.ts` works and `/Users/me/x.ts` does not.
127
134
 
128
- ### Recommended location: `packages/<plugin-name>/`
135
+ ### Recommended location for new local plugins: `packages/<plugin-name>/`
136
+
137
+ This section is about plugins you are **authoring locally**. For a published npm plugin, keep the npm package specifier in `plugins[]`; do not create or guess a local path.
129
138
 
130
- The agent folder is a **bun monorepo**, and `packages/` is its workspace root. **Custom plugins go there.** A `./packages/standup-log/` plugin is a real workspace package — bun installs its dependencies, the workspace symlink machinery makes it importable, and it lands in git like any other reusable code. Concretely:
139
+ The agent folder is a **bun monorepo**, and `packages/` is its workspace root. **Custom local plugins go there.** A `./packages/standup-log/` plugin is a real workspace package — bun installs its dependencies, the workspace symlink machinery makes it importable, and it lands in git like any other reusable code. Concretely:
131
140
 
132
141
  ```
133
142
  packages/