npm - typeclaw - Versions diffs - 0.36.1 → 0.36.3 - Mend

typeclaw 0.36.1 → 0.36.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/package.json +2 -2
package/src/agent/index.ts +11 -0
package/src/agent/plugin-tools.ts +43 -21
package/src/agent/restart/index.ts +6 -0
package/src/agent/restart-handoff/index.ts +10 -0
package/src/agent/system-prompt.ts +6 -0
package/src/agent/tools/restart.ts +9 -0
package/src/bundled-plugins/backup/README.md +11 -2
package/src/bundled-plugins/backup/git-auth.ts +58 -0
package/src/bundled-plugins/backup/index.ts +54 -0
package/src/bundled-plugins/backup/runner.ts +82 -12
package/src/channels/adapters/discord-bot-reactions.ts +1 -0
package/src/channels/adapters/line-attachment.ts +97 -0
package/src/channels/adapters/line-classify.ts +14 -3
package/src/channels/adapters/line.ts +5 -1
package/src/channels/manager.ts +15 -3
package/src/channels/router.ts +67 -16
package/src/cli/hostd.ts +37 -4
package/src/cli/reload.ts +26 -5
package/src/cli/ui.ts +6 -0
package/src/container/index.ts +1 -0
package/src/container/start.ts +6 -0
package/src/init/reconcile-plugin-deps.ts +45 -15
package/src/init/restart-deps-preflight.ts +155 -0
package/src/permissions/permissions.ts +24 -4
package/src/plugin/loader.ts +16 -4
package/src/plugin/manager.ts +175 -71
package/src/reload/client.ts +14 -3
package/src/reload/docker-exec-client.ts +109 -0
package/src/reload/index.ts +7 -1
package/src/reload/recover.ts +38 -0
package/src/run/codex-fetch-observer.ts +57 -5
package/src/run/index.ts +5 -0
package/src/sandbox/availability.ts +58 -15
package/src/sandbox/errors.ts +26 -0
package/src/sandbox/index.ts +6 -1
package/src/sandbox/policy.ts +11 -0
package/src/skills/typeclaw-config/SKILL.md +2 -2
package/src/skills/typeclaw-monorepo/SKILL.md +7 -5
package/src/skills/typeclaw-plugins/SKILL.md +11 -2

package/src/reload/docker-exec-client.ts ADDED Viewed

@@ -0,0 +1,109 @@
+import {
+  CONTAINER_PORT,
+  containerNameFromCwd,
+  defaultDockerExec,
+  sanitizeDockerStderr,
+  type DockerExec,
+  type DockerExecResult,
+} from '@/container'
+import type { ReloadResult } from './types'
+export type RequestReloadViaDockerExecOptions = {
+  cwd: string
+  token: string | null
+  scope?: string
+  timeoutMs?: number
+  exec?: DockerExec
+}
+type DockerExecReloadEnvelope = { ok: true; results: ReloadResult[] } | { ok: false; reason: string }
+const DEFAULT_TIMEOUT_MS = 30_000
+const RELOAD_SCRIPT = String.raw`
+const timeoutMs = Number(process.env.TYPECLAW_RELOAD_TIMEOUT_MS ?? '30000')
+const url = new URL('ws://127.0.0.1:' + (process.env.TYPECLAW_CONTAINER_PORT ?? '8973'))
+if (process.env.TYPECLAW_TUI_TOKEN) url.searchParams.set('token', process.env.TYPECLAW_TUI_TOKEN)
+const ws = new WebSocket(url.toString())
+let settled = false
+const finish = (payload, code) => {
+  if (settled) return
+  settled = true
+  console.log(JSON.stringify(payload))
+  if (ws.readyState === WebSocket.CONNECTING || ws.readyState === WebSocket.OPEN) ws.close()
+  setTimeout(() => process.exit(code), 0)
+}
+const timer = setTimeout(() => finish({ ok: false, reason: 'timed out waiting for container-local reload_result after ' + timeoutMs + 'ms' }, 1), timeoutMs)
+ws.addEventListener('open', () => {
+  const scope = process.env.TYPECLAW_RELOAD_SCOPE
+  ws.send(JSON.stringify(scope ? { type: 'reload', scope } : { type: 'reload' }))
+})
+ws.addEventListener('message', (event) => {
+  const msg = JSON.parse(String(event.data))
+  if (msg.type !== 'reload_result') return
+  clearTimeout(timer)
+  finish({ ok: true, results: msg.results }, 0)
+})
+ws.addEventListener('error', (event) => finish({ ok: false, reason: String(event.message ?? event) }, 1))
+ws.addEventListener('close', () => finish({ ok: false, reason: 'container-local websocket closed before reload_result' }, 1))
+`
+export async function requestReloadViaDockerExec({
+  cwd,
+  token,
+  scope,
+  timeoutMs = DEFAULT_TIMEOUT_MS,
+  exec = defaultDockerExec,
+}: RequestReloadViaDockerExecOptions): Promise<ReloadResult[]> {
+  const envArgs = ['-e', `TYPECLAW_CONTAINER_PORT=${CONTAINER_PORT}`, '-e', `TYPECLAW_RELOAD_TIMEOUT_MS=${timeoutMs}`]
+  if (token !== null) envArgs.push('-e', `TYPECLAW_TUI_TOKEN=${token}`)
+  if (scope !== undefined) envArgs.push('-e', `TYPECLAW_RELOAD_SCOPE=${scope}`)
+  const signal = AbortSignal.timeout(timeoutMs)
+  let result: DockerExecResult
+  try {
+    result = await exec(['exec', ...envArgs, containerNameFromCwd(cwd), 'bun', '-e', RELOAD_SCRIPT], { signal })
+  } catch (err) {
+    if (signal.aborted) throw new Error(`docker exec timed out after ${timeoutMs}ms`)
+    throw err
+  }
+  if (signal.aborted) throw new Error(`docker exec timed out after ${timeoutMs}ms`)
+  if (result.exitCode !== 0) {
+    const envelope = parseEnvelope(result.stdout)
+    if (envelope !== null && !envelope.ok) throw new Error(envelope.reason)
+    const reason =
+      sanitizeDockerStderr(result.stderr) || result.stdout.trim() || `docker exec exited with code ${result.exitCode}`
+    throw new Error(reason)
+  }
+  const envelope = parseEnvelope(result.stdout)
+  if (envelope === null) throw new Error('container-local reload returned invalid JSON')
+  if (!envelope.ok) throw new Error(envelope.reason)
+  return envelope.results
+}
+function parseEnvelope(stdout: string): DockerExecReloadEnvelope | null {
+  const line = stdout
+    .split('\n')
+    .map((entry) => entry.trim())
+    .filter((entry) => entry.length > 0)
+    .at(-1)
+  if (line === undefined) return null
+  try {
+    const parsed: unknown = JSON.parse(line)
+    return isEnvelope(parsed) ? parsed : null
+  } catch {
+    return null
+  }
+}
+function isEnvelope(value: unknown): value is DockerExecReloadEnvelope {
+  if (!isRecord(value) || typeof value.ok !== 'boolean') return false
+  if (value.ok) return Array.isArray(value.results)
+  return typeof value.reason === 'string'
+}
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null
+}

package/src/reload/index.ts CHANGED Viewed

@@ -1,4 +1,10 @@
-export { requestReload, type RequestReloadOptions } from './client'
+export { ReloadConnectionError, requestReload, type RequestReloadOptions } from './client'
+export { requestReloadViaDockerExec, type RequestReloadViaDockerExecOptions } from './docker-exec-client'
 export { formatChannelReloadSummary } from './format'
 export { ReloadRegistry } from './registry'
+export {
+  requestReloadWithFallback,
+  type RequestReloadWithFallbackOptions,
+  type RequestReloadWithFallbackResult,
+} from './recover'
 export type { Reloadable, ReloadAllResult, ReloadResult } from './types'

package/src/reload/recover.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import { ReloadConnectionError, requestReload } from './client'
+import { requestReloadViaDockerExec } from './docker-exec-client'
+import type { ReloadResult } from './types'
+export type RequestReloadWithFallbackOptions = {
+  url: string
+  cwd?: string
+  token?: string | null
+  scope?: string
+  timeoutMs?: number
+  reload?: typeof requestReload
+  reloadViaDockerExec?: typeof requestReloadViaDockerExec
+}
+export type RequestReloadWithFallbackResult =
+  | { transport: 'host'; results: ReloadResult[] }
+  | { transport: 'container-local'; results: ReloadResult[]; hostError: string }
+export async function requestReloadWithFallback({
+  url,
+  cwd,
+  token,
+  scope,
+  timeoutMs,
+  reload = requestReload,
+  reloadViaDockerExec = requestReloadViaDockerExec,
+}: RequestReloadWithFallbackOptions): Promise<RequestReloadWithFallbackResult> {
+  try {
+    return { transport: 'host', results: await reload({ url, scope, timeoutMs }) }
+  } catch (err) {
+    if (!(err instanceof ReloadConnectionError) || cwd === undefined || token === undefined) throw err
+    return {
+      transport: 'container-local',
+      results: await reloadViaDockerExec({ cwd, token, scope, timeoutMs }),
+      hostError: err.message,
+    }
+  }
+}

package/src/run/codex-fetch-observer.ts CHANGED Viewed

@@ -25,9 +25,29 @@ export type CodexFetchObserverOptions = {
   ttfbMs?: number
   // Override the sliding inter-chunk idle deadline applied to the SSE body
   // reader. Resets on every chunk; if no bytes arrive within this window the
-  // body stream errors. Default: 300_000 ms, matches `openai/codex`'s Rust CLI
-  // `DEFAULT_STREAM_IDLE_TIMEOUT_MS`. Set to 0 to disable just this timer.
+  // body stream errors. Like the overall deadline, this doubles as a recovery
+  // bound: on a silent stall the user waits this long before the retry fires,
+  // so it should not exceed the overall ceiling. Default 120_000 ms (was
+  // 300_000, which matched `openai/codex`'s Rust CLI but is 5min of dead air
+  // before recovery). 120s is loose enough for OpenAI's keepalive-less
+  // reasoning pauses (the Responses API sends no SSE heartbeats, so a quiet
+  // reasoning window is genuinely byte-silent) while bounded by the overall
+  // cap. Set to 0 to disable just this timer.
   idleMs?: number
+  // Override the absolute wall-clock ceiling on a single Codex request,
+  // measured from fetch start to body completion. Unlike `idleMs`, it does NOT
+  // reset on chunk arrival, so it catches a "slow-trickle" stream that emits
+  // bytes inside every idle window yet never reaches a terminal SSE event —
+  // the failure mode behind issue #394's multi-minute hang (one observed
+  // request occupied 901s before Bun's OS socket deadline fired). On expiry the
+  // request is aborted with a retryable error, so this also bounds how long a
+  // user waits before the retry fires — keeping it low is a UX requirement, not
+  // just a safety net. Default 120_000 ms: across 96 observed requests the
+  // slowest *healthy* (completed) one was 45s and p99 was ~30s, with a clean
+  // gap up to the 901s hang — so 120s is ~2.7x the healthy max (ample headroom
+  // for PoP/TLS outliers) while capping a real hang at ~2min instead of ~15min.
+  // Set to 0 to disable just this timer.
+  overallMs?: number
   // Schedule fn for tests. Receives (delayMs, callback) and returns a handle
   // the wrapper can pass to `clear`. Default: `setTimeout`/`clearTimeout`.
   scheduler?: TimeoutScheduler
@@ -44,8 +64,10 @@ const ENV_DISABLE_OBSERVER = 'TYPECLAW_CODEX_FETCH_OBSERVER'
 const ENV_DISABLE_TIMEOUTS = 'TYPECLAW_CODEX_TIMEOUTS'
 const ENV_TTFB_MS = 'TYPECLAW_CODEX_TTFB_MS'
 const ENV_IDLE_MS = 'TYPECLAW_CODEX_IDLE_MS'
+const ENV_OVERALL_MS = 'TYPECLAW_CODEX_OVERALL_MS'
 const DEFAULT_TTFB_MS = 15_000
-const DEFAULT_IDLE_MS = 300_000
+const DEFAULT_IDLE_MS = 120_000
+const DEFAULT_OVERALL_MS = 120_000
 const LOG_PREFIX = '[codex-fetch]'
 const defaultScheduler: TimeoutScheduler = {
@@ -126,6 +148,7 @@ function readEnvMs(name: string, fallback: number): number {
 type BodyTapConfig = {
   idleMs: number
+  overallMs: number
   scheduler: TimeoutScheduler
 }
@@ -193,17 +216,44 @@ function attachBodyTimingTap(
   const piped = response.body.pipeThrough(tap, { preventCancel: false })
-  const idleController = config.idleMs > 0 ? new AbortController() : null
+  const idleController = config.idleMs > 0 || config.overallMs > 0 ? new AbortController() : null
   let idleHandle: unknown = null
   const armIdleTimer = () => {
-    if (idleController === null) return
+    if (idleController === null || config.idleMs <= 0) return
     if (idleHandle !== null) config.scheduler.clear(idleHandle)
     idleHandle = config.scheduler.set(config.idleMs, () => {
       cause = 'idle_timeout'
       idleController.abort(new Error(`Codex SSE body idle for ${config.idleMs}ms (typeclaw observer timeout)`))
     })
   }
+  // Absolute ceiling on the whole request, armed once and never reset. The
+  // budget is measured from `start` (before originalFetch), so the time already
+  // spent waiting for headers is subtracted here — otherwise a slow-headers
+  // request would get a fresh full `overallMs` for its body on top of the
+  // headers wait, doubling the intended ceiling. A non-positive remainder means
+  // the budget is already spent, so we schedule at 0 to abort on the next tick.
+  // Aborts the shared controller so the existing reader race tears the stream
+  // down on the first deadline to fire — idle or overall, whichever comes first.
+  let overallHandle: unknown = null
+  if (idleController !== null && config.overallMs > 0) {
+    const remainingOverallMs = Math.max(0, config.overallMs - (now() - start))
+    overallHandle = config.scheduler.set(remainingOverallMs, () => {
+      cause = 'overall_timeout'
+      idleController.abort(
+        new Error(`Codex SSE body exceeded overall deadline of ${config.overallMs}ms (typeclaw observer timeout)`),
+      )
+    })
+  }
+  const disarmOverallTimer = () => {
+    if (overallHandle !== null) {
+      config.scheduler.clear(overallHandle)
+      overallHandle = null
+    }
+  }
   const disarmIdleTimer = () => {
+    disarmOverallTimer()
     if (idleHandle !== null) {
       config.scheduler.clear(idleHandle)
       idleHandle = null
@@ -295,6 +345,7 @@ export function installCodexFetchObserver(opts: CodexFetchObserverOptions = {}):
   const timeoutsEnabled = process.env[ENV_DISABLE_TIMEOUTS] !== 'off'
   const ttfbMs = timeoutsEnabled ? (opts.ttfbMs ?? readEnvMs(ENV_TTFB_MS, DEFAULT_TTFB_MS)) : 0
   const idleMs = timeoutsEnabled ? (opts.idleMs ?? readEnvMs(ENV_IDLE_MS, DEFAULT_IDLE_MS)) : 0
+  const overallMs = timeoutsEnabled ? (opts.overallMs ?? readEnvMs(ENV_OVERALL_MS, DEFAULT_OVERALL_MS)) : 0
   const originalFetch = globalThis.fetch
   const wrappedImpl = async (
@@ -352,6 +403,7 @@ export function installCodexFetchObserver(opts: CodexFetchObserverOptions = {}):
     const requestId = response.headers.get('x-request-id')
     return attachBodyTimingTap(response, start, headersMs, response.status, retryAfter, requestId, now, logger, {
       idleMs,
+      overallMs,
       scheduler,
     })
   }

package/src/run/index.ts CHANGED Viewed

@@ -321,6 +321,7 @@ export async function startAgent({
                 ? { originatingSessionFile: ctx.originatingSessionFile }
                 : {}),
               handoffOrigin: ctx.handoffOrigin,
+              ...(ctx.triggeringAuthorId !== undefined ? { triggeringAuthorId: ctx.triggeringAuthorId } : {}),
             }
           : {}),
       })
@@ -701,6 +702,10 @@ export async function startAgent({
     console.log(`[plugin] loaded ${summarizeLoaded(pluginsLoaded.loadedPlugins, pluginRegistry)}`)
   }
+  for (const f of pluginsLoaded.failedPlugins) {
+    console.warn(`[plugin] DEGRADED: "${f.entry}" disabled (${f.phase}): ${f.error}`)
+  }
   // Container-side portbroker is instantiated only when the host plumbed a
   // broker token in via env var. Outside the container (tests, ad-hoc dev
   // runs), the env var is absent and the broker stays off — same fence as

package/src/sandbox/availability.ts CHANGED Viewed

@@ -220,29 +220,46 @@ export function canBindProcSafely(options?: { bwrapPath?: string }): Promise<boo
 // leak-block guarantee — it only buys more chances to PROVE it.
 export const PROC_BIND_RETRY_BACKOFF_MS = [250, 1_000, 2_000, 4_000] as const
+// The retrying resolver returns the SAME three states as the probe, never a
+// boolean: 'safe' selects proc-bind; the two failure states stay DELIBERATELY
+// distinct so the caller reacts differently. 'unsafe' is a DEFINITIVE host fact
+// (a real cross-userns environ leak was observed, or the binary is genuinely
+// absent) — permanent, fail closed, retrying buys nothing. 'inconclusive' means
+// the safety probe never returned a definitive verdict within the backoff budget
+// (a boot-time CPU/IO storm tripping the probe's own timeout) — it proves NOTHING
+// about the host, so the SAME container can recover on a later call once the
+// spike passes. Folding these two into a single boolean `false` is what made a
+// transient boot-storm degrade look permanent: the caller degraded to tmpfs AND
+// told the model "retrying won't help", so a capable host stayed broken until
+// restart.
+//
 // proc-bind selection must distinguish "definitely unavailable" from "couldn't
-// verify right now". A DEFINITIVE verdict is final: 'safe'→true; a real userns
-// leak ('unsafe')→false with NO retry. Only an 'inconclusive' verdict (transient
-// probe failure that proves nothing about the host) is retried, because degrading
-// the bash call to tmpfs over a transient hiccup is what silently broke
+// verify right now". A DEFINITIVE verdict is final: 'safe'; a real userns leak
+// ('unsafe') with NO retry. Only an 'inconclusive' verdict (transient probe
+// failure that proves nothing about the host) is retried, because degrading the
+// bash call to tmpfs over a transient hiccup is what silently broke
 // external-package runs on capable hosts. 'inconclusive' is never cached
 // (see the cache type), so each retry re-probes from scratch. After the backoff
-// budget is exhausted we fail CLOSED — an unverified leak-block is never treated
-// as safe. Pure and dependency-injected (probe + sleep) so the retry policy is
-// unit-testable without spawning processes; production passes
-// getProcBindSafetyVerdict and Bun.sleep.
+// budget is exhausted we return 'inconclusive' — an unverified leak-block is
+// never treated as safe, but the RESULT (a transient unknown, not a definitive
+// 'unsafe') lets the caller offer a retryable degrade. Pure and
+// dependency-injected (probe + sleep) so the retry policy is unit-testable
+// without spawning processes; production passes getProcBindSafetyVerdict and
+// Bun.sleep.
 export async function resolveProcBindSafetyWithRetry(
   probe: () => Promise<ProcBindSafetyVerdict>,
   sleep: (ms: number) => Promise<void>,
   backoffMs: readonly number[] = PROC_BIND_RETRY_BACKOFF_MS,
-): Promise<boolean> {
+): Promise<ProcBindSafetyVerdict> {
   for (let attempt = 0; ; attempt++) {
     const verdict = await probe()
-    if (verdict === 'safe') return true
-    if (verdict === 'unsafe') return false
+    if (verdict === 'safe') return 'safe'
+    if (verdict === 'unsafe') return 'unsafe'
     const backoff = backoffMs[attempt]
-    if (backoff === undefined) return false
+    // Budget exhausted: still unverified. Report 'inconclusive' (NOT 'unsafe') so
+    // the caller knows this is a retryable unknown, not a definitive host fact.
+    if (backoff === undefined) return 'inconclusive'
     await sleep(backoff)
   }
 }
@@ -282,9 +299,14 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
     // marker: that proves the sentinel is dumpable, same-uid, AND that this pid is
     // OUR sentinel (not a reused pid), so the ONLY thing that can deny the read
     // from inside the sandbox is the child-userns boundary (rules out a false
-    // "blocked" from dumpable=0 / uid mismatch). If the parent can't read the
-    // marker, the sentinel setup is unsound — inconclusive, fail closed, no cache.
-    if (!(await parentReadsSentinelMarker(sentinelPid))) return INCONCLUSIVE
+    // "blocked" from dumpable=0 / uid mismatch). The marker can be absent for a
+    // moment right after Bun.spawn: the child pid exists before `/usr/bin/env -i
+    // SECRET=... /bin/sleep` has exec'd and replaced its environ. Treating that
+    // startup race as immediate INCONCLUSIVE made the retry budget collapse into
+    // pure backoff time (~7.25s) and produced the first-tool `bunx` degrade even
+    // though the same host proved safe on the next call. Wait briefly for the
+    // marker before deciding setup is unsound; a real failure still fails closed.
+    if (!(await waitForSentinelMarker(sentinelPid))) return INCONCLUSIVE
     const proc = Bun.spawn(
       [
@@ -387,6 +409,9 @@ async function probeProcBind(bwrap: string): Promise<ProcBindProbe> {
 // briefly-saturated box; a genuinely wedged runtime still trips it and degrades.
 const PROC_BIND_PROBE_TIMEOUT_MS = 12_000
+const PROC_BIND_SENTINEL_READY_TIMEOUT_MS = 1_000
+const PROC_BIND_SENTINEL_READY_POLL_MS = 25
 // Designated probe-script exit codes. ONLY these two are a cacheable verdict;
 // every other code (a setup failure, bwrap startup failure, a signal, 127, …) is
 // inconclusive and must NOT be cached — see the exit-code interpretation in
@@ -457,6 +482,24 @@ async function parentReadsSentinelMarker(sentinelPid: number): Promise<boolean>
   }
 }
+async function waitForSentinelMarker(
+  sentinelPid: number,
+  readMarker: (pid: number) => Promise<boolean> = parentReadsSentinelMarker,
+  sleep: (ms: number) => Promise<void> = (ms) => Bun.sleep(ms),
+  timeoutMs: number = PROC_BIND_SENTINEL_READY_TIMEOUT_MS,
+  pollMs: number = PROC_BIND_SENTINEL_READY_POLL_MS,
+  now: () => number = Date.now,
+): Promise<boolean> {
+  const deadline = now() + timeoutMs
+  for (;;) {
+    if (await readMarker(sentinelPid)) return true
+    if (now() >= deadline) return false
+    await sleep(pollMs)
+  }
+}
+export const _waitForSentinelMarkerForTests = waitForSentinelMarker
 export function _resetProcBindProbeCacheForTests(): void {
   procBindProbeCache.clear()
   procBindProbeInFlight.clear()

package/src/sandbox/errors.ts CHANGED Viewed

@@ -41,3 +41,29 @@ export class SandboxDegradedProcError extends Error {
     )
   }
 }
+// Distinct from SandboxDegradedProcError: that one is the PERMANENT verdict (a
+// real userns leak, or a host with no usable namespaces — retrying is futile).
+// This one fires when the proc-bind safety probe stayed 'inconclusive' through
+// its whole retry budget — typically a boot-time CPU/IO storm tripping the
+// probe's own timeout. The host is very likely capable; the probe just couldn't
+// prove it RIGHT NOW. Because an 'inconclusive' verdict is never cached, the next
+// bash call re-probes from scratch and usually promotes to proc-bind once the
+// spike passes. So the message tells the model the OPPOSITE of the permanent
+// case: retrying IS the fix. Without this split, a single unlucky boot-storm
+// probe degraded a fully-capable container to tmpfs and told the agent it was a
+// permanent environment limit — so it gave up instead of retrying.
+export class SandboxProcProbeUnverifiedError extends Error {
+  override readonly name = 'SandboxProcProbeUnverifiedError'
+  constructor() {
+    super(
+      'sandbox /proc strategy could not be verified right now: the cap-free ' +
+        'proc-bind safety probe stayed inconclusive (usually transient load on the ' +
+        'host while the container was starting up), so this bun package command ' +
+        '(bun install / bun add / bunx / bun run) was held back rather than run ' +
+        'under a broken /proc. This is almost certainly temporary and NOT a problem ' +
+        'with the command or the package: retry the SAME command in a few seconds — ' +
+        'the next attempt re-probes and normally succeeds.',
+    )
+  }
+}

package/src/sandbox/index.ts CHANGED Viewed

@@ -27,7 +27,12 @@ export { resolveSandboxSymlinks, type SandboxSymlinkSpec } from './symlinks'
 export { commandNeedsRealProc, isPackageInstallCommand } from './package-install'
 export { ensureSessionTmpDir, isUnderTmp, mapVirtualTmpPath, SESSION_TMP_ROOT, sessionTmpDir } from './session-tmp'
 export { formatCommand, shellQuote } from './quote'
-export { SandboxDegradedProcError, SandboxPolicyError, SandboxUnavailableError } from './errors'
+export {
+  SandboxDegradedProcError,
+  SandboxPolicyError,
+  SandboxProcProbeUnverifiedError,
+  SandboxUnavailableError,
+} from './errors'
 export {
   DEFAULT_SANDBOX_ENV,
   type SandboxCommandFilter,

package/src/sandbox/policy.ts CHANGED Viewed

@@ -142,8 +142,19 @@ export type SandboxPolicy = {
 // guard: the container env holds FIREWORKS_API_KEY and GH_TOKEN, and env
 // inheritance is the single highest-risk exfil path for prompt-injected bash.
 // HOME points at /tmp because the sandbox mounts /tmp as a fresh tmpfs.
+//
+// BUN_TMPDIR / BUN_INSTALL both point under /tmp because `--clearenv` strips
+// the host's TMPDIR, and bun refuses to run without a writable scratch dir it
+// can discover: `bunx`, `bun add`, and `bun run <pkg-bin>` abort with
+// "Unexpected accessing temporary directory. Please set $BUN_TMPDIR or
+// $BUN_INSTALL". /tmp is always writable inside the sandbox (fresh tmpfs, or
+// the per-session bind that overrides it), so both are safe targets. Without
+// these, every sandboxed bun invocation — the core subagent install path —
+// fails before it starts.
 export const DEFAULT_SANDBOX_ENV: Record<string, string> = {
   PATH: '/usr/local/bin:/usr/bin:/bin',
   HOME: '/tmp',
   LANG: 'C.UTF-8',
+  BUN_TMPDIR: '/tmp',
+  BUN_INSTALL: '/tmp/.bun',
 }

package/src/skills/typeclaw-config/SKILL.md CHANGED Viewed

@@ -16,7 +16,7 @@ The runtime reads `typeclaw.json` at container startup. Some fields are picked u
 - `port` — the TCP port the websocket server binds to inside the container. The TUI on the host stage connects to this. Default `8973`. **Restart-required.**
 - `model` — a fully-qualified `<provider>/<model-id>` string. The runtime resolves this against the built-in provider registry to decide which API to call for every turn. **Live-reloadable.**
 - `mounts` — additional host directories the user has chosen to expose to you. Each entry produces a `docker run -v <hostPath>:/agent/mounts/<name>` flag at `typeclaw start` time, so the directory shows up at `mounts/<name>` inside your agent folder. **The launcher reads this; the running container does not.** Editing `mounts` only takes effect on the next `typeclaw start`. **Restart-required.**
-- `plugins` — array of plugin package names loaded at server boot. **Restart-required.**
+- `plugins` — array of plugin module specifiers loaded at server boot: npm package names for published plugins, or relative paths for local plugins you are authoring. **Restart-required.**
 - `alias` — additional names the agent answers to when a channel message contains its name in plain text (no `<@id>` mention). The agent folder's directory name (`basename(agentDir)`) is always implicit; `alias` adds further forms (Latin transliteration, nicknames, Korean particles, etc.). Used by the channel engagement layer alongside the structural mention/reply/dm triggers. **Live-reloadable.**
 - `channels` — per-adapter engagement triggers and history-prefetch knobs for external messengers (Discord, Slack, Telegram, KakaoTalk), plus the GitHub channel (a webhook-driven adapter that watches repos and reviews PRs — see **GitHub channel** below). Access control lives in `roles`, not here. **Live-reloadable** — edits take effect on the next `reload` without a container restart.
 - `docker.file` — controls what ships in the autogenerated container image. Two layers: (1) **toggles** for opinionated package installs — `tmux`, `gh`, `python`, `xvfb` default on (`true`); `cjkFonts` defaults to `"auto"` (resolved from host locale at start); `ffmpeg`, `cloudflared`, `claudeCode`, `codexCli` default off (`false`) — set a toggle to `false` to omit, or to a version string like `"2.40.0"` to apt-pin (`python`, `cjkFonts`, `cloudflared`, `xvfb`, `claudeCode`, and `codexCli` are boolean-only). Most toggles install apt packages with BuildKit cache mounts; `cloudflared`, `claudeCode`, and `codexCli` are exceptions — `cloudflared` downloads the pinned GitHub release, `claudeCode` runs Anthropic's official `curl | bash` installer, `codexCli` `bun install`s the `@openai/codex` npm package. (2) **`append`** — extra Dockerfile lines spliced in right before `ENTRYPOINT` for anything the toggles don't cover. The whole Dockerfile is rewritten on every `start` from the typeclaw template. Lives under the `docker` namespace alongside future Docker-related blocks (e.g. `docker.compose`). **Restart-required** (next `typeclaw start` rebuilds the image).
@@ -45,7 +45,7 @@ You yourself cannot run `typeclaw restart` — that is a host-stage command and
 | `port`        | no       | integer          | 1–65535. Defaults to `8973` (T9 spelling of "TYPE"). Change only if the default collides with something on the user's host. **Restart-required.**                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | `model`       | no       | string           | Must be one of the values listed in the **Allowed models** section below. Defaults to `openai/gpt-5.4-nano`. **Live-reloadable.**                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | `mounts`      | no       | array of objects | Host directories bind-mounted into your container. Defaults to `[]` (no host paths exposed). Omitted from scaffolded `typeclaw.json` — add it only when the user wants host paths exposed. See **Mounts** section below. **Restart-required.**                                                                                                                                                                                                                                                                                                                                                                           |
-| `plugins`     | no       | array of strings | Plugin package names loaded at server boot. Defaults to `[]`. **Restart-required.** Plugin-owned config blocks live alongside as additional top-level keys; see **Plugin config blocks**.                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| `plugins`     | no       | array of strings | Plugin module specifiers loaded at server boot: use npm package names for published plugins (for example, `typeclaw-gws-multi-account`) and relative paths only for local plugins you are authoring (for example, `./packages/my-plugin`). Defaults to `[]`. **Restart-required.** Plugin-owned config blocks live alongside as additional top-level keys; see **Plugin config blocks**.                                                                                                                                                                                                                                 |
 | `alias`       | no       | array of strings | Additional names the agent answers to in channel engagement, on top of the implicit `basename(agentDir)`. Each entry is a non-empty trimmed string matched case-insensitively as a substring of the inbound text. Defaults to `[]`. Hatching populates this with the agent's chosen name. See **Channels and Alias** below for schema/edit mechanics; the matching behavior lives in the `typeclaw-channels` skill. **Live-reloadable.**                                                                                                                                                                                 |
 | `channels`    | no       | object           | Per-adapter engagement triggers and history-prefetch knobs for external messengers (plus the `github` webhook channel — see **GitHub channel** below). Defaults to `{}` (no adapters configured). `typeclaw init` scaffolds an empty block per requested adapter (e.g. `"discord-bot": {}`) and the schema fills in defaults. Channel access control lives in `roles` — see the `typeclaw-permissions` skill; engagement behavior lives in `typeclaw-channels`. **Live-reloadable.** See **Channels and Alias** below.                                                                                                   |
 | `portForward` | no       | object           | Allow/deny policy for the host-stage portbroker that auto-forwards container LISTEN ports to `127.0.0.1` on the host. Defaults to `{ "allow": "*" }` (forward everything). Omitted from scaffolded `typeclaw.json`. **Restart-required.** See **portForward** section below.                                                                                                                                                                                                                                                                                                                                             |

package/src/skills/typeclaw-monorepo/SKILL.md CHANGED Viewed

@@ -11,10 +11,10 @@ Your agent folder is a **bun monorepo**. The root `package.json` declares `"work
 You have two free-write zones at the agent root: `workspace/` and `packages/`. Both are exempt from the non-workspace-write guard so you can edit them without acknowledging anything, but their relationship to git is opposite, and picking the wrong one is the most common mistake.
-| Zone         | Purpose                                                            | Tracked in git?                                                                                           | Reusable?                                    |
-| ------------ | ------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | -------------------------------------------- |
-| `workspace/` | One-off scripts, scratch work, throwaway experiments               | **No** — entire dir is gitignored                                                                         | No (the dir itself is invisible to git)      |
-| `packages/`  | Reusable packages, custom plugins, shared utilities, internal libs | **Yes** — every file is tracked and MUST be committed when edited (only `*/node_modules/` ignored inside) | Yes (committed and importable across agents) |
+| Zone         | Purpose                                                                  | Tracked in git?                                                                                           | Reusable?                                    |
+| ------------ | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | -------------------------------------------- |
+| `workspace/` | One-off scripts, scratch work, throwaway experiments                     | **No** — entire dir is gitignored                                                                         | No (the dir itself is invisible to git)      |
+| `packages/`  | Reusable packages, custom local plugins, shared utilities, internal libs | **Yes** — every file is tracked and MUST be committed when edited (only `*/node_modules/` ignored inside) | Yes (committed and importable across agents) |
 The two columns to internalize:
@@ -26,7 +26,7 @@ Anything you put in `packages/` MUST land in a commit — see `typeclaw-git`. Th
 **Decision rule, top to bottom — stop at the first match:**
 1. **Will another script or another part of the agent folder import this?** → `packages/<name>/`. Even if "another part" is just "tomorrow's me writing a sibling script", a reusable thing belongs here.
-2. **Is this a custom typeclaw plugin** (anything you'd list in `typeclaw.json`'s `plugins`)? → `packages/<plugin-name>/`. Always. Plugins are the canonical packages.
+2. **Is this a custom local typeclaw plugin you are authoring?** → `packages/<plugin-name>/`. If you are adding an existing or published plugin, keep its npm package specifier in `typeclaw.json#plugins`; do not create or guess a `./packages/...` path.
 3. **Will the user want to track this in git, see it in PRs, depend on it from a cron job?** → `packages/<name>/`.
 4. **Is this throwaway** — a one-shot data transformation, a debug script, a scratch experiment that exists for one task and dies? → `workspace/`.
 5. **Default if unsure** → `packages/<name>/`. Better to commit something reusable than to lose something useful in the gitignored void.
@@ -97,6 +97,8 @@ To depend on a workspace package from the **agent root** (e.g. so cron `exec` jo
 ## Custom typeclaw plugins live under `packages/`
+This section is only for plugins you are **authoring locally** in the agent folder. If the user asks to add/install an existing or published plugin, use the plugin's npm package specifier in `typeclaw.json#plugins` (for example, `"typeclaw-gws-multi-account"`) and do **not** fabricate a `./packages/...` path.
 If you are writing a typeclaw plugin (anything that uses `definePlugin` from `typeclaw/plugin`), the canonical home is `packages/<plugin-name>/`. The workflow:
 1. **Author**: `packages/my-plugin/index.ts` exports `definePlugin({ ... })` as default.

package/src/skills/typeclaw-plugins/SKILL.md CHANGED Viewed

@@ -115,6 +115,13 @@ Without `configSchema`, `ctx.config` is `never` and any reference is a type erro
 The **derived name is the key** for the per-plugin config block at the top level of `typeclaw.json`. Two plugins with the same derived name are a boot error.
+Use the entry format that matches the plugin's source:
+- **Published npm plugin** → put the npm package specifier in `plugins[]`, e.g. `"typeclaw-gws-multi-account"` or `"typeclaw-plugin-standup-log@1.2.3"`. Do **not** invent a `./packages/...` path for a published package.
+- **Local plugin you are authoring in this agent folder** → put its relative path in `plugins[]`, e.g. `"./packages/my-plugin"`. The path must exist and point at local plugin code.
+If the user says to add/install an existing plugin by package name, preserve that package name. Only use `./packages/<name>` when you are creating or wiring a local workspace package that exists in this repo.
 ### Local path safety
 Local plugin paths **must resolve inside `agentDir`**. Absolute paths (`/etc/...`) and parent-traversing paths (`../../foo`) are rejected with:
@@ -125,9 +132,11 @@ plugin path escapes agent directory: <entry> (resolved to <abs-path>)
 This is why `./plugins/x.ts` works and `/Users/me/x.ts` does not.
-### Recommended location: `packages/<plugin-name>/`
+### Recommended location for new local plugins: `packages/<plugin-name>/`
+This section is about plugins you are **authoring locally**. For a published npm plugin, keep the npm package specifier in `plugins[]`; do not create or guess a local path.
-The agent folder is a **bun monorepo**, and `packages/` is its workspace root. **Custom plugins go there.** A `./packages/standup-log/` plugin is a real workspace package — bun installs its dependencies, the workspace symlink machinery makes it importable, and it lands in git like any other reusable code. Concretely:
+The agent folder is a **bun monorepo**, and `packages/` is its workspace root. **Custom local plugins go there.** A `./packages/standup-log/` plugin is a real workspace package — bun installs its dependencies, the workspace symlink machinery makes it importable, and it lands in git like any other reusable code. Concretely:
 ```
 packages/