npm - typeclaw - Versions diffs - 0.29.0 → 0.30.1 - Mend

typeclaw 0.29.0 → 0.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/package.json +1 -1
package/scripts/verify-realproc-sandbox.sh +58 -0
package/src/agent/index.ts +6 -0
package/src/agent/live-subagents.ts +5 -0
package/src/agent/plugin-tools.ts +79 -10
package/src/agent/subagent-drain.ts +150 -0
package/src/agent/subagents.ts +34 -3
package/src/agent/system-prompt.ts +1 -1
package/src/agent/tools/spawn-subagent.ts +13 -1
package/src/bundled-plugins/bun-hygiene/README.md +12 -11
package/src/bundled-plugins/bun-hygiene/policy.ts +8 -3
package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +116 -35
package/src/bundled-plugins/github-cli-auth/effective-approval.ts +14 -9
package/src/bundled-plugins/github-cli-auth/index.ts +3 -3
package/src/bundled-plugins/planner/planner.ts +2 -1
package/src/bundled-plugins/researcher/researcher.ts +9 -2
package/src/bundled-plugins/reviewer/reviewer.ts +2 -1
package/src/channels/adapters/discord-bot-format.ts +191 -0
package/src/channels/adapters/discord-bot.ts +2 -1
package/src/channels/adapters/github/inbound.ts +88 -30
package/src/channels/adapters/github/review-state.ts +27 -0
package/src/channels/github-review-claim.ts +15 -3
package/src/channels/outbound-flood-filter.ts +70 -3
package/src/channels/router.ts +53 -0
package/src/compose/discover.ts +5 -1
package/src/config/config.ts +38 -0
package/src/container/start.ts +14 -0
package/src/migrations/index.ts +35 -0
package/src/migrations/secrets-v1-to-v2.ts +344 -0
package/src/run/index.ts +13 -0
package/src/sandbox/availability.ts +12 -0
package/src/sandbox/build.ts +53 -9
package/src/sandbox/index.ts +1 -1
package/src/sandbox/policy.ts +17 -1
package/typeclaw.schema.json +24 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typeclaw",
-  "version": "0.29.0",
+  "version": "0.30.1",
   "homepage": "https://github.com/typeclaw/typeclaw#readme",
   "bugs": {
     "url": "https://github.com/typeclaw/typeclaw/issues"

package/scripts/verify-realproc-sandbox.sh ADDED Viewed

@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Manual acceptance check for the sandbox.realProc strategy (src/sandbox/build.ts).
+# Not a unit test: it needs a Linux container with CAP_SYS_ADMIN, which the macOS
+# dev host and standard CI runners cannot provide, so it lives here as an
+# operator-runnable script instead of a skipIf-everywhere test.
+#
+# Proves two properties of the two-phase `unshare --mount-proc -- bwrap` sandbox:
+#   1. An external package runner (bunx) runs to completion (no Bun "NotDir").
+#   2. A secret in a sibling process's environment NEVER appears in any
+#      /proc/*/environ the sandbox can read (PID-namespace scoping holds).
+#
+# Usage: scripts/verify-realproc-sandbox.sh [image]
+#   image defaults to ghcr.io/typeclaw/typeclaw-base:<version-from-package.json>
+set -euo pipefail
+IMAGE="${1:-}"
+if [ -z "$IMAGE" ]; then
+  version="$(node -p "require('./package.json').version" 2>/dev/null || echo latest)"
+  IMAGE="ghcr.io/typeclaw/typeclaw-base:${version}"
+fi
+secret="TYPECLAW_REALPROC_LEAK_CANARY_$$"
+inner='
+echo "=== bunx via real-proc sandbox ==="
+bunx cowsay "real-proc ok" 2>&1 | tail -6
+echo "bunx exit=$?"
+echo "=== visible pids (sandbox should NOT see the canary holder) ==="
+ls /proc | grep -E "^[0-9]+$" | tr "\n" " "; echo
+echo "=== leak scan ==="
+found=0
+for f in /proc/[0-9]*/environ; do
+  if tr "\0" "\n" < "$f" 2>/dev/null | grep -q "CANARY_TOKEN"; then
+    echo "LEAK:$f"; found=1
+  fi
+done
+if [ $found -eq 0 ]; then echo "NO_LEAK_CONFIRMED"; else echo "LEAK_DETECTED"; exit 1; fi
+'
+inner="${inner//CANARY_TOKEN/$secret}"
+# The real-proc argv shape mirrors buildArgv() in src/sandbox/build.ts. Keep in
+# sync if that helper changes.
+runner="
+${secret}_holder() { :; }
+env CANARY=${secret} sleep 120 &
+unshare --pid --fork --mount --mount-proc -- \
+  bwrap --unshare-user --unshare-ipc --unshare-uts --unshare-cgroup \
+        --new-session --die-with-parent --clearenv \
+        --setenv PATH /usr/local/bin:/usr/bin:/bin --setenv HOME /tmp --setenv LANG C.UTF-8 \
+        --ro-bind /usr /usr --ro-bind /etc /etc --dev /dev --tmpfs /tmp \
+        --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \
+        --ro-bind /proc /proc \
+        bash -c '$inner'
+"
+echo "Image: $IMAGE"
+docker run --rm --security-opt seccomp=unconfined --cap-add SYS_ADMIN \
+  -e "CANARY=${secret}" "$IMAGE" bash -c "$runner"

package/src/agent/index.ts CHANGED Viewed

@@ -213,6 +213,7 @@ export type CreateSessionOptions = {
   liveSubagentRegistry?: LiveSubagentRegistry
   subagentRegistry?: SubagentRegistry
   createSessionForSubagent?: CreateSessionForSubagent
+  allowBackgroundFromSubagent?: boolean
 }
 export type CreateSessionResult = {
@@ -357,6 +358,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
               getOrigin,
               permissions: options.permissions,
               stream: options.stream,
+              allowBackgroundFromSubagent: options.allowBackgroundFromSubagent,
             }),
           ]
         : [
@@ -726,6 +728,7 @@ export function buildSubagentOrchestrationTools(opts: {
   getOrigin: () => SessionOrigin | undefined
   permissions: PermissionService | undefined
   stream: Stream | undefined
+  allowBackgroundFromSubagent?: boolean
 }): ToolDefinition[] {
   if (
     opts.liveRegistry === undefined ||
@@ -745,6 +748,9 @@ export function buildSubagentOrchestrationTools(opts: {
       getOrigin: opts.getOrigin,
       ...(opts.permissions ? { permissions: opts.permissions } : {}),
       ...(opts.stream ? { stream: opts.stream } : {}),
+      ...(opts.allowBackgroundFromSubagent !== undefined
+        ? { allowBackgroundFromSubagent: opts.allowBackgroundFromSubagent }
+        : {}),
     }),
     createSubagentOutputTool({
       liveRegistry: opts.liveRegistry,

package/src/agent/live-subagents.ts CHANGED Viewed

@@ -23,6 +23,11 @@ export type LiveSubagent = {
   // subagent_output/subagent_cancel. Absent when no permission service was
   // active at spawn, in which case the cap fails closed.
   spawnedByRole?: string
+  // True when spawned with run_in_background. Only background spawns deliver
+  // their result out-of-band (via the subagent.completed broadcast and the
+  // parent's drain); synchronous spawns return their result inline as the tool
+  // result, so the drain MUST NOT re-prompt for them. See runSubagentDrain.
+  background?: boolean
   startedAt: number
   status: SubagentStatus
   completion?: SubagentCompletion

package/src/agent/plugin-tools.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import {
   checkNonWorkspaceWriteGuard,
   checkSkillAuthoringGuard,
 } from '@/bundled-plugins/guard/policy'
+import { config } from '@/config/config'
 import type { PermissionService } from '@/permissions/permissions'
 import type {
   BuiltinToolRef,
@@ -40,6 +41,7 @@ import {
   ensureSessionTmpDir,
   mapVirtualTmpPath,
   resolveHiddenPaths,
+  resolveProcSelfExe,
   resolveProtectedZones,
   resolveWritableZones,
   subtractMasked,
@@ -463,13 +465,24 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
         await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
       }
-      if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
-        await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
-      }
+      const tmpRedirect =
+        TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined
+          ? await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
+          : undefined
-      const result = await bashEnvStore.run(bashEnvOverlay, () =>
-        tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
-      )
+      let rawResult: ToolResult
+      try {
+        rawResult = await bashEnvStore.run(bashEnvOverlay, () =>
+          tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate),
+        )
+      } catch (error) {
+        // A throwing tool (pi's bash rejects on non-zero exit) must still run
+        // tool.after so cleanup hooks fire — e.g. the github approve guard's
+        // release, whose absence stranded a PR as "already approved" (PR #672).
+        await runToolAfterSafely(opts, tool.name, toolCallId, toErrorResult(error))
+        throw error
+      }
+      const result = tmpRedirect !== undefined ? restoreTmpPathInResult(rawResult, tmpRedirect) : rawResult
       const resolved = loopGate.resolve({ content: result.content as ContentPart[], details: result.details })
       if ('deferredBlock' in resolved) {
         fireLoopAbort(opts.getAbort)
@@ -490,6 +503,26 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
   })
 }
+function toErrorResult(error: unknown): ToolResult {
+  const message = error instanceof Error ? error.message : String(error)
+  return { content: [{ type: 'text', text: message }], details: { error: message } }
+}
+// The original tool error must always propagate, so a failure inside the
+// after-hook itself is swallowed rather than masking the real cause.
+async function runToolAfterSafely(
+  opts: WrapSystemToolOptions,
+  tool: string,
+  callId: string,
+  result: ToolResult,
+): Promise<void> {
+  try {
+    await opts.hooks.runToolAfter({ tool, sessionId: opts.sessionId, callId, result })
+  } catch {
+    // intentionally ignored: never mask the originating tool error
+  }
+}
 export function defaultBuiltinPiAgentTools(): AgentTool<any, any>[] {
   return [piReadTool, piBashTool, piEditTool, piWriteTool, piGrepTool, piFindTool, piLsTool]
 }
@@ -550,6 +583,17 @@ async function applyBashSandbox(
   // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
   // it would never reach the sandboxed process (the non-sandboxed spawnHook
   // path does not run when the command is rewritten to a bwrap invocation).
+  // 'real-proc' gives a sandboxed JS package runner a working /proc/self/{fd,
+  // maps} so `bunx`/`bun add`/`bun run <pkg>` stop aborting with Bun's NotDir.
+  // Opt-in (default 'tmpfs') because it makes start.ts grant the container
+  // CAP_SYS_ADMIN at boot. Read from the boot-time `config` snapshot, NOT live
+  // getConfig(): sandbox.realProc is restart-required, and the strategy MUST
+  // track the boot-time capability. A `typeclaw reload` that flips realProc to
+  // true would otherwise make this emit `unshare --mount-proc` in a container
+  // booted WITHOUT CAP_SYS_ADMIN, so the mount fails instead of the old tmpfs
+  // strategy holding until restart. `config` never changes on reload.
+  // procSelfExe is only consumed by the 'tmpfs' branch.
+  const realProc = config.sandbox.realProc
   const { commandString } = buildSandboxedCommand(command, {
     mounts: [
       { type: 'ro-bind', source: agentDir, dest: agentDir },
@@ -560,6 +604,8 @@ async function applyBashSandbox(
     protected: protectedZones,
     network: 'inherit',
     cwd: agentDir,
+    proc: realProc ? 'real-proc' : 'tmpfs',
+    procSelfExe: resolveProcSelfExe(),
     ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
   })
   mutableArgs.command = commandString
@@ -579,24 +625,47 @@ const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls
 // different files. Rewriting the file tool's on-disk path to the same session
 // backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
 // (empty masks) are left untouched: their bash already shares the real /tmp.
+type TmpRedirect = { original: string; backing: string }
 async function applyTmpPathRedirect(
   mutableArgs: Record<string, unknown>,
   permissions: PermissionService,
   origin: SessionOrigin | undefined,
   agentDir: string,
   sessionId: string,
-): Promise<void> {
+): Promise<TmpRedirect | undefined> {
   const rawPath = mutableArgs.path
-  if (typeof rawPath !== 'string') return
+  if (typeof rawPath !== 'string') return undefined
   const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
-  if (dirs.length === 0 && files.length === 0) return
+  if (dirs.length === 0 && files.length === 0) return undefined
   const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
-  if (backing === undefined) return
+  if (backing === undefined || backing === rawPath) return undefined
   await ensureSessionTmpDir(sessionId)
   mutableArgs.path = backing
+  return { original: rawPath, backing }
+}
+// The redirect swaps the model-facing /tmp path for its session backing dir
+// before execution; the file tool then echoes that backing path in its receipt
+// text and details. Reverse it on the way out so the model only ever sees the
+// path it asked for — a leaked backing path is unreachable inside the bwrap
+// bash sandbox, so reusing it in `gh api --input` fails (the PR #672 strand).
+function restoreTmpPathInResult(result: ToolResult, redirect: TmpRedirect): ToolResult {
+  const content = (result.content as ContentPart[]).map((part) =>
+    part.type === 'text' ? { ...part, text: part.text.split(redirect.backing).join(redirect.original) } : part,
+  )
+  const details =
+    isRecord(result.details) && result.details.path === redirect.backing
+      ? { ...result.details, path: redirect.original }
+      : result.details
+  return { content, details }
+}
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null
 }
 function appendLoopWarning(result: ToolResult, message: string): ToolResult {

package/src/agent/subagent-drain.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import type { Stream, Unsubscribe } from '@/stream'
+import type { LiveSubagentRegistry } from './live-subagents'
+import { parseSubagentCompletedPayload, renderSubagentCompletionReminder } from './subagent-completion-reminder'
+// Presence of this capability is the single signal that background spawning is
+// permitted from a subagent (see the spawn_subagent guard); absence keeps the
+// subagent a one-shot leaf. It carries everything the drain needs: the shared
+// Stream to listen on, the subagent's own sessionId to filter completions by,
+// and the registry that is the source of truth for child state.
+export type SubagentBackgroundDrain = {
+  stream: Stream
+  sessionId: string
+  liveRegistry: LiveSubagentRegistry
+}
+export type DrainPrompt = (text: string) => Promise<void>
+export type RunSubagentDrainOptions = {
+  drain: SubagentBackgroundDrain
+  prompt: DrainPrompt
+  // Cooperative cancellation: when this returns true the loop stops re-prompting
+  // and returns, letting the caller's timeout/abort path dispose the session.
+  cancelled?: () => boolean
+}
+// Re-prompts a subagent with its children's completion reminders until a fixed
+// point, called after the subagent's initial prompt resolves. The registry is
+// the source of truth; stream broadcasts are only wakeups, so a duplicated or
+// missed broadcast cannot corrupt termination (every iteration re-derives state
+// from the registry). Each child's reminder is delivered at most once (tracked
+// by taskId). Terminates only when no children are running AND none are
+// completed-but-undelivered; a child spawned during a reminder turn reappears as
+// `running` in the next snapshot and keeps the loop alive, so no separate
+// "spawned nothing" flag is needed. The watch MUST have been started before the
+// initial prompt (see `beginSubagentDrainWatch`) to close the lost-wakeup race.
+export async function runSubagentDrain(watch: SubagentDrainWatch, options: RunSubagentDrainOptions): Promise<void> {
+  const { drain, prompt, cancelled } = options
+  const delivered = new Set<string>()
+  try {
+    while (cancelled === undefined || !cancelled()) {
+      const pending = collectPendingReminders(drain, delivered)
+      if (pending.length === 0) {
+        if (!hasRunningChildren(drain)) return
+        // Children still running but none newly completed: wait for the next
+        // wakeup, then re-derive from the registry.
+        const woke = await watch.waitForWakeup()
+        if (!woke) return
+        continue
+      }
+      for (const reminder of pending) {
+        if (cancelled !== undefined && cancelled()) return
+        delivered.add(reminder.taskId)
+        await prompt(reminder.text)
+      }
+    }
+  } finally {
+    watch.stop()
+  }
+}
+type PendingReminder = { taskId: string; text: string }
+function collectPendingReminders(drain: SubagentBackgroundDrain, delivered: Set<string>): PendingReminder[] {
+  const children = drain.liveRegistry.list({ parentSessionId: drain.sessionId })
+  const pending: PendingReminder[] = []
+  for (const child of children) {
+    // Synchronous spawns return their result inline via the tool call; only
+    // background spawns deliver out-of-band and need a drain reminder.
+    if (child.background !== true) continue
+    if (child.status === 'running') continue
+    if (delivered.has(child.taskId)) continue
+    const completion = child.completion
+    const text = renderSubagentCompletionReminder({
+      subagent: child.subagentName,
+      taskId: child.taskId,
+      ok: child.status === 'completed',
+      durationMs: completion?.durationMs ?? 0,
+      ...(completion?.error !== undefined ? { error: completion.error } : {}),
+    })
+    pending.push({ taskId: child.taskId, text })
+  }
+  return pending
+}
+function hasRunningChildren(drain: SubagentBackgroundDrain): boolean {
+  // Only background children gate termination. A sync child still marked running
+  // in the registry settles via its inline tool call, never via a broadcast
+  // wakeup, so waiting on it would hang the drain forever.
+  return drain.liveRegistry
+    .list({ parentSessionId: drain.sessionId })
+    .some((c) => c.background === true && c.status === 'running')
+}
+export type SubagentDrainWatch = {
+  // Resolves true on a child-completion wakeup, false once stopped. A wakeup
+  // that arrives before anyone waits is latched (pendingWake), so a completion
+  // during the subagent's prompt is not lost.
+  waitForWakeup: () => Promise<boolean>
+  stop: () => void
+}
+export function beginSubagentDrainWatch(drain: SubagentBackgroundDrain): SubagentDrainWatch {
+  let stopped = false
+  let pendingWake = false
+  let resolveWaiter: ((woke: boolean) => void) | null = null
+  const wake = (): void => {
+    if (resolveWaiter !== null) {
+      const r = resolveWaiter
+      resolveWaiter = null
+      r(true)
+      return
+    }
+    pendingWake = true
+  }
+  const unsubscribe: Unsubscribe = drain.stream.subscribe({ target: { kind: 'broadcast' } }, (msg) => {
+    const parsed = parseSubagentCompletedPayload(msg.payload)
+    if (parsed === null) return
+    if (parsed.parentSessionId !== drain.sessionId) return
+    wake()
+  })
+  return {
+    waitForWakeup: () =>
+      new Promise<boolean>((resolve) => {
+        if (stopped) {
+          resolve(false)
+          return
+        }
+        if (pendingWake) {
+          pendingWake = false
+          resolve(true)
+          return
+        }
+        resolveWaiter = resolve
+      }),
+    stop: () => {
+      if (stopped) return
+      stopped = true
+      unsubscribe()
+      if (resolveWaiter !== null) {
+        const r = resolveWaiter
+        resolveWaiter = null
+        r(false)
+      }
+    },
+  }
+}

package/src/agent/subagents.ts CHANGED Viewed

@@ -7,6 +7,12 @@ import type { Stream, Unsubscribe } from '@/stream'
 import { type AgentSession, createSession } from './index'
 import { subscribeProviderErrors } from './provider-error'
 import type { SessionOrigin } from './session-origin'
+import {
+  beginSubagentDrainWatch,
+  runSubagentDrain,
+  type SubagentBackgroundDrain,
+  type SubagentDrainWatch,
+} from './subagent-drain'
 import { renderTurnTimeAnchor } from './system-prompt'
 import type { ToolResultBudget } from './tool-result-budget'
@@ -62,6 +68,12 @@ export type SubagentShared<P = unknown> = {
   // registry scoping. Default (unset/false) keeps the subagent a leaf — the
   // historical contract for explorer/scout/memory-logger/etc.
   canSpawnSubagents?: boolean
+  // Opt-in: allow this subagent to spawn background children AND drain their
+  // completions back into its own session (requires canSpawnSubagents). Default
+  // (unset/false) keeps background spawns denied from this subagent — it must
+  // use synchronous spawns. Only meaningful when the runtime wires the drain
+  // capability (createSessionForSubagent provides stream+sessionId+liveRegistry).
+  canBackgroundSpawnSubagents?: boolean
   // Wall-clock ceiling on a single spawn, enforced at the orchestration
   // layer (both `dispatchSpawnSubagent` and the stream-driven
   // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
@@ -116,6 +128,7 @@ export type CreateSessionForSubagentResult = {
   agentDir?: string
   origin?: SessionOrigin
   getTranscriptPath?: () => string | undefined
+  backgroundDrain?: SubagentBackgroundDrain
 }
 export type CreateSessionForSubagentOptions = {
   name?: string
@@ -152,6 +165,7 @@ type NormalizedSubagentSession = {
   agentDir: string | undefined
   origin: SessionOrigin | undefined
   getTranscriptPath: (() => string | undefined) | undefined
+  backgroundDrain: SubagentBackgroundDrain | undefined
 }
 function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagentResult): NormalizedSubagentSession {
@@ -164,6 +178,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
       agentDir: result.agentDir,
       origin: result.origin,
       getTranscriptPath: result.getTranscriptPath,
+      backgroundDrain: result.backgroundDrain,
     }
   }
   return {
@@ -174,6 +189,7 @@ function normalizeSubagentSession(result: AgentSession | CreateSessionForSubagen
     agentDir: undefined,
     origin: undefined,
     getTranscriptPath: undefined,
+    backgroundDrain: undefined,
   }
 }
@@ -214,14 +230,16 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
   }
   const runSession: RunSession = async (override) => {
-    const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath } = normalizeSubagentSession(
-      await createSessionForSubagent(subagent, sessionOptions),
-    )
+    const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath, backgroundDrain } =
+      normalizeSubagentSession(await createSessionForSubagent(subagent, sessionOptions))
+    let aborted = false
+    let drainWatch: SubagentDrainWatch | undefined
     if (options.onSessionCreated !== undefined) {
       options.onSessionCreated({
         session,
         sessionId,
         abort: async () => {
+          aborted = true
           await session.abort()
         },
       })
@@ -239,6 +257,9 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
       if (hooks && turnEvent !== undefined) {
         await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
       }
+      if (backgroundDrain !== undefined) {
+        drainWatch = beginSubagentDrainWatch(backgroundDrain)
+      }
       try {
         await session.prompt(`${renderTurnTimeAnchor()}\n\n${userPromptForTurn}`)
       } finally {
@@ -246,6 +267,15 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
           await hooks.runSessionTurnEnd(turnEvent)
         }
       }
+      if (drainWatch !== undefined && backgroundDrain !== undefined) {
+        await runSubagentDrain(drainWatch, {
+          drain: backgroundDrain,
+          prompt: async (text) => {
+            await session.prompt(`${renderTurnTimeAnchor()}\n\n${text}`)
+          },
+          cancelled: () => aborted,
+        })
+      }
       if (hooks && sessionId !== undefined) {
         await hooks.runSessionIdle({
           sessionId,
@@ -259,6 +289,7 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
       if (hooks && sessionId !== undefined) {
         await hooks.runSessionEnd({ sessionId, ...(origin !== undefined ? { origin } : {}) })
       }
+      drainWatch?.stop()
       session.dispose()
       await dispose()
     }

package/src/agent/system-prompt.ts CHANGED Viewed

@@ -93,7 +93,7 @@ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`,
 There are three delegation modes. Pick deliberately.
-**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
+**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself). When the user *explicitly* says "research"/"investigate" (or equivalent), you MUST spawn \`researcher\` — answering from training memory or a single inline \`web_search\` does not satisfy the request, even if you think you know the answer. (Fanning out \`scout\`/\`explorer\` underneath is fine, but it does not replace \`researcher\`.)
 **Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.

package/src/agent/tools/spawn-subagent.ts CHANGED Viewed

@@ -42,6 +42,7 @@ export type CreateSpawnSubagentToolOptions = {
   stream?: Stream
   generateTaskId?: () => string
   now?: () => number
+  allowBackgroundFromSubagent?: boolean
 }
 export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions) {
@@ -56,6 +57,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
     stream,
     generateTaskId = () => `${SPAWN_TASK_ID_PREFIX}${randomUUID().replace(/-/g, '').slice(0, 12)}`,
     now = () => Date.now(),
+    allowBackgroundFromSubagent,
   } = options
   return defineTool({
@@ -81,7 +83,9 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
           description:
             'When true, the spawn returns immediately with a task_id; the subagent runs in the background and a system-reminder is delivered when it completes. ' +
             'When false (default), the spawn blocks until the subagent finishes and returns its final message synchronously. ' +
-            'Use background mode for long-running tasks where you want to keep the conversation moving (Mode B) or for parallel fan-out (Mode A).',
+            'For PARALLEL fan-out, do NOT use background mode: emit several spawn_subagent calls (sync, the default) in a SINGLE turn — they execute concurrently and all their results return together before your next turn. ' +
+            'Reserve background mode for a long-running task you want to keep the conversation moving alongside (Mode B). ' +
+            'NOTE: background mode from subagents is only available when that subagent is explicitly enabled to drain child results; otherwise use sync spawns batched in one turn instead.',
         }),
       ),
     }),
@@ -105,6 +109,13 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
           `subagent.spawn denied: maximum delegation depth (${MAX_SUBAGENT_DEPTH}) reached; a subagent at this depth cannot spawn further subagents`,
         )
       }
+      if (origin?.kind === 'subagent' && params.run_in_background === true && allowBackgroundFromSubagent !== true) {
+        return errorResult(
+          'subagent.spawn denied: background spawning is not available from a subagent session because the result cannot be delivered after this turn ends. ' +
+            'Retry with run_in_background=false (or omit it) — the synchronous spawn blocks until the child finishes and returns its result into your context, ' +
+            'which is what you need to fold the result into your output.',
+        )
+      }
       const taskId = generateTaskId()
       const subagentName = params.subagent_type
@@ -140,6 +151,7 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
         subagentName,
         parentSessionId,
         ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
+        background,
         startedAt,
         status: 'running' as const,
         abort: resolvedHandle.abort,

package/src/bundled-plugins/bun-hygiene/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 The bundled bun-hygiene plugin. Registers a `tool.before` hook that blocks two classes of `bash` command:
 1. **Global package installs** — `npm install -g`, `pnpm add -g`, `yarn global add`, `bun add -g`, and their `--global` / bundled-flag variants.
-2. **Non-bun package managers** — any `npm`, `npx`, `pnpm`, `pnpx`, or `yarn` invocation.
+2. **Non-bun install managers** — any `npm`, `pnpm`, or `yarn` invocation. The ephemeral runners `npx` and `pnpx` are **allowed** (alongside `bunx`): they execute a tool once without touching the dependency tree or writing a competing lockfile, so they don't undermine the bun-standardization this guard protects.
 This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add. Both guards carry an `acknowledgeGuards` escape hatch (below) for the cases where the agent genuinely needs the blocked command.
@@ -11,16 +11,16 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
 **Global installs don't persist.** The agent folder is bind-mounted at `/agent`; everything else in the container — including `~/.bun`, `~/.npm`, and the global `node_modules` a global install writes to — is ephemeral and wiped on every `typeclaw restart`. An agent that runs `npm install -g some-cli` gets a tool that works for the rest of the session and silently vanishes on the next boot, leading to confusing "command not found" failures that look like regressions. The fix is to either add the dependency to `package.json` (`bun add <pkg>`, which lives in the bind-mounted folder and survives) or run it once without installing (`bunx <pkg>`).
-**The container standardizes on bun.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` produces competing lockfiles and install trees, and `npx` pulls a second package-execution path when `bunx` already covers it. Steering every package-manager call to bun keeps the dependency state coherent.
+**The container standardizes on bun for dependency management.** TypeClaw is Bun-native end to end (see the root README). Mixing in `npm`/`pnpm`/`yarn` installs produces competing lockfiles and install trees, so those are steered to bun. Ephemeral runners (`npx`/`pnpx`/`bunx`) are not install managers — they run a tool once and leave no lockfile or `node_modules` behind — so they're allowed for one-off execution.
 Both guards **block with guidance** rather than silently rewriting the command — the agent sees exactly why the command was rejected and what to run instead, the same UX as the bundled `security` and `guard` policies.
 ## Guards
-| Guard                  | Triggers on                                                                                       | Guidance in the block reason                                               |
-| ---------------------- | ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
-| `globalInstall`        | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run).            |
-| `nonBunPackageManager` | `npm`, `npx`, `pnpm`, `pnpx`, `yarn` at a command boundary                                        | Use `bun install` / `bun add <pkg>`, and `bunx <pkg>` instead of npx/pnpx. |
+| Guard                  | Triggers on                                                                                       | Guidance in the block reason                                           |
+| ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
+| `globalInstall`        | `npm`/`pnpm` install/add with `-g`/`--global`, `yarn global add`, `bun add -g` / `bun install -g` | Use `bun add <pkg>` (persists) or `bunx <pkg>` (ephemeral run).        |
+| `nonBunPackageManager` | `npm`, `pnpm`, `yarn` at a command boundary (`npx`/`pnpx`/`bunx` are allowed)                     | Use `bun install` / `bun add <pkg>`. Ephemeral runners are fine as-is. |
 A global install (e.g. `npm install -g x`) trips **only** `globalInstall`, not both — the global install is the more specific violation, so acknowledging `globalInstall` lets the command through without a second acknowledgement for `nonBunPackageManager`.
@@ -43,9 +43,9 @@ Both guards follow the repo-wide `acknowledgeGuards` convention (shared with the
 For each segment, the guard strips leading **preamble wrappers** (`sudo`, `env`, `command`, `exec`, `nice`, `nohup`, `stdbuf`, `setsid`, `time`, `xargs`, and any `VAR=val` assignment) — including their options, and the argument a flag consumes (`sudo -u nobody`, `nice -n 10`, `env -i`) — to find the real command word, then classifies:
-1. command word is `npm`/`npx`/`pnpm`/`pnpx`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
-2. command word is a non-bun manager (not via global) → `nonBunPackageManager`;
-3. otherwise → allowed.
+1. command word is `npm`/`pnpm`/`yarn` (or `bun`) **and** the segment has an install subcommand **and** a global flag → `globalInstall` (for `yarn`, the `global add` sequence must appear adjacent and in command position, so `yarn add global foo` — a local install of a package named `global` — is not misflagged);
+2. command word is a non-bun install manager `npm`/`pnpm`/`yarn` (not via global) → `nonBunPackageManager`;
+3. otherwise (including the ephemeral runners `npx`/`pnpx`/`bunx`) → allowed.
 A `globalInstall` verdict on any segment wins over a plain non-bun verdict. This is a command-position detector, not a full shell parser — it doesn't interpret redirections or expansions beyond boundary marking — but it is linear-time and closes the structural gaps a single regex left open.
@@ -70,6 +70,7 @@ Because classification scans a segment's words as a set (after preamble strippin
 ## What is NOT blocked
 - `bun`, `bunx`, `bun run`, `bun add`, `bun install` (local) — the intended package commands. (`bun add -g` / `bun install -g` are still blocked as global installs: bun globals live in `~/.bun`, outside `/agent`, and are wiped on restart.)
+- `npx`, `pnpx` — ephemeral runners, allowed for one-off tool execution (they leave no lockfile or install tree). A global install through them is still nothing to block since they don't install into the dependency tree at all.
 - A non-bun manager name appearing as a substring or argument: `my-npm-wrapper`, `./npm`, `cat npm-debug.log`, `git commit -m "drop npm"`, `grep -rn npx src/`, `echo "npm install -g foo"`. Only the **command word** of a segment is classified, so a manager name inside an argument, path, quoted string, or longer token never trips the guard.
 ## Ordering against other bundled plugins
@@ -78,5 +79,5 @@ Registered after `guard` in `src/run/bundled-plugins.ts`. It guards a disjoint s
 ## Tests
-- `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun manager, the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
-- `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on npx, allow bunx, honor the bypass).
+- `policy.test.ts` — pure-function unit tests for the detection logic: every global-install form, every non-bun install manager, the ephemeral-runner allowance (`npx`/`pnpx`/`bunx`, including behind preamble wrappers), the allowed-command set (bun/bunx, substrings, paths, quoted text), both bypasses, the global-install-takes-precedence rule, escaped/quoted evasions, leading-assignment preambles, newline-as-separator scoping, falsy `--global=`, option placement, and subshell/substitution detection.
+- `index.test.ts` — composition tests: the plugin registers the `tool.before` hook and wires it to the policy (block on global install, block on `npm install`, allow `bunx`/`npx`, honor the bypass).