npm - typeclaw - Versions diffs - 0.24.0 → 0.25.0 - Mend

typeclaw 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +1 -1
package/package.json +1 -1
package/src/agent/index.ts +42 -5
package/src/agent/llm-replay-sanitizer.ts +120 -0
package/src/agent/loop-guard.ts +34 -0
package/src/agent/multimodal/look-at.ts +1 -1
package/src/agent/plugin-tools.ts +90 -12
package/src/agent/session-origin.ts +30 -0
package/src/agent/subagent-completion-reminder.ts +23 -0
package/src/agent/subagents.ts +31 -2
package/src/agent/system-prompt.ts +1 -1
package/src/agent/tool-not-found-nudge.ts +8 -1
package/src/agent/tools/channel-reply.ts +3 -3
package/src/agent/tools/curl-impersonate.ts +2 -2
package/src/agent/tools/spawn-subagent.ts +19 -2
package/src/agent/tools/subagent-access.ts +40 -5
package/src/agent/tools/subagent-cancel.ts +3 -1
package/src/agent/tools/subagent-output.ts +6 -2
package/src/agent/tools/webfetch/fetch.ts +18 -18
package/src/agent/tools/webfetch/index.ts +1 -1
package/src/agent/tools/webfetch/tool.ts +13 -13
package/src/agent/tools/webfetch/types.ts +1 -1
package/src/agent/tools/websearch.ts +6 -6
package/src/bundled-plugins/backup/index.ts +40 -37
package/src/bundled-plugins/backup/runner.ts +22 -1
package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
package/src/bundled-plugins/memory/README.md +11 -11
package/src/bundled-plugins/memory/dreaming.ts +5 -0
package/src/bundled-plugins/memory/search-tool.ts +98 -1
package/src/bundled-plugins/operator/operator.ts +5 -1
package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
package/src/bundled-plugins/scout/scout.ts +7 -7
package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
package/src/bundled-plugins/tool-result-cap/README.md +1 -1
package/src/channels/adapters/github/inbound.ts +11 -0
package/src/channels/adapters/github/webhook-register.ts +32 -27
package/src/channels/router.ts +61 -23
package/src/channels/schema.ts +2 -1
package/src/channels/subagent-completion-bridge.ts +18 -18
package/src/channels/types.ts +1 -1
package/src/cli/inspect-controller.ts +130 -38
package/src/container/start.ts +7 -1
package/src/git/mutex.ts +22 -0
package/src/git/reconcile-ignored.ts +214 -0
package/src/hostd/daemon.ts +26 -1
package/src/hostd/portbroker-manager.ts +7 -0
package/src/init/dockerfile.ts +1 -1
package/src/init/gitignore.ts +25 -16
package/src/inspect/index.ts +31 -4
package/src/inspect/loop.ts +16 -12
package/src/plugin/define.ts +2 -2
package/src/plugin/index.ts +2 -2
package/src/portbroker/hostd-client.ts +36 -13
package/src/run/index.ts +14 -0
package/src/sandbox/build.ts +10 -0
package/src/sandbox/index.ts +9 -1
package/src/sandbox/policy.ts +12 -0
package/src/sandbox/session-tmp.ts +43 -0
package/src/sandbox/writable-zones.ts +103 -3
package/src/server/command-runner.ts +1 -1
package/src/server/index.ts +8 -0
package/src/skills/typeclaw-channel-github/SKILL.md +37 -10
package/src/skills/typeclaw-memory/SKILL.md +3 -1
package/src/tui/format.ts +11 -11

package/README.md CHANGED Viewed

@@ -34,7 +34,7 @@ If you're like me, TypeClaw is the right choice. If not, that's fine too.
 - 💬 **Multi-channel** — Slack, Discord, Telegram, KakaoTalk, GitHub webhooks, and a websocket TUI; one agent, many inboxes
 - ⏰ **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
 - 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
-- 🔎 **Web research** — bundled `scout` subagent plus first-class `websearch` and `webfetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
+- 🔎 **Web research** — bundled `scout` subagent plus first-class `web_search` and `web_fetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
 - 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
 - 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typeclaw",
-  "version": "0.24.0",
+  "version": "0.25.0",
   "homepage": "https://github.com/typeclaw/typeclaw#readme",
   "bugs": {
     "url": "https://github.com/typeclaw/typeclaw/issues"

package/src/agent/index.ts CHANGED Viewed

@@ -35,6 +35,7 @@ import { getAuthFor } from './auth'
 import { createCompactionSettingsManager } from './compaction'
 import { renderGitNudge } from './git-nudge'
 import type { LiveSubagentRegistry } from './live-subagents'
+import { sanitizeMessagesForLlmReplay } from './llm-replay-sanitizer'
 import { applyModelRuntimeOverrides } from './model-overrides'
 import { createChannelLookAtTool, lookAtTool } from './multimodal'
 import {
@@ -72,8 +73,8 @@ import { createStreamSnapshotTool } from './tools/stream-snapshot'
 import { createSubagentCancelTool } from './tools/subagent-cancel'
 import { createSubagentOutputTool } from './tools/subagent-output'
 import { createTodoTools } from './tools/todo'
-import { webfetchTool } from './tools/webfetch'
-import { websearchTool } from './tools/websearch'
+import { webFetchTool } from './tools/webfetch'
+import { webSearchTool } from './tools/websearch'
 export type { SessionOrigin } from './session-origin'
@@ -327,14 +328,33 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
     }
   }
+  // Plugin subagents (operator/reviewer) see ONLY their declared builtins plus
+  // the orchestration tools — never the full main-session tool surface. The
+  // orchestration tools self-omit unless `liveSubagentRegistry`/
+  // `subagentRegistry`/`createSessionForSubagent` are wired (see
+  // buildSubagentOrchestrationTools); `spawn_subagent` enforces MAX_SUBAGENT_DEPTH
+  // at execute time so a depth-capped subagent's spawn fails closed even though
+  // the tool is present.
   const customSystemTools =
     options.customTools !== undefined
       ? options.customTools
       : options.pluginSubagent
-        ? resolvedSubagentBuiltins.toolDefinitions
+        ? [
+            ...resolvedSubagentBuiltins.toolDefinitions,
+            ...buildSubagentOrchestrationTools({
+              liveRegistry: options.liveSubagentRegistry,
+              registry: options.subagentRegistry,
+              createSessionForSubagent: options.createSessionForSubagent,
+              agentDir: options.plugins?.agentDir,
+              parentSessionId: sessionManager.getSessionId(),
+              getOrigin,
+              permissions: options.permissions,
+              stream: options.stream,
+            }),
+          ]
         : [
-            websearchTool,
-            webfetchTool,
+            webSearchTool,
+            webFetchTool,
             lookAtTool,
             ...(options.mcpManager ? buildMcpDispatcherToolDefinitions(options.mcpManager) : []),
             ...(options.reloadRegistry ? [createReloadTool({ registry: options.reloadRegistry })] : []),
@@ -405,6 +425,21 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
     ...(thinkingLevel ? { thinkingLevel } : {}),
   })
+  // Layer the replay sanitizer over pi's convertToLlm so a transcript with an
+  // orphaned toolResult (e.g. a torn-down restart turn) can't wedge the session
+  // with an Anthropic 400 on every replay. Runs on every provider call path
+  // that goes through the agent. Honors pi's contract that convertToLlm must
+  // not throw: on any failure it falls back to the unsanitized output.
+  const innerConvertToLlm = session.agent.convertToLlm
+  session.agent.convertToLlm = async (messages) => {
+    const converted = await innerConvertToLlm(messages)
+    try {
+      return sanitizeMessagesForLlmReplay(converted).messages
+    } catch {
+      return converted
+    }
+  }
   abortHolder.abort = () => {
     if (session.agent.signal?.aborted !== true) session.agent.abort()
   }
@@ -686,11 +721,13 @@ export function buildSubagentOrchestrationTools(opts: {
     createSubagentOutputTool({
       liveRegistry: opts.liveRegistry,
       getOrigin: opts.getOrigin,
+      callerSessionId: opts.parentSessionId,
       ...(opts.permissions ? { permissions: opts.permissions } : {}),
     }),
     createSubagentCancelTool({
       liveRegistry: opts.liveRegistry,
       getOrigin: opts.getOrigin,
+      callerSessionId: opts.parentSessionId,
       ...(opts.permissions ? { permissions: opts.permissions } : {}),
     }),
   ]

package/src/agent/llm-replay-sanitizer.ts ADDED Viewed

@@ -0,0 +1,120 @@
+// Defensive projection applied to the LLM message array right before each
+// provider call, layered on top of pi-coding-agent's `convertToLlm`. It exists
+// to un-wedge sessions whose persisted transcript contains a `toolResult` with
+// no live preceding `toolCall` — the exact shape Anthropic rejects with
+// "unexpected `tool_use_id` found in `tool_result` blocks" (HTTP 400).
+//
+// How a transcript gets poisoned: the self-`restart` tool exits the container
+// mid-turn. The assistant turn carrying the restart `toolCall` can land in the
+// JSONL with `stopReason: "error"/"aborted"` (or be torn down), while its
+// `toolResult` is persisted. On replay, pi-ai's provider-side `transformMessages`
+// DROPS error/aborted assistant turns but passes the `toolResult` through
+// unchanged, leaving a true orphan that the API rejects on every subsequent
+// turn — the session is permanently stuck.
+//
+// pi-ai's `transformMessages` already handles the inverse cases (a `toolCall`
+// with no result → synthetic "No result provided" result; error/aborted
+// assistant turns → dropped). The one gap is an orphaned `toolResult`. This
+// sanitizer fills exactly that gap and nothing more.
+//
+// Invariant (local pending-window, NOT a global id union — Anthropic requires
+// tool results to belong to the immediately preceding tool-use turn):
+//   1. Assistant turns with stopReason "error"/"aborted" are dropped here, so
+//      orphan detection sees the same message set the provider will after its
+//      own drop pass. Without this, a result tied to a dropped assistant would
+//      survive us and be orphaned downstream — the original bug.
+//   2. A `toolResult` is kept only if its `toolCallId` was declared by the most
+//      recent kept assistant tool-use turn AND has not already been emitted in
+//      that window. Any user or assistant message closes the window.
+//   3. Missing results are NOT synthesized here — pi-ai's existing pass inserts
+//      the synthetic placeholder, so dropping an orphan that leaves a bare
+//      `toolCall` is safe and self-healing.
+//
+// This is a read-only projection: it never mutates the persisted JSONL, so an
+// already-poisoned session becomes usable without destructive migration.
+import type { Message } from '@mariozechner/pi-ai'
+export type ReplaySanitizerStats = {
+  droppedOrphans: number
+  droppedDuplicates: number
+  droppedErrorAssistants: number
+}
+export type SanitizeResult = {
+  messages: Message[]
+  stats: ReplaySanitizerStats
+}
+function isErroredAssistant(message: Message): boolean {
+  return message.role === 'assistant' && (message.stopReason === 'error' || message.stopReason === 'aborted')
+}
+function toolCallIdsOf(message: Extract<Message, { role: 'assistant' }>): string[] {
+  return message.content
+    .filter((block): block is Extract<typeof block, { type: 'toolCall' }> => block.type === 'toolCall')
+    .map((block) => block.id)
+    .filter((id): id is string => typeof id === 'string' && id.length > 0)
+}
+export function sanitizeMessagesForLlmReplay(messages: Message[]): SanitizeResult {
+  const output: Message[] = []
+  const stats: ReplaySanitizerStats = {
+    droppedOrphans: 0,
+    droppedDuplicates: 0,
+    droppedErrorAssistants: 0,
+  }
+  let pendingToolCallIds = new Set<string>()
+  let emittedResultIds = new Set<string>()
+  const closeWindow = () => {
+    pendingToolCallIds = new Set()
+    emittedResultIds = new Set()
+  }
+  for (const message of messages) {
+    if (message.role === 'assistant') {
+      closeWindow()
+      // Mirror pi-ai's provider-side drop of incomplete turns so orphan
+      // detection matches the message set the provider will actually send.
+      if (isErroredAssistant(message)) {
+        stats.droppedErrorAssistants += 1
+        continue
+      }
+      const callIds = toolCallIdsOf(message)
+      if (callIds.length > 0) pendingToolCallIds = new Set(callIds)
+      output.push(message)
+      continue
+    }
+    if (message.role === 'user') {
+      closeWindow()
+      output.push(message)
+      continue
+    }
+    if (message.role === 'toolResult') {
+      const id = message.toolCallId
+      if (!pendingToolCallIds.has(id)) {
+        // Orphan: true orphan, stale late result, or result for a dropped
+        // error/aborted assistant turn.
+        stats.droppedOrphans += 1
+        continue
+      }
+      if (emittedResultIds.has(id)) {
+        stats.droppedDuplicates += 1
+        continue
+      }
+      emittedResultIds.add(id)
+      output.push(message)
+      continue
+    }
+    output.push(message)
+  }
+  return { messages: output, stats }
+}

package/src/agent/loop-guard.ts CHANGED Viewed

@@ -63,6 +63,14 @@ export type LoopGuard = {
   check: (sessionId: string, tool: string, args: unknown) => LoopGuardDecision
   reset: (sessionId: string) => void
   forget: (sessionId: string) => void
+  // Clears only the residue a single tool left behind in a session: its entries
+  // in the windowed history and, if the current consecutive streak belongs to
+  // that tool, the streak itself. Used when a state-change boundary makes a
+  // tool's prior calls irrelevant — e.g. a backgrounded subagent finishing
+  // makes the next `subagent_output` fetch legitimate even though earlier
+  // premature polls poisoned the window. Narrower than `forget`, so an
+  // unrelated tool's accumulating loop on the same session is preserved.
+  forgetTool: (sessionId: string, tool: string) => void
 }
 type SessionState = {
@@ -215,9 +223,35 @@ export function createLoopGuard(options: CreateLoopGuardOptions = {}): LoopGuard
     forget(sessionId) {
       sessions.delete(sessionId)
     },
+    forgetTool(sessionId, tool) {
+      const state = sessions.get(sessionId)
+      if (state === undefined) return
+      const retained: string[] = []
+      for (const sig of state.window) {
+        if (signatureBelongsToTool(sig, tool)) {
+          state.windowWarned.delete(sig)
+        } else {
+          retained.push(sig)
+        }
+      }
+      state.window = retained
+      if (signatureBelongsToTool(state.signature, tool)) {
+        state.signature = ''
+        state.count = 0
+        state.warned = false
+      }
+    },
   }
 }
+// Both signature builders prefix the tool name: exact signatures as `tool:...`
+// and path-coarsened ones as `tool#path:...`. A tool's residue is therefore any
+// signature starting with `tool:` or `tool#`, never a different tool whose name
+// merely shares this one as a prefix (the delimiter rules that out).
+function signatureBelongsToTool(signature: string, tool: string): boolean {
+  return signature.startsWith(`${tool}:`) || signature.startsWith(`${tool}#`)
+}
 function formatWarnMessage(tool: string, count: number): string {
   return (
     `\n\n[loop-guard] You have called \`${tool}\` ${count} times in a row with identical arguments. ` +

package/src/agent/multimodal/look-at.ts CHANGED Viewed

@@ -161,7 +161,7 @@ async function runLookAtImages(imageContents: ImageContent[], prompt: string | u
     origin,
     profile: 'vision',
     // Both knobs are required to fully disarm the subagent's tool surface:
-    // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
+    // `customTools: []` blocks typeclaw's system tools (web_search/web_fetch/
     // look_at/restart/...) — without it, the look_at tool would recurse
     // into itself. `tools: []` blocks pi-coding-agent's defaults
     // (read/bash/edit/write) — without it, a vision model could be talked

package/src/agent/plugin-tools.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { AsyncLocalStorage } from 'node:async_hooks'
+import { join } from 'node:path'
 import type { AgentTool } from '@mariozechner/pi-agent-core'
 import {
@@ -36,7 +37,10 @@ import type {
 import {
   buildSandboxedCommand,
   ensureBwrapAvailable,
+  ensureSessionTmpDir,
+  mapVirtualTmpPath,
   resolveHiddenPaths,
+  resolveProtectedZones,
   resolveWritableZones,
   subtractMasked,
 } from '@/sandbox'
@@ -44,8 +48,8 @@ import {
 import { createLoopGuard, type LoopGuard } from './loop-guard'
 import { checkImageReadRedirect } from './multimodal/read-redirect'
 import type { SessionOrigin } from './session-origin'
-import { webfetchTool } from './tools/webfetch'
-import { websearchTool } from './tools/websearch'
+import { webFetchTool } from './tools/webfetch'
+import { webSearchTool } from './tools/websearch'
 // Process-wide loop guard. State is keyed by sessionId so concurrent sessions
 // don't interfere; the guard's own LRU bound keeps it from growing without
@@ -112,7 +116,7 @@ const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
 // name-filter path); the wrapped customTools just replace the implementation
 // underneath so subagent and channel sessions share the same hook coverage.
 type PiAgentToolName = 'read' | 'bash' | 'edit' | 'write' | 'grep' | 'find' | 'ls'
-type TypeclawToolName = 'websearch' | 'webfetch'
+type TypeclawToolName = 'web_search' | 'web_fetch'
 const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
   read: piReadTool,
@@ -125,8 +129,8 @@ const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
 }
 const TYPECLAW_TOOL_DEFINITION_MAP: Record<TypeclawToolName, ToolDefinition<any, any, any>> = {
-  websearch: websearchTool,
-  webfetch: webfetchTool,
+  web_search: webSearchTool,
+  web_fetch: webFetchTool,
 }
 function isPiAgentToolName(name: string): name is PiAgentToolName {
@@ -458,7 +462,11 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
       stripGuardAcknowledgements(mutableArgs)
       if (tool.name === 'bash' && opts.permissions !== undefined) {
-        await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, bashEnvOverlay)
+        await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
+      }
+      if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
+        await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
       }
       const result = await bashEnvStore.run(bashEnvOverlay, () =>
@@ -505,6 +513,7 @@ async function applyBashSandbox(
   permissions: PermissionService,
   origin: SessionOrigin | undefined,
   agentDir: string,
+  sessionId: string,
   envOverlay: BashEnvOverlay | undefined,
 ): Promise<void> {
   const command = mutableArgs.command
@@ -514,21 +523,46 @@ async function applyBashSandbox(
   if (dirs.length === 0 && files.length === 0) return
   await ensureBwrapAvailable()
+  // Per-session /tmp: bind this session's scratch dir over the default
+  // --tmpfs /tmp so writes survive across the role's sandboxed bash calls AND
+  // match what the write/edit wrapper redirected a /tmp path to. The bind is
+  // emitted via policy.mounts (after the hardcoded --tmpfs /tmp), so last-op-
+  // wins makes it the live /tmp. Unsandboxed roles (empty masks, returned
+  // above) keep sharing the real container /tmp between write and bash.
+  const sessionTmp = await ensureSessionTmpDir(sessionId)
   // Write-confined jail for low-trust roles: bind the whole project read-only,
   // hide private/secret paths, then re-expose only the free-write scratch zones
-  // RW. Anything else under agentDir (.git/, node_modules/, agentDir root) is
-  // EROFS, so bash cannot sidestep the non-workspace-write guard. Trusted/owner
-  // never reach here (their masks are empty) and keep full unsandboxed access.
-  // subtractMasked drops any writable zone masked for this role so an RW bind
-  // never re-exposes a hidden path (e.g. a guest's masked workspace/).
+  // (workspace + root allowlist + .git) RW. The WORKING TREE outside those zones
+  // (node_modules/, agentDir root, non-allowlisted tracked files) stays EROFS, so
+  // bash cannot sidestep the non-workspace-write guard — and `git checkout` of a
+  // protected worktree path fails at the kernel. .git is RW so members can
+  // commit; .git/hooks + .git/config (and any writable core.hooksPath target)
+  // are re-protected RO (protected, rendered after writable, ensured to exist so
+  // an absent path can't be created+executed) so a hook-plant / core.hooksPath
+  // never becomes code execution in the unsandboxed runtime git ops. Trusted/owner never reach here
+  // (their masks are empty) and keep full unsandboxed access. subtractMasked
+  // drops any writable zone masked for this role so an RW bind never re-exposes a
+  // hidden path (e.g. a guest's masked workspace/).
   const writable = subtractMasked(await resolveWritableZones(agentDir), { dirs, files })
+  // subtractMasked again on the protected set: a protected RO bind renders after
+  // the masks (last-op-wins), so an unfiltered protected path nested under a
+  // masked dir (e.g. a guest's workspace/ when core.hooksPath=workspace/hooks)
+  // would re-expose the hidden real dir. A masked path is already non-writable
+  // for this role, so it needs no protection anyway.
+  const protectedZones = writable.dirs.includes(join(agentDir, '.git'))
+    ? subtractMasked(await resolveProtectedZones(agentDir), { dirs, files })
+    : { dirs: [], files: [] }
   // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
   // it would never reach the sandboxed process (the non-sandboxed spawnHook
   // path does not run when the command is rewritten to a bwrap invocation).
   const { commandString } = buildSandboxedCommand(command, {
-    mounts: [{ type: 'ro-bind', source: agentDir, dest: agentDir }],
+    mounts: [
+      { type: 'ro-bind', source: agentDir, dest: agentDir },
+      { type: 'bind', source: sessionTmp, dest: '/tmp' },
+    ],
     masks: { dirs, files },
     writable,
+    protected: protectedZones,
     network: 'inherit',
     cwd: agentDir,
     ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
@@ -536,11 +570,55 @@ async function applyBashSandbox(
   mutableArgs.command = commandString
 }
+// The builtin file tools that take a single filesystem `path` arg. For a
+// sandboxed role they all run UNSANDBOXED in the main process (only bash is
+// bwrap-wrapped), so each must apply the same /tmp -> session-dir mapping that
+// applyBashSandbox binds for bash — otherwise a `read` of /tmp/foo hits the
+// real container /tmp while sandboxed bash wrote the session backing dir.
+const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls'])
+// Sandboxed roles read /tmp through bwrap's per-session bind (applyBashSandbox),
+// but the path-based file tools run unsandboxed against the real container /tmp.
+// Without this redirect a guest/member that touches /tmp/foo through bash (bound
+// to the session dir) and through a file tool (real /tmp) would see two
+// different files. Rewriting the file tool's on-disk path to the same session
+// backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
+// (empty masks) are left untouched: their bash already shares the real /tmp.
+async function applyTmpPathRedirect(
+  mutableArgs: Record<string, unknown>,
+  permissions: PermissionService,
+  origin: SessionOrigin | undefined,
+  agentDir: string,
+  sessionId: string,
+): Promise<void> {
+  const rawPath = mutableArgs.path
+  if (typeof rawPath !== 'string') return
+  const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
+  if (dirs.length === 0 && files.length === 0) return
+  const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
+  if (backing === undefined) return
+  await ensureSessionTmpDir(sessionId)
+  mutableArgs.path = backing
+}
 function appendLoopWarning(result: ToolResult, message: string): ToolResult {
   const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
   return { content, details: result.details }
 }
+// Clears one tool's loop-guard residue for a session on the process-wide shared
+// guard. The completion-reminder bridges (channel router + TUI server) call this
+// for `subagent_output` when a backgrounded subagent finishes, so the next fetch
+// the reminder asks for isn't blocked by the window the agent's premature polling
+// poisoned. Exposed as a narrow function rather than the guard itself so callers
+// can't reach `check`/`forget` and widen the blast radius.
+export function forgetSharedLoopGuardTool(sessionId: string, tool: string): void {
+  sharedLoopGuard.forgetTool(sessionId, tool)
+}
 // Test-only seam: swaps the shared loop guard for a fresh instance so tests
 // that reuse sessionIds across cases don't see cross-test streak counts.
 // Production code never calls this; the guard's LRU bound handles

package/src/agent/session-origin.ts CHANGED Viewed

@@ -69,6 +69,36 @@ export type SessionOrigin =
       triggeredBy?: SessionOrigin
     }
+// Hard ceiling on the subagent delegation chain. Bounds chain LENGTH, not
+// fan-out breadth: the deepest reachable chain is main (depth 0) →
+// operator/reviewer (depth 1) → nested worker (depth 2). `spawn_subagent`
+// refuses to spawn from a session already at this depth.
+export const MAX_SUBAGENT_DEPTH = 2
+// Counts subagent links from the root by walking the `spawnedByOrigin`
+// ancestry. A non-subagent (or undefined) origin is depth 0; each nested
+// subagent origin adds one. Fails CLOSED on ambiguous ancestry: if a subagent
+// origin has no `spawnedByOrigin` (the serialized path in
+// parseSpawnedByOriginJson drops it), the true depth is unknowable, so we
+// return MAX_SUBAGENT_DEPTH rather than assume it sits at the root — a
+// truncated grandchild must not read as a child and earn an extra spawn. A
+// cyclic chain is bounded by the same cap.
+export function subagentDepth(origin: SessionOrigin | undefined): number {
+  let depth = 0
+  let current: SessionOrigin | undefined = origin
+  while (current !== undefined && current.kind === 'subagent') {
+    depth += 1
+    if (current.spawnedByOrigin === undefined) {
+      return MAX_SUBAGENT_DEPTH
+    }
+    if (depth >= MAX_SUBAGENT_DEPTH) {
+      return depth
+    }
+    current = current.spawnedByOrigin
+  }
+  return depth
+}
 export const PARTICIPANTS_TOP_K = 10
 export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000

package/src/agent/subagent-completion-reminder.ts CHANGED Viewed

@@ -59,6 +59,13 @@ export function formatReminderDuration(ms: number): string {
   return `${min}m${sec}s`
 }
+export type SubagentCompletedChannelKey = {
+  adapter: string
+  workspace: string
+  chat: string
+  thread: string | null
+}
 export type SubagentCompletedPayload = {
   taskId: string
   subagent: string
@@ -66,6 +73,11 @@ export type SubagentCompletedPayload = {
   ok: boolean
   durationMs: number
   error?: string
+  // Present when the parent was a channel session. Lets the router fall back
+  // to the live successor session for the same channel key when the parent
+  // rolled over (SESSION_FRESHNESS_TTL_MS) or was idle-evicted while the
+  // subagent ran — otherwise the completion is silently dropped.
+  channelKey?: SubagentCompletedChannelKey
 }
 // Type guard for the `subagent.completed` broadcast payload. Subscribers
@@ -82,9 +94,11 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
     ok?: unknown
     durationMs?: unknown
     error?: unknown
+    channelKey?: unknown
   }
   if (p.kind !== 'subagent.completed') return null
   if (typeof p.parentSessionId !== 'string') return null
+  const channelKey = parseChannelKey(p.channelKey)
   return {
     taskId: typeof p.taskId === 'string' ? p.taskId : '<unknown>',
     subagent: typeof p.subagent === 'string' ? p.subagent : 'subagent',
@@ -92,5 +106,14 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
     ok: p.ok === true,
     durationMs: typeof p.durationMs === 'number' ? p.durationMs : 0,
     ...(typeof p.error === 'string' ? { error: p.error } : {}),
+    ...(channelKey !== null ? { channelKey } : {}),
   }
 }
+function parseChannelKey(value: unknown): SubagentCompletedChannelKey | null {
+  if (value === null || typeof value !== 'object') return null
+  const k = value as { adapter?: unknown; workspace?: unknown; chat?: unknown; thread?: unknown }
+  if (typeof k.adapter !== 'string' || typeof k.workspace !== 'string' || typeof k.chat !== 'string') return null
+  if (k.thread !== null && typeof k.thread !== 'string') return null
+  return { adapter: k.adapter, workspace: k.workspace, chat: k.chat, thread: k.thread }
+}

package/src/agent/subagents.ts CHANGED Viewed

@@ -49,6 +49,12 @@ export type SubagentShared<P = unknown> = {
   toolResultBudget?: ToolResultBudget
   visibility?: 'public' | 'internal'
   requiresSpecificPermission?: boolean
+  // Opt-in: when true, this subagent's session is wired with the orchestration
+  // tools (spawn_subagent/subagent_output/subagent_cancel) so it can delegate
+  // to its own subagents, bounded by MAX_SUBAGENT_DEPTH and caller-owned
+  // registry scoping. Default (unset/false) keeps the subagent a leaf — the
+  // historical contract for explorer/scout/memory-logger/etc.
+  canSpawnSubagents?: boolean
   // Wall-clock ceiling on a single spawn, enforced at the orchestration
   // layer (both `dispatchSpawnSubagent` and the stream-driven
   // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
@@ -403,13 +409,36 @@ function raceSubagentCompletion(
   })
 }
+// A complete <review>...</review> block. The reviewer's contract is that this
+// block IS its result; same-message preamble/trailing chatter or a later
+// summary turn must not become the captured final message. `[\s\S]` spans
+// newlines (the block is multi-line); non-greedy stops at the first close so an
+// incidental `<review>` literal in reviewed text cannot swallow real content.
+// Global so a message with several blocks yields the last (the revision).
+const REVIEW_BLOCK_RE = /<review>[\s\S]*?<\/review>/g
+function lastReviewBlock(text: string): string | null {
+  const matches = text.match(REVIEW_BLOCK_RE)
+  return matches === null ? null : (matches[matches.length - 1] ?? null)
+}
 function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
+  let lastAssistant: string | null = null
+  let lastReview: string | null = null
   try {
     session.subscribe((event: unknown) => {
-      const ev = event as { type?: string; message?: { content?: unknown } }
+      const ev = event as { type?: string; message?: { role?: string; content?: unknown } }
       if (ev?.type !== 'message_end') return
+      // Real assistant messages carry role 'assistant'; older test doubles omit
+      // it. user/toolResult echoes must never overwrite the assistant's answer.
+      const role = ev.message?.role
+      if (role !== undefined && role !== 'assistant') return
       const text = extractFinalMessageText(ev.message?.content)
-      if (text !== null) onFinalMessage(text)
+      if (text === null) return
+      lastAssistant = text
+      const review = lastReviewBlock(text)
+      if (review !== null) lastReview = review
+      onFinalMessage(lastReview ?? lastAssistant)
     })
   } catch {
     // session.subscribe is a stable upstream API; defensive try is for test

package/src/agent/system-prompt.ts CHANGED Viewed

@@ -100,7 +100,7 @@ There are three delegation modes. Pick deliberately.
 [REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
 \`\`\`
-**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Subagents cannot recursively spawn subagents.
+**Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Most subagents are leaves; only \`operator\` and \`reviewer\` may delegate one level further, and the chain is hard-capped regardless.
 ## Safety

package/src/agent/tool-not-found-nudge.ts CHANGED Viewed

@@ -9,7 +9,7 @@ export type NudgeableSession = {
 const NOT_FOUND_RE = /^Tool (.+?) not found$/
 // Levenshtein distance ceiling for a name to count as "did you mean". A typo
-// like web_search -> websearch is distance 1 (one '_' removed); read_file ->
+// like websearch -> web_search is distance 1 (one '_' inserted); read_file ->
 // read is larger but still a clear prefix relationship. Keeping the ceiling
 // small avoids suggesting an unrelated tool for a genuinely unknown name.
 const MAX_SUGGESTION_DISTANCE = 4
@@ -79,13 +79,20 @@ function firstTextChunk(result: unknown): string | null {
 // normally — unlike a silent alias, this rescue path cannot bypass policy.
 export function attachToolNotFoundNudge(session: NudgeableSession, knownToolNames: readonly string[]): () => void {
   const known = [...new Set(knownToolNames)]
+  // A wedged model re-calls the same wrong name every turn; each steer
+  // spawns a fresh assistant turn that clobbers the subagent's captured
+  // final message (see attachFinalMessageCapture). One reminder per mistake.
+  const nudged = new Set<string>()
   return session.subscribe((event) => {
     const e = event as { type?: unknown; isError?: unknown; result?: unknown }
     if (e?.type !== 'tool_execution_end' || e.isError !== true) return
     const text = firstTextChunk(e.result)
     if (text === null) return
+    const requested = extractNotFoundToolName(text)
+    if (requested === null || nudged.has(requested)) return
     const nudge = buildToolNotFoundNudge(text, known)
     if (nudge === null) return
+    nudged.add(requested)
     void session.steer(nudge)
   })
 }