typeclaw 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +42 -5
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +90 -12
  8. package/src/agent/session-origin.ts +30 -0
  9. package/src/agent/subagent-completion-reminder.ts +23 -0
  10. package/src/agent/subagents.ts +31 -2
  11. package/src/agent/system-prompt.ts +1 -1
  12. package/src/agent/tool-not-found-nudge.ts +8 -1
  13. package/src/agent/tools/channel-reply.ts +3 -3
  14. package/src/agent/tools/curl-impersonate.ts +2 -2
  15. package/src/agent/tools/spawn-subagent.ts +19 -2
  16. package/src/agent/tools/subagent-access.ts +40 -5
  17. package/src/agent/tools/subagent-cancel.ts +3 -1
  18. package/src/agent/tools/subagent-output.ts +6 -2
  19. package/src/agent/tools/webfetch/fetch.ts +18 -18
  20. package/src/agent/tools/webfetch/index.ts +1 -1
  21. package/src/agent/tools/webfetch/tool.ts +13 -13
  22. package/src/agent/tools/webfetch/types.ts +1 -1
  23. package/src/agent/tools/websearch.ts +6 -6
  24. package/src/bundled-plugins/backup/index.ts +40 -37
  25. package/src/bundled-plugins/backup/runner.ts +22 -1
  26. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  27. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  28. package/src/bundled-plugins/memory/README.md +11 -11
  29. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  30. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  31. package/src/bundled-plugins/operator/operator.ts +5 -1
  32. package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
  33. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  34. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  35. package/src/bundled-plugins/scout/scout.ts +7 -7
  36. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  37. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  38. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  39. package/src/channels/adapters/github/inbound.ts +11 -0
  40. package/src/channels/adapters/github/webhook-register.ts +32 -27
  41. package/src/channels/router.ts +61 -23
  42. package/src/channels/schema.ts +2 -1
  43. package/src/channels/subagent-completion-bridge.ts +18 -18
  44. package/src/channels/types.ts +1 -1
  45. package/src/cli/inspect-controller.ts +130 -38
  46. package/src/container/start.ts +7 -1
  47. package/src/git/mutex.ts +22 -0
  48. package/src/git/reconcile-ignored.ts +214 -0
  49. package/src/hostd/daemon.ts +26 -1
  50. package/src/hostd/portbroker-manager.ts +7 -0
  51. package/src/init/dockerfile.ts +1 -1
  52. package/src/init/gitignore.ts +25 -16
  53. package/src/inspect/index.ts +31 -4
  54. package/src/inspect/loop.ts +16 -12
  55. package/src/plugin/define.ts +2 -2
  56. package/src/plugin/index.ts +2 -2
  57. package/src/portbroker/hostd-client.ts +36 -13
  58. package/src/run/index.ts +14 -0
  59. package/src/sandbox/build.ts +10 -0
  60. package/src/sandbox/index.ts +9 -1
  61. package/src/sandbox/policy.ts +12 -0
  62. package/src/sandbox/session-tmp.ts +43 -0
  63. package/src/sandbox/writable-zones.ts +103 -3
  64. package/src/server/command-runner.ts +1 -1
  65. package/src/server/index.ts +8 -0
  66. package/src/skills/typeclaw-channel-github/SKILL.md +37 -10
  67. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  68. package/src/tui/format.ts +11 -11
package/README.md CHANGED
@@ -34,7 +34,7 @@ If you're like me, TypeClaw is the right choice. If not, that's fine too.
34
34
  - 💬 **Multi-channel** — Slack, Discord, Telegram, KakaoTalk, GitHub webhooks, and a websocket TUI; one agent, many inboxes
35
35
  - ⏰ **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
36
36
  - 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
37
- - 🔎 **Web research** — bundled `scout` subagent plus first-class `websearch` and `webfetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
37
+ - 🔎 **Web research** — bundled `scout` subagent plus first-class `web_search` and `web_fetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
38
38
  - 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
39
39
  - 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state
40
40
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -35,6 +35,7 @@ import { getAuthFor } from './auth'
35
35
  import { createCompactionSettingsManager } from './compaction'
36
36
  import { renderGitNudge } from './git-nudge'
37
37
  import type { LiveSubagentRegistry } from './live-subagents'
38
+ import { sanitizeMessagesForLlmReplay } from './llm-replay-sanitizer'
38
39
  import { applyModelRuntimeOverrides } from './model-overrides'
39
40
  import { createChannelLookAtTool, lookAtTool } from './multimodal'
40
41
  import {
@@ -72,8 +73,8 @@ import { createStreamSnapshotTool } from './tools/stream-snapshot'
72
73
  import { createSubagentCancelTool } from './tools/subagent-cancel'
73
74
  import { createSubagentOutputTool } from './tools/subagent-output'
74
75
  import { createTodoTools } from './tools/todo'
75
- import { webfetchTool } from './tools/webfetch'
76
- import { websearchTool } from './tools/websearch'
76
+ import { webFetchTool } from './tools/webfetch'
77
+ import { webSearchTool } from './tools/websearch'
77
78
 
78
79
  export type { SessionOrigin } from './session-origin'
79
80
 
@@ -327,14 +328,33 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
327
328
  }
328
329
  }
329
330
 
331
+ // Plugin subagents (operator/reviewer) see ONLY their declared builtins plus
332
+ // the orchestration tools — never the full main-session tool surface. The
333
+ // orchestration tools self-omit unless `liveSubagentRegistry`/
334
+ // `subagentRegistry`/`createSessionForSubagent` are wired (see
335
+ // buildSubagentOrchestrationTools); `spawn_subagent` enforces MAX_SUBAGENT_DEPTH
336
+ // at execute time so a depth-capped subagent's spawn fails closed even though
337
+ // the tool is present.
330
338
  const customSystemTools =
331
339
  options.customTools !== undefined
332
340
  ? options.customTools
333
341
  : options.pluginSubagent
334
- ? resolvedSubagentBuiltins.toolDefinitions
342
+ ? [
343
+ ...resolvedSubagentBuiltins.toolDefinitions,
344
+ ...buildSubagentOrchestrationTools({
345
+ liveRegistry: options.liveSubagentRegistry,
346
+ registry: options.subagentRegistry,
347
+ createSessionForSubagent: options.createSessionForSubagent,
348
+ agentDir: options.plugins?.agentDir,
349
+ parentSessionId: sessionManager.getSessionId(),
350
+ getOrigin,
351
+ permissions: options.permissions,
352
+ stream: options.stream,
353
+ }),
354
+ ]
335
355
  : [
336
- websearchTool,
337
- webfetchTool,
356
+ webSearchTool,
357
+ webFetchTool,
338
358
  lookAtTool,
339
359
  ...(options.mcpManager ? buildMcpDispatcherToolDefinitions(options.mcpManager) : []),
340
360
  ...(options.reloadRegistry ? [createReloadTool({ registry: options.reloadRegistry })] : []),
@@ -405,6 +425,21 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
405
425
  ...(thinkingLevel ? { thinkingLevel } : {}),
406
426
  })
407
427
 
428
+ // Layer the replay sanitizer over pi's convertToLlm so a transcript with an
429
+ // orphaned toolResult (e.g. a torn-down restart turn) can't wedge the session
430
+ // with an Anthropic 400 on every replay. Runs on every provider call path
431
+ // that goes through the agent. Honors pi's contract that convertToLlm must
432
+ // not throw: on any failure it falls back to the unsanitized output.
433
+ const innerConvertToLlm = session.agent.convertToLlm
434
+ session.agent.convertToLlm = async (messages) => {
435
+ const converted = await innerConvertToLlm(messages)
436
+ try {
437
+ return sanitizeMessagesForLlmReplay(converted).messages
438
+ } catch {
439
+ return converted
440
+ }
441
+ }
442
+
408
443
  abortHolder.abort = () => {
409
444
  if (session.agent.signal?.aborted !== true) session.agent.abort()
410
445
  }
@@ -686,11 +721,13 @@ export function buildSubagentOrchestrationTools(opts: {
686
721
  createSubagentOutputTool({
687
722
  liveRegistry: opts.liveRegistry,
688
723
  getOrigin: opts.getOrigin,
724
+ callerSessionId: opts.parentSessionId,
689
725
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
690
726
  }),
691
727
  createSubagentCancelTool({
692
728
  liveRegistry: opts.liveRegistry,
693
729
  getOrigin: opts.getOrigin,
730
+ callerSessionId: opts.parentSessionId,
694
731
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
695
732
  }),
696
733
  ]
@@ -0,0 +1,120 @@
1
+ // Defensive projection applied to the LLM message array right before each
2
+ // provider call, layered on top of pi-coding-agent's `convertToLlm`. It exists
3
+ // to un-wedge sessions whose persisted transcript contains a `toolResult` with
4
+ // no live preceding `toolCall` — the exact shape Anthropic rejects with
5
+ // "unexpected `tool_use_id` found in `tool_result` blocks" (HTTP 400).
6
+ //
7
+ // How a transcript gets poisoned: the self-`restart` tool exits the container
8
+ // mid-turn. The assistant turn carrying the restart `toolCall` can land in the
9
+ // JSONL with `stopReason: "error"/"aborted"` (or be torn down), while its
10
+ // `toolResult` is persisted. On replay, pi-ai's provider-side `transformMessages`
11
+ // DROPS error/aborted assistant turns but passes the `toolResult` through
12
+ // unchanged, leaving a true orphan that the API rejects on every subsequent
13
+ // turn — the session is permanently stuck.
14
+ //
15
+ // pi-ai's `transformMessages` already handles the inverse cases (a `toolCall`
16
+ // with no result → synthetic "No result provided" result; error/aborted
17
+ // assistant turns → dropped). The one gap is an orphaned `toolResult`. This
18
+ // sanitizer fills exactly that gap and nothing more.
19
+ //
20
+ // Invariant (local pending-window, NOT a global id union — Anthropic requires
21
+ // tool results to belong to the immediately preceding tool-use turn):
22
+ // 1. Assistant turns with stopReason "error"/"aborted" are dropped here, so
23
+ // orphan detection sees the same message set the provider will after its
24
+ // own drop pass. Without this, a result tied to a dropped assistant would
25
+ // survive us and be orphaned downstream — the original bug.
26
+ // 2. A `toolResult` is kept only if its `toolCallId` was declared by the most
27
+ // recent kept assistant tool-use turn AND has not already been emitted in
28
+ // that window. Any user or assistant message closes the window.
29
+ // 3. Missing results are NOT synthesized here — pi-ai's existing pass inserts
30
+ // the synthetic placeholder, so dropping an orphan that leaves a bare
31
+ // `toolCall` is safe and self-healing.
32
+ //
33
+ // This is a read-only projection: it never mutates the persisted JSONL, so an
34
+ // already-poisoned session becomes usable without destructive migration.
35
+
36
+ import type { Message } from '@mariozechner/pi-ai'
37
+
38
+ export type ReplaySanitizerStats = {
39
+ droppedOrphans: number
40
+ droppedDuplicates: number
41
+ droppedErrorAssistants: number
42
+ }
43
+
44
+ export type SanitizeResult = {
45
+ messages: Message[]
46
+ stats: ReplaySanitizerStats
47
+ }
48
+
49
+ function isErroredAssistant(message: Message): boolean {
50
+ return message.role === 'assistant' && (message.stopReason === 'error' || message.stopReason === 'aborted')
51
+ }
52
+
53
+ function toolCallIdsOf(message: Extract<Message, { role: 'assistant' }>): string[] {
54
+ return message.content
55
+ .filter((block): block is Extract<typeof block, { type: 'toolCall' }> => block.type === 'toolCall')
56
+ .map((block) => block.id)
57
+ .filter((id): id is string => typeof id === 'string' && id.length > 0)
58
+ }
59
+
60
+ export function sanitizeMessagesForLlmReplay(messages: Message[]): SanitizeResult {
61
+ const output: Message[] = []
62
+ const stats: ReplaySanitizerStats = {
63
+ droppedOrphans: 0,
64
+ droppedDuplicates: 0,
65
+ droppedErrorAssistants: 0,
66
+ }
67
+
68
+ let pendingToolCallIds = new Set<string>()
69
+ let emittedResultIds = new Set<string>()
70
+
71
+ const closeWindow = () => {
72
+ pendingToolCallIds = new Set()
73
+ emittedResultIds = new Set()
74
+ }
75
+
76
+ for (const message of messages) {
77
+ if (message.role === 'assistant') {
78
+ closeWindow()
79
+
80
+ // Mirror pi-ai's provider-side drop of incomplete turns so orphan
81
+ // detection matches the message set the provider will actually send.
82
+ if (isErroredAssistant(message)) {
83
+ stats.droppedErrorAssistants += 1
84
+ continue
85
+ }
86
+
87
+ const callIds = toolCallIdsOf(message)
88
+ if (callIds.length > 0) pendingToolCallIds = new Set(callIds)
89
+ output.push(message)
90
+ continue
91
+ }
92
+
93
+ if (message.role === 'user') {
94
+ closeWindow()
95
+ output.push(message)
96
+ continue
97
+ }
98
+
99
+ if (message.role === 'toolResult') {
100
+ const id = message.toolCallId
101
+ if (!pendingToolCallIds.has(id)) {
102
+ // Orphan: true orphan, stale late result, or result for a dropped
103
+ // error/aborted assistant turn.
104
+ stats.droppedOrphans += 1
105
+ continue
106
+ }
107
+ if (emittedResultIds.has(id)) {
108
+ stats.droppedDuplicates += 1
109
+ continue
110
+ }
111
+ emittedResultIds.add(id)
112
+ output.push(message)
113
+ continue
114
+ }
115
+
116
+ output.push(message)
117
+ }
118
+
119
+ return { messages: output, stats }
120
+ }
@@ -63,6 +63,14 @@ export type LoopGuard = {
63
63
  check: (sessionId: string, tool: string, args: unknown) => LoopGuardDecision
64
64
  reset: (sessionId: string) => void
65
65
  forget: (sessionId: string) => void
66
+ // Clears only the residue a single tool left behind in a session: its entries
67
+ // in the windowed history and, if the current consecutive streak belongs to
68
+ // that tool, the streak itself. Used when a state-change boundary makes a
69
+ // tool's prior calls irrelevant — e.g. a backgrounded subagent finishing
70
+ // makes the next `subagent_output` fetch legitimate even though earlier
71
+ // premature polls poisoned the window. Narrower than `forget`, so an
72
+ // unrelated tool's accumulating loop on the same session is preserved.
73
+ forgetTool: (sessionId: string, tool: string) => void
66
74
  }
67
75
 
68
76
  type SessionState = {
@@ -215,9 +223,35 @@ export function createLoopGuard(options: CreateLoopGuardOptions = {}): LoopGuard
215
223
  forget(sessionId) {
216
224
  sessions.delete(sessionId)
217
225
  },
226
+ forgetTool(sessionId, tool) {
227
+ const state = sessions.get(sessionId)
228
+ if (state === undefined) return
229
+ const retained: string[] = []
230
+ for (const sig of state.window) {
231
+ if (signatureBelongsToTool(sig, tool)) {
232
+ state.windowWarned.delete(sig)
233
+ } else {
234
+ retained.push(sig)
235
+ }
236
+ }
237
+ state.window = retained
238
+ if (signatureBelongsToTool(state.signature, tool)) {
239
+ state.signature = ''
240
+ state.count = 0
241
+ state.warned = false
242
+ }
243
+ },
218
244
  }
219
245
  }
220
246
 
247
+ // Both signature builders prefix the tool name: exact signatures as `tool:...`
248
+ // and path-coarsened ones as `tool#path:...`. A tool's residue is therefore any
249
+ // signature starting with `tool:` or `tool#`, never a different tool whose name
250
+ // merely shares this one as a prefix (the delimiter rules that out).
251
+ function signatureBelongsToTool(signature: string, tool: string): boolean {
252
+ return signature.startsWith(`${tool}:`) || signature.startsWith(`${tool}#`)
253
+ }
254
+
221
255
  function formatWarnMessage(tool: string, count: number): string {
222
256
  return (
223
257
  `\n\n[loop-guard] You have called \`${tool}\` ${count} times in a row with identical arguments. ` +
@@ -161,7 +161,7 @@ async function runLookAtImages(imageContents: ImageContent[], prompt: string | u
161
161
  origin,
162
162
  profile: 'vision',
163
163
  // Both knobs are required to fully disarm the subagent's tool surface:
164
- // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
164
+ // `customTools: []` blocks typeclaw's system tools (web_search/web_fetch/
165
165
  // look_at/restart/...) — without it, the look_at tool would recurse
166
166
  // into itself. `tools: []` blocks pi-coding-agent's defaults
167
167
  // (read/bash/edit/write) — without it, a vision model could be talked
@@ -1,4 +1,5 @@
1
1
  import { AsyncLocalStorage } from 'node:async_hooks'
2
+ import { join } from 'node:path'
2
3
 
3
4
  import type { AgentTool } from '@mariozechner/pi-agent-core'
4
5
  import {
@@ -36,7 +37,10 @@ import type {
36
37
  import {
37
38
  buildSandboxedCommand,
38
39
  ensureBwrapAvailable,
40
+ ensureSessionTmpDir,
41
+ mapVirtualTmpPath,
39
42
  resolveHiddenPaths,
43
+ resolveProtectedZones,
40
44
  resolveWritableZones,
41
45
  subtractMasked,
42
46
  } from '@/sandbox'
@@ -44,8 +48,8 @@ import {
44
48
  import { createLoopGuard, type LoopGuard } from './loop-guard'
45
49
  import { checkImageReadRedirect } from './multimodal/read-redirect'
46
50
  import type { SessionOrigin } from './session-origin'
47
- import { webfetchTool } from './tools/webfetch'
48
- import { websearchTool } from './tools/websearch'
51
+ import { webFetchTool } from './tools/webfetch'
52
+ import { webSearchTool } from './tools/websearch'
49
53
 
50
54
  // Process-wide loop guard. State is keyed by sessionId so concurrent sessions
51
55
  // don't interfere; the guard's own LRU bound keeps it from growing without
@@ -112,7 +116,7 @@ const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
112
116
  // name-filter path); the wrapped customTools just replace the implementation
113
117
  // underneath so subagent and channel sessions share the same hook coverage.
114
118
  type PiAgentToolName = 'read' | 'bash' | 'edit' | 'write' | 'grep' | 'find' | 'ls'
115
- type TypeclawToolName = 'websearch' | 'webfetch'
119
+ type TypeclawToolName = 'web_search' | 'web_fetch'
116
120
 
117
121
  const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
118
122
  read: piReadTool,
@@ -125,8 +129,8 @@ const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
125
129
  }
126
130
 
127
131
  const TYPECLAW_TOOL_DEFINITION_MAP: Record<TypeclawToolName, ToolDefinition<any, any, any>> = {
128
- websearch: websearchTool,
129
- webfetch: webfetchTool,
132
+ web_search: webSearchTool,
133
+ web_fetch: webFetchTool,
130
134
  }
131
135
 
132
136
  function isPiAgentToolName(name: string): name is PiAgentToolName {
@@ -458,7 +462,11 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
458
462
  stripGuardAcknowledgements(mutableArgs)
459
463
 
460
464
  if (tool.name === 'bash' && opts.permissions !== undefined) {
461
- await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, bashEnvOverlay)
465
+ await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
466
+ }
467
+
468
+ if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
469
+ await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
462
470
  }
463
471
 
464
472
  const result = await bashEnvStore.run(bashEnvOverlay, () =>
@@ -505,6 +513,7 @@ async function applyBashSandbox(
505
513
  permissions: PermissionService,
506
514
  origin: SessionOrigin | undefined,
507
515
  agentDir: string,
516
+ sessionId: string,
508
517
  envOverlay: BashEnvOverlay | undefined,
509
518
  ): Promise<void> {
510
519
  const command = mutableArgs.command
@@ -514,21 +523,46 @@ async function applyBashSandbox(
514
523
  if (dirs.length === 0 && files.length === 0) return
515
524
 
516
525
  await ensureBwrapAvailable()
526
+ // Per-session /tmp: bind this session's scratch dir over the default
527
+ // --tmpfs /tmp so writes survive across the role's sandboxed bash calls AND
528
+ // match what the write/edit wrapper redirected a /tmp path to. The bind is
529
+ // emitted via policy.mounts (after the hardcoded --tmpfs /tmp), so last-op-
530
+ // wins makes it the live /tmp. Unsandboxed roles (empty masks, returned
531
+ // above) keep sharing the real container /tmp between write and bash.
532
+ const sessionTmp = await ensureSessionTmpDir(sessionId)
517
533
  // Write-confined jail for low-trust roles: bind the whole project read-only,
518
534
  // hide private/secret paths, then re-expose only the free-write scratch zones
519
- // RW. Anything else under agentDir (.git/, node_modules/, agentDir root) is
520
- // EROFS, so bash cannot sidestep the non-workspace-write guard. Trusted/owner
521
- // never reach here (their masks are empty) and keep full unsandboxed access.
522
- // subtractMasked drops any writable zone masked for this role so an RW bind
523
- // never re-exposes a hidden path (e.g. a guest's masked workspace/).
535
+ // (workspace + root allowlist + .git) RW. The WORKING TREE outside those zones
536
+ // (node_modules/, agentDir root, non-allowlisted tracked files) stays EROFS, so
537
+ // bash cannot sidestep the non-workspace-write guard and `git checkout` of a
538
+ // protected worktree path fails at the kernel. .git is RW so members can
539
+ // commit; .git/hooks + .git/config (and any writable core.hooksPath target)
540
+ // are re-protected RO (protected, rendered after writable, ensured to exist so
541
+ // an absent path can't be created+executed) so a hook-plant / core.hooksPath
542
+ // never becomes code execution in the unsandboxed runtime git ops. Trusted/owner never reach here
543
+ // (their masks are empty) and keep full unsandboxed access. subtractMasked
544
+ // drops any writable zone masked for this role so an RW bind never re-exposes a
545
+ // hidden path (e.g. a guest's masked workspace/).
524
546
  const writable = subtractMasked(await resolveWritableZones(agentDir), { dirs, files })
547
+ // subtractMasked again on the protected set: a protected RO bind renders after
548
+ // the masks (last-op-wins), so an unfiltered protected path nested under a
549
+ // masked dir (e.g. a guest's workspace/ when core.hooksPath=workspace/hooks)
550
+ // would re-expose the hidden real dir. A masked path is already non-writable
551
+ // for this role, so it needs no protection anyway.
552
+ const protectedZones = writable.dirs.includes(join(agentDir, '.git'))
553
+ ? subtractMasked(await resolveProtectedZones(agentDir), { dirs, files })
554
+ : { dirs: [], files: [] }
525
555
  // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
526
556
  // it would never reach the sandboxed process (the non-sandboxed spawnHook
527
557
  // path does not run when the command is rewritten to a bwrap invocation).
528
558
  const { commandString } = buildSandboxedCommand(command, {
529
- mounts: [{ type: 'ro-bind', source: agentDir, dest: agentDir }],
559
+ mounts: [
560
+ { type: 'ro-bind', source: agentDir, dest: agentDir },
561
+ { type: 'bind', source: sessionTmp, dest: '/tmp' },
562
+ ],
530
563
  masks: { dirs, files },
531
564
  writable,
565
+ protected: protectedZones,
532
566
  network: 'inherit',
533
567
  cwd: agentDir,
534
568
  ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
@@ -536,11 +570,55 @@ async function applyBashSandbox(
536
570
  mutableArgs.command = commandString
537
571
  }
538
572
 
573
+ // The builtin file tools that take a single filesystem `path` arg. For a
574
+ // sandboxed role they all run UNSANDBOXED in the main process (only bash is
575
+ // bwrap-wrapped), so each must apply the same /tmp -> session-dir mapping that
576
+ // applyBashSandbox binds for bash — otherwise a `read` of /tmp/foo hits the
577
+ // real container /tmp while sandboxed bash wrote the session backing dir.
578
+ const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls'])
579
+
580
+ // Sandboxed roles read /tmp through bwrap's per-session bind (applyBashSandbox),
581
+ // but the path-based file tools run unsandboxed against the real container /tmp.
582
+ // Without this redirect a guest/member that touches /tmp/foo through bash (bound
583
+ // to the session dir) and through a file tool (real /tmp) would see two
584
+ // different files. Rewriting the file tool's on-disk path to the same session
585
+ // backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
586
+ // (empty masks) are left untouched: their bash already shares the real /tmp.
587
+ async function applyTmpPathRedirect(
588
+ mutableArgs: Record<string, unknown>,
589
+ permissions: PermissionService,
590
+ origin: SessionOrigin | undefined,
591
+ agentDir: string,
592
+ sessionId: string,
593
+ ): Promise<void> {
594
+ const rawPath = mutableArgs.path
595
+ if (typeof rawPath !== 'string') return
596
+
597
+ const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
598
+ if (dirs.length === 0 && files.length === 0) return
599
+
600
+ const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
601
+ if (backing === undefined) return
602
+
603
+ await ensureSessionTmpDir(sessionId)
604
+ mutableArgs.path = backing
605
+ }
606
+
539
607
  function appendLoopWarning(result: ToolResult, message: string): ToolResult {
540
608
  const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
541
609
  return { content, details: result.details }
542
610
  }
543
611
 
612
+ // Clears one tool's loop-guard residue for a session on the process-wide shared
613
+ // guard. The completion-reminder bridges (channel router + TUI server) call this
614
+ // for `subagent_output` when a backgrounded subagent finishes, so the next fetch
615
+ // the reminder asks for isn't blocked by the window the agent's premature polling
616
+ // poisoned. Exposed as a narrow function rather than the guard itself so callers
617
+ // can't reach `check`/`forget` and widen the blast radius.
618
+ export function forgetSharedLoopGuardTool(sessionId: string, tool: string): void {
619
+ sharedLoopGuard.forgetTool(sessionId, tool)
620
+ }
621
+
544
622
  // Test-only seam: swaps the shared loop guard for a fresh instance so tests
545
623
  // that reuse sessionIds across cases don't see cross-test streak counts.
546
624
  // Production code never calls this; the guard's LRU bound handles
@@ -69,6 +69,36 @@ export type SessionOrigin =
69
69
  triggeredBy?: SessionOrigin
70
70
  }
71
71
 
72
+ // Hard ceiling on the subagent delegation chain. Bounds chain LENGTH, not
73
+ // fan-out breadth: the deepest reachable chain is main (depth 0) →
74
+ // operator/reviewer (depth 1) → nested worker (depth 2). `spawn_subagent`
75
+ // refuses to spawn from a session already at this depth.
76
+ export const MAX_SUBAGENT_DEPTH = 2
77
+
78
+ // Counts subagent links from the root by walking the `spawnedByOrigin`
79
+ // ancestry. A non-subagent (or undefined) origin is depth 0; each nested
80
+ // subagent origin adds one. Fails CLOSED on ambiguous ancestry: if a subagent
81
+ // origin has no `spawnedByOrigin` (the serialized path in
82
+ // parseSpawnedByOriginJson drops it), the true depth is unknowable, so we
83
+ // return MAX_SUBAGENT_DEPTH rather than assume it sits at the root — a
84
+ // truncated grandchild must not read as a child and earn an extra spawn. A
85
+ // cyclic chain is bounded by the same cap.
86
+ export function subagentDepth(origin: SessionOrigin | undefined): number {
87
+ let depth = 0
88
+ let current: SessionOrigin | undefined = origin
89
+ while (current !== undefined && current.kind === 'subagent') {
90
+ depth += 1
91
+ if (current.spawnedByOrigin === undefined) {
92
+ return MAX_SUBAGENT_DEPTH
93
+ }
94
+ if (depth >= MAX_SUBAGENT_DEPTH) {
95
+ return depth
96
+ }
97
+ current = current.spawnedByOrigin
98
+ }
99
+ return depth
100
+ }
101
+
72
102
  export const PARTICIPANTS_TOP_K = 10
73
103
  export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
74
104
 
@@ -59,6 +59,13 @@ export function formatReminderDuration(ms: number): string {
59
59
  return `${min}m${sec}s`
60
60
  }
61
61
 
62
+ export type SubagentCompletedChannelKey = {
63
+ adapter: string
64
+ workspace: string
65
+ chat: string
66
+ thread: string | null
67
+ }
68
+
62
69
  export type SubagentCompletedPayload = {
63
70
  taskId: string
64
71
  subagent: string
@@ -66,6 +73,11 @@ export type SubagentCompletedPayload = {
66
73
  ok: boolean
67
74
  durationMs: number
68
75
  error?: string
76
+ // Present when the parent was a channel session. Lets the router fall back
77
+ // to the live successor session for the same channel key when the parent
78
+ // rolled over (SESSION_FRESHNESS_TTL_MS) or was idle-evicted while the
79
+ // subagent ran — otherwise the completion is silently dropped.
80
+ channelKey?: SubagentCompletedChannelKey
69
81
  }
70
82
 
71
83
  // Type guard for the `subagent.completed` broadcast payload. Subscribers
@@ -82,9 +94,11 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
82
94
  ok?: unknown
83
95
  durationMs?: unknown
84
96
  error?: unknown
97
+ channelKey?: unknown
85
98
  }
86
99
  if (p.kind !== 'subagent.completed') return null
87
100
  if (typeof p.parentSessionId !== 'string') return null
101
+ const channelKey = parseChannelKey(p.channelKey)
88
102
  return {
89
103
  taskId: typeof p.taskId === 'string' ? p.taskId : '<unknown>',
90
104
  subagent: typeof p.subagent === 'string' ? p.subagent : 'subagent',
@@ -92,5 +106,14 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
92
106
  ok: p.ok === true,
93
107
  durationMs: typeof p.durationMs === 'number' ? p.durationMs : 0,
94
108
  ...(typeof p.error === 'string' ? { error: p.error } : {}),
109
+ ...(channelKey !== null ? { channelKey } : {}),
95
110
  }
96
111
  }
112
+
113
+ function parseChannelKey(value: unknown): SubagentCompletedChannelKey | null {
114
+ if (value === null || typeof value !== 'object') return null
115
+ const k = value as { adapter?: unknown; workspace?: unknown; chat?: unknown; thread?: unknown }
116
+ if (typeof k.adapter !== 'string' || typeof k.workspace !== 'string' || typeof k.chat !== 'string') return null
117
+ if (k.thread !== null && typeof k.thread !== 'string') return null
118
+ return { adapter: k.adapter, workspace: k.workspace, chat: k.chat, thread: k.thread }
119
+ }
@@ -49,6 +49,12 @@ export type SubagentShared<P = unknown> = {
49
49
  toolResultBudget?: ToolResultBudget
50
50
  visibility?: 'public' | 'internal'
51
51
  requiresSpecificPermission?: boolean
52
+ // Opt-in: when true, this subagent's session is wired with the orchestration
53
+ // tools (spawn_subagent/subagent_output/subagent_cancel) so it can delegate
54
+ // to its own subagents, bounded by MAX_SUBAGENT_DEPTH and caller-owned
55
+ // registry scoping. Default (unset/false) keeps the subagent a leaf — the
56
+ // historical contract for explorer/scout/memory-logger/etc.
57
+ canSpawnSubagents?: boolean
52
58
  // Wall-clock ceiling on a single spawn, enforced at the orchestration
53
59
  // layer (both `dispatchSpawnSubagent` and the stream-driven
54
60
  // `SubagentConsumer`). When exceeded, the orchestrator's `await` settles
@@ -403,13 +409,36 @@ function raceSubagentCompletion(
403
409
  })
404
410
  }
405
411
 
412
+ // A complete <review>...</review> block. The reviewer's contract is that this
413
+ // block IS its result; same-message preamble/trailing chatter or a later
414
+ // summary turn must not become the captured final message. `[\s\S]` spans
415
+ // newlines (the block is multi-line); non-greedy stops at the first close so an
416
+ // incidental `<review>` literal in reviewed text cannot swallow real content.
417
+ // Global so a message with several blocks yields the last (the revision).
418
+ const REVIEW_BLOCK_RE = /<review>[\s\S]*?<\/review>/g
419
+
420
+ function lastReviewBlock(text: string): string | null {
421
+ const matches = text.match(REVIEW_BLOCK_RE)
422
+ return matches === null ? null : (matches[matches.length - 1] ?? null)
423
+ }
424
+
406
425
  function attachFinalMessageCapture(session: AgentSession, onFinalMessage: (msg: string) => void): void {
426
+ let lastAssistant: string | null = null
427
+ let lastReview: string | null = null
407
428
  try {
408
429
  session.subscribe((event: unknown) => {
409
- const ev = event as { type?: string; message?: { content?: unknown } }
430
+ const ev = event as { type?: string; message?: { role?: string; content?: unknown } }
410
431
  if (ev?.type !== 'message_end') return
432
+ // Real assistant messages carry role 'assistant'; older test doubles omit
433
+ // it. user/toolResult echoes must never overwrite the assistant's answer.
434
+ const role = ev.message?.role
435
+ if (role !== undefined && role !== 'assistant') return
411
436
  const text = extractFinalMessageText(ev.message?.content)
412
- if (text !== null) onFinalMessage(text)
437
+ if (text === null) return
438
+ lastAssistant = text
439
+ const review = lastReviewBlock(text)
440
+ if (review !== null) lastReview = review
441
+ onFinalMessage(lastReview ?? lastAssistant)
413
442
  })
414
443
  } catch {
415
444
  // session.subscribe is a stable upstream API; defensive try is for test
@@ -100,7 +100,7 @@ There are three delegation modes. Pick deliberately.
100
100
  [REQUEST]: Concrete instructions — what to find/do/produce, what format, what to SKIP.
101
101
  \`\`\`
102
102
 
103
- **Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Subagents cannot recursively spawn subagents.
103
+ **Anti-patterns.** Don't fire more than 5 subagents per turn, spawn for a known answer or single-file lookup, poll \`subagent_output\` in a loop (end your turn; the reminder wakes you), or ask a research subagent to make decisions — they find and report, you decide. Most subagents are leaves; only \`operator\` and \`reviewer\` may delegate one level further, and the chain is hard-capped regardless.
104
104
 
105
105
  ## Safety
106
106
 
@@ -9,7 +9,7 @@ export type NudgeableSession = {
9
9
  const NOT_FOUND_RE = /^Tool (.+?) not found$/
10
10
 
11
11
  // Levenshtein distance ceiling for a name to count as "did you mean". A typo
12
- // like web_search -> websearch is distance 1 (one '_' removed); read_file ->
12
+ // like websearch -> web_search is distance 1 (one '_' inserted); read_file ->
13
13
  // read is larger but still a clear prefix relationship. Keeping the ceiling
14
14
  // small avoids suggesting an unrelated tool for a genuinely unknown name.
15
15
  const MAX_SUGGESTION_DISTANCE = 4
@@ -79,13 +79,20 @@ function firstTextChunk(result: unknown): string | null {
79
79
  // normally — unlike a silent alias, this rescue path cannot bypass policy.
80
80
  export function attachToolNotFoundNudge(session: NudgeableSession, knownToolNames: readonly string[]): () => void {
81
81
  const known = [...new Set(knownToolNames)]
82
+ // A wedged model re-calls the same wrong name every turn; each steer
83
+ // spawns a fresh assistant turn that clobbers the subagent's captured
84
+ // final message (see attachFinalMessageCapture). One reminder per mistake.
85
+ const nudged = new Set<string>()
82
86
  return session.subscribe((event) => {
83
87
  const e = event as { type?: unknown; isError?: unknown; result?: unknown }
84
88
  if (e?.type !== 'tool_execution_end' || e.isError !== true) return
85
89
  const text = firstTextChunk(e.result)
86
90
  if (text === null) return
91
+ const requested = extractNotFoundToolName(text)
92
+ if (requested === null || nudged.has(requested)) return
87
93
  const nudge = buildToolNotFoundNudge(text, known)
88
94
  if (nudge === null) return
95
+ nudged.add(requested)
89
96
  void session.steer(nudge)
90
97
  })
91
98
  }