typeclaw 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +42 -5
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +90 -12
  8. package/src/agent/session-origin.ts +58 -5
  9. package/src/agent/subagent-completion-reminder.ts +39 -1
  10. package/src/agent/subagents.ts +31 -2
  11. package/src/agent/system-prompt.ts +1 -1
  12. package/src/agent/tool-not-found-nudge.ts +8 -1
  13. package/src/agent/tools/channel-react.ts +11 -4
  14. package/src/agent/tools/channel-reply.ts +3 -3
  15. package/src/agent/tools/curl-impersonate.ts +2 -2
  16. package/src/agent/tools/spawn-subagent.ts +19 -2
  17. package/src/agent/tools/subagent-access.ts +40 -5
  18. package/src/agent/tools/subagent-cancel.ts +3 -1
  19. package/src/agent/tools/subagent-output.ts +6 -2
  20. package/src/agent/tools/webfetch/fetch.ts +18 -18
  21. package/src/agent/tools/webfetch/index.ts +1 -1
  22. package/src/agent/tools/webfetch/tool.ts +13 -13
  23. package/src/agent/tools/webfetch/types.ts +1 -1
  24. package/src/agent/tools/websearch.ts +6 -6
  25. package/src/bundled-plugins/backup/index.ts +40 -37
  26. package/src/bundled-plugins/backup/runner.ts +22 -1
  27. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  28. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  29. package/src/bundled-plugins/memory/README.md +11 -11
  30. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  31. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  32. package/src/bundled-plugins/operator/operator.ts +5 -1
  33. package/src/bundled-plugins/reviewer/reviewer.ts +18 -9
  34. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  35. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  36. package/src/bundled-plugins/scout/scout.ts +7 -7
  37. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  38. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  39. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  40. package/src/channels/adapters/discord-bot-classify.ts +3 -0
  41. package/src/channels/adapters/discord-bot-reactions.ts +164 -0
  42. package/src/channels/adapters/discord-bot.ts +23 -0
  43. package/src/channels/adapters/github/inbound.ts +19 -4
  44. package/src/channels/adapters/github/webhook-register.ts +32 -27
  45. package/src/channels/adapters/slack-bot-classify.ts +2 -0
  46. package/src/channels/adapters/slack-bot-reactions.ts +167 -0
  47. package/src/channels/adapters/slack-bot.ts +24 -0
  48. package/src/channels/router.ts +63 -23
  49. package/src/channels/schema.ts +43 -1
  50. package/src/channels/subagent-completion-bridge.ts +18 -18
  51. package/src/channels/types.ts +1 -1
  52. package/src/cli/inspect-controller.ts +130 -38
  53. package/src/config/config.ts +43 -2
  54. package/src/container/start.ts +7 -1
  55. package/src/git/mutex.ts +22 -0
  56. package/src/git/reconcile-ignored.ts +214 -0
  57. package/src/hostd/daemon.ts +26 -1
  58. package/src/hostd/portbroker-manager.ts +7 -0
  59. package/src/init/dockerfile.ts +1 -1
  60. package/src/init/gitignore.ts +25 -16
  61. package/src/init/index.ts +3 -3
  62. package/src/inspect/index.ts +31 -4
  63. package/src/inspect/loop.ts +16 -12
  64. package/src/plugin/define.ts +2 -2
  65. package/src/plugin/index.ts +2 -2
  66. package/src/portbroker/hostd-client.ts +36 -13
  67. package/src/run/index.ts +14 -0
  68. package/src/sandbox/build.ts +10 -0
  69. package/src/sandbox/index.ts +9 -1
  70. package/src/sandbox/policy.ts +12 -0
  71. package/src/sandbox/session-tmp.ts +43 -0
  72. package/src/sandbox/writable-zones.ts +103 -3
  73. package/src/server/command-runner.ts +1 -1
  74. package/src/server/index.ts +8 -0
  75. package/src/skills/typeclaw-channel-github/SKILL.md +38 -11
  76. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  77. package/src/tui/format.ts +11 -11
  78. package/typeclaw.schema.json +1 -0
package/README.md CHANGED
@@ -34,7 +34,7 @@ If you're like me, TypeClaw is the right choice. If not, that's fine too.
34
34
  - 💬 **Multi-channel** — Slack, Discord, Telegram, KakaoTalk, GitHub webhooks, and a websocket TUI; one agent, many inboxes
35
35
  - ⏰ **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
36
36
  - 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
37
- - 🔎 **Web research** — bundled `scout` subagent plus first-class `websearch` and `webfetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
37
+ - 🔎 **Web research** — bundled `scout` subagent plus first-class `web_search` and `web_fetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
38
38
  - 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
39
39
  - 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state
40
40
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.24.0",
3
+ "version": "0.26.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -35,6 +35,7 @@ import { getAuthFor } from './auth'
35
35
  import { createCompactionSettingsManager } from './compaction'
36
36
  import { renderGitNudge } from './git-nudge'
37
37
  import type { LiveSubagentRegistry } from './live-subagents'
38
+ import { sanitizeMessagesForLlmReplay } from './llm-replay-sanitizer'
38
39
  import { applyModelRuntimeOverrides } from './model-overrides'
39
40
  import { createChannelLookAtTool, lookAtTool } from './multimodal'
40
41
  import {
@@ -72,8 +73,8 @@ import { createStreamSnapshotTool } from './tools/stream-snapshot'
72
73
  import { createSubagentCancelTool } from './tools/subagent-cancel'
73
74
  import { createSubagentOutputTool } from './tools/subagent-output'
74
75
  import { createTodoTools } from './tools/todo'
75
- import { webfetchTool } from './tools/webfetch'
76
- import { websearchTool } from './tools/websearch'
76
+ import { webFetchTool } from './tools/webfetch'
77
+ import { webSearchTool } from './tools/websearch'
77
78
 
78
79
  export type { SessionOrigin } from './session-origin'
79
80
 
@@ -327,14 +328,33 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
327
328
  }
328
329
  }
329
330
 
331
+ // Plugin subagents (operator/reviewer) see ONLY their declared builtins plus
332
+ // the orchestration tools — never the full main-session tool surface. The
333
+ // orchestration tools self-omit unless `liveSubagentRegistry`/
334
+ // `subagentRegistry`/`createSessionForSubagent` are wired (see
335
+ // buildSubagentOrchestrationTools); `spawn_subagent` enforces MAX_SUBAGENT_DEPTH
336
+ // at execute time so a depth-capped subagent's spawn fails closed even though
337
+ // the tool is present.
330
338
  const customSystemTools =
331
339
  options.customTools !== undefined
332
340
  ? options.customTools
333
341
  : options.pluginSubagent
334
- ? resolvedSubagentBuiltins.toolDefinitions
342
+ ? [
343
+ ...resolvedSubagentBuiltins.toolDefinitions,
344
+ ...buildSubagentOrchestrationTools({
345
+ liveRegistry: options.liveSubagentRegistry,
346
+ registry: options.subagentRegistry,
347
+ createSessionForSubagent: options.createSessionForSubagent,
348
+ agentDir: options.plugins?.agentDir,
349
+ parentSessionId: sessionManager.getSessionId(),
350
+ getOrigin,
351
+ permissions: options.permissions,
352
+ stream: options.stream,
353
+ }),
354
+ ]
335
355
  : [
336
- websearchTool,
337
- webfetchTool,
356
+ webSearchTool,
357
+ webFetchTool,
338
358
  lookAtTool,
339
359
  ...(options.mcpManager ? buildMcpDispatcherToolDefinitions(options.mcpManager) : []),
340
360
  ...(options.reloadRegistry ? [createReloadTool({ registry: options.reloadRegistry })] : []),
@@ -405,6 +425,21 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
405
425
  ...(thinkingLevel ? { thinkingLevel } : {}),
406
426
  })
407
427
 
428
+ // Layer the replay sanitizer over pi's convertToLlm so a transcript with an
429
+ // orphaned toolResult (e.g. a torn-down restart turn) can't wedge the session
430
+ // with an Anthropic 400 on every replay. Runs on every provider call path
431
+ // that goes through the agent. Honors pi's contract that convertToLlm must
432
+ // not throw: on any failure it falls back to the unsanitized output.
433
+ const innerConvertToLlm = session.agent.convertToLlm
434
+ session.agent.convertToLlm = async (messages) => {
435
+ const converted = await innerConvertToLlm(messages)
436
+ try {
437
+ return sanitizeMessagesForLlmReplay(converted).messages
438
+ } catch {
439
+ return converted
440
+ }
441
+ }
442
+
408
443
  abortHolder.abort = () => {
409
444
  if (session.agent.signal?.aborted !== true) session.agent.abort()
410
445
  }
@@ -686,11 +721,13 @@ export function buildSubagentOrchestrationTools(opts: {
686
721
  createSubagentOutputTool({
687
722
  liveRegistry: opts.liveRegistry,
688
723
  getOrigin: opts.getOrigin,
724
+ callerSessionId: opts.parentSessionId,
689
725
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
690
726
  }),
691
727
  createSubagentCancelTool({
692
728
  liveRegistry: opts.liveRegistry,
693
729
  getOrigin: opts.getOrigin,
730
+ callerSessionId: opts.parentSessionId,
694
731
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
695
732
  }),
696
733
  ]
@@ -0,0 +1,120 @@
1
+ // Defensive projection applied to the LLM message array right before each
2
+ // provider call, layered on top of pi-coding-agent's `convertToLlm`. It exists
3
+ // to un-wedge sessions whose persisted transcript contains a `toolResult` with
4
+ // no live preceding `toolCall` — the exact shape Anthropic rejects with
5
+ // "unexpected `tool_use_id` found in `tool_result` blocks" (HTTP 400).
6
+ //
7
+ // How a transcript gets poisoned: the self-`restart` tool exits the container
8
+ // mid-turn. The assistant turn carrying the restart `toolCall` can land in the
9
+ // JSONL with `stopReason: "error"/"aborted"` (or be torn down), while its
10
+ // `toolResult` is persisted. On replay, pi-ai's provider-side `transformMessages`
11
+ // DROPS error/aborted assistant turns but passes the `toolResult` through
12
+ // unchanged, leaving a true orphan that the API rejects on every subsequent
13
+ // turn — the session is permanently stuck.
14
+ //
15
+ // pi-ai's `transformMessages` already handles the inverse cases (a `toolCall`
16
+ // with no result → synthetic "No result provided" result; error/aborted
17
+ // assistant turns → dropped). The one gap is an orphaned `toolResult`. This
18
+ // sanitizer fills exactly that gap and nothing more.
19
+ //
20
+ // Invariant (local pending-window, NOT a global id union — Anthropic requires
21
+ // tool results to belong to the immediately preceding tool-use turn):
22
+ // 1. Assistant turns with stopReason "error"/"aborted" are dropped here, so
23
+ // orphan detection sees the same message set the provider will after its
24
+ // own drop pass. Without this, a result tied to a dropped assistant would
25
+ // survive us and be orphaned downstream — the original bug.
26
+ // 2. A `toolResult` is kept only if its `toolCallId` was declared by the most
27
+ // recent kept assistant tool-use turn AND has not already been emitted in
28
+ // that window. Any user or assistant message closes the window.
29
+ // 3. Missing results are NOT synthesized here — pi-ai's existing pass inserts
30
+ // the synthetic placeholder, so dropping an orphan that leaves a bare
31
+ // `toolCall` is safe and self-healing.
32
+ //
33
+ // This is a read-only projection: it never mutates the persisted JSONL, so an
34
+ // already-poisoned session becomes usable without destructive migration.
35
+
36
+ import type { Message } from '@mariozechner/pi-ai'
37
+
38
+ export type ReplaySanitizerStats = {
39
+ droppedOrphans: number
40
+ droppedDuplicates: number
41
+ droppedErrorAssistants: number
42
+ }
43
+
44
+ export type SanitizeResult = {
45
+ messages: Message[]
46
+ stats: ReplaySanitizerStats
47
+ }
48
+
49
+ function isErroredAssistant(message: Message): boolean {
50
+ return message.role === 'assistant' && (message.stopReason === 'error' || message.stopReason === 'aborted')
51
+ }
52
+
53
+ function toolCallIdsOf(message: Extract<Message, { role: 'assistant' }>): string[] {
54
+ return message.content
55
+ .filter((block): block is Extract<typeof block, { type: 'toolCall' }> => block.type === 'toolCall')
56
+ .map((block) => block.id)
57
+ .filter((id): id is string => typeof id === 'string' && id.length > 0)
58
+ }
59
+
60
+ export function sanitizeMessagesForLlmReplay(messages: Message[]): SanitizeResult {
61
+ const output: Message[] = []
62
+ const stats: ReplaySanitizerStats = {
63
+ droppedOrphans: 0,
64
+ droppedDuplicates: 0,
65
+ droppedErrorAssistants: 0,
66
+ }
67
+
68
+ let pendingToolCallIds = new Set<string>()
69
+ let emittedResultIds = new Set<string>()
70
+
71
+ const closeWindow = () => {
72
+ pendingToolCallIds = new Set()
73
+ emittedResultIds = new Set()
74
+ }
75
+
76
+ for (const message of messages) {
77
+ if (message.role === 'assistant') {
78
+ closeWindow()
79
+
80
+ // Mirror pi-ai's provider-side drop of incomplete turns so orphan
81
+ // detection matches the message set the provider will actually send.
82
+ if (isErroredAssistant(message)) {
83
+ stats.droppedErrorAssistants += 1
84
+ continue
85
+ }
86
+
87
+ const callIds = toolCallIdsOf(message)
88
+ if (callIds.length > 0) pendingToolCallIds = new Set(callIds)
89
+ output.push(message)
90
+ continue
91
+ }
92
+
93
+ if (message.role === 'user') {
94
+ closeWindow()
95
+ output.push(message)
96
+ continue
97
+ }
98
+
99
+ if (message.role === 'toolResult') {
100
+ const id = message.toolCallId
101
+ if (!pendingToolCallIds.has(id)) {
102
+ // Orphan: true orphan, stale late result, or result for a dropped
103
+ // error/aborted assistant turn.
104
+ stats.droppedOrphans += 1
105
+ continue
106
+ }
107
+ if (emittedResultIds.has(id)) {
108
+ stats.droppedDuplicates += 1
109
+ continue
110
+ }
111
+ emittedResultIds.add(id)
112
+ output.push(message)
113
+ continue
114
+ }
115
+
116
+ output.push(message)
117
+ }
118
+
119
+ return { messages: output, stats }
120
+ }
@@ -63,6 +63,14 @@ export type LoopGuard = {
63
63
  check: (sessionId: string, tool: string, args: unknown) => LoopGuardDecision
64
64
  reset: (sessionId: string) => void
65
65
  forget: (sessionId: string) => void
66
+ // Clears only the residue a single tool left behind in a session: its entries
67
+ // in the windowed history and, if the current consecutive streak belongs to
68
+ // that tool, the streak itself. Used when a state-change boundary makes a
69
+ // tool's prior calls irrelevant — e.g. a backgrounded subagent finishing
70
+ // makes the next `subagent_output` fetch legitimate even though earlier
71
+ // premature polls poisoned the window. Narrower than `forget`, so an
72
+ // unrelated tool's accumulating loop on the same session is preserved.
73
+ forgetTool: (sessionId: string, tool: string) => void
66
74
  }
67
75
 
68
76
  type SessionState = {
@@ -215,9 +223,35 @@ export function createLoopGuard(options: CreateLoopGuardOptions = {}): LoopGuard
215
223
  forget(sessionId) {
216
224
  sessions.delete(sessionId)
217
225
  },
226
+ forgetTool(sessionId, tool) {
227
+ const state = sessions.get(sessionId)
228
+ if (state === undefined) return
229
+ const retained: string[] = []
230
+ for (const sig of state.window) {
231
+ if (signatureBelongsToTool(sig, tool)) {
232
+ state.windowWarned.delete(sig)
233
+ } else {
234
+ retained.push(sig)
235
+ }
236
+ }
237
+ state.window = retained
238
+ if (signatureBelongsToTool(state.signature, tool)) {
239
+ state.signature = ''
240
+ state.count = 0
241
+ state.warned = false
242
+ }
243
+ },
218
244
  }
219
245
  }
220
246
 
247
+ // Both signature builders prefix the tool name: exact signatures as `tool:...`
248
+ // and path-coarsened ones as `tool#path:...`. A tool's residue is therefore any
249
+ // signature starting with `tool:` or `tool#`, never a different tool whose name
250
+ // merely shares this one as a prefix (the delimiter rules that out).
251
+ function signatureBelongsToTool(signature: string, tool: string): boolean {
252
+ return signature.startsWith(`${tool}:`) || signature.startsWith(`${tool}#`)
253
+ }
254
+
221
255
  function formatWarnMessage(tool: string, count: number): string {
222
256
  return (
223
257
  `\n\n[loop-guard] You have called \`${tool}\` ${count} times in a row with identical arguments. ` +
@@ -161,7 +161,7 @@ async function runLookAtImages(imageContents: ImageContent[], prompt: string | u
161
161
  origin,
162
162
  profile: 'vision',
163
163
  // Both knobs are required to fully disarm the subagent's tool surface:
164
- // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
164
+ // `customTools: []` blocks typeclaw's system tools (web_search/web_fetch/
165
165
  // look_at/restart/...) — without it, the look_at tool would recurse
166
166
  // into itself. `tools: []` blocks pi-coding-agent's defaults
167
167
  // (read/bash/edit/write) — without it, a vision model could be talked
@@ -1,4 +1,5 @@
1
1
  import { AsyncLocalStorage } from 'node:async_hooks'
2
+ import { join } from 'node:path'
2
3
 
3
4
  import type { AgentTool } from '@mariozechner/pi-agent-core'
4
5
  import {
@@ -36,7 +37,10 @@ import type {
36
37
  import {
37
38
  buildSandboxedCommand,
38
39
  ensureBwrapAvailable,
40
+ ensureSessionTmpDir,
41
+ mapVirtualTmpPath,
39
42
  resolveHiddenPaths,
43
+ resolveProtectedZones,
40
44
  resolveWritableZones,
41
45
  subtractMasked,
42
46
  } from '@/sandbox'
@@ -44,8 +48,8 @@ import {
44
48
  import { createLoopGuard, type LoopGuard } from './loop-guard'
45
49
  import { checkImageReadRedirect } from './multimodal/read-redirect'
46
50
  import type { SessionOrigin } from './session-origin'
47
- import { webfetchTool } from './tools/webfetch'
48
- import { websearchTool } from './tools/websearch'
51
+ import { webFetchTool } from './tools/webfetch'
52
+ import { webSearchTool } from './tools/websearch'
49
53
 
50
54
  // Process-wide loop guard. State is keyed by sessionId so concurrent sessions
51
55
  // don't interfere; the guard's own LRU bound keeps it from growing without
@@ -112,7 +116,7 @@ const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
112
116
  // name-filter path); the wrapped customTools just replace the implementation
113
117
  // underneath so subagent and channel sessions share the same hook coverage.
114
118
  type PiAgentToolName = 'read' | 'bash' | 'edit' | 'write' | 'grep' | 'find' | 'ls'
115
- type TypeclawToolName = 'websearch' | 'webfetch'
119
+ type TypeclawToolName = 'web_search' | 'web_fetch'
116
120
 
117
121
  const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
118
122
  read: piReadTool,
@@ -125,8 +129,8 @@ const PI_AGENT_TOOL_MAP: Record<PiAgentToolName, AgentTool<any, any>> = {
125
129
  }
126
130
 
127
131
  const TYPECLAW_TOOL_DEFINITION_MAP: Record<TypeclawToolName, ToolDefinition<any, any, any>> = {
128
- websearch: websearchTool,
129
- webfetch: webfetchTool,
132
+ web_search: webSearchTool,
133
+ web_fetch: webFetchTool,
130
134
  }
131
135
 
132
136
  function isPiAgentToolName(name: string): name is PiAgentToolName {
@@ -458,7 +462,11 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
458
462
  stripGuardAcknowledgements(mutableArgs)
459
463
 
460
464
  if (tool.name === 'bash' && opts.permissions !== undefined) {
461
- await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, bashEnvOverlay)
465
+ await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
466
+ }
467
+
468
+ if (TMP_REDIRECT_TOOLS.has(tool.name) && opts.permissions !== undefined) {
469
+ await applyTmpPathRedirect(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId)
462
470
  }
463
471
 
464
472
  const result = await bashEnvStore.run(bashEnvOverlay, () =>
@@ -505,6 +513,7 @@ async function applyBashSandbox(
505
513
  permissions: PermissionService,
506
514
  origin: SessionOrigin | undefined,
507
515
  agentDir: string,
516
+ sessionId: string,
508
517
  envOverlay: BashEnvOverlay | undefined,
509
518
  ): Promise<void> {
510
519
  const command = mutableArgs.command
@@ -514,21 +523,46 @@ async function applyBashSandbox(
514
523
  if (dirs.length === 0 && files.length === 0) return
515
524
 
516
525
  await ensureBwrapAvailable()
526
+ // Per-session /tmp: bind this session's scratch dir over the default
527
+ // --tmpfs /tmp so writes survive across the role's sandboxed bash calls AND
528
+ // match what the write/edit wrapper redirected a /tmp path to. The bind is
529
+ // emitted via policy.mounts (after the hardcoded --tmpfs /tmp), so last-op-
530
+ // wins makes it the live /tmp. Unsandboxed roles (empty masks, returned
531
+ // above) keep sharing the real container /tmp between write and bash.
532
+ const sessionTmp = await ensureSessionTmpDir(sessionId)
517
533
  // Write-confined jail for low-trust roles: bind the whole project read-only,
518
534
  // hide private/secret paths, then re-expose only the free-write scratch zones
519
- // RW. Anything else under agentDir (.git/, node_modules/, agentDir root) is
520
- // EROFS, so bash cannot sidestep the non-workspace-write guard. Trusted/owner
521
- // never reach here (their masks are empty) and keep full unsandboxed access.
522
- // subtractMasked drops any writable zone masked for this role so an RW bind
523
- // never re-exposes a hidden path (e.g. a guest's masked workspace/).
535
+ // (workspace + root allowlist + .git) RW. The WORKING TREE outside those zones
536
+ // (node_modules/, agentDir root, non-allowlisted tracked files) stays EROFS, so
537
+ // bash cannot sidestep the non-workspace-write guard and `git checkout` of a
538
+ // protected worktree path fails at the kernel. .git is RW so members can
539
+ // commit; .git/hooks + .git/config (and any writable core.hooksPath target)
540
+ // are re-protected RO (protected, rendered after writable, ensured to exist so
541
+ // an absent path can't be created+executed) so a hook-plant / core.hooksPath
542
+ // never becomes code execution in the unsandboxed runtime git ops. Trusted/owner never reach here
543
+ // (their masks are empty) and keep full unsandboxed access. subtractMasked
544
+ // drops any writable zone masked for this role so an RW bind never re-exposes a
545
+ // hidden path (e.g. a guest's masked workspace/).
524
546
  const writable = subtractMasked(await resolveWritableZones(agentDir), { dirs, files })
547
+ // subtractMasked again on the protected set: a protected RO bind renders after
548
+ // the masks (last-op-wins), so an unfiltered protected path nested under a
549
+ // masked dir (e.g. a guest's workspace/ when core.hooksPath=workspace/hooks)
550
+ // would re-expose the hidden real dir. A masked path is already non-writable
551
+ // for this role, so it needs no protection anyway.
552
+ const protectedZones = writable.dirs.includes(join(agentDir, '.git'))
553
+ ? subtractMasked(await resolveProtectedZones(agentDir), { dirs, files })
554
+ : { dirs: [], files: [] }
525
555
  // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
526
556
  // it would never reach the sandboxed process (the non-sandboxed spawnHook
527
557
  // path does not run when the command is rewritten to a bwrap invocation).
528
558
  const { commandString } = buildSandboxedCommand(command, {
529
- mounts: [{ type: 'ro-bind', source: agentDir, dest: agentDir }],
559
+ mounts: [
560
+ { type: 'ro-bind', source: agentDir, dest: agentDir },
561
+ { type: 'bind', source: sessionTmp, dest: '/tmp' },
562
+ ],
530
563
  masks: { dirs, files },
531
564
  writable,
565
+ protected: protectedZones,
532
566
  network: 'inherit',
533
567
  cwd: agentDir,
534
568
  ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
@@ -536,11 +570,55 @@ async function applyBashSandbox(
536
570
  mutableArgs.command = commandString
537
571
  }
538
572
 
573
+ // The builtin file tools that take a single filesystem `path` arg. For a
574
+ // sandboxed role they all run UNSANDBOXED in the main process (only bash is
575
+ // bwrap-wrapped), so each must apply the same /tmp -> session-dir mapping that
576
+ // applyBashSandbox binds for bash — otherwise a `read` of /tmp/foo hits the
577
+ // real container /tmp while sandboxed bash wrote the session backing dir.
578
+ const TMP_REDIRECT_TOOLS = new Set(['read', 'write', 'edit', 'grep', 'find', 'ls'])
579
+
580
+ // Sandboxed roles read /tmp through bwrap's per-session bind (applyBashSandbox),
581
+ // but the path-based file tools run unsandboxed against the real container /tmp.
582
+ // Without this redirect a guest/member that touches /tmp/foo through bash (bound
583
+ // to the session dir) and through a file tool (real /tmp) would see two
584
+ // different files. Rewriting the file tool's on-disk path to the same session
585
+ // backing dir makes every layer resolve /tmp/foo to one file. Unsandboxed roles
586
+ // (empty masks) are left untouched: their bash already shares the real /tmp.
587
+ async function applyTmpPathRedirect(
588
+ mutableArgs: Record<string, unknown>,
589
+ permissions: PermissionService,
590
+ origin: SessionOrigin | undefined,
591
+ agentDir: string,
592
+ sessionId: string,
593
+ ): Promise<void> {
594
+ const rawPath = mutableArgs.path
595
+ if (typeof rawPath !== 'string') return
596
+
597
+ const { dirs, files } = resolveHiddenPaths(permissions, origin, agentDir)
598
+ if (dirs.length === 0 && files.length === 0) return
599
+
600
+ const backing = mapVirtualTmpPath(agentDir, sessionId, rawPath)
601
+ if (backing === undefined) return
602
+
603
+ await ensureSessionTmpDir(sessionId)
604
+ mutableArgs.path = backing
605
+ }
606
+
539
607
  function appendLoopWarning(result: ToolResult, message: string): ToolResult {
540
608
  const content: ContentPart[] = [...(result.content as ContentPart[]), { type: 'text', text: message }]
541
609
  return { content, details: result.details }
542
610
  }
543
611
 
612
+ // Clears one tool's loop-guard residue for a session on the process-wide shared
613
+ // guard. The completion-reminder bridges (channel router + TUI server) call this
614
+ // for `subagent_output` when a backgrounded subagent finishes, so the next fetch
615
+ // the reminder asks for isn't blocked by the window the agent's premature polling
616
+ // poisoned. Exposed as a narrow function rather than the guard itself so callers
617
+ // can't reach `check`/`forget` and widen the blast radius.
618
+ export function forgetSharedLoopGuardTool(sessionId: string, tool: string): void {
619
+ sharedLoopGuard.forgetTool(sessionId, tool)
620
+ }
621
+
544
622
  // Test-only seam: swaps the shared loop guard for a fresh instance so tests
545
623
  // that reuse sessionIds across cases don't see cross-test streak counts.
546
624
  // Production code never calls this; the guard's LRU bound handles
@@ -69,6 +69,36 @@ export type SessionOrigin =
69
69
  triggeredBy?: SessionOrigin
70
70
  }
71
71
 
72
+ // Hard ceiling on the subagent delegation chain. Bounds chain LENGTH, not
73
+ // fan-out breadth: the deepest reachable chain is main (depth 0) →
74
+ // operator/reviewer (depth 1) → nested worker (depth 2). `spawn_subagent`
75
+ // refuses to spawn from a session already at this depth.
76
+ export const MAX_SUBAGENT_DEPTH = 2
77
+
78
+ // Counts subagent links from the root by walking the `spawnedByOrigin`
79
+ // ancestry. A non-subagent (or undefined) origin is depth 0; each nested
80
+ // subagent origin adds one. Fails CLOSED on ambiguous ancestry: if a subagent
81
+ // origin has no `spawnedByOrigin` (the serialized path in
82
+ // parseSpawnedByOriginJson drops it), the true depth is unknowable, so we
83
+ // return MAX_SUBAGENT_DEPTH rather than assume it sits at the root — a
84
+ // truncated grandchild must not read as a child and earn an extra spawn. A
85
+ // cyclic chain is bounded by the same cap.
86
+ export function subagentDepth(origin: SessionOrigin | undefined): number {
87
+ let depth = 0
88
+ let current: SessionOrigin | undefined = origin
89
+ while (current !== undefined && current.kind === 'subagent') {
90
+ depth += 1
91
+ if (current.spawnedByOrigin === undefined) {
92
+ return MAX_SUBAGENT_DEPTH
93
+ }
94
+ if (depth >= MAX_SUBAGENT_DEPTH) {
95
+ return depth
96
+ }
97
+ current = current.spawnedByOrigin
98
+ }
99
+ return depth
100
+ }
101
+
72
102
  export const PARTICIPANTS_TOP_K = 10
73
103
  export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
74
104
 
@@ -93,14 +123,20 @@ export const PARTICIPANTS_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000
93
123
  type PlatformInfo = {
94
124
  displayName: string
95
125
  mentionMode: 'angle-id' | 'at-username' | 'alias'
126
+ // Whether this adapter registers a ReactionCallback, i.e. whether
127
+ // `channel_react` actually does anything here. Gates the proactive-reaction
128
+ // prompt guidance so we never tell a KakaoTalk/Telegram agent to react when
129
+ // the call would no-op. Keep in sync with the adapters that call
130
+ // `router.registerReaction` (github, slack-bot, discord-bot today).
131
+ supportsReactions: boolean
96
132
  }
97
133
 
98
134
  const PLATFORM_INFO: Record<AdapterId, PlatformInfo> = {
99
- 'slack-bot': { displayName: 'Slack', mentionMode: 'angle-id' },
100
- 'discord-bot': { displayName: 'Discord', mentionMode: 'angle-id' },
101
- github: { displayName: 'GitHub', mentionMode: 'at-username' },
102
- 'telegram-bot': { displayName: 'Telegram', mentionMode: 'at-username' },
103
- kakaotalk: { displayName: 'KakaoTalk', mentionMode: 'alias' },
135
+ 'slack-bot': { displayName: 'Slack', mentionMode: 'angle-id', supportsReactions: true },
136
+ 'discord-bot': { displayName: 'Discord', mentionMode: 'angle-id', supportsReactions: true },
137
+ github: { displayName: 'GitHub', mentionMode: 'at-username', supportsReactions: true },
138
+ 'telegram-bot': { displayName: 'Telegram', mentionMode: 'at-username', supportsReactions: false },
139
+ kakaotalk: { displayName: 'KakaoTalk', mentionMode: 'alias', supportsReactions: false },
104
140
  }
105
141
 
106
142
  function getPlatformInfo(adapter: AdapterId): PlatformInfo {
@@ -318,6 +354,23 @@ function renderChannelOrigin(
318
354
  const conversationLine = renderConversationLine(origin)
319
355
  if (conversationLine !== null) lines.push('', conversationLine)
320
356
 
357
+ if (platformInfo.supportsReactions) {
358
+ lines.push(
359
+ '',
360
+ '**React like a teammate would.** You can drop an emoji on the message that',
361
+ 'triggered this turn with `channel_react({ emoji })` — it posts no comment,',
362
+ 'just a reaction. Read the message and pick what genuinely fits its tone:',
363
+ '`+1` to agree or approve, `rocket` for something shipping or exciting,',
364
+ '`tada` to celebrate, `heart` to show appreciation, `laugh` for something',
365
+ 'funny, `eyes` to signal you are looking. Reach for it when a reaction adds',
366
+ 'real warmth or signal — not on every message, and not just because you can.',
367
+ 'A reaction does NOT satisfy the reply obligation below: when the message',
368
+ 'needs a substantive answer, still send it via `channel_reply`. Think of',
369
+ 'reactions as the lightweight, human layer on top of your words, not a',
370
+ 'replacement for them.',
371
+ )
372
+ }
373
+
321
374
  lines.push(
322
375
  '',
323
376
  '**For every user message in this session, you MUST call `channel_reply`',
@@ -16,6 +16,7 @@ export type CompletionReminderArgs = {
16
16
  durationMs: number
17
17
  error?: string
18
18
  channel?: boolean
19
+ adapter?: string
19
20
  }
20
21
 
21
22
  const CHANNEL_REPLY_NUDGE =
@@ -28,9 +29,23 @@ const CHANNEL_REPLY_NUDGE =
28
29
  'can see why the post-completion turn was silent. `NO_REPLY` is the legacy fallback only when ' +
29
30
  '`skip_response` is unavailable.'
30
31
 
32
+ // Conditional carve-out for github channel sessions. The base nudge above
33
+ // steers EVERY completion to `channel_reply`, which on github is a plain PR
34
+ // comment — fine for most subagents, but wrong for a finished `reviewer`:
35
+ // a verdict delivered as a comment leaves the PR "awaiting review" with no
36
+ // formal approval. This reminder cannot tell a review turn from any other
37
+ // completion, so the carve-out is phrased conditionally ("if this was a PR
38
+ // review") to redirect only the review case without misleading the rest.
39
+ const GITHUB_REVIEW_NUDGE =
40
+ 'If this was a PR review, the verdict is a formal review, not a `channel_reply`: ' +
41
+ 'post it via `gh api -X POST /repos/owner/repo/pulls/<N>/reviews` (APPROVE/REQUEST_CHANGES/COMMENT) ' +
42
+ 'and end the turn with `skip_response`. A `channel_reply` that merely says "Approved" posts a ' +
43
+ 'comment and leaves the PR awaiting review.'
44
+
31
45
  export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
32
46
  const durationStr = formatReminderDuration(args.durationMs)
33
- const channelTail = args.channel === true ? ` ${CHANNEL_REPLY_NUDGE}` : ''
47
+ const githubTail = args.channel === true && args.adapter === 'github' ? ` ${GITHUB_REVIEW_NUDGE}` : ''
48
+ const channelTail = args.channel === true ? ` ${CHANNEL_REPLY_NUDGE}${githubTail}` : ''
34
49
  if (args.ok) {
35
50
  return (
36
51
  `<system-reminder>\n` +
@@ -59,6 +74,13 @@ export function formatReminderDuration(ms: number): string {
59
74
  return `${min}m${sec}s`
60
75
  }
61
76
 
77
+ export type SubagentCompletedChannelKey = {
78
+ adapter: string
79
+ workspace: string
80
+ chat: string
81
+ thread: string | null
82
+ }
83
+
62
84
  export type SubagentCompletedPayload = {
63
85
  taskId: string
64
86
  subagent: string
@@ -66,6 +88,11 @@ export type SubagentCompletedPayload = {
66
88
  ok: boolean
67
89
  durationMs: number
68
90
  error?: string
91
+ // Present when the parent was a channel session. Lets the router fall back
92
+ // to the live successor session for the same channel key when the parent
93
+ // rolled over (SESSION_FRESHNESS_TTL_MS) or was idle-evicted while the
94
+ // subagent ran — otherwise the completion is silently dropped.
95
+ channelKey?: SubagentCompletedChannelKey
69
96
  }
70
97
 
71
98
  // Type guard for the `subagent.completed` broadcast payload. Subscribers
@@ -82,9 +109,11 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
82
109
  ok?: unknown
83
110
  durationMs?: unknown
84
111
  error?: unknown
112
+ channelKey?: unknown
85
113
  }
86
114
  if (p.kind !== 'subagent.completed') return null
87
115
  if (typeof p.parentSessionId !== 'string') return null
116
+ const channelKey = parseChannelKey(p.channelKey)
88
117
  return {
89
118
  taskId: typeof p.taskId === 'string' ? p.taskId : '<unknown>',
90
119
  subagent: typeof p.subagent === 'string' ? p.subagent : 'subagent',
@@ -92,5 +121,14 @@ export function parseSubagentCompletedPayload(payload: unknown): SubagentComplet
92
121
  ok: p.ok === true,
93
122
  durationMs: typeof p.durationMs === 'number' ? p.durationMs : 0,
94
123
  ...(typeof p.error === 'string' ? { error: p.error } : {}),
124
+ ...(channelKey !== null ? { channelKey } : {}),
95
125
  }
96
126
  }
127
+
128
+ function parseChannelKey(value: unknown): SubagentCompletedChannelKey | null {
129
+ if (value === null || typeof value !== 'object') return null
130
+ const k = value as { adapter?: unknown; workspace?: unknown; chat?: unknown; thread?: unknown }
131
+ if (typeof k.adapter !== 'string' || typeof k.workspace !== 'string' || typeof k.chat !== 'string') return null
132
+ if (k.thread !== null && typeof k.thread !== 'string') return null
133
+ return { adapter: k.adapter, workspace: k.workspace, chat: k.chat, thread: k.thread }
134
+ }