typeclaw 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/agent/index.ts +133 -27
  4. package/src/agent/llm-replay-sanitizer.ts +120 -0
  5. package/src/agent/loop-guard.ts +34 -0
  6. package/src/agent/multimodal/look-at.ts +1 -1
  7. package/src/agent/plugin-tools.ts +122 -8
  8. package/src/agent/restart/index.ts +15 -3
  9. package/src/agent/restart-handoff/index.ts +110 -12
  10. package/src/agent/session-origin.ts +30 -0
  11. package/src/agent/subagent-completion-reminder.ts +26 -1
  12. package/src/agent/subagents.ts +75 -3
  13. package/src/agent/system-prompt.ts +5 -1
  14. package/src/agent/todo/continuation-policy.ts +242 -0
  15. package/src/agent/todo/continuation-state.ts +87 -0
  16. package/src/agent/todo/continuation-wiring.ts +113 -0
  17. package/src/agent/todo/continuation.ts +71 -0
  18. package/src/agent/todo/scope.ts +77 -0
  19. package/src/agent/todo/store.ts +98 -0
  20. package/src/agent/tool-not-found-nudge.ts +126 -0
  21. package/src/agent/tools/channel-reply.ts +51 -0
  22. package/src/agent/tools/curl-impersonate.ts +2 -2
  23. package/src/agent/tools/restart.ts +11 -4
  24. package/src/agent/tools/spawn-subagent.ts +19 -2
  25. package/src/agent/tools/subagent-access.ts +40 -5
  26. package/src/agent/tools/subagent-cancel.ts +3 -1
  27. package/src/agent/tools/subagent-output.ts +6 -2
  28. package/src/agent/tools/todo/index.ts +119 -0
  29. package/src/agent/tools/webfetch/fetch.ts +18 -18
  30. package/src/agent/tools/webfetch/index.ts +1 -1
  31. package/src/agent/tools/webfetch/tool.ts +13 -13
  32. package/src/agent/tools/webfetch/types.ts +1 -1
  33. package/src/agent/tools/websearch.ts +6 -6
  34. package/src/bundled-plugins/backup/index.ts +40 -37
  35. package/src/bundled-plugins/backup/runner.ts +23 -2
  36. package/src/bundled-plugins/github-cli-auth/gh-command.ts +15 -7
  37. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +38 -1
  38. package/src/bundled-plugins/memory/README.md +11 -11
  39. package/src/bundled-plugins/memory/dreaming.ts +5 -0
  40. package/src/bundled-plugins/memory/search-tool.ts +98 -1
  41. package/src/bundled-plugins/operator/operator.ts +5 -1
  42. package/src/bundled-plugins/reviewer/reviewer.ts +32 -9
  43. package/src/bundled-plugins/reviewer/skills/code-review.ts +1 -1
  44. package/src/bundled-plugins/reviewer/skills/general.ts +1 -1
  45. package/src/bundled-plugins/scout/scout.ts +7 -7
  46. package/src/bundled-plugins/security/policies/private-surface-read.ts +2 -2
  47. package/src/bundled-plugins/security/policies/ssrf.ts +3 -3
  48. package/src/bundled-plugins/tool-result-cap/README.md +1 -1
  49. package/src/channels/adapters/discord-bot-reference.ts +78 -0
  50. package/src/channels/adapters/discord-bot.ts +25 -3
  51. package/src/channels/adapters/github/inbound.ts +172 -10
  52. package/src/channels/adapters/github/index.ts +10 -0
  53. package/src/channels/adapters/github/review-thread-resolver.ts +246 -0
  54. package/src/channels/adapters/github/webhook-register.ts +32 -27
  55. package/src/channels/adapters/kakaotalk-classify.ts +67 -6
  56. package/src/channels/adapters/slack-bot-classify.ts +9 -1
  57. package/src/channels/adapters/slack-bot-reference.ts +129 -0
  58. package/src/channels/adapters/slack-bot.ts +67 -8
  59. package/src/channels/manager.ts +8 -2
  60. package/src/channels/router.ts +506 -45
  61. package/src/channels/schema.ts +21 -4
  62. package/src/channels/subagent-completion-bridge.ts +18 -18
  63. package/src/channels/types.ts +69 -1
  64. package/src/cli/inspect-controller.ts +132 -33
  65. package/src/cli/inspect.ts +2 -1
  66. package/src/commands/index.ts +9 -0
  67. package/src/container/start.ts +7 -1
  68. package/src/git/mutex.ts +22 -0
  69. package/src/git/reconcile-ignored.ts +214 -0
  70. package/src/hostd/daemon.ts +26 -1
  71. package/src/hostd/portbroker-manager.ts +7 -0
  72. package/src/init/dockerfile.ts +1 -1
  73. package/src/init/gitignore.ts +28 -16
  74. package/src/inspect/index.ts +53 -4
  75. package/src/inspect/loop.ts +16 -12
  76. package/src/plugin/define.ts +2 -2
  77. package/src/plugin/index.ts +2 -2
  78. package/src/portbroker/hostd-client.ts +36 -13
  79. package/src/run/index.ts +74 -5
  80. package/src/sandbox/build.ts +20 -0
  81. package/src/sandbox/index.ts +10 -0
  82. package/src/sandbox/policy.ts +22 -0
  83. package/src/sandbox/session-tmp.ts +43 -0
  84. package/src/sandbox/writable-zones.ts +178 -0
  85. package/src/server/command-runner.ts +1 -1
  86. package/src/server/index.ts +126 -4
  87. package/src/skills/typeclaw-channel-github/SKILL.md +71 -17
  88. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  89. package/src/tui/format.ts +11 -11
  90. package/typeclaw.schema.json +10 -0
package/README.md CHANGED
@@ -34,7 +34,7 @@ If you're like me, TypeClaw is the right choice. If not, that's fine too.
34
34
  - 💬 **Multi-channel** — Slack, Discord, Telegram, KakaoTalk, GitHub webhooks, and a websocket TUI; one agent, many inboxes
35
35
  - ⏰ **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
36
36
  - 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
37
- - 🔎 **Web research** — bundled `scout` subagent plus first-class `websearch` and `webfetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
37
+ - 🔎 **Web research** — bundled `scout` subagent plus first-class `web_search` and `web_fetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
38
38
  - 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
39
39
  - 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state
40
40
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.23.0",
3
+ "version": "0.25.0",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -4,6 +4,7 @@ import { fileURLToPath } from 'node:url'
4
4
 
5
5
  import {
6
6
  createAgentSession,
7
+ createCodingTools,
7
8
  DefaultResourceLoader,
8
9
  defineTool as definePiTool,
9
10
  SessionManager,
@@ -34,6 +35,7 @@ import { getAuthFor } from './auth'
34
35
  import { createCompactionSettingsManager } from './compaction'
35
36
  import { renderGitNudge } from './git-nudge'
36
37
  import type { LiveSubagentRegistry } from './live-subagents'
38
+ import { sanitizeMessagesForLlmReplay } from './llm-replay-sanitizer'
37
39
  import { applyModelRuntimeOverrides } from './model-overrides'
38
40
  import { createChannelLookAtTool, lookAtTool } from './multimodal'
39
41
  import {
@@ -45,11 +47,13 @@ import {
45
47
  zodToToolParameters,
46
48
  } from './plugin-tools'
47
49
  import { createReloadTool } from './reload-tool'
50
+ import type { RestartHandoffOrigin } from './restart-handoff'
48
51
  import { loadSelf } from './self'
49
52
  import { SESSION_META_CUSTOM_TYPE, sessionMetaPayload } from './session-meta'
50
53
  import { renderSessionOrigin, type SessionOrigin, type SessionRoleContext } from './session-origin'
51
54
  import type { CreateSessionForSubagent, SubagentRegistry } from './subagents'
52
55
  import { DEFAULT_SYSTEM_PROMPT, renderRuntimeBlock, SLIM_SYSTEM_PROMPT } from './system-prompt'
56
+ import { attachToolNotFoundNudge } from './tool-not-found-nudge'
53
57
  import {
54
58
  createBudgetState,
55
59
  type ToolResultBudget,
@@ -68,8 +72,9 @@ import { createSpawnSubagentTool } from './tools/spawn-subagent'
68
72
  import { createStreamSnapshotTool } from './tools/stream-snapshot'
69
73
  import { createSubagentCancelTool } from './tools/subagent-cancel'
70
74
  import { createSubagentOutputTool } from './tools/subagent-output'
71
- import { webfetchTool } from './tools/webfetch'
72
- import { websearchTool } from './tools/websearch'
75
+ import { createTodoTools } from './tools/todo'
76
+ import { webFetchTool } from './tools/webfetch'
77
+ import { webSearchTool } from './tools/websearch'
73
78
 
74
79
  export type { SessionOrigin } from './session-origin'
75
80
 
@@ -79,6 +84,13 @@ export { renderTurnRoleAnchor, renderTurnTimeAnchor } from './system-prompt'
79
84
 
80
85
  type AgentSessionTools = NonNullable<Parameters<typeof createAgentSession>[0]>['tools']
81
86
 
87
+ // pi's default active built-in tools when a session declares no `tools:` filter
88
+ // (pi `createAgentSession` falls back to `defaultActiveToolNames`, which is the
89
+ // name set of `codingTools`). Derived from pi's own `createCodingTools()` rather
90
+ // than hardcoded so the list can't silently drift if pi adds/removes/renames a
91
+ // default builtin; `default-pi-builtins match pi's coding tool set` pins it.
92
+ const DEFAULT_PI_BUILTIN_TOOL_NAMES = createCodingTools(process.cwd()).map((t) => t.name)
93
+
82
94
  export type PluginSessionWiring = {
83
95
  registry: PluginRegistry
84
96
  hooks: HookBus
@@ -248,6 +260,13 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
248
260
  const getOrigin: () => SessionOrigin | undefined =
249
261
  options.originRef !== undefined ? () => options.originRef!.current : () => options.origin
250
262
 
263
+ // Holds the session's signal-only abort once `createAgentSession` resolves.
264
+ // Tools are wrapped BEFORE the session exists, so the loop guard reaches the
265
+ // abort through this lazily-resolved getter. See `fireLoopAbort` in
266
+ // plugin-tools.ts for why aborting (not throwing) is what stops the loop.
267
+ const abortHolder: { abort?: () => void } = {}
268
+ const getAbort: () => (() => void) | undefined = () => abortHolder.abort
269
+
251
270
  // Subagent built-in tool refs are dual-routed (see BUILTIN_TOOL_DEFINITION
252
271
  // dual-map in plugin-tools.ts): pi-side coding tools go to `tools:` so they
253
272
  // become the strict base set, typeclaw-side web tools go to `customTools:`.
@@ -259,8 +278,8 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
259
278
  ? resolveBuiltinToolRefs(options.pluginSubagent.toolRefs)
260
279
  : { agentTools: [], toolDefinitions: [] }
261
280
  const pluginCustomTools = options.pluginSubagent
262
- ? wrapSubagentCustomTools(options.pluginSubagent, options.plugins, getOrigin)
263
- : wrapRegistryTools(options.plugins, getOrigin)
281
+ ? wrapSubagentCustomTools(options.pluginSubagent, options.plugins, getOrigin, getAbort)
282
+ : wrapRegistryTools(options.plugins, getOrigin, getAbort)
264
283
 
265
284
  // Per-run budget state for the tool-result byte ceiling. Allocated once per
266
285
  // session creation and threaded into every wrapped tool so they share the
@@ -276,7 +295,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
276
295
 
277
296
  const effectiveTools =
278
297
  options.tools ?? (options.pluginSubagent ? (resolvedSubagentBuiltins.agentTools as AgentSessionTools) : undefined)
279
- const hookWrappedTools = wrapSystemAgentTools(effectiveTools, options.plugins, getOrigin)
298
+ const hookWrappedTools = wrapSystemAgentTools(effectiveTools, options.plugins, getOrigin, getAbort)
280
299
  const tools =
281
300
  sessionBudget && sessionBudgetState && hookWrappedTools
282
301
  ? (hookWrappedTools.map((t) =>
@@ -309,14 +328,33 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
309
328
  }
310
329
  }
311
330
 
331
+ // Plugin subagents (operator/reviewer) see ONLY their declared builtins plus
332
+ // the orchestration tools — never the full main-session tool surface. The
333
+ // orchestration tools self-omit unless `liveSubagentRegistry`/
334
+ // `subagentRegistry`/`createSessionForSubagent` are wired (see
335
+ // buildSubagentOrchestrationTools); `spawn_subagent` enforces MAX_SUBAGENT_DEPTH
336
+ // at execute time so a depth-capped subagent's spawn fails closed even though
337
+ // the tool is present.
312
338
  const customSystemTools =
313
339
  options.customTools !== undefined
314
340
  ? options.customTools
315
341
  : options.pluginSubagent
316
- ? resolvedSubagentBuiltins.toolDefinitions
342
+ ? [
343
+ ...resolvedSubagentBuiltins.toolDefinitions,
344
+ ...buildSubagentOrchestrationTools({
345
+ liveRegistry: options.liveSubagentRegistry,
346
+ registry: options.subagentRegistry,
347
+ createSessionForSubagent: options.createSessionForSubagent,
348
+ agentDir: options.plugins?.agentDir,
349
+ parentSessionId: sessionManager.getSessionId(),
350
+ getOrigin,
351
+ permissions: options.permissions,
352
+ stream: options.stream,
353
+ }),
354
+ ]
317
355
  : [
318
- websearchTool,
319
- webfetchTool,
356
+ webSearchTool,
357
+ webFetchTool,
320
358
  lookAtTool,
321
359
  ...(options.mcpManager ? buildMcpDispatcherToolDefinitions(options.mcpManager) : []),
322
360
  ...(options.reloadRegistry ? [createReloadTool({ registry: options.reloadRegistry })] : []),
@@ -348,6 +386,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
348
386
  permissions: options.permissions,
349
387
  reloadRoles: options.reloadRoles,
350
388
  }),
389
+ ...buildTodoTools(options.plugins?.agentDir, getOrigin),
351
390
  ]
352
391
  // Hook coverage for pi's builtin coding tools (read/bash/edit/write/grep/
353
392
  // find/ls) — pi 0.67.3 ignores `tools:` for implementation, so the only
@@ -361,10 +400,11 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
361
400
  sessionId: options.plugins.sessionId,
362
401
  hooks: options.plugins.hooks,
363
402
  getOrigin,
403
+ getAbort,
364
404
  ...(options.permissions ? { permissions: options.permissions } : {}),
365
405
  })
366
406
  : []
367
- const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin)
407
+ const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin, getAbort)
368
408
  const customToolsPreBudget = [...wrappedCustomSystemTools, ...pluginCustomTools, ...builtinPiToolOverrides]
369
409
  const customTools =
370
410
  sessionBudget && sessionBudgetState
@@ -385,25 +425,56 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
385
425
  ...(thinkingLevel ? { thinkingLevel } : {}),
386
426
  })
387
427
 
428
+ // Layer the replay sanitizer over pi's convertToLlm so a transcript with an
429
+ // orphaned toolResult (e.g. a torn-down restart turn) can't wedge the session
430
+ // with an Anthropic 400 on every replay. Runs on every provider call path
431
+ // that goes through the agent. Honors pi's contract that convertToLlm must
432
+ // not throw: on any failure it falls back to the unsanitized output.
433
+ const innerConvertToLlm = session.agent.convertToLlm
434
+ session.agent.convertToLlm = async (messages) => {
435
+ const converted = await innerConvertToLlm(messages)
436
+ try {
437
+ return sanitizeMessagesForLlmReplay(converted).messages
438
+ } catch {
439
+ return converted
440
+ }
441
+ }
442
+
443
+ abortHolder.abort = () => {
444
+ if (session.agent.signal?.aborted !== true) session.agent.abort()
445
+ }
446
+
447
+ // The names the session actually exposes to the model: pi's active base set
448
+ // (the caller's `tools:` filter, or pi's default builtins when unset) union
449
+ // the typeclaw/plugin custom tools. Deliberately EXCLUDES
450
+ // `builtinPiToolOverrides` — those replace builtin implementations by name,
451
+ // they are not additional callable names. This is the single source of truth
452
+ // for both the active-set re-narrowing below and the tool-not-found nudge
453
+ // vocabulary, so the two never drift (a divergence would make the nudge miss
454
+ // real tools or suggest tools the session deliberately did not expose).
455
+ const intendedActiveToolNames = [
456
+ ...new Set([
457
+ ...(tools !== undefined ? tools.map((t) => t.name) : DEFAULT_PI_BUILTIN_TOOL_NAMES),
458
+ ...[...wrappedCustomSystemTools, ...pluginCustomTools].map((t) => t.name),
459
+ ]),
460
+ ]
461
+
388
462
  // Re-narrow the active tool set after `createAgentSession`. pi 0.67.3's
389
463
  // `_refreshToolRegistry` runs with `includeAllExtensionTools: true` and
390
464
  // pushes every customTool name into the active set, which would widen
391
465
  // a subagent's declared `[edit]` to all 7 builtin overrides plus every
392
- // typeclaw custom tool. The intended active set is the names the caller
393
- // would have gotten WITHOUT the builtin overrides: pi's `initialActiveToolNames`
394
- // (derived from `tools:`) union the names from typeclaw/plugin customTools.
395
- // `builtinPiToolOverrides` are implementation overrides, never additions.
466
+ // typeclaw custom tool.
396
467
  if (builtinPiToolOverrides.length > 0) {
397
- const baseActiveNames = tools !== undefined ? tools.map((t) => t.name) : ['read', 'bash', 'edit', 'write']
398
- const customToolActiveNames = [...wrappedCustomSystemTools, ...pluginCustomTools].map((t) => t.name)
399
- const intendedActive = [...new Set([...baseActiveNames, ...customToolActiveNames])]
400
- session.setActiveToolsByName(intendedActive)
468
+ session.setActiveToolsByName(intendedActiveToolNames)
401
469
  }
402
470
 
403
471
  const unsubRestart = subscribeRestartNotice(options.stream, sessionManager)
404
472
 
473
+ const unsubToolNudge = attachToolNotFoundNudge(session, intendedActiveToolNames)
474
+
405
475
  const dispose = async () => {
406
476
  unsubRestart?.()
477
+ unsubToolNudge()
407
478
  if (materializedSkills) await materializedSkills.dispose()
408
479
  }
409
480
  return { session, dispose }
@@ -411,22 +482,39 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
411
482
 
412
483
  // Decides whether the restart tool should write the cross-restart handoff
413
484
  // file (`<agentDir>/.typeclaw/restart-pending.json`) and supplies the agentDir
414
- // + session file path it needs to do so. Returns an empty object — meaning
415
- // "no handoff" — for any session whose origin is not TUI, so a channel-
416
- // originated or cron-originated `restart` call cannot accidentally produce an
417
- // "I'm back" greeting in the next container's first TUI session. See
418
- // issue #291's scoping concerns. Also returns empty when the session is not
419
- // persisted to disk (in-memory sessions have no file the next container could
420
- // reopen).
485
+ // + session file path + origin metadata it needs to do so. Returns an empty
486
+ // object — meaning "no handoff" — for cron/subagent/system origins (no
487
+ // attended session the next boot could resume) and for in-memory sessions
488
+ // (no file to reopen).
489
+ //
490
+ // TUI and channel origins both resume: a TUI restart reattaches to the
491
+ // reconnecting client (websocket open handler), a channel restart reopens the
492
+ // originating chat session on the channel router's boot path. The `origin`
493
+ // discriminator in the handoff is what routes the next boot to the correct
494
+ // subsystem.
421
495
  export function buildRestartHandoffWiring(
422
496
  options: { origin?: SessionOrigin; plugins?: { agentDir: string } },
423
497
  sessionManager: SessionManager,
424
- ): { agentDir?: string; originatingSessionFile?: string } {
425
- if (options.origin?.kind !== 'tui') return {}
498
+ ): { agentDir?: string; originatingSessionFile?: string; handoffOrigin?: RestartHandoffOrigin } {
499
+ const origin = options.origin
500
+ if (origin === undefined) return {}
501
+ const handoffOrigin = restartHandoffOriginFor(origin)
502
+ if (handoffOrigin === null) return {}
426
503
  const agentDir = options.plugins?.agentDir
427
504
  const sessionFile = sessionManager.getSessionFile()
428
505
  if (agentDir === undefined || sessionFile === undefined) return {}
429
- return { agentDir, originatingSessionFile: sessionFile }
506
+ return { agentDir, originatingSessionFile: sessionFile, handoffOrigin }
507
+ }
508
+
509
+ function restartHandoffOriginFor(origin: SessionOrigin): RestartHandoffOrigin | null {
510
+ if (origin.kind === 'tui') return { kind: 'tui' }
511
+ if (origin.kind === 'channel') {
512
+ return {
513
+ kind: 'channel',
514
+ key: { adapter: origin.adapter, workspace: origin.workspace, chat: origin.chat, thread: origin.thread },
515
+ }
516
+ }
517
+ return null
430
518
  }
431
519
 
432
520
  // Subscribes the given session to the in-process broadcast that the `restart`
@@ -633,11 +721,13 @@ export function buildSubagentOrchestrationTools(opts: {
633
721
  createSubagentOutputTool({
634
722
  liveRegistry: opts.liveRegistry,
635
723
  getOrigin: opts.getOrigin,
724
+ callerSessionId: opts.parentSessionId,
636
725
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
637
726
  }),
638
727
  createSubagentCancelTool({
639
728
  liveRegistry: opts.liveRegistry,
640
729
  getOrigin: opts.getOrigin,
730
+ callerSessionId: opts.parentSessionId,
641
731
  ...(opts.permissions ? { permissions: opts.permissions } : {}),
642
732
  }),
643
733
  ]
@@ -662,9 +752,18 @@ export function buildRoleGrantTools(opts: {
662
752
  ]
663
753
  }
664
754
 
755
+ export function buildTodoTools(
756
+ agentDir: string | undefined,
757
+ getOrigin: () => SessionOrigin | undefined,
758
+ ): ToolDefinition[] {
759
+ if (agentDir === undefined) return []
760
+ return createTodoTools({ agentDir, getOrigin })
761
+ }
762
+
665
763
  function wrapRegistryTools(
666
764
  plugins: PluginSessionWiring | undefined,
667
765
  getOrigin: () => SessionOrigin | undefined,
766
+ getAbort: () => (() => void) | undefined,
668
767
  ): ToolDefinition[] {
669
768
  if (!plugins) return []
670
769
  return plugins.registry.tools.map((t: PluginRegisteredTool) =>
@@ -676,6 +775,7 @@ function wrapRegistryTools(
676
775
  logger: t.logger,
677
776
  hooks: plugins.hooks,
678
777
  getOrigin,
778
+ getAbort,
679
779
  }),
680
780
  )
681
781
  }
@@ -684,6 +784,7 @@ function wrapSystemAgentTools(
684
784
  tools: AgentSessionTools | undefined,
685
785
  plugins: PluginSessionWiring | undefined,
686
786
  getOrigin: () => SessionOrigin | undefined,
787
+ getAbort: () => (() => void) | undefined,
687
788
  ): AgentSessionTools | undefined {
688
789
  if (!tools || !hasToolHooks(plugins)) return tools
689
790
  return tools.map((tool) =>
@@ -692,6 +793,7 @@ function wrapSystemAgentTools(
692
793
  sessionId: plugins.sessionId,
693
794
  hooks: plugins.hooks,
694
795
  getOrigin,
796
+ getAbort,
695
797
  }),
696
798
  )
697
799
  }
@@ -700,6 +802,7 @@ function wrapSystemTools(
700
802
  tools: ToolDefinition[],
701
803
  plugins: PluginSessionWiring | undefined,
702
804
  getOrigin: () => SessionOrigin | undefined,
805
+ getAbort: () => (() => void) | undefined,
703
806
  ): ToolDefinition[] {
704
807
  if (!hasToolHooks(plugins)) return tools
705
808
  return tools.map((tool) =>
@@ -708,6 +811,7 @@ function wrapSystemTools(
708
811
  sessionId: plugins.sessionId,
709
812
  hooks: plugins.hooks,
710
813
  getOrigin,
814
+ getAbort,
711
815
  }),
712
816
  )
713
817
  }
@@ -721,6 +825,7 @@ function wrapSubagentCustomTools(
721
825
  selection: PluginSubagentSelection,
722
826
  plugins: PluginSessionWiring | undefined,
723
827
  getOrigin: () => SessionOrigin | undefined,
828
+ getAbort: () => (() => void) | undefined,
724
829
  ): ToolDefinition[] {
725
830
  if (!selection.customTools || !plugins) return []
726
831
  const logger = makePluginLogger(selection.pluginName)
@@ -733,6 +838,7 @@ function wrapSubagentCustomTools(
733
838
  logger,
734
839
  hooks: plugins.hooks,
735
840
  getOrigin,
841
+ getAbort,
736
842
  }),
737
843
  )
738
844
  }
@@ -0,0 +1,120 @@
1
+ // Defensive projection applied to the LLM message array right before each
2
+ // provider call, layered on top of pi-coding-agent's `convertToLlm`. It exists
3
+ // to un-wedge sessions whose persisted transcript contains a `toolResult` with
4
+ // no live preceding `toolCall` — the exact shape Anthropic rejects with
5
+ // "unexpected `tool_use_id` found in `tool_result` blocks" (HTTP 400).
6
+ //
7
+ // How a transcript gets poisoned: the self-`restart` tool exits the container
8
+ // mid-turn. The assistant turn carrying the restart `toolCall` can land in the
9
+ // JSONL with `stopReason: "error"/"aborted"` (or be torn down), while its
10
+ // `toolResult` is persisted. On replay, pi-ai's provider-side `transformMessages`
11
+ // DROPS error/aborted assistant turns but passes the `toolResult` through
12
+ // unchanged, leaving a true orphan that the API rejects on every subsequent
13
+ // turn — the session is permanently stuck.
14
+ //
15
+ // pi-ai's `transformMessages` already handles the inverse cases (a `toolCall`
16
+ // with no result → synthetic "No result provided" result; error/aborted
17
+ // assistant turns → dropped). The one gap is an orphaned `toolResult`. This
18
+ // sanitizer fills exactly that gap and nothing more.
19
+ //
20
+ // Invariant (local pending-window, NOT a global id union — Anthropic requires
21
+ // tool results to belong to the immediately preceding tool-use turn):
22
+ // 1. Assistant turns with stopReason "error"/"aborted" are dropped here, so
23
+ // orphan detection sees the same message set the provider will after its
24
+ // own drop pass. Without this, a result tied to a dropped assistant would
25
+ // survive us and be orphaned downstream — the original bug.
26
+ // 2. A `toolResult` is kept only if its `toolCallId` was declared by the most
27
+ // recent kept assistant tool-use turn AND has not already been emitted in
28
+ // that window. Any user or assistant message closes the window.
29
+ // 3. Missing results are NOT synthesized here — pi-ai's existing pass inserts
30
+ // the synthetic placeholder, so dropping an orphan that leaves a bare
31
+ // `toolCall` is safe and self-healing.
32
+ //
33
+ // This is a read-only projection: it never mutates the persisted JSONL, so an
34
+ // already-poisoned session becomes usable without destructive migration.
35
+
36
+ import type { Message } from '@mariozechner/pi-ai'
37
+
38
+ export type ReplaySanitizerStats = {
39
+ droppedOrphans: number
40
+ droppedDuplicates: number
41
+ droppedErrorAssistants: number
42
+ }
43
+
44
+ export type SanitizeResult = {
45
+ messages: Message[]
46
+ stats: ReplaySanitizerStats
47
+ }
48
+
49
+ function isErroredAssistant(message: Message): boolean {
50
+ return message.role === 'assistant' && (message.stopReason === 'error' || message.stopReason === 'aborted')
51
+ }
52
+
53
+ function toolCallIdsOf(message: Extract<Message, { role: 'assistant' }>): string[] {
54
+ return message.content
55
+ .filter((block): block is Extract<typeof block, { type: 'toolCall' }> => block.type === 'toolCall')
56
+ .map((block) => block.id)
57
+ .filter((id): id is string => typeof id === 'string' && id.length > 0)
58
+ }
59
+
60
+ export function sanitizeMessagesForLlmReplay(messages: Message[]): SanitizeResult {
61
+ const output: Message[] = []
62
+ const stats: ReplaySanitizerStats = {
63
+ droppedOrphans: 0,
64
+ droppedDuplicates: 0,
65
+ droppedErrorAssistants: 0,
66
+ }
67
+
68
+ let pendingToolCallIds = new Set<string>()
69
+ let emittedResultIds = new Set<string>()
70
+
71
+ const closeWindow = () => {
72
+ pendingToolCallIds = new Set()
73
+ emittedResultIds = new Set()
74
+ }
75
+
76
+ for (const message of messages) {
77
+ if (message.role === 'assistant') {
78
+ closeWindow()
79
+
80
+ // Mirror pi-ai's provider-side drop of incomplete turns so orphan
81
+ // detection matches the message set the provider will actually send.
82
+ if (isErroredAssistant(message)) {
83
+ stats.droppedErrorAssistants += 1
84
+ continue
85
+ }
86
+
87
+ const callIds = toolCallIdsOf(message)
88
+ if (callIds.length > 0) pendingToolCallIds = new Set(callIds)
89
+ output.push(message)
90
+ continue
91
+ }
92
+
93
+ if (message.role === 'user') {
94
+ closeWindow()
95
+ output.push(message)
96
+ continue
97
+ }
98
+
99
+ if (message.role === 'toolResult') {
100
+ const id = message.toolCallId
101
+ if (!pendingToolCallIds.has(id)) {
102
+ // Orphan: true orphan, stale late result, or result for a dropped
103
+ // error/aborted assistant turn.
104
+ stats.droppedOrphans += 1
105
+ continue
106
+ }
107
+ if (emittedResultIds.has(id)) {
108
+ stats.droppedDuplicates += 1
109
+ continue
110
+ }
111
+ emittedResultIds.add(id)
112
+ output.push(message)
113
+ continue
114
+ }
115
+
116
+ output.push(message)
117
+ }
118
+
119
+ return { messages: output, stats }
120
+ }
@@ -63,6 +63,14 @@ export type LoopGuard = {
63
63
  check: (sessionId: string, tool: string, args: unknown) => LoopGuardDecision
64
64
  reset: (sessionId: string) => void
65
65
  forget: (sessionId: string) => void
66
+ // Clears only the residue a single tool left behind in a session: its entries
67
+ // in the windowed history and, if the current consecutive streak belongs to
68
+ // that tool, the streak itself. Used when a state-change boundary makes a
69
+ // tool's prior calls irrelevant — e.g. a backgrounded subagent finishing
70
+ // makes the next `subagent_output` fetch legitimate even though earlier
71
+ // premature polls poisoned the window. Narrower than `forget`, so an
72
+ // unrelated tool's accumulating loop on the same session is preserved.
73
+ forgetTool: (sessionId: string, tool: string) => void
66
74
  }
67
75
 
68
76
  type SessionState = {
@@ -215,9 +223,35 @@ export function createLoopGuard(options: CreateLoopGuardOptions = {}): LoopGuard
215
223
  forget(sessionId) {
216
224
  sessions.delete(sessionId)
217
225
  },
226
+ forgetTool(sessionId, tool) {
227
+ const state = sessions.get(sessionId)
228
+ if (state === undefined) return
229
+ const retained: string[] = []
230
+ for (const sig of state.window) {
231
+ if (signatureBelongsToTool(sig, tool)) {
232
+ state.windowWarned.delete(sig)
233
+ } else {
234
+ retained.push(sig)
235
+ }
236
+ }
237
+ state.window = retained
238
+ if (signatureBelongsToTool(state.signature, tool)) {
239
+ state.signature = ''
240
+ state.count = 0
241
+ state.warned = false
242
+ }
243
+ },
218
244
  }
219
245
  }
220
246
 
247
+ // Both signature builders prefix the tool name: exact signatures as `tool:...`
248
+ // and path-coarsened ones as `tool#path:...`. A tool's residue is therefore any
249
+ // signature starting with `tool:` or `tool#`, never a different tool whose name
250
+ // merely shares this one as a prefix (the delimiter rules that out).
251
+ function signatureBelongsToTool(signature: string, tool: string): boolean {
252
+ return signature.startsWith(`${tool}:`) || signature.startsWith(`${tool}#`)
253
+ }
254
+
221
255
  function formatWarnMessage(tool: string, count: number): string {
222
256
  return (
223
257
  `\n\n[loop-guard] You have called \`${tool}\` ${count} times in a row with identical arguments. ` +
@@ -161,7 +161,7 @@ async function runLookAtImages(imageContents: ImageContent[], prompt: string | u
161
161
  origin,
162
162
  profile: 'vision',
163
163
  // Both knobs are required to fully disarm the subagent's tool surface:
164
- // `customTools: []` blocks typeclaw's system tools (websearch/webfetch/
164
+ // `customTools: []` blocks typeclaw's system tools (web_search/web_fetch/
165
165
  // look_at/restart/...) — without it, the look_at tool would recurse
166
166
  // into itself. `tools: []` blocks pi-coding-agent's defaults
167
167
  // (read/bash/edit/write) — without it, a vision model could be talked