@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
package/src/agent/loop.ts CHANGED
@@ -1,37 +1,25 @@
1
1
  import * as Sentry from "@sentry/node";
2
2
 
3
3
  import type { LLMCallSite } from "../config/schemas/llm.js";
4
+ import { stripInjectionsForCompaction } from "../context/strip-injections.js";
4
5
  import {
5
6
  estimatePromptTokensRaw,
7
+ estimatePromptTokensWithTools,
6
8
  estimateToolsTokens,
7
9
  getCalibrationProviderKey,
8
10
  } from "../context/token-estimator.js";
9
- import { calculateMaxToolResultChars } from "../context/tool-result-truncation.js";
10
11
  import type { ContextWindowResult } from "../context/window-manager.js";
11
12
  import type { ToolActivityMetadata } from "../daemon/message-types/web-activity.js";
13
+ import { HOOKS } from "../plugin-api/constants.js";
14
+ import type { PostToolUseContext, StopContext } from "../plugin-api/types.js";
12
15
  import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
13
- import { defaultEmptyResponseTerminal } from "../plugins/defaults/empty-response/terminal.js";
14
- import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate/terminal.js";
15
- import { defaultToolErrorTerminal } from "../plugins/defaults/tool-error/terminal.js";
16
- import { defaultToolResultTruncateTerminal } from "../plugins/defaults/tool-result-truncate/terminal.js";
17
- import type {
18
- ToolResultTruncateArgs,
19
- ToolResultTruncateResult,
20
- } from "../plugins/defaults/tool-result-truncate/types.js";
21
- import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
16
+ import type { PostCompactionHookInput } from "../plugins/defaults/memory-retrieval/hooks/post-compact.js";
17
+ import { DEFAULT_TIMEOUTS, runHook, runPipeline } from "../plugins/pipeline.js";
22
18
  import { getMiddlewaresFor } from "../plugins/registry.js";
23
19
  import type {
24
20
  CompactionArgs,
25
21
  CompactionCircuitEvent,
26
22
  CompactionResult,
27
- EmptyResponseArgs,
28
- EmptyResponseDecision,
29
- EstimateArgs,
30
- EstimateResult,
31
- LLMCallArgs,
32
- LLMCallResult,
33
- ToolErrorArgs,
34
- ToolErrorDecision,
35
23
  TurnContext,
36
24
  } from "../plugins/types.js";
37
25
  import { PluginTimeoutError } from "../plugins/types.js";
@@ -40,6 +28,8 @@ import type {
40
28
  ContentBlock,
41
29
  Message,
42
30
  Provider,
31
+ ProviderResponse,
32
+ SendMessageOptions,
43
33
  ToolDefinition,
44
34
  ToolResultContent,
45
35
  } from "../providers/types.js";
@@ -48,7 +38,7 @@ import {
48
38
  applyStreamingSubstitution,
49
39
  applySubstitutions,
50
40
  } from "../tools/sensitive-output-placeholders.js";
51
- import { AssistantError, ErrorCode, ProviderError } from "../util/errors.js";
41
+ import { ProviderError } from "../util/errors.js";
52
42
  import { getLogger } from "../util/logger.js";
53
43
  import { isRetryableNetworkError } from "../util/retry.js";
54
44
  import { CompactionCircuit } from "./compaction-circuit.js";
@@ -95,17 +85,28 @@ export type ExitReason = "handoff" | "budget";
95
85
 
96
86
  export type CheckpointDecision = "continue" | ExitReason;
97
87
 
98
- /**
99
- * Result of {@link AgentLoop.run}.
100
- *
101
- * `exitReason` carries the reason the loop paused at a checkpoint so the
102
- * orchestrator reads the loop's own signal instead of inferring it from
103
- * callback side-effects. It is `null` whenever the loop reached a terminal
104
- * stop (completion, error, abort, or a tool-requested yield-to-user).
105
- */
88
+ /** Result of {@link AgentLoop.run}. */
106
89
  export interface AgentLoopRunResult {
90
+ /** Full conversation history after the run, including everything appended this run. */
107
91
  history: Message[];
92
+ /**
93
+ * Reason the loop paused at a checkpoint, or `null` on a terminal stop
94
+ * (completion, error, abort, or a tool-requested yield-to-user).
95
+ */
108
96
  exitReason: ExitReason | null;
97
+ /**
98
+ * Whether the loop produced at least one new assistant message this run —
99
+ * the forward-progress signal for the ordering-error retry gate and the
100
+ * overflow convergence fold (immune to in-loop compaction shrinking history
101
+ * below a pre-run length).
102
+ */
103
+ appendedNewMessages: boolean;
104
+ /**
105
+ * Slice of `history` appended this run, measured from the loop's input or
106
+ * from the compacted base when it compacts in place. The loop owns this
107
+ * boundary, so it cannot desync the way an externally-held index can.
108
+ */
109
+ newMessages: Message[];
109
110
  }
110
111
 
111
112
  /**
@@ -129,8 +130,6 @@ export interface AgentLoopRunResult {
129
130
  export type AgentLoopExitReason =
130
131
  /** `if (signal?.aborted) break;` at the top of the loop. */
131
132
  | "aborted_pre_call"
132
- /** Empty assistant response after the configured retry budget. */
133
- | "empty_response_exhausted"
134
133
  /** Assistant message has no tool-use blocks (or no tool executor). */
135
134
  | "no_tool_calls"
136
135
  /** Signal aborted while building the user-side tool-results message. */
@@ -209,6 +208,14 @@ export type AgentEvent =
209
208
  approvalReason?: string;
210
209
  riskThreshold?: string;
211
210
  activityMetadata?: ToolActivityMetadata;
211
+ /**
212
+ * Set when the loop synthesizes this result for a tool_use that never
213
+ * executed (a "Cancelled by user" block on abort). The daemon still
214
+ * captures it into `pendingToolResults` and forwards it to the client,
215
+ * but skips the side effects that assume the tool ran — marking the
216
+ * workspace dirty and emitting a post-tool "thinking" activity state.
217
+ */
218
+ cancelled?: boolean;
212
219
  }
213
220
  | { type: "tool_use_preview_start"; toolUseId: string; toolName: string }
214
221
  | {
@@ -243,7 +250,7 @@ export type AgentEvent =
243
250
  | { type: "error"; error: Error }
244
251
  | {
245
252
  /**
246
- * Emitted when the `llmCall` pipeline throws — i.e. the provider
253
+ * Emitted when the provider call throws — i.e. the provider
247
254
  * rejected the request before returning a usable response. Carries
248
255
  * the loop-level raw request we attempted to send (messages, tools,
249
256
  * system prompt, provider-agnostic config) plus the thrown error.
@@ -295,6 +302,42 @@ export type AgentEvent =
295
302
  */
296
303
  type: "context_compacting";
297
304
  }
305
+ | {
306
+ /**
307
+ * Emitted after the loop's inline mid-loop compaction pipeline runs,
308
+ * immediately before re-injection — whether or not the pipeline actually
309
+ * compacted. The daemon's event dispatcher always commits `basis` (the
310
+ * stripped pre-compaction history) as the conversation's durable message
311
+ * state, so re-injection ({@link MidLoopCompaction.reinject}) re-applies
312
+ * injections onto the stripped base rather than stacking on top of the
313
+ * still-injected messages. When `result.compacted` is set it
314
+ * additionally commits the durable compaction result (DB-record fields,
315
+ * graph-memory side effects, SSE) and flips the per-turn re-injection
316
+ * guards on the handler state.
317
+ *
318
+ * Treated as a critical event: a failed durable commit re-throws so the
319
+ * turn aborts rather than re-injecting against half-applied state.
320
+ *
321
+ * `basis` is the stripped pre-compaction history the summary was built
322
+ * from; the dispatcher uses it to project Slack provenance onto the
323
+ * compacted result.
324
+ */
325
+ type: "compaction_completed";
326
+ result: ContextWindowResult;
327
+ basis: Message[];
328
+ }
329
+ | {
330
+ /**
331
+ * Emitted right after the loop strips runtime injections from the
332
+ * running history, before the compaction pipeline runs. The daemon's
333
+ * event dispatcher records the history-stripped marker — a Conversation
334
+ * DB-record field read back at load time to strip embedded injection
335
+ * prefixes from pre-strip messages. Best-effort: a transient marker
336
+ * write must not abort the turn, so unlike `compaction_completed` this
337
+ * event is not treated as critical.
338
+ */
339
+ type: "history_stripped";
340
+ }
298
341
  /**
299
342
  * Circuit-breaker transitions emitted when auto-compaction is paused
300
343
  * (`compaction_circuit_open`, after three consecutive summary-LLM
@@ -324,8 +367,7 @@ const DEFAULT_CONFIG: AgentLoopConfig = {
324
367
  minTurnIntervalMs: 150,
325
368
  };
326
369
 
327
- const MAX_CONSECUTIVE_ERROR_NUDGES = 3;
328
- const MAX_EMPTY_RESPONSE_RETRIES = 1;
370
+ const MAX_STOP_CONTINUE_RETRIES = 1;
329
371
  const MAX_TOKENS_STOP_REASONS = new Set([
330
372
  "length",
331
373
  "max_output_tokens",
@@ -346,12 +388,11 @@ export function isMaxTokensStopReason(
346
388
  * {@link AgentLoop.run}); this helper is the fallback used only by unit
347
389
  * tests that construct `AgentLoop` directly without an orchestrator.
348
390
  *
349
- * When the orchestrator-supplied context is present, {@link resolveLoopTurnContext}
350
- * is used instead of this helper so the pipeline sees the real
351
- * `conversationId`, trust, and `contextWindowManager`. In the fallback path
352
- * the returned context is still useful for pipeline logging: `requestId`
353
- * surfaces in every structured record, and `turnIndex` reflects the
354
- * current tool-use iteration.
391
+ * When the orchestrator-supplied context is present it is used directly so the
392
+ * pipeline sees the real `conversationId`, trust, and `contextWindowManager`.
393
+ * In the fallback path the returned context is still useful for pipeline
394
+ * logging: `requestId` surfaces in every structured record, and `turnIndex`
395
+ * reflects the current tool-use iteration.
355
396
  */
356
397
  function buildLoopTurnContext(
357
398
  requestId: string | undefined,
@@ -371,29 +412,6 @@ function buildLoopTurnContext(
371
412
  };
372
413
  }
373
414
 
374
- /**
375
- * Produce a `TurnContext` for a pipeline call inside {@link AgentLoop.run}.
376
- *
377
- * When the orchestrator supplied a `turnContext`, clone it and overwrite
378
- * `requestId` + `turnIndex` with the loop-scoped values so plugin log
379
- * records correctly attribute the call to the current tool-use iteration
380
- * while preserving the real `conversationId`, trust context, and
381
- * `contextWindowManager` the orchestrator assembled for the turn. Without
382
- * an orchestrator context (unit tests that instantiate `AgentLoop` with no
383
- * `turnContext`), fall back to {@link buildLoopTurnContext}'s synthesized
384
- * placeholder.
385
- */
386
- function resolveLoopTurnContext(
387
- base: TurnContext | undefined,
388
- requestId: string | undefined,
389
- turnIndex: number,
390
- ): TurnContext {
391
- if (base) {
392
- return { ...base, requestId: requestId ?? base.requestId, turnIndex };
393
- }
394
- return buildLoopTurnContext(requestId, turnIndex);
395
- }
396
-
397
415
  /**
398
416
  * User-config HTTP status codes that should never page the on-call: billing
399
417
  * exhaustion (402), invalid credentials (401), and forbidden/plan-gated (403).
@@ -437,28 +455,25 @@ export interface ResolvedSystemPrompt {
437
455
  }
438
456
 
439
457
  /**
440
- * Orchestrator-supplied hooks the loop invokes when the mid-loop budget gate
458
+ * Orchestrator-supplied hook the loop invokes when the mid-loop budget gate
441
459
  * trips and inline compaction runs. The loop owns the trigger, the
442
460
  * `compaction` pipeline call, the result interpretation (circuit-breaker
443
- * bookkeeping + the exhaustion decision), and the inline continue; these hooks
444
- * bridge the durable / injection state the loop is intentionally blind to.
445
- * Durable persistence ({@link applyResult}) and re-injection
446
- * ({@link reinject}) remain orchestrator-supplied for now and are expected to
447
- * move into the loop in a future change.
461
+ * bookkeeping + the exhaustion decision), and the inline continue; this hook
462
+ * bridges the injection state the loop is intentionally blind to. Durable
463
+ * persistence is signalled out-of-band via the `history_stripped` (marker)
464
+ * and `compaction_completed` (basis commit + successful summary) {@link
465
+ * AgentEvent}s; the {@link MidLoopCompaction.postCompactionHook} is
466
+ * orchestrator-supplied, and its inputs migrate loop-ward as the loop
467
+ * subsumes the re-injection ceremony.
448
468
  */
449
469
  export interface MidLoopCompaction {
450
- /** Strip runtime injections, commit stripped messages, and resolve pipeline options. */
451
- prepare: (history: Message[]) => {
452
- rawHistory: Message[];
453
- options: CompactionArgs["options"];
454
- };
455
- /** Commit a successful compaction result to durable state. */
456
- applyResult: (
457
- result: ContextWindowResult,
458
- rawHistory: Message[],
459
- ) => Promise<void>;
460
- /** Re-apply runtime injections and return the history to continue from. */
461
- reinject: () => Promise<Message[]>;
470
+ /**
471
+ * Re-apply runtime injections onto the post-compaction history and return
472
+ * the history to continue from. The loop supplies its own working state via
473
+ * {@link PostCompactionHookInput} so the hook re-injects from that rather
474
+ * than reading it back from orchestrator state.
475
+ */
476
+ postCompactionHook: (input: PostCompactionHookInput) => Promise<Message[]>;
462
477
  }
463
478
 
464
479
  export interface AgentLoopRunOptions {
@@ -518,21 +533,12 @@ export interface AgentLoopRunOptions {
518
533
 
519
534
  /**
520
535
  * Callback shape the loop uses to execute a tool invocation.
521
- *
522
- * The trailing `turnContext` is optional so in-process tests that wire the
523
- * callback without an orchestrator keep working. Production sites (the
524
- * `Conversation`'s `createToolExecutor`) forward the supplied context into
525
- * `ToolExecutor.execute` so the `toolExecute` pipeline sees the orchestrator's
526
- * real conversation identity/trust/contextWindowManager instead of the
527
- * synthesized placeholder `ToolExecutor` would otherwise build from the
528
- * `ToolContext` alone.
529
536
  */
530
537
  export type LoopToolExecutor = (
531
538
  name: string,
532
539
  input: Record<string, unknown>,
533
540
  onOutput?: (chunk: string) => void,
534
541
  toolUseId?: string,
535
- turnContext?: TurnContext,
536
542
  ) => Promise<{
537
543
  content: string;
538
544
  isError: boolean;
@@ -624,10 +630,9 @@ export class AgentLoop {
624
630
  * Resolve the tool definitions sent to the provider for the given turn.
625
631
  *
626
632
  * Mirrors the logic of {@link getToolTokenBudget} but returns the tool
627
- * array itself — callers that need to thread the tool set into a plugin
628
- * pipeline (e.g. `tokenEstimate`, where the pipeline's args include
629
- * `tools`) use this rather than re-implementing the dynamic-vs-static
630
- * resolver fork.
633
+ * array itself — callers that need to thread the tool set into the token
634
+ * estimate (`estimatePromptTokensWithTools`, whose args include `tools`)
635
+ * use this rather than re-implementing the dynamic-vs-static resolver fork.
631
636
  */
632
637
  getResolvedTools(history?: Message[]): ToolDefinition[] {
633
638
  return history && this.resolveTools
@@ -648,28 +653,15 @@ export class AgentLoop {
648
653
  }
649
654
 
650
655
  /**
651
- * Estimate total prompt tokens for `history` via the `tokenEstimate`
652
- * pipeline. Args are shallow-frozen so a mutating middleware cannot strip
653
- * context from the loop's live `history`.
656
+ * Calibrated prompt-token estimate for `history`, including the
657
+ * resolved-tool budget for the turn.
654
658
  */
655
- private estimateTokens(
656
- history: Message[],
657
- turnContext: TurnContext,
658
- ): Promise<EstimateResult> {
659
- return runPipeline<EstimateArgs, EstimateResult>(
660
- "tokenEstimate",
661
- getMiddlewaresFor("tokenEstimate"),
662
- defaultTokenEstimateTerminal,
663
- {
664
- history: Object.freeze([...history]) as Message[],
665
- systemPrompt: this.systemPrompt,
666
- tools: Object.freeze([
667
- ...this.getResolvedTools(history),
668
- ]) as ToolDefinition[],
669
- providerName: getCalibrationProviderKey(this.provider),
670
- },
671
- turnContext,
672
- DEFAULT_TIMEOUTS.tokenEstimate,
659
+ private estimateTokens(history: Message[]): number {
660
+ return estimatePromptTokensWithTools(
661
+ history,
662
+ this.systemPrompt,
663
+ this.getResolvedTools(history),
664
+ getCalibrationProviderKey(this.provider),
673
665
  );
674
666
  }
675
667
 
@@ -720,16 +712,36 @@ export class AgentLoop {
720
712
  compaction: MidLoopCompaction,
721
713
  signal: AbortSignal | undefined,
722
714
  onEvent: (event: AgentEvent) => void | Promise<void>,
715
+ overrideProfile: string | null,
723
716
  ): Promise<Message[] | null> {
724
717
  await onEvent({ type: "context_compacting" });
725
- const { rawHistory, options } = compaction.prepare(history);
718
+ // Strip runtime injections so the compactor summarizes the raw persistent
719
+ // messages.
720
+ const rawHistory = stripInjectionsForCompaction(history);
721
+ // Record the history-stripped marker right after stripping, before the
722
+ // pipeline runs.
723
+ await onEvent({ type: "history_stripped" });
726
724
  let result: CompactionResult;
727
725
  try {
728
726
  result = await runPipeline<CompactionArgs, CompactionResult>(
729
727
  "compaction",
730
728
  getMiddlewaresFor("compaction"),
731
729
  (args) => defaultCompactionTerminal(args, turnContext),
732
- { messages: rawHistory, signal, options },
730
+ // The mid-loop budget gate is reached only when this turn decides to
731
+ // compact in place, so `force` the pipeline past its auto-threshold
732
+ // check. `actorTrustClass` comes from the turn context (the actor whose
733
+ // turn triggered compaction) so the compactor's image manifest excludes
734
+ // guardian-only attachments for untrusted actors. `overrideProfile` is
735
+ // the turn's resolved inference-profile override for the summary call.
736
+ {
737
+ messages: rawHistory,
738
+ signal,
739
+ options: {
740
+ force: true,
741
+ actorTrustClass: turnContext.trust.trustClass,
742
+ overrideProfile,
743
+ },
744
+ },
733
745
  turnContext,
734
746
  DEFAULT_TIMEOUTS.compaction,
735
747
  );
@@ -745,7 +757,7 @@ export class AgentLoop {
745
757
  // plugin consumers don't import the window manager; the loop ran the
746
758
  // pipeline, so it interprets the concrete result here.
747
759
  const compactResult = result as ContextWindowResult;
748
- // `force: true` bypasses the cooldown/threshold gates, but early returns
760
+ // `force: true` bypasses the auto-threshold gate, but early returns
749
761
  // for "no eligible messages" / "insufficient messages" still leave
750
762
  // `summaryFailed` undefined. Only record an outcome when the summary LLM
751
763
  // actually ran.
@@ -756,13 +768,25 @@ export class AgentLoop {
756
768
  onEvent,
757
769
  );
758
770
  }
759
- if (compactResult.compacted) {
760
- await compaction.applyResult(compactResult, rawHistory);
761
- }
771
+ // Emit unconditionally: the dispatcher commits the stripped `basis` as the
772
+ // durable message base whether or not the pipeline compacted (re-injection
773
+ // reads it), and runs the durable compaction commit only when
774
+ // `result.compacted`.
775
+ await onEvent({
776
+ type: "compaction_completed",
777
+ result: compactResult,
778
+ basis: rawHistory,
779
+ });
762
780
  if (compactResult.exhausted ?? false) {
763
781
  return null;
764
782
  }
765
- return compaction.reinject();
783
+ // Re-inject onto the same base the `compaction_completed` dispatch commits:
784
+ // the compacted messages when the pipeline compacted, the stripped
785
+ // pre-compaction history otherwise.
786
+ return compaction.postCompactionHook({
787
+ history: compactResult.compacted ? compactResult.messages : rawHistory,
788
+ turnContext,
789
+ });
766
790
  }
767
791
 
768
792
  async run(
@@ -783,26 +807,36 @@ export class AgentLoop {
783
807
  mutableLatestUserMessage,
784
808
  } = options ?? {};
785
809
  let history = [...messages];
810
+ // Index into `history` where this run's appended output begins. It starts
811
+ // after the input and resets to the compacted base whenever the loop
812
+ // compacts in place, so `history.slice(newMessagesStart)` is always exactly
813
+ // what the loop produced since the last (re-injected) base.
814
+ let newMessagesStart = history.length;
786
815
  let producedVisibleTextThisRun = false;
787
816
  let toolUseTurns = 0;
788
- let consecutiveErrorTurns = 0;
789
- let emptyResponseRetries = 0;
817
+ let stopContinueRetries = 0;
790
818
  let lastLlmCallTime = 0;
791
819
  let exitReason: ExitReason | null = null;
820
+ let appendedNewMessages = false;
792
821
  const rlog = requestId ? log.child({ requestId }) : log;
793
822
 
823
+ // Resolve the inference-profile override that applies right now. The
824
+ // optional resolver lets a turn observe a confirmed mid-turn profile switch
825
+ // before the next model call; absent a resolver the turn-start value holds.
826
+ const resolveEffectiveOverrideProfile = (): string | undefined =>
827
+ resolveOverrideProfile ? resolveOverrideProfile() : overrideProfile;
828
+
794
829
  // Per-run substitution map for sensitive output placeholders.
795
830
  // Bindings are accumulated from tool results; placeholders are
796
831
  // resolved in streamed deltas and final assistant message text.
797
832
  const substitutionMap = new Map<string, string>();
798
833
  let streamingPending = "";
799
834
 
800
- // Idempotency guard for `emitExit`. Used so the throw path in the
801
- // empty-response branch can stamp its reason ("empty_response_exhausted")
802
- // before throwing the catch handler that observes the rethrow will
803
- // then attempt to stamp "error" and harmlessly no-op, preserving the
804
- // more specific reason. Also defends against accidental future
805
- // double-emits if a new break site is added without checking this.
835
+ // Idempotency guard for `emitExit`: the first reason stamped wins. A break
836
+ // site that stamps a specific reason before unwinding into the catch
837
+ // handler keeps that reason instead of the generic "error", and the guard
838
+ // also defends against accidental double-emits if a new break site is
839
+ // added without checking this.
806
840
  let exitReasonEmitted = false;
807
841
  const emitExit = async (reason: AgentLoopExitReason): Promise<void> => {
808
842
  if (exitReasonEmitted) return;
@@ -923,12 +957,8 @@ export class AgentLoop {
923
957
  // `activeProfile` and any call-site named profile. Threading it on
924
958
  // every send (rather than once at construction) keeps subagents that
925
959
  // share an `AgentLoop` instance but ought to inherit a different
926
- // profile correct — and matches how `callSite` is plumbed. The
927
- // optional resolver lets a turn observe an explicitly confirmed
928
- // profile-session switch before the next model call.
929
- const effectiveOverrideProfile = resolveOverrideProfile
930
- ? resolveOverrideProfile()
931
- : overrideProfile;
960
+ // profile correct — and matches how `callSite` is plumbed.
961
+ const effectiveOverrideProfile = resolveEffectiveOverrideProfile();
932
962
  if (effectiveOverrideProfile) {
933
963
  providerConfig.overrideProfile = effectiveOverrideProfile;
934
964
  }
@@ -974,95 +1004,76 @@ export class AgentLoop {
974
1004
  stripOldMediaBlocks(history),
975
1005
  );
976
1006
 
977
- // Wrap the provider call in the `llmCall` pipeline so middleware
978
- // contributed by plugins may observe, rewrite, short-circuit, or
979
- // post-process every LLM request. The terminal below is the real
980
- // `provider.sendMessage(...)` call; middleware reach it by calling
981
- // `next(args)`. The default `defaultLlmCallPlugin` contributes a
982
- // passthrough middleware that forwards to `next(args)` — it
983
- // registers at module load and sits at the outermost onion layer,
984
- // so it must yield to keep user-registered `llmCall` middleware
985
- // reachable. Timeout is `null` (`DEFAULT_TIMEOUTS.llmCall`) — the
986
- // provider layer already enforces its own HTTP-level budgets.
987
- //
988
- // The `onEvent` wrapping is kept inside `args.options` so substitution
989
- // and streaming behavior exactly match the pre-pipeline call site.
990
- const llmCallArgs: LLMCallArgs = {
991
- provider: this.provider,
992
- messages: providerHistory,
993
- options: {
994
- tools: currentTools.length > 0 ? currentTools : undefined,
995
- systemPrompt: turnSystemPrompt,
996
- config: providerConfig,
997
- onEvent: (event) => {
998
- if (event.type === "text_delta") {
999
- // Apply sensitive-output placeholder substitution (chunk-safe)
1000
- if (substitutionMap.size > 0) {
1001
- const combined = streamingPending + event.text;
1002
- const { emit, pending } = applyStreamingSubstitution(
1003
- combined,
1004
- substitutionMap,
1005
- );
1006
- streamingPending = pending;
1007
- if (emit.length > 0) {
1008
- onEvent({ type: "text_delta", text: emit });
1009
- }
1010
- } else {
1011
- onEvent({ type: "text_delta", text: event.text });
1007
+ // The `onEvent` wrapping below applies sensitive-output placeholder
1008
+ // substitution to streamed text while forwarding every other event
1009
+ // type through unchanged.
1010
+ const providerOptions: SendMessageOptions = {
1011
+ tools: currentTools.length > 0 ? currentTools : undefined,
1012
+ systemPrompt: turnSystemPrompt,
1013
+ config: providerConfig,
1014
+ onEvent: (event) => {
1015
+ if (event.type === "text_delta") {
1016
+ // Apply sensitive-output placeholder substitution (chunk-safe)
1017
+ if (substitutionMap.size > 0) {
1018
+ const combined = streamingPending + event.text;
1019
+ const { emit, pending } = applyStreamingSubstitution(
1020
+ combined,
1021
+ substitutionMap,
1022
+ );
1023
+ streamingPending = pending;
1024
+ if (emit.length > 0) {
1025
+ onEvent({ type: "text_delta", text: emit });
1012
1026
  }
1013
- } else if (event.type === "thinking_delta") {
1014
- onEvent({ type: "thinking_delta", thinking: event.thinking });
1015
- } else if (event.type === "tool_use_preview_start") {
1016
- onEvent({
1017
- type: "tool_use_preview_start",
1018
- toolUseId: event.toolUseId,
1019
- toolName: event.toolName,
1020
- });
1021
- } else if (event.type === "input_json_delta") {
1022
- onEvent({
1023
- type: "input_json_delta",
1024
- toolName: event.toolName,
1025
- toolUseId: event.toolUseId,
1026
- accumulatedJson: event.accumulatedJson,
1027
- });
1028
- } else if (event.type === "server_tool_start") {
1029
- onEvent({
1030
- type: "server_tool_start",
1031
- name: event.name,
1032
- toolUseId: event.toolUseId,
1033
- input: event.input,
1034
- });
1035
- } else if (event.type === "server_tool_complete") {
1036
- onEvent({
1037
- type: "server_tool_complete",
1038
- toolUseId: event.toolUseId,
1039
- isError: event.isError,
1040
- ...(event.content ? { content: event.content } : {}),
1041
- ...(event.resolvedInput
1042
- ? { resolvedInput: event.resolvedInput }
1043
- : {}),
1044
- ...(event.errorCode ? { errorCode: event.errorCode } : {}),
1045
- ...(event.errorMessage
1046
- ? { errorMessage: event.errorMessage }
1047
- : {}),
1048
- });
1027
+ } else {
1028
+ onEvent({ type: "text_delta", text: event.text });
1049
1029
  }
1050
- },
1051
- signal,
1030
+ } else if (event.type === "thinking_delta") {
1031
+ onEvent({ type: "thinking_delta", thinking: event.thinking });
1032
+ } else if (event.type === "tool_use_preview_start") {
1033
+ onEvent({
1034
+ type: "tool_use_preview_start",
1035
+ toolUseId: event.toolUseId,
1036
+ toolName: event.toolName,
1037
+ });
1038
+ } else if (event.type === "input_json_delta") {
1039
+ onEvent({
1040
+ type: "input_json_delta",
1041
+ toolName: event.toolName,
1042
+ toolUseId: event.toolUseId,
1043
+ accumulatedJson: event.accumulatedJson,
1044
+ });
1045
+ } else if (event.type === "server_tool_start") {
1046
+ onEvent({
1047
+ type: "server_tool_start",
1048
+ name: event.name,
1049
+ toolUseId: event.toolUseId,
1050
+ input: event.input,
1051
+ });
1052
+ } else if (event.type === "server_tool_complete") {
1053
+ onEvent({
1054
+ type: "server_tool_complete",
1055
+ toolUseId: event.toolUseId,
1056
+ isError: event.isError,
1057
+ ...(event.content ? { content: event.content } : {}),
1058
+ ...(event.resolvedInput
1059
+ ? { resolvedInput: event.resolvedInput }
1060
+ : {}),
1061
+ ...(event.errorCode ? { errorCode: event.errorCode } : {}),
1062
+ ...(event.errorMessage
1063
+ ? { errorMessage: event.errorMessage }
1064
+ : {}),
1065
+ });
1066
+ }
1052
1067
  },
1068
+ signal,
1053
1069
  };
1054
1070
 
1055
- // Per-turn pipeline context. When the orchestrator threaded a full
1056
- // `turnContext` into `run()`, use it (overwriting `turnIndex` with
1057
- // the loop-scoped tool-use iteration) so middleware sees the real
1058
- // conversation identity, trust, and `contextWindowManager`. The
1059
- // synthesized fallback is only reached by standalone unit-test
1060
- // instantiations that never plumb a context through.
1061
- const turnCtx = resolveLoopTurnContext(
1062
- turnContext,
1063
- requestId,
1064
- toolUseTurns,
1065
- );
1071
+ // Per-turn pipeline context. Real call sites thread a full
1072
+ // `turnContext` into `run()` and it is used directly; standalone
1073
+ // unit-test instantiations that never plumb a context through fall
1074
+ // back to a synthesized placeholder scoped to the tool-use iteration.
1075
+ const turnCtx =
1076
+ turnContext ?? buildLoopTurnContext(requestId, toolUseTurns);
1066
1077
 
1067
1078
  // Announce the LLM-call boundary so downstream handlers (the
1068
1079
  // daemon's persistence pipeline) can reserve an empty assistant row
@@ -1085,15 +1096,11 @@ export class AgentLoop {
1085
1096
  // `llm_request_logs` row, then re-throw so the existing outer catch
1086
1097
  // continues to handle abort sync, Sentry capture, the `error` event,
1087
1098
  // and the loop break unchanged.
1088
- let response: LLMCallResult;
1099
+ let response: ProviderResponse;
1089
1100
  try {
1090
- response = await runPipeline<LLMCallArgs, LLMCallResult>(
1091
- "llmCall",
1092
- getMiddlewaresFor("llmCall"),
1093
- (args) => args.provider.sendMessage(args.messages, args.options),
1094
- llmCallArgs,
1095
- turnCtx,
1096
- DEFAULT_TIMEOUTS.llmCall,
1101
+ response = await this.provider.sendMessage(
1102
+ providerHistory,
1103
+ providerOptions,
1097
1104
  );
1098
1105
  } catch (llmCallError) {
1099
1106
  // Skip recording on abort — the user cancelled the request and
@@ -1111,10 +1118,10 @@ export class AgentLoop {
1111
1118
  // misrepresent both.
1112
1119
  const rawRequest = {
1113
1120
  provider: this.provider.name,
1114
- messages: llmCallArgs.messages,
1115
- tools: llmCallArgs.options?.tools,
1116
- systemPrompt: llmCallArgs.options?.systemPrompt,
1117
- config: llmCallArgs.options?.config,
1121
+ messages: providerHistory,
1122
+ tools: providerOptions.tools,
1123
+ systemPrompt: providerOptions.systemPrompt,
1124
+ config: providerOptions.config,
1118
1125
  };
1119
1126
  onEvent({
1120
1127
  type: "provider_error",
@@ -1203,6 +1210,7 @@ export class AgentLoop {
1203
1210
  "LLM response reached output token limit",
1204
1211
  );
1205
1212
  history.push(safeAssistantMessage);
1213
+ appendedNewMessages = true;
1206
1214
  await onEvent({
1207
1215
  type: "max_tokens_reached",
1208
1216
  stopReason: response.stopReason,
@@ -1215,126 +1223,65 @@ export class AgentLoop {
1215
1223
  break;
1216
1224
  }
1217
1225
 
1218
- // Detect empty responses: no user-visible text and no tool calls.
1219
- // This can happen when the model fails to produce output after
1220
- // receiving a large tool result. Retry once with a nudge before
1221
- // the message is persisted.
1222
- //
1223
- // Only nudge when the model hasn't already delivered text to the user
1224
- // earlier in this tool-use chain. If a prior assistant turn in history
1225
- // contained visible text (e.g. the model said its piece before calling
1226
- // a side-effect tool like `remember`), an empty follow-up is the model
1227
- // correctly ending its turn — nudging would mislead it into thinking
1228
- // its earlier text didn't land and cause a verbatim re-send.
1229
- //
1230
- // Note: we check ANY prior assistant turn from this run()
1231
- // invocation, not just the most recent one. In multi-step tool-use
1232
- // chains (say-something → call-tool → call-another-tool → end),
1233
- // the "say-something" text lives on an earlier assistant turn while
1234
- // the most recent assistant turn is a pure tool_use with no text.
1235
- // Restricting the check to the most recent assistant turn would
1236
- // falsely nudge in that case and trigger a duplicate re-send of
1237
- // text the user already saw.
1238
- //
1239
- // Scope the scan to messages appended during this run() call only.
1240
- // Assistant text from prior conversation turns (earlier run()
1241
- // invocations passed in via `messages`) must NOT suppress the
1242
- // nudge — those turns completed long ago and have no bearing on
1243
- // whether the current tool-use chain has delivered text yet.
1244
- //
1245
- // The actual decision (nudge vs. accept vs. error) is delegated to
1246
- // the `emptyResponse` plugin pipeline. The pipeline returns a
1247
- // decision; the loop carries out the side-effect (pushing the nudge
1248
- // or surfacing the error). See `plugins/defaults/empty-response/register.ts`
1249
- // for the default decision logic.
1226
+ // The model's "stop" moment: a response with no tool calls is about to
1227
+ // yield to the user. The `stop` hook (below) decides whether to accept
1228
+ // the turn or re-query with a follow-up; `priorAssistantHadVisibleText`
1229
+ // gates the ops log for the post-tool empty case.
1250
1230
  const hasVisibleText = response.content.some(
1251
1231
  (block) => block.type === "text" && block.text.trim().length > 0,
1252
1232
  );
1253
- // Track whether the model produced visible text earlier in this
1254
- // run() invocation. Run-scoped rather than derived from `history` so
1255
- // it survives inline compaction rewriting the message array: an empty
1256
- // completion after a compaction must not be nudged into re-sending
1257
- // text the user already saw.
1258
1233
  const priorAssistantHadVisibleText = producedVisibleTextThisRun;
1259
1234
  if (hasVisibleText) {
1260
1235
  producedVisibleTextThisRun = true;
1261
1236
  }
1262
1237
 
1263
- const emptyResponseArgs: EmptyResponseArgs = {
1264
- responseContent: response.content,
1265
- toolUseBlocksLength: toolUseBlocks.length,
1266
- toolUseTurns,
1267
- emptyResponseRetries,
1268
- maxEmptyResponseRetries: MAX_EMPTY_RESPONSE_RETRIES,
1269
- priorAssistantHadVisibleText,
1270
- stopReason: response.stopReason,
1271
- };
1272
- const emptyResponseCtx = resolveLoopTurnContext(
1273
- turnContext,
1274
- requestId,
1275
- toolUseTurns,
1276
- );
1277
- const emptyResponseDecision: EmptyResponseDecision = await runPipeline(
1278
- "emptyResponse",
1279
- getMiddlewaresFor("emptyResponse"),
1280
- async (args) => defaultEmptyResponseTerminal(args),
1281
- emptyResponseArgs,
1282
- emptyResponseCtx,
1283
- DEFAULT_TIMEOUTS.emptyResponse,
1284
- );
1285
-
1286
- if (emptyResponseDecision.action === "nudge") {
1287
- // Fall back to the canonical nudge text if the plugin returned
1288
- // `action: "nudge"` but forgot `nudgeText`. Keeps a misbehaving
1289
- // plugin from silently breaking the loop invariant that the
1290
- // model sees a coherent prompt.
1291
- const nudgeText =
1292
- emptyResponseDecision.nudgeText ??
1293
- "<system_notice>Your previous response was empty. You must respond to the user with a summary of what you found or did. Do not use any tools — just respond with text.</system_notice>";
1294
- emptyResponseRetries++;
1295
- rlog.warn(
1296
- { turn: toolUseTurns, retry: emptyResponseRetries },
1297
- "Model returned empty response after tool results — retrying",
1298
- );
1299
- history.push({
1300
- role: "user",
1301
- content: [{ type: "text", text: nudgeText }],
1302
- });
1303
- continue;
1304
- }
1305
-
1306
- if (emptyResponseDecision.action === "error") {
1307
- rlog.error(
1308
- { turn: toolUseTurns, retries: emptyResponseRetries },
1309
- "emptyResponse pipeline requested error surface",
1310
- );
1311
- // Stamp the specific exit reason *before* throwing. The catch
1312
- // handler below will see the rethrown error and attempt to stamp
1313
- // "error" — guarded by `exitReasonEmitted`, that becomes a no-op
1314
- // and the more specific reason wins.
1315
- await emitExit("empty_response_exhausted");
1316
- throw new AssistantError(
1317
- "Model returned empty response after tool results",
1318
- ErrorCode.INTERNAL_ERROR,
1319
- );
1320
- }
1238
+ if (toolUseBlocks.length === 0) {
1239
+ // The model stopped requesting tools — the run's stop boundary. The
1240
+ // `stop` hook decides whether to let the turn end or re-query with a
1241
+ // follow-up turn. It receives the full history and, when it asks to
1242
+ // continue, appends the follow-up turn itself.
1243
+ const stopCtx: StopContext = {
1244
+ conversationId: turnCtx.conversationId,
1245
+ messages: [...history],
1246
+ responseContent: response.content,
1247
+ stopReason: response.stopReason,
1248
+ decision: "stop",
1249
+ logger: rlog,
1250
+ };
1251
+ const finalStopCtx = await runHook(HOOKS.STOP, stopCtx);
1252
+
1253
+ if (finalStopCtx.decision === "continue") {
1254
+ // The loop owns the retry budget: a hook always asks to continue
1255
+ // when a nudge is warranted, and the loop stops anyway once the
1256
+ // budget is spent. This bounds the hook-driven re-query loop.
1257
+ if (stopContinueRetries < MAX_STOP_CONTINUE_RETRIES) {
1258
+ stopContinueRetries++;
1259
+ rlog.warn(
1260
+ { turn: toolUseTurns, retry: stopContinueRetries },
1261
+ "Model returned empty response after tool results — retrying",
1262
+ );
1263
+ history = finalStopCtx.messages;
1264
+ continue;
1265
+ }
1321
1266
 
1322
- // action === "accept" fall through. Emit a dedicated log line for
1323
- // the specific "empty turn after tool results, retries exhausted"
1324
- // case so ops dashboards that grep on this line keep working.
1325
- if (
1326
- !hasVisibleText &&
1327
- toolUseBlocks.length === 0 &&
1328
- toolUseTurns > 0 &&
1329
- !priorAssistantHadVisibleText
1330
- ) {
1331
- rlog.error(
1332
- { turn: toolUseTurns, retries: emptyResponseRetries },
1333
- "Model returned empty response after tool results — retries exhausted",
1334
- );
1267
+ // Budget spent accept the empty turn. Emit a dedicated log line
1268
+ // for the post-tool empty case so ops dashboards that grep on it
1269
+ // keep working.
1270
+ if (
1271
+ !hasVisibleText &&
1272
+ toolUseTurns > 0 &&
1273
+ !priorAssistantHadVisibleText
1274
+ ) {
1275
+ rlog.error(
1276
+ { turn: toolUseTurns, retries: stopContinueRetries },
1277
+ "Model returned empty response after tool results — retries exhausted",
1278
+ );
1279
+ }
1280
+ }
1335
1281
  }
1336
1282
 
1337
1283
  history.push(assistantMessage);
1284
+ appendedNewMessages = true;
1338
1285
 
1339
1286
  await onEvent({ type: "message_complete", message: assistantMessage });
1340
1287
 
@@ -1364,6 +1311,15 @@ export class AgentLoop {
1364
1311
  }),
1365
1312
  );
1366
1313
  history.push({ role: "user", content: cancelledBlocks });
1314
+ for (const toolUse of toolUseBlocks) {
1315
+ await onEvent({
1316
+ type: "tool_result",
1317
+ toolUseId: toolUse.id,
1318
+ content: "Cancelled by user",
1319
+ isError: true,
1320
+ cancelled: true,
1321
+ });
1322
+ }
1367
1323
  await emitExit("aborted_post_response");
1368
1324
  break;
1369
1325
  }
@@ -1393,14 +1349,6 @@ export class AgentLoop {
1393
1349
  });
1394
1350
  },
1395
1351
  toolUse.id,
1396
- // Forward the loop's resolved `TurnContext` through the
1397
- // executor callback so `ToolExecutor.execute` can thread the
1398
- // real orchestrator context into the `toolExecute` pipeline.
1399
- // Standalone tests that don't wire a `turnContext` into
1400
- // `AgentLoop.run()` pass `undefined` here and the executor
1401
- // falls back to the synthesized placeholder — preserving the
1402
- // existing unit-test behavior.
1403
- turnCtx,
1404
1352
  );
1405
1353
 
1406
1354
  return { toolUse, result };
@@ -1464,60 +1412,39 @@ export class AgentLoop {
1464
1412
  }),
1465
1413
  );
1466
1414
 
1467
- // Pre-emptively truncate oversized tool results to prevent context
1468
- // overflow. The work is delegated to the `toolResultTruncate`
1469
- // plugin pipeline so downstream plugins can swap in a smarter
1470
- // truncation strategy (e.g. a summariser) while the default
1471
- // middleware preserves the historical tail-drop behaviour.
1415
+ // Run the `post-tool-use` hook once per tool result, after the tool
1416
+ // returns and before the result joins the provider-bound history.
1417
+ // The default tool-result-truncate plugin tail-drops oversized output
1418
+ // to fit the context window; user hooks can swap in a smarter strategy
1419
+ // (e.g. a summariser) or observe results for side effects.
1472
1420
  const contextWindowTokens =
1473
1421
  resolveContextWindow?.().maxInputTokens ??
1474
1422
  this.config.maxInputTokens ??
1475
1423
  180_000;
1476
- const maxChars = calculateMaxToolResultChars(contextWindowTokens);
1477
- const truncateMiddlewares = getMiddlewaresFor("toolResultTruncate");
1478
1424
 
1479
- let truncatedCount = 0;
1480
- const truncatedBlocks: ContentBlock[] = [];
1425
+ const resultBlocks: ContentBlock[] = [];
1426
+ const additionalContextBlocks: ContentBlock[] = [];
1481
1427
  for (const block of rawResultBlocks) {
1482
1428
  if (block.type !== "tool_result") {
1483
- truncatedBlocks.push(block);
1484
- continue;
1485
- }
1486
- const toolBlock = block as ToolResultContent;
1487
- if (
1488
- typeof toolBlock.content !== "string" ||
1489
- toolBlock.content.length <= maxChars
1490
- ) {
1491
- truncatedBlocks.push(block);
1429
+ resultBlocks.push(block);
1492
1430
  continue;
1493
1431
  }
1494
- const pipelineResult = await runPipeline<
1495
- ToolResultTruncateArgs,
1496
- ToolResultTruncateResult
1497
- >(
1498
- "toolResultTruncate",
1499
- truncateMiddlewares,
1500
- async (args) => defaultToolResultTruncateTerminal(args),
1501
- { content: toolBlock.content, maxChars },
1502
- turnCtx,
1503
- DEFAULT_TIMEOUTS.toolResultTruncate,
1504
- );
1505
- if (pipelineResult.truncated) {
1506
- truncatedCount++;
1507
- truncatedBlocks.push({
1508
- ...toolBlock,
1509
- content: pipelineResult.content,
1432
+ const postToolUseCtx: PostToolUseContext = {
1433
+ conversationId: turnCtx.conversationId,
1434
+ toolResponse: block as ToolResultContent,
1435
+ messages: history,
1436
+ maxInputTokens: contextWindowTokens,
1437
+ logger: rlog,
1438
+ };
1439
+ const finalCtx = await runHook(HOOKS.POST_TOOL_USE, postToolUseCtx);
1440
+ resultBlocks.push(finalCtx.toolResponse);
1441
+ if (finalCtx.additionalContext !== undefined) {
1442
+ additionalContextBlocks.push({
1443
+ type: "text",
1444
+ text: finalCtx.additionalContext,
1510
1445
  });
1511
- } else {
1512
- truncatedBlocks.push(block);
1513
1446
  }
1514
1447
  }
1515
- const resultBlocks = truncatedBlocks;
1516
- if (truncatedCount > 0) {
1517
- log.warn(
1518
- `Truncated ${truncatedCount} oversized tool result(s) to prevent context overflow`,
1519
- );
1520
- }
1521
1448
 
1522
1449
  // Emit tool_result events AFTER truncation so downstream consumers
1523
1450
  // (e.g. session persistence) receive the truncated content.
@@ -1569,54 +1496,15 @@ export class AgentLoop {
1569
1496
 
1570
1497
  toolUseTurns++;
1571
1498
 
1572
- // When any tool returned an error, nudge the LLM to retry with
1573
- // corrected parameters instead of ending its turn. Skip the nudge
1574
- // after MAX_CONSECUTIVE_ERROR_NUDGES consecutive error turns
1575
- // (the error is likely unrecoverable at that point). The nudge
1576
- // decision is delegated to the `toolError` plugin pipeline so user
1577
- // plugins can change the text, observe the event, or suppress it.
1578
- const hasToolError = toolResults.some(({ result }) => result.isError);
1579
- if (hasToolError) {
1580
- consecutiveErrorTurns++;
1581
- } else {
1582
- consecutiveErrorTurns = 0;
1583
- }
1584
- const toolErrorArgs: ToolErrorArgs = {
1585
- hasToolError,
1586
- consecutiveErrorTurns,
1587
- maxConsecutiveErrorNudges: MAX_CONSECUTIVE_ERROR_NUDGES,
1588
- };
1589
- const toolErrorCtx: TurnContext = resolveLoopTurnContext(
1590
- turnContext,
1591
- requestId,
1592
- toolUseTurns - 1,
1593
- );
1594
- const toolErrorDecision = await runPipeline<
1595
- ToolErrorArgs,
1596
- ToolErrorDecision
1597
- >(
1598
- "toolError",
1599
- getMiddlewaresFor("toolError"),
1600
- // Terminal: the canonical nudge decision. The default plugin's
1601
- // middleware is a passthrough (so later-registered user plugins
1602
- // aren't shadowed), so this terminal is what actually produces
1603
- // the decision when no user plugin overrides it. Wiring the
1604
- // decision here also ensures the nudge fires for direct
1605
- // AgentLoop callers (tests, benchmarks) that skip
1606
- // `bootstrapPlugins()` and therefore never register the default.
1607
- async (args) => defaultToolErrorTerminal(args),
1608
- toolErrorArgs,
1609
- toolErrorCtx,
1610
- DEFAULT_TIMEOUTS.toolError,
1611
- );
1612
- if (toolErrorDecision.action === "nudge") {
1613
- resultBlocks.push({
1614
- type: "text",
1615
- text: toolErrorDecision.nudgeText,
1616
- });
1617
- }
1499
+ // Append any guidance a post-tool-use hook surfaced via
1500
+ // `additionalContext` (e.g. tool-error retry coaching) as separate
1501
+ // blocks. They join the provider-bound history below but were not part
1502
+ // of the tool_result events emitted above, so the model sees the
1503
+ // guidance while the client-facing and persisted tool output stay the
1504
+ // tool's actual result.
1505
+ resultBlocks.push(...additionalContextBlocks);
1618
1506
 
1619
- // Add tool results as a user message and continue the loop
1507
+ // Add tool results as a user message and continue the loop.
1620
1508
  history.push({ role: "user", content: resultBlocks });
1621
1509
 
1622
1510
  // Invoke checkpoint callback after tool results are in history.
@@ -1659,7 +1547,7 @@ export class AgentLoop {
1659
1547
  );
1660
1548
  const midLoopThreshold =
1661
1549
  preflightBudget * MID_LOOP_YIELD_THRESHOLD_RATIO;
1662
- const estimated = await this.estimateTokens(history, turnCtx);
1550
+ const estimated = this.estimateTokens(history);
1663
1551
  if (estimated > midLoopThreshold) {
1664
1552
  if (compaction) {
1665
1553
  rlog.info(
@@ -1672,9 +1560,13 @@ export class AgentLoop {
1672
1560
  compaction,
1673
1561
  signal,
1674
1562
  onEvent,
1563
+ resolveEffectiveOverrideProfile() ?? null,
1675
1564
  );
1676
1565
  if (compacted) {
1677
1566
  history = compacted;
1567
+ // The compacted, re-injected array is the new base; output
1568
+ // produced after this point is what the orchestrator persists.
1569
+ newMessagesStart = history.length;
1678
1570
  continue;
1679
1571
  }
1680
1572
  }
@@ -1701,6 +1593,15 @@ export class AgentLoop {
1701
1593
  }),
1702
1594
  );
1703
1595
  history.push({ role: "user", content: cancelledBlocks });
1596
+ for (const toolUse of toolUseBlocks) {
1597
+ await onEvent({
1598
+ type: "tool_result",
1599
+ toolUseId: toolUse.id,
1600
+ content: "Cancelled by user",
1601
+ isError: true,
1602
+ cancelled: true,
1603
+ });
1604
+ }
1704
1605
  }
1705
1606
  await emitExit("aborted_via_error");
1706
1607
  break;
@@ -1714,11 +1615,9 @@ export class AgentLoop {
1714
1615
  Sentry.captureException(err);
1715
1616
  }
1716
1617
  onEvent({ type: "error", error: err });
1717
- // Catch-block fallback. If the rethrow came from the
1718
- // empty-response throw path above, `emitExit("error")` no-ops
1719
- // because `emitExit("empty_response_exhausted")` already ran
1720
- // before the throw. Otherwise, this is the genuine
1721
- // unhandled-error exit.
1618
+ // Catch-block fallback. A break site that stamped a more specific
1619
+ // reason before unwinding here keeps it; the guard makes this a no-op.
1620
+ // Otherwise this is the genuine unhandled-error exit.
1722
1621
  await emitExit("error");
1723
1622
  break;
1724
1623
  }
@@ -1733,7 +1632,12 @@ export class AgentLoop {
1733
1632
  "Agent loop exited",
1734
1633
  );
1735
1634
 
1736
- return { history, exitReason };
1635
+ return {
1636
+ history,
1637
+ exitReason,
1638
+ appendedNewMessages,
1639
+ newMessages: history.slice(newMessagesStart),
1640
+ };
1737
1641
  }
1738
1642
  }
1739
1643