@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
@@ -17,6 +17,7 @@ import type {
17
17
  import { getConfig } from "../config/loader.js";
18
18
  import { recordEstimate } from "../context/estimator-calibration.js";
19
19
  import { getCalibrationProviderKey } from "../context/token-estimator.js";
20
+ import type { ContextWindowResult } from "../context/window-manager.js";
20
21
  import { projectAssistantMessage } from "../memory/conversation-attention-store.js";
21
22
  import {
22
23
  deleteMessageById,
@@ -24,8 +25,12 @@ import {
24
25
  getMessageById,
25
26
  messageMetadataSchema,
26
27
  provenanceFromTrustContext,
28
+ reserveMessage,
29
+ setConversationHistoryStrippedAt,
30
+ setLastNotifiedInferenceProfile,
27
31
  updateMessageContent,
28
32
  } from "../memory/conversation-crud.js";
33
+ import { syncMessageToDisk } from "../memory/conversation-disk-view.js";
29
34
  import { indexMessageNow } from "../memory/indexer.js";
30
35
  import {
31
36
  backfillMessageIdOnLogs,
@@ -41,20 +46,24 @@ import {
41
46
  type SlackMessageMetadata,
42
47
  writeSlackMetadata,
43
48
  } from "../messaging/providers/slack/message-metadata.js";
44
- import { defaultPersistenceTerminal } from "../plugins/defaults/persistence/terminal.js";
45
- import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
46
- import { getMiddlewaresFor } from "../plugins/registry.js";
47
49
  import type {
48
- PersistArgs,
49
- PersistReserveResult,
50
- PersistResult,
51
- TurnContext,
52
- } from "../plugins/types.js";
53
- import type { ContentBlock, ImageContent } from "../providers/types.js";
50
+ ContentBlock,
51
+ ImageContent,
52
+ Message,
53
+ } from "../providers/types.js";
54
54
  import { isContextOverflowError } from "../providers/types.js";
55
+ import {
56
+ getCurrentSeq,
57
+ recordPersistedSeq,
58
+ } from "../runtime/assistant-stream-state.js";
55
59
  import { publishSyncInvalidation } from "../runtime/sync/sync-publisher.js";
56
60
  import { redactSecrets } from "../security/secret-scanner.js";
57
61
  import { extractDomain } from "../tools/network/domain-normalize.js";
62
+ import {
63
+ classifyWebSearchFailure,
64
+ logWebSearchBackendFailure,
65
+ WEB_SEARCH_BACKEND_FAILURE_MESSAGE,
66
+ } from "../tools/network/web-search-error.js";
58
67
  import {
59
68
  buildPricingUsage,
60
69
  resolveStructuredPricing,
@@ -87,45 +96,36 @@ import type {
87
96
  } from "./message-protocol.js";
88
97
  import { conversationMetadataSyncTag } from "./message-types/sync.js";
89
98
  import type {
99
+ ToolActivityMetadata,
90
100
  WebSearchMetadata,
91
101
  WebSearchResultItem,
92
102
  } from "./message-types/web-activity.js";
93
- import { FALLBACK_TURN_TRUST } from "./trust-context.js";
94
103
 
95
104
  const log = getLogger("agent-loop-handlers");
96
105
 
106
+ /**
107
+ * Persist the history-stripped marker after the loop strips runtime injections
108
+ * for compaction / overflow recovery. The marker is a durability hint, not
109
+ * turn-critical state — a transient SQLite write failure (SQLITE_BUSY,
110
+ * disk-full, read-only FS) must not abort the turn, so failures log a warning
111
+ * and continue.
112
+ */
113
+ export function markHistoryStrippedBestEffort(conversationId: string): void {
114
+ try {
115
+ setConversationHistoryStrippedAt(conversationId, Date.now());
116
+ } catch (err) {
117
+ log.warn(
118
+ { err, conversationId },
119
+ "Failed to persist history-stripped marker after compaction strip (non-fatal)",
120
+ );
121
+ }
122
+ }
123
+
97
124
  // ── Partial-persistence tunables ─────────────────────────────────────
98
125
  // Debounce for mid-turn `updateContent` writes from text deltas.
99
126
  // Indexer + projector still fire ONLY at `handleMessageComplete`.
100
127
  const PARTIAL_PERSIST_DEBOUNCE_MS = 1000;
101
128
 
102
- /**
103
- * Build a {@link TurnContext} from the handler's deps for pipeline logging
104
- * and plugin attribution.
105
- *
106
- * Reads `turnIndex` from `deps.ctx.turnCount` — the orchestrator-owned
107
- * per-turn counter that is stable for the entire duration of a single
108
- * `runAgentLoopImpl` invocation. The handlers fire after the orchestrator
109
- * has completed its in-turn pipeline work but before `ctx.turnCount++` runs
110
- * in the outer `finally` block, so this value always reflects the turn the
111
- * handler's event belongs to. Trust pulls from the per-turn snapshot first,
112
- * then the conversation-level context, then the canonical `unknown`
113
- * fallback so the required field stays populated for edge cases (fresh
114
- * conversations before the trust resolver runs, heartbeat turns that never
115
- * bind an actor).
116
- */
117
- function buildHandlerTurnContext(deps: EventHandlerDeps): TurnContext {
118
- return {
119
- requestId: deps.reqId,
120
- conversationId: deps.ctx.conversationId,
121
- turnIndex: deps.ctx.turnCount,
122
- trust:
123
- deps.ctx.currentTurnTrustContext ??
124
- deps.ctx.trustContext ??
125
- FALLBACK_TURN_TRUST,
126
- };
127
- }
128
-
129
129
  // ── Types ────────────────────────────────────────────────────────────
130
130
 
131
131
  export interface PendingToolResult {
@@ -137,6 +137,16 @@ export interface PendingToolResult {
137
137
  /** Mutable state shared across event handlers within a single agent loop run. */
138
138
  export interface EventHandlerState {
139
139
  llmCallStartedEmitted: boolean;
140
+ /**
141
+ * Profile key whose `model_profile` notice has been assembled into the turn
142
+ * context but not yet marked notified. Set when the turn injects the notice,
143
+ * and consumed the first time the model actually receives that context — i.e.
144
+ * on the first `message_complete`. Persisting on delivery (rather than inline
145
+ * before the provider call) means a cancelled or failed turn re-sends the
146
+ * notice next turn instead of silently marking the profile notified without
147
+ * the model ever having seen it.
148
+ */
149
+ pendingNotifiedInferenceProfile: string | null;
140
150
  pendingDirectiveDisplayBuffer: string;
141
151
  firstAssistantText: string;
142
152
  /** Most recent resolved provider for the current exchange's usage accounting. */
@@ -186,6 +196,15 @@ export interface EventHandlerState {
186
196
  */
187
197
  assistantRowAwaitingFinalization: boolean;
188
198
  readonly pendingToolResults: Map<string, PendingToolResult>;
199
+ /**
200
+ * Reservation of the grouped `user` tool-result row for the current batch,
201
+ * resolving to the row id. Shared across the concurrent `handleToolResult`
202
+ * calls of one parallel-tool batch so they reserve exactly one row and write
203
+ * into it as sibling results land. `undefined` until the first result of a
204
+ * batch triggers a reservation (reset on a failed reservation so the next
205
+ * arrival can retry) and again after the batch is finalized.
206
+ */
207
+ pendingToolResultRowReservation: Promise<string> | undefined;
189
208
  readonly persistedToolUseIds: Set<string>;
190
209
  readonly accumulatedDirectives: DirectiveRequest[];
191
210
  readonly accumulatedToolContentBlocks: ContentBlock[];
@@ -238,6 +257,13 @@ export interface EventHandlerState {
238
257
  riskDirectoryScopeOptions?: Array<{ scope: string; label: string }>;
239
258
  }
240
259
  >;
260
+ /**
261
+ * Structured tool activity (web_search / web_fetch) keyed by tool_use_id,
262
+ * captured when a result lands so it can be persisted on the tool's content
263
+ * block and survive a history reopen. Populated for both external provider
264
+ * tools (in handleToolResult) and native server tools (server_tool_complete).
265
+ */
266
+ readonly toolActivityMetadata: Map<string, ToolActivityMetadata>;
241
267
  /** tool_use_ids emitted in the current turn (populated in handleToolUse, cleared after annotation). */
242
268
  currentTurnToolUseIds: string[];
243
269
  /** Wall-clock time (ms since epoch) when the agent loop turn started, used as the display timestamp for assistant messages. */
@@ -246,12 +272,47 @@ export interface EventHandlerState {
246
272
  readonly serverToolStartedAt: Map<string, number>;
247
273
  /** Original input from server_tool_start, keyed by tool_use_id, so the complete handler can read the query. */
248
274
  readonly serverToolInputs: Map<string, Record<string, unknown>>;
275
+ /** Request ids for which a user-facing web_search backend-failure notice was already surfaced this turn (dedup noisy repeats). Keyed by request id; each turn has a fresh request id, so this grows at most one entry per turn. */
276
+ readonly webSearchBackendFailureNotified: Set<string>;
249
277
  /** Active debounce timer for partial persistence; `undefined` when idle. */
250
278
  pendingPartialFlushTimer: ReturnType<typeof setTimeout> | undefined;
251
279
  /** In-flight partial flush write awaited at finalize to avoid overwrite races. */
252
280
  pendingPartialFlushPromise: Promise<void> | undefined;
253
- /** Running mirror of the in-flight assistant message's content. */
281
+ /**
282
+ * Running mirror of the in-flight assistant message's streamed content
283
+ * (text and thinking), flushed to the assistant row on the partial-persist
284
+ * debounce so a mid-turn snapshot reflects what the user is watching live.
285
+ */
254
286
  currentMessageContent: ContentBlock[];
287
+ /**
288
+ * `seq` of the most recent streamed content delta mirrored into
289
+ * `currentMessageContent`. Recorded as the conversation's persisted `seq`
290
+ * after each flush commits (the debounced partial flushes and the
291
+ * `message_complete` finalize), so the snapshot's advertised `seq` tracks
292
+ * exactly the streamed content the durable row holds. `undefined` until the
293
+ * first content delta of the in-flight message. Because every streamed
294
+ * content type rides the same mirror-and-flush path, this single field
295
+ * never claims content a flush has not yet written.
296
+ */
297
+ lastPersistedContentSeq: number | undefined;
298
+ /**
299
+ * Whether the workspace top-level block should be (re)injected on this
300
+ * turn. Compaction's prepare phase strips the workspace / NOW.md / PKB
301
+ * blocks off the tail, so it is set after any successful compaction to
302
+ * force the workspace overview back in. On an ordinary turn the block is
303
+ * already present in history, so it defaults `false` to avoid burning
304
+ * tokens re-injecting it redundantly.
305
+ */
306
+ shouldInjectWorkspace: boolean;
307
+ /**
308
+ * Whether the reducer has compacted `ctx.messages`, gating the Slack
309
+ * chronological-transcript override on re-injection. The captured
310
+ * transcript is the full persisted history; blindly replaying it after
311
+ * compaction would overwrite the reduced messages and undo compaction, so
312
+ * once this is `true` the override falls back to the reduced
313
+ * `ctx.messages`.
314
+ */
315
+ reducerCompacted: boolean;
255
316
  }
256
317
 
257
318
  /** Immutable context shared across event handlers within a single agent loop run. */
@@ -265,6 +326,18 @@ export interface EventHandlerDeps {
265
326
  readonly rlog: pino.Logger;
266
327
  readonly turnChannelContext: TurnChannelContext;
267
328
  readonly turnInterfaceContext: TurnInterfaceContext;
329
+ /**
330
+ * Commit a successful inline compaction to durable state. Invoked from the
331
+ * `compaction_completed` dispatch case (when `result.compacted`) with the
332
+ * loop's compaction result and the stripped pre-compaction `basis`. Supplied
333
+ * by the orchestrator because the body writes Conversation DB-record fields,
334
+ * projects Slack provenance, and emits transport the loop is intentionally
335
+ * blind to.
336
+ */
337
+ readonly applyCompaction: (
338
+ result: ContextWindowResult,
339
+ basis: Message[],
340
+ ) => Promise<void>;
268
341
  }
269
342
 
270
343
  // ── Factory ──────────────────────────────────────────────────────────
@@ -272,6 +345,7 @@ export interface EventHandlerDeps {
272
345
  export function createEventHandlerState(): EventHandlerState {
273
346
  return {
274
347
  llmCallStartedEmitted: false,
348
+ pendingNotifiedInferenceProfile: null,
275
349
  pendingDirectiveDisplayBuffer: "",
276
350
  firstAssistantText: "",
277
351
  exchangeProviderName: undefined,
@@ -292,6 +366,7 @@ export function createEventHandlerState(): EventHandlerState {
292
366
  lastAssistantMessageId: undefined,
293
367
  assistantRowAwaitingFinalization: false,
294
368
  pendingToolResults: new Map(),
369
+ pendingToolResultRowReservation: undefined,
295
370
  persistedToolUseIds: new Set(),
296
371
  accumulatedDirectives: [],
297
372
  accumulatedToolContentBlocks: [],
@@ -307,22 +382,28 @@ export function createEventHandlerState(): EventHandlerState {
307
382
  requestIdToToolUseId: new Map(),
308
383
  toolConfirmationOutcomes: new Map(),
309
384
  toolRiskOutcomes: new Map(),
385
+ toolActivityMetadata: new Map(),
310
386
  currentTurnToolUseIds: [],
311
387
  turnStartedAt: Date.now(),
312
388
  serverToolStartedAt: new Map(),
313
389
  serverToolInputs: new Map(),
390
+ webSearchBackendFailureNotified: new Set(),
314
391
  pendingPartialFlushTimer: undefined,
315
392
  pendingPartialFlushPromise: undefined,
316
393
  currentMessageContent: [],
394
+ lastPersistedContentSeq: undefined,
395
+ shouldInjectWorkspace: false,
396
+ reducerCompacted: false,
317
397
  };
318
398
  }
319
399
 
320
400
  // ── Partial-persistence helpers ──────────────────────────────────────
321
401
 
322
402
  /** Canonical persisted-content build: clean → append surfaces → redact. */
323
- function buildPersistedAssistantContent(
403
+ export function buildPersistedAssistantContent(
324
404
  rawBlocks: readonly ContentBlock[],
325
405
  surfaces: readonly AssistantSurface[],
406
+ activityByToolUseId?: ReadonlyMap<string, ToolActivityMetadata>,
326
407
  ): ContentBlock[] {
327
408
  const { cleanedContent } = cleanAssistantContent(rawBlocks);
328
409
  const cleaned = cleanedContent as ContentBlock[];
@@ -345,6 +426,18 @@ function buildPersistedAssistantContent(
345
426
  const tb = block as Extract<ContentBlock, { type: "text" }>;
346
427
  return { ...tb, text: redactSecrets(tb.text) };
347
428
  }
429
+ // Native server tools (Anthropic web_search) resolve mid-stream — their
430
+ // `server_tool_complete` fires before `message_complete` — so the captured
431
+ // activity is available at persist time. Stamp it on the server_tool_use
432
+ // block here so the web-search card survives a history reopen. External
433
+ // tool_use activity arrives only with the later tool_result, so it is
434
+ // stamped in `annotatePersistedAssistantMessage` instead.
435
+ if (block.type === "server_tool_use" && activityByToolUseId) {
436
+ const activity = activityByToolUseId.get(block.id);
437
+ if (activity) {
438
+ return { ...block, _activityMetadata: activity } as ContentBlock;
439
+ }
440
+ }
348
441
  return block;
349
442
  });
350
443
  }
@@ -363,6 +456,30 @@ function appendTextToCurrentMessage(
363
456
  }
364
457
  }
365
458
 
459
+ /**
460
+ * Append a streamed thinking chunk to `state.currentMessageContent`, fusing
461
+ * into the tail thinking block. The streamed delta carries no provider
462
+ * `signature` (that arrives only when the block closes), so the mirrored block
463
+ * holds an empty one; `message_complete` overwrites the row with the
464
+ * authoritative signed content before it is ever sent back to a provider.
465
+ */
466
+ function appendThinkingToCurrentMessage(
467
+ state: EventHandlerState,
468
+ thinking: string,
469
+ ): void {
470
+ if (thinking.length === 0) return;
471
+ const tail = state.currentMessageContent.at(-1);
472
+ if (tail && tail.type === "thinking") {
473
+ tail.thinking = tail.thinking + thinking;
474
+ } else {
475
+ state.currentMessageContent.push({
476
+ type: "thinking",
477
+ thinking,
478
+ signature: "",
479
+ });
480
+ }
481
+ }
482
+
366
483
  /** Reset partial-persist accumulator and any pending flush state. Idempotent. */
367
484
  function resetPartialPersistAccumulator(state: EventHandlerState): void {
368
485
  if (state.pendingPartialFlushTimer !== undefined) {
@@ -370,10 +487,11 @@ function resetPartialPersistAccumulator(state: EventHandlerState): void {
370
487
  state.pendingPartialFlushTimer = undefined;
371
488
  }
372
489
  state.currentMessageContent = [];
490
+ state.lastPersistedContentSeq = undefined;
373
491
  state.pendingPartialFlushPromise = undefined;
374
492
  }
375
493
 
376
- /** Flush `state.currentMessageContent` to the row via the persistence pipeline. */
494
+ /** Flush `state.currentMessageContent` to the persisted assistant row. */
377
495
  async function flushAccumulatedContent(
378
496
  state: EventHandlerState,
379
497
  deps: EventHandlerDeps,
@@ -382,22 +500,24 @@ async function flushAccumulatedContent(
382
500
  if (messageId === undefined) return;
383
501
  if (state.currentMessageContent.length === 0) return;
384
502
 
385
- const built = buildPersistedAssistantContent(state.currentMessageContent, []);
503
+ const built = buildPersistedAssistantContent(
504
+ state.currentMessageContent,
505
+ [],
506
+ state.toolActivityMetadata,
507
+ );
386
508
  const contentJson = JSON.stringify(built);
509
+ // Pair the seq with the exact content snapshot taken above: deltas that
510
+ // arrive while the write is in flight bump `lastPersistedContentSeq`
511
+ // again, but they are not part of this write.
512
+ const flushedSeq = state.lastPersistedContentSeq;
387
513
 
388
514
  try {
389
- await runPipeline<PersistArgs, PersistResult>(
390
- "persistence",
391
- getMiddlewaresFor("persistence"),
392
- defaultPersistenceTerminal,
393
- {
394
- op: "updateContent",
395
- messageId,
396
- content: contentJson,
397
- },
398
- buildHandlerTurnContext(deps),
399
- DEFAULT_TIMEOUTS.persistence,
400
- );
515
+ updateMessageContent(messageId, contentJson);
516
+ // Record only after the write commits, so the snapshot seq never
517
+ // claims content that is not yet durable.
518
+ if (flushedSeq != null) {
519
+ recordPersistedSeq(deps.ctx.conversationId, flushedSeq);
520
+ }
401
521
  } catch (err) {
402
522
  deps.rlog.warn(
403
523
  { err, messageId },
@@ -633,12 +753,6 @@ export async function handleLlmCallStarted(
633
753
  // the `assistantRowAwaitingFinalization` flag — `handleMessageComplete`
634
754
  // clears it after the successful `updateContent`, so the previous call's
635
755
  // committed row is never touched here.
636
- //
637
- // Direct `deleteMessageById` (not via the `persistence` pipeline) is
638
- // intentional: a never-finalized reservation has no segments, no
639
- // attachments, and no observable history — undoing it isn't a real
640
- // persistence event for plugins to react to, so routing through the
641
- // pipeline would only widen the mock surface for no observability win.
642
756
  if (state.assistantRowAwaitingFinalization && state.lastAssistantMessageId) {
643
757
  try {
644
758
  deleteMessageById(state.lastAssistantMessageId);
@@ -652,20 +766,12 @@ export async function handleLlmCallStarted(
652
766
  }
653
767
 
654
768
  const metadata = buildAssistantChannelMetadata(state, deps);
655
- const reserveResult = (await runPipeline<PersistArgs, PersistResult>(
656
- "persistence",
657
- getMiddlewaresFor("persistence"),
658
- defaultPersistenceTerminal,
659
- {
660
- op: "reserve",
661
- conversationId: deps.ctx.conversationId,
662
- role: "assistant",
663
- metadata,
664
- },
665
- buildHandlerTurnContext(deps),
666
- DEFAULT_TIMEOUTS.persistence,
667
- )) as PersistReserveResult;
668
- state.lastAssistantMessageId = reserveResult.message.id;
769
+ const reservedRow = await reserveMessage(
770
+ deps.ctx.conversationId,
771
+ "assistant",
772
+ metadata,
773
+ );
774
+ state.lastAssistantMessageId = reservedRow.id;
669
775
  state.assistantRowAwaitingFinalization = true;
670
776
  // Fresh row → fresh accumulator. If an earlier (failed) LLM call
671
777
  // within the same run left partial state behind, the
@@ -675,7 +781,7 @@ export async function handleLlmCallStarted(
675
781
  resetPartialPersistAccumulator(state);
676
782
  deps.onEvent({
677
783
  type: "assistant_turn_start",
678
- messageId: reserveResult.message.id,
784
+ messageId: reservedRow.id,
679
785
  conversationId: deps.ctx.conversationId,
680
786
  });
681
787
  }
@@ -711,6 +817,11 @@ function handleTextDelta(
711
817
  // Mirror the drained delta into state.currentMessageContent so partial
712
818
  // flushes mid-turn see the same content the user is watching live.
713
819
  appendTextToCurrentMessage(state, drained.emitText);
820
+ // The hub stamps `seq` synchronously on the delta emitted above, so
821
+ // `getCurrentSeq()` here is that delta's seq -- the position the
822
+ // mirrored content now reflects. A partial flush snapshots this to
823
+ // record how far the durable rows track the live stream.
824
+ state.lastPersistedContentSeq = getCurrentSeq();
714
825
  schedulePartialFlush(state, deps);
715
826
  }
716
827
  }
@@ -746,6 +857,14 @@ function handleThinkingDelta(
746
857
  conversationId: deps.ctx.conversationId,
747
858
  messageId: state.lastAssistantMessageId,
748
859
  });
860
+ // Mirror thinking into the same running view as text so the debounced
861
+ // partial flush persists it mid-turn -- long reasoning streams survive a
862
+ // refresh that outlives the SSE replay window, exactly as long answers do.
863
+ appendThinkingToCurrentMessage(state, event.thinking);
864
+ // The hub stamps `seq` synchronously on the delta emitted above, so
865
+ // `getCurrentSeq()` is that delta's position in the mirrored content.
866
+ state.lastPersistedContentSeq = getCurrentSeq();
867
+ schedulePartialFlush(state, deps);
749
868
  }
750
869
 
751
870
  export function handleToolUse(
@@ -773,6 +892,14 @@ export function handleToolUse(
773
892
  toolUseId: event.id,
774
893
  messageId: state.lastAssistantMessageId,
775
894
  });
895
+ // `message_complete` always precedes tool events (see handleMessageComplete),
896
+ // so this tool_use block is already durable in the assistant row. The
897
+ // `tool_use_start` emitted just above is therefore the newest stamped event
898
+ // whose content the `/messages` snapshot already reflects -- advance the
899
+ // persisted seq to it. Without this the snapshot would advertise a seq below
900
+ // an event it already incorporates, and a client applying `seq > snapshot.seq`
901
+ // would replay this tool start.
902
+ recordPersistedSeq(deps.ctx.conversationId, getCurrentSeq());
776
903
  }
777
904
 
778
905
  export function handleToolUsePreviewStart(
@@ -890,11 +1017,244 @@ export function handleInputJsonDelta(
890
1017
  });
891
1018
  }
892
1019
 
893
- export function handleToolResult(
1020
+ /**
1021
+ * Build the persisted `tool_result` content blocks for the buffered results,
1022
+ * redacting secrets from both the flat content and any structured blocks. All
1023
+ * results of one assistant turn share a single `user` row (the shape providers
1024
+ * expect for tool_result-in-user-turn).
1025
+ */
1026
+ function buildToolResultBlocks(
1027
+ pending: ReadonlyMap<string, PendingToolResult>,
1028
+ ) {
1029
+ return Array.from(pending.entries()).map(([toolUseId, result]) => ({
1030
+ type: "tool_result",
1031
+ tool_use_id: toolUseId,
1032
+ content: redactSecrets(result.content),
1033
+ is_error: result.isError,
1034
+ ...(result.contentBlocks
1035
+ ? {
1036
+ contentBlocks: result.contentBlocks.map((block) =>
1037
+ block.type === "text"
1038
+ ? { ...block, text: redactSecrets(block.text) }
1039
+ : block,
1040
+ ),
1041
+ }
1042
+ : {}),
1043
+ }));
1044
+ }
1045
+
1046
+ /**
1047
+ * Channel/interface provenance metadata for the grouped tool-result row,
1048
+ * stamped from the turn context so the row carries the same provenance the
1049
+ * snapshot reflects from the moment it lands in SQLite.
1050
+ */
1051
+ function buildToolResultMetadata(
1052
+ deps: EventHandlerDeps,
1053
+ ): Record<string, unknown> {
1054
+ return {
1055
+ ...provenanceFromTrustContext(deps.ctx.trustContext),
1056
+ userMessageChannel: deps.turnChannelContext.userMessageChannel,
1057
+ assistantMessageChannel: deps.turnChannelContext.assistantMessageChannel,
1058
+ userMessageInterface: deps.turnInterfaceContext.userMessageInterface,
1059
+ assistantMessageInterface:
1060
+ deps.turnInterfaceContext.assistantMessageInterface,
1061
+ };
1062
+ }
1063
+
1064
+ /**
1065
+ * Reserve the grouped `user` tool-result row for the current batch exactly
1066
+ * once. Parallel tool results are dispatched without awaiting (`agent/loop.ts`
1067
+ * emits each `tool_result` synchronously), so concurrent `handleToolResult`
1068
+ * calls can reach this before the first reservation resolves; sharing one
1069
+ * in-flight reservation promise keeps the whole batch in a single row. A
1070
+ * failed reservation resets the promise so the next caller can retry rather
1071
+ * than inheriting a settled rejection.
1072
+ */
1073
+ function ensureToolResultRowReserved(
1074
+ state: EventHandlerState,
1075
+ conversationId: string,
1076
+ metadata: Record<string, unknown>,
1077
+ ): Promise<string> {
1078
+ if (state.pendingToolResultRowReservation === undefined) {
1079
+ state.pendingToolResultRowReservation = reserveMessage(
1080
+ conversationId,
1081
+ "user",
1082
+ metadata,
1083
+ )
1084
+ .then((reserved) => reserved.id)
1085
+ .catch((err) => {
1086
+ state.pendingToolResultRowReservation = undefined;
1087
+ throw err;
1088
+ });
1089
+ }
1090
+ return state.pendingToolResultRowReservation;
1091
+ }
1092
+
1093
+ /**
1094
+ * Persist the buffered tool results into their grouped `user` row as each
1095
+ * result arrives, so a long-running tool's output survives a refresh that
1096
+ * outlives the SSE replay window. The row is reserved once per batch and
1097
+ * rewritten in place as sibling parallel results land, keeping all
1098
+ * `tool_result` blocks of one turn in a single message. `seq` is the position
1099
+ * stamped on the triggering `tool_result` event, captured by the caller before
1100
+ * any await so it reflects exactly the content now durable in the row.
1101
+ * Indexing and the buffer drain are deferred to `finalizePendingToolResultRow`.
1102
+ */
1103
+ async function persistPendingToolResultRow(
1104
+ state: EventHandlerState,
1105
+ deps: EventHandlerDeps,
1106
+ seq: number,
1107
+ ): Promise<void> {
1108
+ if (state.pendingToolResults.size === 0) return;
1109
+ const rowId = await ensureToolResultRowReserved(
1110
+ state,
1111
+ deps.ctx.conversationId,
1112
+ buildToolResultMetadata(deps),
1113
+ );
1114
+ // Serialize the content after the reservation resolves so the last of the
1115
+ // concurrent writers reflects the fullest batch.
1116
+ updateMessageContent(
1117
+ rowId,
1118
+ JSON.stringify(buildToolResultBlocks(state.pendingToolResults)),
1119
+ );
1120
+ recordPersistedSeq(deps.ctx.conversationId, seq);
1121
+ const conv = getConversation(deps.ctx.conversationId);
1122
+ if (conv != null) {
1123
+ syncMessageToDisk(deps.ctx.conversationId, rowId, conv.createdAt);
1124
+ }
1125
+ }
1126
+
1127
+ /**
1128
+ * Finalize the grouped tool-result row at a turn/loop boundary: ensure the row
1129
+ * is reserved (a fallback for the case where every on-arrival write failed),
1130
+ * rewrite it to the full batch, sync it to disk, index it for memory recall,
1131
+ * and clear the batch state. Shared by `message_complete` and the orchestrator
1132
+ * loop-exit flush so an aborted or yielded turn finalizes the same reserved row
1133
+ * instead of writing a duplicate.
1134
+ */
1135
+ export async function finalizePendingToolResultRow(
1136
+ state: EventHandlerState,
1137
+ conversationId: string,
1138
+ metadata: Record<string, unknown>,
1139
+ rlog: pino.Logger,
1140
+ ): Promise<void> {
1141
+ if (state.pendingToolResults.size === 0) return;
1142
+ const rowId = await ensureToolResultRowReserved(
1143
+ state,
1144
+ conversationId,
1145
+ metadata,
1146
+ );
1147
+ const contentJson = JSON.stringify(
1148
+ buildToolResultBlocks(state.pendingToolResults),
1149
+ );
1150
+ updateMessageContent(rowId, contentJson);
1151
+ // Sync the row to the JSONL disk view so it stays in lockstep with the DB.
1152
+ // `getConversation` returns `ConversationRow | null`, so `!= null` gates on a
1153
+ // real row (skipping the sync when the conversation was not found rather than
1154
+ // asking the disk-view to resolve a missing id).
1155
+ const conv = getConversation(conversationId);
1156
+ if (conv != null) {
1157
+ syncMessageToDisk(conversationId, rowId, conv.createdAt);
1158
+ }
1159
+ // `reserveMessage` + `updateMessageContent` are CRUD-only, so index the
1160
+ // finalized tool-result content explicitly here (mirroring the assistant-row
1161
+ // finalize) once it is durable. Non-fatal: a memory hiccup must not escalate
1162
+ // a successful turn into a throw.
1163
+ const row = getMessageById(rowId, conversationId);
1164
+ if (row) {
1165
+ let provenanceTrustClass:
1166
+ | "guardian"
1167
+ | "trusted_contact"
1168
+ | "unknown"
1169
+ | undefined;
1170
+ let automated: boolean | undefined;
1171
+ if (row.metadata) {
1172
+ try {
1173
+ const parsedMeta = messageMetadataSchema.safeParse(
1174
+ JSON.parse(row.metadata),
1175
+ );
1176
+ if (parsedMeta.success) {
1177
+ provenanceTrustClass = parsedMeta.data.provenanceTrustClass;
1178
+ automated = parsedMeta.data.automated;
1179
+ }
1180
+ } catch {
1181
+ // Malformed metadata JSON — index with undefined provenance fields.
1182
+ }
1183
+ }
1184
+ try {
1185
+ await indexMessageNow(
1186
+ {
1187
+ messageId: rowId,
1188
+ conversationId,
1189
+ role: "user",
1190
+ content: contentJson,
1191
+ createdAt: row.createdAt,
1192
+ scopeId: "default",
1193
+ provenanceTrustClass,
1194
+ automated,
1195
+ },
1196
+ getConfig().memory,
1197
+ );
1198
+ } catch (err) {
1199
+ rlog.warn(
1200
+ { err, conversationId, messageId: rowId },
1201
+ "Failed to index tool-result message for memory (non-fatal)",
1202
+ );
1203
+ }
1204
+ }
1205
+ for (const id of state.pendingToolResults.keys()) {
1206
+ state.persistedToolUseIds.add(id);
1207
+ }
1208
+ state.pendingToolResults.clear();
1209
+ state.pendingToolResultRowReservation = undefined;
1210
+ }
1211
+
1212
+ export async function handleToolResult(
894
1213
  state: EventHandlerState,
895
1214
  deps: EventHandlerDeps,
896
1215
  event: Extract<AgentEvent, { type: "tool_result" }>,
897
- ): void {
1216
+ ): Promise<void> {
1217
+ // A synthesized cancellation (the tool never executed) is captured for
1218
+ // persistence and forwarded to the client like any result, but skips every
1219
+ // side effect that assumes the tool ran. A real result already captured or
1220
+ // persisted for the same tool wins, so only fill genuine gaps.
1221
+ if (event.cancelled) {
1222
+ if (
1223
+ state.pendingToolResults.has(event.toolUseId) ||
1224
+ state.persistedToolUseIds.has(event.toolUseId)
1225
+ ) {
1226
+ return;
1227
+ }
1228
+ state.pendingToolResults.set(event.toolUseId, {
1229
+ content: event.content,
1230
+ isError: event.isError,
1231
+ });
1232
+ state.currentToolUseId = undefined;
1233
+ deps.onEvent({
1234
+ type: "tool_result",
1235
+ toolName: "",
1236
+ result: event.content,
1237
+ isError: event.isError,
1238
+ conversationId: deps.ctx.conversationId,
1239
+ messageId: state.lastAssistantMessageId,
1240
+ toolUseId: event.toolUseId,
1241
+ });
1242
+ // Capture the seq synchronously (before the persist await) so it reflects
1243
+ // the just-stamped tool_result event, then persist on arrival. A failure
1244
+ // here is non-fatal: the buffered result is still drained at
1245
+ // `message_complete`.
1246
+ const cancelledSeq = getCurrentSeq();
1247
+ try {
1248
+ await persistPendingToolResultRow(state, deps, cancelledSeq);
1249
+ } catch (err) {
1250
+ log.warn(
1251
+ { err, conversationId: deps.ctx.conversationId },
1252
+ "Failed to persist cancelled tool result on arrival (non-fatal; retried at message_complete)",
1253
+ );
1254
+ }
1255
+ return;
1256
+ }
1257
+
898
1258
  const imageBlocks = event.contentBlocks?.filter(
899
1259
  (b): b is ImageContent => b.type === "image",
900
1260
  );
@@ -945,6 +1305,13 @@ export function handleToolResult(
945
1305
  });
946
1306
  }
947
1307
 
1308
+ // Capture tool activity (web_search / web_fetch) so it can be persisted on
1309
+ // the tool_use block and the activity card survives a history reopen,
1310
+ // matching the live tool_result event's activityMetadata.
1311
+ if (event.activityMetadata) {
1312
+ state.toolActivityMetadata.set(event.toolUseId, event.activityMetadata);
1313
+ }
1314
+
948
1315
  const toolName = state.toolUseIdToName.get(event.toolUseId);
949
1316
  if (toolName === "file_write" || toolName === "bash") {
950
1317
  deps.ctx.markWorkspaceTopLevelDirty();
@@ -1026,6 +1393,20 @@ export function handleToolResult(
1026
1393
  riskThreshold: event.riskThreshold,
1027
1394
  activityMetadata: event.activityMetadata,
1028
1395
  });
1396
+
1397
+ // Capture the seq synchronously (before the persist await) so it reflects the
1398
+ // just-stamped tool_result event, then persist the grouped row on arrival. A
1399
+ // failure here is non-fatal: the buffered result is still drained at
1400
+ // `message_complete`.
1401
+ const resultSeq = getCurrentSeq();
1402
+ try {
1403
+ await persistPendingToolResultRow(state, deps, resultSeq);
1404
+ } catch (err) {
1405
+ log.warn(
1406
+ { err, conversationId: deps.ctx.conversationId },
1407
+ "Failed to persist tool result on arrival (non-fatal; retried at message_complete)",
1408
+ );
1409
+ }
1029
1410
  }
1030
1411
 
1031
1412
  /**
@@ -1097,6 +1478,16 @@ function annotatePersistedAssistantMessage(
1097
1478
  rec._riskDirectoryScopeOptions = risk.riskDirectoryScopeOptions;
1098
1479
  modified = true;
1099
1480
  }
1481
+ // External provider tools (brave/perplexity/tavily) + web_fetch produce
1482
+ // their activity only when the tool_result lands, after message_complete
1483
+ // has already persisted this block — so it is stamped here. Native
1484
+ // server_tool_use activity is stamped earlier, at persist time, in
1485
+ // `buildPersistedAssistantContent`.
1486
+ const activity = state.toolActivityMetadata.get(id);
1487
+ if (activity) {
1488
+ rec._activityMetadata = activity;
1489
+ modified = true;
1490
+ }
1100
1491
  }
1101
1492
  }
1102
1493
 
@@ -1259,6 +1650,18 @@ export async function handleMessageComplete(
1259
1650
  deps: EventHandlerDeps,
1260
1651
  event: Extract<AgentEvent, { type: "message_complete" }>,
1261
1652
  ): Promise<void> {
1653
+ // The model has now received the turn context, so persist any pending
1654
+ // inference-profile-change notification. Guarded by the pending slot so it
1655
+ // fires once per turn; a turn that fails before reaching delivery leaves the
1656
+ // slot unconsumed and re-sends the notice next turn.
1657
+ if (state.pendingNotifiedInferenceProfile != null) {
1658
+ setLastNotifiedInferenceProfile(
1659
+ deps.ctx.conversationId,
1660
+ state.pendingNotifiedInferenceProfile,
1661
+ );
1662
+ state.pendingNotifiedInferenceProfile = null;
1663
+ }
1664
+
1262
1665
  // Reset per-turn tool tracking for the new turn.
1263
1666
  state.currentTurnToolUseIds = [];
1264
1667
 
@@ -1298,62 +1701,16 @@ export async function handleMessageComplete(
1298
1701
  state.pendingDirectiveDisplayBuffer = "";
1299
1702
  }
1300
1703
 
1301
- // Persist pending tool results
1302
- if (state.pendingToolResults.size > 0) {
1303
- const toolResultBlocks = Array.from(state.pendingToolResults.entries()).map(
1304
- ([toolUseId, result]) => ({
1305
- type: "tool_result",
1306
- tool_use_id: toolUseId,
1307
- content: redactSecrets(result.content),
1308
- is_error: result.isError,
1309
- ...(result.contentBlocks
1310
- ? {
1311
- contentBlocks: result.contentBlocks.map((block) =>
1312
- block.type === "text"
1313
- ? { ...block, text: redactSecrets(block.text) }
1314
- : block,
1315
- ),
1316
- }
1317
- : {}),
1318
- }),
1319
- );
1320
- const toolResultMetadata = {
1321
- ...provenanceFromTrustContext(deps.ctx.trustContext),
1322
- userMessageChannel: deps.turnChannelContext.userMessageChannel,
1323
- assistantMessageChannel: deps.turnChannelContext.assistantMessageChannel,
1324
- userMessageInterface: deps.turnInterfaceContext.userMessageInterface,
1325
- assistantMessageInterface:
1326
- deps.turnInterfaceContext.assistantMessageInterface,
1327
- };
1328
- // Route the add + disk-view sync through the `persistence` pipeline so
1329
- // plugins can observe or override both operations together. The default
1330
- // plugin's terminal performs the add and, when `syncToDisk` is true,
1331
- // immediately calls `syncMessageToDisk` against the just-persisted row.
1332
- // `getConversation` returns `ConversationRow | null`, so `!= null`
1333
- // gates on a real row (skipping the sync when the conversation was
1334
- // not found rather than asking the disk-view to resolve a missing id).
1335
- const convForToolResult = getConversation(deps.ctx.conversationId);
1336
- await runPipeline<PersistArgs, PersistResult>(
1337
- "persistence",
1338
- getMiddlewaresFor("persistence"),
1339
- defaultPersistenceTerminal,
1340
- {
1341
- op: "add",
1342
- conversationId: deps.ctx.conversationId,
1343
- role: "user",
1344
- content: JSON.stringify(toolResultBlocks),
1345
- metadata: toolResultMetadata,
1346
- syncToDisk: convForToolResult != null,
1347
- createdAtMs: convForToolResult?.createdAt,
1348
- },
1349
- buildHandlerTurnContext(deps),
1350
- DEFAULT_TIMEOUTS.persistence,
1351
- );
1352
- for (const id of state.pendingToolResults.keys()) {
1353
- state.persistedToolUseIds.add(id);
1354
- }
1355
- state.pendingToolResults.clear();
1356
- }
1704
+ // Finalize the grouped tool-result row. Each result was persisted into this
1705
+ // row as it arrived (`persistPendingToolResultRow`); this rewrites it to the
1706
+ // full batch (covering the case where a mid-arrival write failed), indexes it
1707
+ // for memory recall, and clears the batch state.
1708
+ await finalizePendingToolResultRow(
1709
+ state,
1710
+ deps.ctx.conversationId,
1711
+ buildToolResultMetadata(deps),
1712
+ deps.rlog,
1713
+ );
1357
1714
 
1358
1715
  // Accumulate directives + warnings from the assistant content for
1359
1716
  // downstream attachment processing. `cleanAssistantContent` is also
@@ -1391,6 +1748,7 @@ export async function handleMessageComplete(
1391
1748
  const contentForPersistence = buildPersistedAssistantContent(
1392
1749
  event.message.content as ContentBlock[],
1393
1750
  deps.ctx.currentTurnSurfaces,
1751
+ state.toolActivityMetadata,
1394
1752
  );
1395
1753
 
1396
1754
  // The row was reserved at `llm_call_started` (with channel metadata
@@ -1407,28 +1765,30 @@ export async function handleMessageComplete(
1407
1765
  );
1408
1766
  }
1409
1767
  const contentJson = JSON.stringify(contentForPersistence);
1410
- await runPipeline<PersistArgs, PersistResult>(
1411
- "persistence",
1412
- getMiddlewaresFor("persistence"),
1413
- defaultPersistenceTerminal,
1414
- {
1415
- op: "updateContent",
1416
- messageId: assistantMessageId,
1417
- content: contentJson,
1418
- },
1419
- buildHandlerTurnContext(deps),
1420
- DEFAULT_TIMEOUTS.persistence,
1421
- );
1768
+ updateMessageContent(assistantMessageId, contentJson);
1422
1769
  state.assistantRowAwaitingFinalization = false;
1770
+ // The assistant row now holds the authoritative content (text + thinking +
1771
+ // tool_use blocks from `event.message`), and any drained tool-result rows
1772
+ // are durable. `lastPersistedContentSeq` is the last streamed text/thinking
1773
+ // delta's seq -- the highest stamped content event this row reflects -- so
1774
+ // recording it is honest. A drained tool result was stamped earlier in the
1775
+ // turn, so this seq already covers it; a call that streams no content (a
1776
+ // pure tool call) advances instead via `tool_use_start`. `recordPersistedSeq`
1777
+ // clamps monotonically, so a lower value here never regresses the seq.
1778
+ if (state.lastPersistedContentSeq != null) {
1779
+ recordPersistedSeq(deps.ctx.conversationId, state.lastPersistedContentSeq);
1780
+ }
1423
1781
  // Reset the partial-persist mirror so subsequent calls in this turn
1424
1782
  // start with an empty running view.
1425
1783
  state.currentMessageContent = [];
1784
+ state.lastPersistedContentSeq = undefined;
1426
1785
 
1427
- // ── Indexing + attention projection (restored from the pre-B3 `add` path) ──
1786
+ // ── Indexing + attention projection ──
1428
1787
  // `reserveMessage` + `updateMessageContent` are CRUD-only: they don't run
1429
- // the memory indexer or the attention-cursor projector. The pre-B3 path
1430
- // wrote the row via `addMessage`, which ran both as side-effects of the
1431
- // insert. Calling them here keeps the assistant row's external state
1788
+ // the memory indexer or the attention-cursor projector (unlike `addMessage`,
1789
+ // which runs both as side-effects of the insert). Because the assistant row
1790
+ // is reserved empty and finalized via `updateMessageContent`, both must be
1791
+ // invoked explicitly here to keep the assistant row's external state
1432
1792
  // (Qdrant segments, conversation attention cursor) in lockstep with the
1433
1793
  // finalized content. Both are non-fatal — a memory hiccup must not
1434
1794
  // escalate a successful generation into a turn-level throw. Indexing
@@ -1759,7 +2119,7 @@ export async function dispatchAgentEvent(
1759
2119
  handleInputJsonDelta(state, deps, event);
1760
2120
  break;
1761
2121
  case "tool_result":
1762
- handleToolResult(state, deps, event);
2122
+ await handleToolResult(state, deps, event);
1763
2123
  break;
1764
2124
  case "server_tool_start": {
1765
2125
  const query =
@@ -1828,9 +2188,65 @@ export async function dispatchAgentEvent(
1828
2188
  // for them would mis-label the provider and ship empty results.
1829
2189
  const isAnthropicNative = deps.ctx.provider.name === "anthropic";
1830
2190
 
1831
- const errorMessage = event.isError
1832
- ? (event.errorMessage ?? event.errorCode ?? "Search failed")
1833
- : undefined;
2191
+ // Classify provider failures through the shared normalizer so the same
2192
+ // friendly copy propagates to every client via WebSearchMetadata, while
2193
+ // the raw provider detail stays in telemetry only (ATL-727).
2194
+ const classification = classifyWebSearchFailure({
2195
+ errorCode: event.errorCode,
2196
+ error: event.errorMessage,
2197
+ isError: event.isError,
2198
+ hasResults: results.length > 0,
2199
+ });
2200
+
2201
+ let errorMessage: string | undefined;
2202
+ let fallbackShown = false;
2203
+ if (event.isError) {
2204
+ // A genuine backend failure OR an unclassifiable, message-less native
2205
+ // failure (e.g. `isError:true` with no `error_code`) both surface the
2206
+ // friendly backend copy: a terse "Search failed" placeholder is the
2207
+ // confusing copy this normalization exists to eliminate (ATL-727).
2208
+ // Recoverable categories that carry a real user message
2209
+ // (query_too_long, max_uses_exceeded) keep their own copy.
2210
+ const useBackendCopy =
2211
+ classification.isBackendFailure || !classification.userMessage;
2212
+ if (useBackendCopy) {
2213
+ // Dedup the user-facing friendly notice per turn (request id) so a
2214
+ // burst of failures surfaces at most one full notice. The raw
2215
+ // provider error is preserved on every failure via telemetry below.
2216
+ const alreadyNotified = state.webSearchBackendFailureNotified.has(
2217
+ deps.reqId,
2218
+ );
2219
+ if (alreadyNotified) {
2220
+ errorMessage = "Search is still having trouble.";
2221
+ } else {
2222
+ state.webSearchBackendFailureNotified.add(deps.reqId);
2223
+ errorMessage = WEB_SEARCH_BACKEND_FAILURE_MESSAGE;
2224
+ fallbackShown = true;
2225
+ }
2226
+
2227
+ // Backend-failure telemetry (provider outages / rate limits) must
2228
+ // fire only for genuine backend classifications so it does not
2229
+ // count recoverable input/quota errors — or a message-less unknown
2230
+ // failure that merely borrows the friendly copy — as provider
2231
+ // outages.
2232
+ if (classification.isBackendFailure) {
2233
+ logWebSearchBackendFailure(deps.rlog, {
2234
+ provider: isAnthropicNative
2235
+ ? "anthropic-native"
2236
+ : deps.ctx.provider.name,
2237
+ requestId: deps.reqId,
2238
+ errorCategory: classification.category,
2239
+ rawDetail: classification.rawDetail,
2240
+ fallbackShown,
2241
+ queryLength: query.length,
2242
+ });
2243
+ }
2244
+ } else {
2245
+ // Recoverable, non-backend categories with their own user-facing
2246
+ // copy (query_too_long, max_uses_exceeded) keep that message.
2247
+ errorMessage = classification.userMessage;
2248
+ }
2249
+ }
1834
2250
 
1835
2251
  const metadata: WebSearchMetadata | undefined = isAnthropicNative
1836
2252
  ? {
@@ -1847,6 +2263,14 @@ export async function dispatchAgentEvent(
1847
2263
  .map((r) => `${r.title}\n${r.url}`)
1848
2264
  .join("\n\n");
1849
2265
 
2266
+ // Capture activity so it persists on the server_tool_use block and the
2267
+ // web-search card survives a history reopen, matching the live event.
2268
+ if (metadata) {
2269
+ state.toolActivityMetadata.set(event.toolUseId, {
2270
+ webSearch: metadata,
2271
+ });
2272
+ }
2273
+
1850
2274
  deps.onEvent({
1851
2275
  type: "tool_result",
1852
2276
  toolName: "web_search",
@@ -1873,6 +2297,31 @@ export async function dispatchAgentEvent(
1873
2297
  // banner.
1874
2298
  deps.onEvent(event);
1875
2299
  break;
2300
+ case "compaction_completed":
2301
+ // Always commit the loop-stripped `basis` as the durable message base
2302
+ // so re-injection re-applies onto the stripped history even when the
2303
+ // pipeline ran but did not compact. When it did compact, commit the
2304
+ // durable result (DB-record fields, Slack provenance, SSE) — which
2305
+ // overwrites `ctx.messages` with the compacted history — and flip the
2306
+ // per-turn re-injection guards the orchestrator reads. This runs
2307
+ // before the loop's `reinject` hook (the loop awaits this dispatch),
2308
+ // so the guards are set in time. A failed durable commit re-throws
2309
+ // below to abort the turn rather than re-injecting against
2310
+ // half-applied state.
2311
+ deps.ctx.messages = event.basis;
2312
+ if (event.result.compacted) {
2313
+ await deps.applyCompaction(event.result, event.basis);
2314
+ state.reducerCompacted = true;
2315
+ state.shouldInjectWorkspace = true;
2316
+ }
2317
+ break;
2318
+ case "history_stripped":
2319
+ // Record the history-stripped DB marker right after the loop strips
2320
+ // injections (before the pipeline). Best-effort: a transient marker
2321
+ // write must not abort the turn, so unlike `compaction_completed` this
2322
+ // is not on the re-throw allowlist below.
2323
+ markHistoryStrippedBestEffort(deps.ctx.conversationId);
2324
+ break;
1876
2325
  case "error":
1877
2326
  handleError(state, deps, event);
1878
2327
  break;
@@ -1925,10 +2374,13 @@ export async function dispatchAgentEvent(
1925
2374
  // - message_complete: persists assistant message to DB, sets state flags
1926
2375
  // - error: sets recovery flags (contextTooLargeDetected, orderingErrorDetected)
1927
2376
  // - usage: records token accounting
2377
+ // - compaction_completed: durable compaction commit; aborting the turn is
2378
+ // safer than re-injecting against a half-applied compaction
1928
2379
  if (
1929
2380
  event.type === "message_complete" ||
1930
2381
  event.type === "error" ||
1931
- event.type === "usage"
2382
+ event.type === "usage" ||
2383
+ event.type === "compaction_completed"
1932
2384
  ) {
1933
2385
  throw err;
1934
2386
  }