@vellumai/assistant 0.8.7 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (387) hide show
  1. package/Dockerfile +20 -4
  2. package/docker-entrypoint.sh +4 -2
  3. package/docker-init-apt-root.sh +3 -1
  4. package/docker-kata-apt-env.sh +3 -1
  5. package/docker-kata-runtime-family.sh +12 -0
  6. package/docs/architecture/memory.md +1 -1
  7. package/docs/plugins.md +75 -79
  8. package/examples/plugins/echo/README.md +6 -12
  9. package/examples/plugins/echo/register.ts +0 -41
  10. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  11. package/openapi.yaml +3381 -348
  12. package/package.json +1 -1
  13. package/scripts/generate-openapi.ts +68 -41
  14. package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
  15. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  16. package/src/__tests__/agent-loop.test.ts +37 -87
  17. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  18. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  19. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  20. package/src/__tests__/anthropic-provider.test.ts +95 -2
  21. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  22. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  23. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  24. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  25. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  26. package/src/__tests__/btw-routes.test.ts +62 -3
  27. package/src/__tests__/build-persisted-content.test.ts +184 -0
  28. package/src/__tests__/catalog-files.test.ts +1 -1
  29. package/src/__tests__/clawhub-files.test.ts +1 -1
  30. package/src/__tests__/compaction-pipeline.test.ts +1 -1
  31. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  32. package/src/__tests__/config-watcher.test.ts +1 -1
  33. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  34. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
  35. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
  36. package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
  37. package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
  38. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  39. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  40. package/src/__tests__/conversation-pairing.test.ts +4 -31
  41. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  42. package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
  43. package/src/__tests__/conversation-queue.test.ts +2 -0
  44. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  45. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
  48. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  49. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  50. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  51. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  52. package/src/__tests__/conversation-title-service.test.ts +135 -2
  53. package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
  54. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  55. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  56. package/src/__tests__/dm-persistence.test.ts +5 -1
  57. package/src/__tests__/empty-response-hook.test.ts +304 -0
  58. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  59. package/src/__tests__/gemini-image-service.test.ts +13 -0
  60. package/src/__tests__/helpers/mock-provider.ts +110 -0
  61. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  62. package/src/__tests__/history-repair-hook.test.ts +1 -0
  63. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  64. package/src/__tests__/identity-routes.test.ts +248 -7
  65. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  66. package/src/__tests__/injector-background-turn.test.ts +2 -8
  67. package/src/__tests__/injector-chain.test.ts +106 -270
  68. package/src/__tests__/injector-disk-pressure.test.ts +3 -12
  69. package/src/__tests__/injector-document-comments.test.ts +2 -2
  70. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  71. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  72. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  73. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  74. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  75. package/src/__tests__/llm-usage-store.test.ts +223 -1
  76. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  77. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  78. package/src/__tests__/native-web-search.test.ts +191 -0
  79. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  80. package/src/__tests__/openai-image-service.test.ts +17 -0
  81. package/src/__tests__/openai-provider.test.ts +31 -1
  82. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  83. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  84. package/src/__tests__/pipeline-runner.test.ts +29 -39
  85. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  86. package/src/__tests__/plugin-bootstrap.test.ts +13 -28
  87. package/src/__tests__/plugin-registry.test.ts +0 -27
  88. package/src/__tests__/plugin-types.test.ts +2 -125
  89. package/src/__tests__/process-message-display-content.test.ts +6 -2
  90. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  91. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  92. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  93. package/src/__tests__/schedule-routes.test.ts +603 -2
  94. package/src/__tests__/schedule-store.test.ts +41 -0
  95. package/src/__tests__/schedule-tools.test.ts +35 -0
  96. package/src/__tests__/server-history-render.test.ts +314 -1
  97. package/src/__tests__/skillssh-files.test.ts +1 -1
  98. package/src/__tests__/system-prompt.test.ts +20 -0
  99. package/src/__tests__/task-scheduler.test.ts +162 -1
  100. package/src/__tests__/terminal-tools.test.ts +6 -1
  101. package/src/__tests__/title-generate-hook.test.ts +319 -0
  102. package/src/__tests__/tool-error-hook.test.ts +278 -0
  103. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  104. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  105. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  106. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  107. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  108. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  109. package/src/__tests__/usage-routes.test.ts +285 -1
  110. package/src/__tests__/user-plugin-loader.test.ts +2 -2
  111. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  112. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  113. package/src/agent/loop.ts +346 -442
  114. package/src/api/events/assistant-thinking-delta.ts +33 -0
  115. package/src/api/events/tool-output-chunk.ts +45 -0
  116. package/src/api/events/tool-use-preview-start.ts +32 -0
  117. package/src/api/events/trace-event.ts +69 -0
  118. package/src/api/index.ts +48 -13
  119. package/src/api/responses/conversation-message.ts +368 -0
  120. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  121. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  122. package/src/cli/commands/notifications.ts +112 -60
  123. package/src/config/assistant-feature-flags.ts +22 -11
  124. package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
  125. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  126. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  127. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  128. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  129. package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
  130. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  131. package/src/config/feature-flag-cache.ts +3 -3
  132. package/src/config/feature-flag-registry.json +35 -3
  133. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  134. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  135. package/src/config/schemas/llm.ts +1 -0
  136. package/src/config/schemas/memory-v2.ts +8 -0
  137. package/src/config/schemas/memory-v3.ts +8 -0
  138. package/src/config/schemas/platform.ts +8 -0
  139. package/src/config/seed-inference-profiles.ts +2 -2
  140. package/src/config/skills.ts +13 -0
  141. package/src/context/compactor.ts +1 -1
  142. package/src/context/strip-injections.ts +122 -0
  143. package/src/context/token-estimator.ts +23 -0
  144. package/src/context/tool-result-truncation.ts +0 -23
  145. package/src/context/window-manager.ts +3 -6
  146. package/src/credential-execution/executable-discovery.ts +16 -0
  147. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  148. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  149. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  150. package/src/daemon/assistant-attachments.ts +1 -1
  151. package/src/daemon/config-watcher.ts +2 -2
  152. package/src/daemon/context-overflow-reducer.ts +0 -1
  153. package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
  154. package/src/daemon/conversation-agent-loop.ts +281 -760
  155. package/src/daemon/conversation-history.ts +5 -4
  156. package/src/daemon/conversation-lifecycle.ts +3 -4
  157. package/src/daemon/conversation-messaging.ts +7 -6
  158. package/src/daemon/conversation-process.ts +11 -16
  159. package/src/daemon/conversation-runtime-assembly.ts +130 -347
  160. package/src/daemon/conversation-slash.ts +6 -25
  161. package/src/daemon/conversation-surfaces.ts +222 -4
  162. package/src/daemon/conversation-tool-setup.ts +2 -29
  163. package/src/daemon/conversation.ts +32 -14
  164. package/src/daemon/external-plugins-bootstrap.ts +9 -10
  165. package/src/daemon/handlers/config-a2a.ts +51 -36
  166. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  167. package/src/daemon/handlers/config-telegram.ts +16 -2
  168. package/src/daemon/handlers/shared.ts +156 -84
  169. package/src/daemon/handlers/skills.ts +39 -10
  170. package/src/daemon/lifecycle.ts +4 -0
  171. package/src/daemon/message-types/apps.ts +1 -29
  172. package/src/daemon/message-types/messages.ts +9 -57
  173. package/src/daemon/message-types/skills.ts +2 -0
  174. package/src/daemon/message-types/surfaces.ts +136 -3
  175. package/src/daemon/now-scratchpad.ts +21 -0
  176. package/src/daemon/orphan-reaper.test.ts +210 -0
  177. package/src/daemon/orphan-reaper.ts +240 -0
  178. package/src/daemon/persist-unsendable-image.ts +117 -0
  179. package/src/daemon/process-message.ts +1 -3
  180. package/src/daemon/trace-emitter.ts +6 -4
  181. package/src/daemon/trust-context.ts +19 -0
  182. package/src/daemon/wake-target-adapter.ts +3 -1
  183. package/src/home/home-greeting-cache.ts +24 -1
  184. package/src/ipc/gateway-client.test.ts +2 -2
  185. package/src/ipc/gateway-client.ts +3 -3
  186. package/src/media/gemini-image-service.ts +15 -0
  187. package/src/media/openai-image-service.ts +14 -0
  188. package/src/media/types.ts +34 -0
  189. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  190. package/src/memory/auth-fallback-events-store.ts +94 -0
  191. package/src/memory/conversation-title-service.ts +65 -41
  192. package/src/memory/db-init.ts +4 -0
  193. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  194. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  195. package/src/memory/jobs-store.ts +33 -0
  196. package/src/memory/jobs-worker.ts +31 -4
  197. package/src/memory/llm-usage-store.ts +224 -50
  198. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  199. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  200. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  201. package/src/memory/migrations/index.ts +2 -0
  202. package/src/memory/pkb/autoinject.ts +61 -0
  203. package/src/memory/pkb/context.ts +50 -0
  204. package/src/memory/pkb/types.ts +14 -0
  205. package/src/memory/schedule-attribution-sql.ts +104 -0
  206. package/src/memory/schema/infrastructure.ts +16 -0
  207. package/src/memory/usage-grouped-buckets.ts +6 -1
  208. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
  209. package/src/memory/v2/consolidation-job.ts +1 -1
  210. package/src/memory/v3/__tests__/health.test.ts +16 -0
  211. package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
  212. package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
  213. package/src/memory/v3/__tests__/router.test.ts +101 -29
  214. package/src/memory/v3/__tests__/selector.test.ts +93 -27
  215. package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
  216. package/src/memory/v3/health.ts +0 -0
  217. package/src/memory/v3/llm-retry.ts +32 -0
  218. package/src/memory/v3/orchestrate.ts +26 -14
  219. package/src/memory/v3/provider-blocks.ts +15 -5
  220. package/src/memory/v3/router.ts +48 -42
  221. package/src/memory/v3/selector.ts +57 -42
  222. package/src/memory/v3/shadow-plugin.ts +47 -15
  223. package/src/memory/v3/types.ts +8 -0
  224. package/src/notifications/conversation-pairing.ts +8 -15
  225. package/src/notifications/decision-engine.ts +6 -3
  226. package/src/notifications/home-feed-side-effect.ts +12 -1
  227. package/src/permissions/prompter.ts +4 -0
  228. package/src/plugin-api/constants.ts +4 -0
  229. package/src/plugin-api/index.ts +8 -1
  230. package/src/plugin-api/types.ts +151 -1
  231. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  232. package/src/plugins/defaults/empty-response/register.ts +8 -13
  233. package/src/plugins/defaults/index.ts +1 -15
  234. package/src/plugins/defaults/injectors/register.ts +243 -74
  235. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
  236. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  237. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  238. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  239. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  240. package/src/plugins/defaults/title-generate/package.json +1 -1
  241. package/src/plugins/defaults/title-generate/register.ts +18 -18
  242. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  243. package/src/plugins/defaults/tool-error/package.json +1 -1
  244. package/src/plugins/defaults/tool-error/register.ts +9 -21
  245. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  246. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  247. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  248. package/src/plugins/pipeline.ts +6 -18
  249. package/src/plugins/registry.ts +8 -25
  250. package/src/plugins/types.ts +43 -474
  251. package/src/proactive-artifact/aux-message-injector.ts +3 -3
  252. package/src/proactive-artifact/job.test.ts +7 -12
  253. package/src/prompts/__tests__/system-prompt.test.ts +36 -0
  254. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
  255. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  256. package/src/prompts/templates/system-sections.ts +15 -0
  257. package/src/providers/anthropic/client.ts +37 -29
  258. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  259. package/src/providers/openai/chat-completions-provider.ts +44 -0
  260. package/src/providers/openrouter/client.ts +1 -0
  261. package/src/providers/placeholder-sentinels.ts +35 -0
  262. package/src/runtime/__tests__/agent-wake.test.ts +5 -1
  263. package/src/runtime/agent-wake.ts +2 -2
  264. package/src/runtime/assistant-event-hub.ts +36 -6
  265. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  266. package/src/runtime/http-router.ts +16 -21
  267. package/src/runtime/http-types.ts +16 -70
  268. package/src/runtime/pending-interactions.ts +1 -0
  269. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  270. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  271. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  272. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  273. package/src/runtime/routes/app-management-routes.ts +6 -117
  274. package/src/runtime/routes/app-routes.ts +13 -15
  275. package/src/runtime/routes/attachment-routes.ts +26 -15
  276. package/src/runtime/routes/avatar-routes.ts +26 -0
  277. package/src/runtime/routes/btw-routes.ts +29 -23
  278. package/src/runtime/routes/consolidation-routes.ts +120 -20
  279. package/src/runtime/routes/conversation-query-routes.ts +2 -0
  280. package/src/runtime/routes/conversation-routes.ts +358 -184
  281. package/src/runtime/routes/documents-routes.ts +4 -0
  282. package/src/runtime/routes/domain-routes.ts +51 -37
  283. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  284. package/src/runtime/routes/events-routes.ts +28 -34
  285. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  286. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  287. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  288. package/src/runtime/routes/identity-routes.ts +208 -17
  289. package/src/runtime/routes/image-generation-routes.ts +40 -2
  290. package/src/runtime/routes/index.ts +2 -0
  291. package/src/runtime/routes/integrations/a2a.ts +12 -10
  292. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  293. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  294. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  295. package/src/runtime/routes/integrations/telegram.ts +6 -0
  296. package/src/runtime/routes/integrations/twilio.ts +42 -0
  297. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  298. package/src/runtime/routes/log-export-routes.ts +8 -0
  299. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  300. package/src/runtime/routes/memory-v3-routes.ts +50 -28
  301. package/src/runtime/routes/oauth-apps.ts +66 -12
  302. package/src/runtime/routes/oauth-providers.ts +44 -5
  303. package/src/runtime/routes/platform-routes.ts +81 -5
  304. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  305. package/src/runtime/routes/playground/force-compact.ts +1 -1
  306. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  307. package/src/runtime/routes/schedule-routes.ts +152 -42
  308. package/src/runtime/routes/secret-routes.ts +14 -2
  309. package/src/runtime/routes/skills-routes.ts +43 -14
  310. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  311. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  312. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  313. package/src/runtime/routes/tts-routes.ts +35 -0
  314. package/src/runtime/routes/types.ts +66 -8
  315. package/src/runtime/routes/usage-routes.ts +47 -39
  316. package/src/runtime/routes/webhook-routes.ts +41 -2
  317. package/src/runtime/routes/workspace-routes.ts +4 -0
  318. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  319. package/src/runtime/services/analyze-conversation.ts +2 -2
  320. package/src/schedule/schedule-store.ts +20 -1
  321. package/src/schedule/schedule-usage-store.ts +83 -0
  322. package/src/schedule/scheduler.ts +12 -5
  323. package/src/skills/catalog-files.ts +2 -2
  324. package/src/skills/catalog-install.ts +3 -0
  325. package/src/skills/categories-cache.ts +118 -0
  326. package/src/skills/clawhub-files.ts +1 -2
  327. package/src/skills/skillssh-files.ts +1 -2
  328. package/src/telemetry/types.ts +29 -1
  329. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  330. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  331. package/src/tools/executor.ts +1 -53
  332. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  333. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  334. package/src/tools/network/web-search-error.test.ts +248 -0
  335. package/src/tools/network/web-search-error.ts +267 -0
  336. package/src/tools/network/web-search.ts +207 -48
  337. package/src/tools/schedule/create.ts +2 -0
  338. package/src/tools/terminal/safe-env.ts +10 -1
  339. package/src/tools/ui-surface/definitions.ts +9 -1
  340. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  341. package/src/tts/provider-catalog.ts +76 -1
  342. package/src/util/mutex.ts +47 -0
  343. package/src/workspace/git-service.ts +1 -42
  344. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  345. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  346. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
  347. package/src/workspace/migrations/registry.ts +6 -0
  348. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  349. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  350. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  351. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  352. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  353. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  354. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  355. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  356. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  357. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  358. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  359. package/src/gallery/default-gallery.ts +0 -1359
  360. package/src/gallery/gallery-manifest.ts +0 -28
  361. package/src/home/feature-gate.ts +0 -22
  362. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  363. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  364. package/src/plugins/defaults/injectors/package.json +0 -15
  365. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  366. package/src/plugins/defaults/llm-call/package.json +0 -15
  367. package/src/plugins/defaults/llm-call/register.ts +0 -45
  368. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  369. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  370. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  371. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  372. package/src/plugins/defaults/persistence/package.json +0 -15
  373. package/src/plugins/defaults/persistence/register.ts +0 -38
  374. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  375. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  376. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  377. package/src/plugins/defaults/token-estimate/package.json +0 -15
  378. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  379. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  380. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  381. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  382. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  383. package/src/plugins/defaults/tool-execute/package.json +0 -15
  384. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  385. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  386. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  387. package/src/skills/category-inference.ts +0 -111
@@ -7,8 +7,6 @@
7
7
  * runAgentLoop method here via the AgentLoopConversationContext interface.
8
8
  */
9
9
 
10
- import { join } from "node:path";
11
-
12
10
  import { v4 as uuid } from "uuid";
13
11
 
14
12
  import { optimizeImageForTransport } from "../agent/image-optimize.js";
@@ -46,10 +44,10 @@ import {
46
44
  } from "../context/post-turn-tool-result-truncation.js";
47
45
  import {
48
46
  estimatePromptTokens,
47
+ estimatePromptTokensWithTools,
49
48
  getCalibrationProviderKey,
50
49
  } from "../context/token-estimator.js";
51
50
  import type { ContextWindowManager } from "../context/window-manager.js";
52
- import { getDocumentsForConversation } from "../documents/document-store.js";
53
51
  import type { ToolProfiler } from "../events/tool-profiling-listener.js";
54
52
  import { writeRelationshipState } from "../home/relationship-state-writer.js";
55
53
  import {
@@ -57,9 +55,9 @@ import {
57
55
  setSentryConversationContext,
58
56
  } from "../instrument.js";
59
57
  import { commitAppTurnChanges } from "../memory/app-git-service.js";
60
- import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
61
58
  import { enqueueAutoAnalysisOnCompaction } from "../memory/auto-analysis-enqueue.js";
62
59
  import {
60
+ addMessage,
63
61
  deleteMessageById,
64
62
  getConversation,
65
63
  getConversationOriginChannel,
@@ -68,77 +66,48 @@ import {
68
66
  getLastUserTimestampBefore,
69
67
  getMessageById,
70
68
  provenanceFromTrustContext,
71
- setConversationHistoryStrippedAt,
72
- setLastNotifiedInferenceProfile,
73
69
  updateConversationContextWindow,
74
70
  updateConversationSlackContextWatermark,
71
+ updateMessageMetadata,
75
72
  } from "../memory/conversation-crud.js";
76
73
  import { getResolvedConversationDirPath } from "../memory/conversation-directories.js";
77
74
  import { syncMessageToDisk } from "../memory/conversation-disk-view.js";
78
- import {
79
- isReplaceableTitle,
80
- queueRegenerateConversationTitle,
81
- } from "../memory/conversation-title-service.js";
75
+ import { isReplaceableTitle } from "../memory/conversation-title-service.js";
82
76
  import { isBackgroundConversationType } from "../memory/conversation-types.js";
83
77
  import type { ConversationGraphMemory } from "../memory/graph/conversation-graph-memory.js";
84
78
  import {
85
79
  backfillMessageIdOnLogs,
86
80
  recordSyntheticAgentErrorMessageLog,
87
81
  } from "../memory/llm-request-log-store.js";
88
- import { recordMemoryRecallLog } from "../memory/memory-recall-log-store.js";
89
82
  import { enqueueMemoryRetrospectiveOnCompaction } from "../memory/memory-retrospective-enqueue.js";
90
- import { PKB_WORKSPACE_SCOPE } from "../memory/pkb/types.js";
91
- import type { QdrantSparseVector } from "../memory/qdrant-client.js";
92
- import {
93
- readMemoryV2StaticContent,
94
- shouldExposePersonalMemory,
95
- } from "../memory/v2/static-context.js";
96
83
  import type { PermissionPrompter } from "../permissions/prompter.js";
97
84
  import { HOOKS } from "../plugin-api/constants.js";
98
85
  import type { UserPromptSubmitContext } from "../plugin-api/types.js";
99
86
  import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
100
87
  import { deepRepairHistory } from "../plugins/defaults/history-repair/terminal.js";
101
- import {
102
- asDefaultGraphPayload,
103
- type DefaultMemoryRetrievalDeps,
104
- type GraphMemoryPayload,
105
- runDefaultMemoryRetrieval,
106
- } from "../plugins/defaults/memory-retrieval/register.js";
107
- import { defaultPersistenceTerminal } from "../plugins/defaults/persistence/terminal.js";
108
- import { defaultTitleGenerateTerminal } from "../plugins/defaults/title-generate/terminal.js";
109
- import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate/terminal.js";
88
+ import postCompactReinject from "../plugins/defaults/memory-retrieval/hooks/post-compact.js";
89
+ import userPromptSubmitMemoryRetrieval, {
90
+ type MemoryRetrievalHookContext,
91
+ } from "../plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.js";
110
92
  import { DEFAULT_TIMEOUTS, runHook, runPipeline } from "../plugins/pipeline.js";
111
93
  import { getMiddlewaresFor } from "../plugins/registry.js";
112
94
  import type {
113
95
  CompactionArgs,
114
96
  CompactionResult,
115
- EstimateArgs,
116
- EstimateResult,
117
- MemoryArgs,
118
- MemoryResult,
119
97
  OverflowReduceArgs,
120
98
  OverflowReduceResult,
121
- PersistAddResult,
122
- PersistArgs,
123
- PersistResult,
124
99
  TurnContext as PluginTurnContext,
125
100
  } from "../plugins/types.js";
126
101
  import { PluginExecutionError, PluginTimeoutError } from "../plugins/types.js";
127
- import type {
128
- ContentBlock,
129
- Message,
130
- ToolDefinition,
131
- } from "../providers/types.js";
102
+ import type { ContentBlock, Message } from "../providers/types.js";
132
103
  import type { Provider } from "../providers/types.js";
133
104
  import { resolveActorTrust } from "../runtime/actor-trust-resolver.js";
134
105
  import { broadcastMessage } from "../runtime/assistant-event-hub.js";
135
106
  import { DAEMON_INTERNAL_ASSISTANT_ID } from "../runtime/assistant-scope.js";
136
107
  import { publishConversationMessagesChanged } from "../runtime/sync/resource-sync-events.js";
137
- import { redactSecrets } from "../security/secret-scanner.js";
138
108
  import { getSubagentManager } from "../subagent/index.js";
139
109
  import type { UsageActor } from "../usage/actors.js";
140
110
  import { getLogger } from "../util/logger.js";
141
- import { getWorkspaceDir } from "../util/platform.js";
142
111
  import { timeAgo } from "../util/time.js";
143
112
  import { truncate } from "../util/truncate.js";
144
113
  import { getWorkspaceGitService } from "../workspace/git-service.js";
@@ -147,7 +116,6 @@ import {
147
116
  type AssistantAttachmentDraft,
148
117
  cleanAssistantContent,
149
118
  } from "./assistant-attachments.js";
150
- import { cleanupBootstrapAfterTurnThreshold } from "./bootstrap-turn-cleanup.js";
151
119
  import { resolveOverflowAction } from "./context-overflow-policy.js";
152
120
  import {
153
121
  createInitialReducerState,
@@ -158,6 +126,8 @@ import {
158
126
  createEventHandlerState,
159
127
  dispatchAgentEvent,
160
128
  type EventHandlerDeps,
129
+ finalizePendingToolResultRow,
130
+ markHistoryStrippedBestEffort,
161
131
  } from "./conversation-agent-loop-handlers.js";
162
132
  import {
163
133
  approveHostAttachmentRead,
@@ -173,17 +143,17 @@ import { raceWithTimeout } from "./conversation-media-retry.js";
173
143
  import type { MessageQueue } from "./conversation-queue-manager.js";
174
144
  import type { QueueDrainReason } from "./conversation-queue-manager.js";
175
145
  import type {
176
- ActiveSurfaceContext,
177
146
  ChannelCapabilities,
178
147
  InboundActorContext,
179
148
  InjectionMode,
180
149
  } from "./conversation-runtime-assembly.js";
181
150
  import {
182
151
  applyRuntimeInjections,
152
+ buildActiveDocuments,
153
+ buildActiveSurfaceContext,
183
154
  buildSubagentStatusBlock,
184
155
  buildUnifiedTurnContextBlock,
185
- findLastInjectedNowContent,
186
- getPkbAutoInjectList,
156
+ buildWorkspaceTopLevelContext,
187
157
  getSlackCompactionWatermarkForPrefix,
188
158
  inboundActorContextFromTrust,
189
159
  inboundActorContextFromTrustContext,
@@ -194,7 +164,6 @@ import {
194
164
  } from "./conversation-runtime-assembly.js";
195
165
  import type { SkillProjectionCache } from "./conversation-skill-tools.js";
196
166
  import { markSurfaceCompleted } from "./conversation-surfaces.js";
197
- import { resolveTrustClass } from "./conversation-tool-setup.js";
198
167
  import { recordUsage } from "./conversation-usage.js";
199
168
  import {
200
169
  formatTurnTimestamp,
@@ -203,45 +172,23 @@ import {
203
172
  import { getDiskPressureStatus } from "./disk-pressure-guard.js";
204
173
  import { classifyDiskPressureTurnPolicy } from "./disk-pressure-policy.js";
205
174
  import type {
206
- DynamicPageSurfaceData,
207
175
  ServerMessage,
208
176
  SurfaceData,
209
177
  SurfaceType,
210
178
  UsageStats,
211
179
  } from "./message-protocol.js";
212
- import type { MemoryRecalled } from "./message-types/memory.js";
213
180
  import type { ConfirmationStateChanged } from "./message-types/messages.js";
214
- import { conversationMetadataSyncTag } from "./message-types/sync.js";
215
181
  import { parseActualTokensFromError } from "./parse-actual-tokens-from-error.js";
182
+ import {
183
+ persistUnsendableImageDowngrades,
184
+ UNSENDABLE_IMAGE_NOTE,
185
+ } from "./persist-unsendable-image.js";
216
186
  import type { TraceEmitter } from "./trace-emitter.js";
217
- import type { TrustContext } from "./trust-context.js";
187
+ import { resolveTrustClass, type TrustContext } from "./trust-context.js";
218
188
  import { stripHistoricalWebSearchResults } from "./web-search-history.js";
219
189
 
220
190
  const log = getLogger("conversation-agent-loop");
221
191
 
222
- /**
223
- * Best-effort persistence of the history-stripped marker after an
224
- * injection-strip event (compaction / overflow recovery). The marker is a
225
- * durability hint, not turn-critical state — a transient SQLite write failure
226
- * (SQLITE_BUSY, disk-full, read-only FS) must not abort the turn. Logs a
227
- * warning and continues on failure, preserving the long-standing non-fatal
228
- * contract for this metadata write.
229
- */
230
- function markHistoryStrippedBestEffort(
231
- conversationId: string,
232
- strippedAt: number,
233
- logger: ReturnType<typeof getLogger>,
234
- ): void {
235
- try {
236
- setConversationHistoryStrippedAt(conversationId, strippedAt);
237
- } catch (err) {
238
- logger.warn(
239
- { err },
240
- "Failed to persist history-stripped marker after compaction strip (non-fatal)",
241
- );
242
- }
243
- }
244
-
245
192
  const DISK_PRESSURE_ERROR_CODE = "DISK_SPACE_CRITICAL" as const;
246
193
  const DISK_PRESSURE_ERROR_CATEGORY = "disk_pressure";
247
194
 
@@ -322,9 +269,23 @@ function buildPluginTurnContext(
322
269
  turnIndex: ctx.turnCount,
323
270
  trust,
324
271
  contextWindowManager: ctx.contextWindowManager,
272
+ callSite: ctx.currentCallSite,
325
273
  };
326
274
  }
327
275
 
276
+ /**
277
+ * Trust class of the actor whose turn is in progress, for the compactor's
278
+ * image manifest filter. Prefers the turn-start snapshot
279
+ * ({@link AgentLoopConversationContext.currentTurnTrustContext}) over the live
280
+ * trust context so compaction running in a later tool iteration can't pick up
281
+ * a concurrent request's actor.
282
+ */
283
+ function resolveTurnActorTrustClass(
284
+ ctx: AgentLoopConversationContext,
285
+ ): TrustContext["trustClass"] | undefined {
286
+ return (ctx.currentTurnTrustContext ?? ctx.trustContext)?.trustClass;
287
+ }
288
+
328
289
  // ── Context Interface ────────────────────────────────────────────────
329
290
 
330
291
  /**
@@ -352,9 +313,18 @@ export interface AssistantSurface {
352
313
  export interface AgentLoopConversationContext {
353
314
  readonly conversationId: string;
354
315
  messages: Message[];
355
- processing: boolean;
316
+ isProcessing(): boolean;
317
+ setProcessing(value: boolean): void;
356
318
  abortController: AbortController | null;
357
319
  currentRequestId?: string;
320
+ /**
321
+ * The {@link LLMCallSite} of the in-flight turn, set at turn start from
322
+ * `options?.callSite ?? "mainAgent"`. Read by {@link buildPluginTurnContext}
323
+ * so pipeline/injector plugins can tell the main reply apart from
324
+ * background agent-loop work (compaction, subagents, …) on this same
325
+ * conversation. Per-turn mutable, mirroring {@link currentRequestId}.
326
+ */
327
+ currentCallSite?: LLMCallSite;
358
328
 
359
329
  readonly agentLoop: AgentLoop;
360
330
  readonly provider: Provider;
@@ -561,6 +531,13 @@ export async function runAgentLoopImpl(
561
531
  });
562
532
  let yieldedForHandoff = false;
563
533
  let yieldedForBudget = false;
534
+ // Whether the most recent agent-loop run produced at least one new assistant
535
+ // message — the loop's own forward-progress signal, used by the ordering
536
+ // retry gate and the overflow convergence fold.
537
+ let lastRunAppendedNewMessages = false;
538
+ // The messages the most recent agent-loop run appended on top of its base —
539
+ // the loop's own new-output boundary, persisted as this turn's new messages.
540
+ let lastRunNewMessages: Message[] = [];
564
541
  let pendingCheckpointYield: "budget" | "handoff" | null = null;
565
542
  // Captured when the auto_compress_latest_turn rerun yields at the mid-loop
566
543
  // budget checkpoint. SSE emission happens immediately at the detection site;
@@ -579,6 +556,9 @@ export async function runAgentLoopImpl(
579
556
  // `resolveCallSiteConfig`, picking up any user overrides under
580
557
  // `llm.callSites.mainAgent` (falling back to `llm.default` when absent).
581
558
  const turnCallSite: LLMCallSite = options?.callSite ?? "mainAgent";
559
+ // Expose the turn's call site to plugin pipeline/injector contexts (read by
560
+ // buildPluginTurnContext) so plugins can scope behaviour to the main reply.
561
+ ctx.currentCallSite = turnCallSite;
582
562
 
583
563
  // Read the conversation row once for both the override-profile derivation
584
564
  // below and the title-replaceability check at turn start. Later reads in
@@ -898,55 +878,6 @@ export async function runAgentLoopImpl(
898
878
  }
899
879
  }
900
880
 
901
- // Generate title early — the user message alone is sufficient context.
902
- // Firing before the main LLM call removes the delay of waiting for the
903
- // full assistant response. The second-pass regeneration at turn 3 will
904
- // refine the title with more context.
905
- // No abort signal — title generation should complete even if the user
906
- // cancels the response, since the user message is already persisted.
907
- // Deferred via setTimeout so the main agent loop LLM call enqueues
908
- // first, avoiding rate-limit slot contention on strict configs.
909
- if (isReplaceableTitle(turnStartConversation?.title ?? null)) {
910
- // TurnContext routed through the canonical builder so the pipeline's
911
- // log record reports the same `conversationId`/`turnIndex` shape as
912
- // every other slot in this turn. Title generation does not depend on
913
- // the context-window manager attached by the builder, but sharing the
914
- // builder keeps the invariant enforced in one place.
915
- const titlePipelineCtx = buildPluginTurnContext(ctx, reqId);
916
- const titleArgs = {
917
- conversationId: ctx.conversationId,
918
- provider: ctx.provider,
919
- userMessage: options?.titleText ?? content,
920
- onTitleUpdated: (title: string) => {
921
- onEvent({
922
- type: "conversation_title_updated",
923
- conversationId: ctx.conversationId,
924
- title,
925
- });
926
- onEvent({
927
- type: "sync_changed",
928
- tags: [conversationMetadataSyncTag(ctx.conversationId)],
929
- });
930
- },
931
- };
932
- setTimeout(() => {
933
- runPipeline(
934
- "titleGenerate",
935
- getMiddlewaresFor("titleGenerate"),
936
- defaultTitleGenerateTerminal,
937
- titleArgs,
938
- titlePipelineCtx,
939
- DEFAULT_TIMEOUTS.titleGenerate,
940
- ).catch((err) => {
941
- // Fire-and-forget — keep previous non-propagating semantics.
942
- // queueGenerateConversationTitle already swallows internal
943
- // errors; this catch covers pipeline-layer errors (timeouts,
944
- // middleware throws) without surfacing them to the agent loop.
945
- rlog.warn({ err }, "titleGenerate pipeline failed (non-fatal)");
946
- });
947
- }, 0);
948
- }
949
-
950
881
  const isFirstMessage = ctx.messages.length === 1;
951
882
  // Promote a pending post-compaction re-inject signal (e.g. from `/compact`)
952
883
  // into `compactedThisTurn` so NOW.md / PKB / v2 static blocks land on this
@@ -954,7 +885,7 @@ export async function runAgentLoopImpl(
954
885
  // so this fires exactly once per `/compact` event.
955
886
  const consumedPostCompactReinject = ctx.pendingPostCompactReinject;
956
887
  ctx.pendingPostCompactReinject = false;
957
- let shouldInjectWorkspace = isFirstMessage || consumedPostCompactReinject;
888
+ state.shouldInjectWorkspace = isFirstMessage || consumedPostCompactReinject;
958
889
  let compactedThisTurn = consumedPostCompactReinject;
959
890
  let slackCompactedThisTurn = false;
960
891
  const isSlackConversation = ctx.channelCapabilities?.channel === "slack";
@@ -1099,12 +1030,9 @@ export async function runAgentLoopImpl(
1099
1030
  });
1100
1031
  }
1101
1032
  const compactionOptions = {
1102
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1103
1033
  precomputedEstimate: compactCheck.estimatedTokens,
1104
- conversationOriginChannel:
1105
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
1106
1034
  overrideProfile: resolveCurrentOverrideProfile() ?? null,
1107
- actorTrustClass: ctx.trustContext?.trustClass,
1035
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
1108
1036
  };
1109
1037
  let compacted: Awaited<
1110
1038
  ReturnType<typeof ctx.contextWindowManager.maybeCompact>
@@ -1150,7 +1078,7 @@ export async function runAgentLoopImpl(
1150
1078
  }
1151
1079
  // Only track circuit-breaker state when a summary LLM call actually ran.
1152
1080
  // `summaryFailed` is `undefined` on early returns (compaction disabled,
1153
- // below threshold, cooldown active, no eligible messages, truncation-only
1081
+ // below threshold, no eligible messages, truncation-only
1154
1082
  // path) — treating those as "successful" compactions would silently reset
1155
1083
  // the 3-strike counter and break the invariant.
1156
1084
  if (compacted && compacted.summaryFailed !== undefined) {
@@ -1165,7 +1093,7 @@ export async function runAgentLoopImpl(
1165
1093
  compacted,
1166
1094
  messagesForStartOfTurnCompaction,
1167
1095
  );
1168
- shouldInjectWorkspace = true;
1096
+ state.shouldInjectWorkspace = true;
1169
1097
  if (compacted.compactedPersistedMessages > 0) {
1170
1098
  compactedThisTurn = true;
1171
1099
  }
@@ -1203,213 +1131,10 @@ export async function runAgentLoopImpl(
1203
1131
  }
1204
1132
  };
1205
1133
 
1206
- let runMessages = ctx.messages;
1207
-
1208
- // Memory retrieval pipeline fetches PKB, NOW.md, and memory-graph
1209
- // outputs through a single `memoryRetrieval` pipeline. Plugins may
1210
- // replace the terminal behavior by registering a middleware that
1211
- // short-circuits with its own `MemoryResult`; the default terminal
1212
- // below runs `runDefaultMemoryRetrieval` which reproduces the prior
1213
- // in-lined behavior (PKB/NOW reads + gated graph call).
1214
- const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
1215
- // Canonical builder — pulls trust from per-turn snapshot, then
1216
- // conversation-level, then the synthetic fallback. Memory retrieval
1217
- // does not need the context-window handle the builder attaches, but
1218
- // keeping every call site on one helper is load-bearing for log
1219
- // coherence across pipeline slots.
1220
- const memoryPluginTurnCtx = buildPluginTurnContext(ctx, reqId);
1221
- const memoryArgs: MemoryArgs = {
1222
- conversationId: ctx.conversationId,
1223
- trustContext: ctx.trustContext,
1224
- turnIndex: ctx.turnCount,
1225
- // Pass the abort signal via `args` (not `deps`) so the pipeline
1226
- // runner's `linkAbortSignal` can swap it for a signal linked to the
1227
- // pipeline's internal controller — on a plugin-set timeout or
1228
- // external cancel, the linked signal aborts and `prepareMemory`
1229
- // stops mutating graph state / emitting events after the pipeline
1230
- // has already errored.
1231
- signal: abortController.signal,
1232
- };
1233
- const memoryDeps: DefaultMemoryRetrievalDeps = {
1234
- messages: ctx.messages,
1235
- graphMemory: ctx.graphMemory,
1236
- config: getConfig(),
1237
- onEvent,
1238
- isTrustedActor,
1239
- };
1240
- const memoryResult: MemoryResult = await runPipeline(
1241
- "memoryRetrieval",
1242
- getMiddlewaresFor("memoryRetrieval"),
1243
- (args) => runDefaultMemoryRetrieval(args, memoryDeps),
1244
- memoryArgs,
1245
- memoryPluginTurnCtx,
1246
- DEFAULT_TIMEOUTS.memoryRetrieval,
1247
- );
1248
-
1249
- // Consume the memory-graph block when the default retriever emitted
1250
- // one. Custom plugins that substitute their own blocks without the
1251
- // default discriminator are expected to handle their own side effects
1252
- // (event emission, metric persistence) inside their middleware; this
1253
- // block short-circuits to the original no-op behavior in that case.
1254
- const defaultGraphPayload: GraphMemoryPayload | null =
1255
- asDefaultGraphPayload(memoryResult.memoryGraphBlocks);
1256
- let pkbQueryVector: number[] | undefined;
1257
- let pkbSparseVector: QdrantSparseVector | undefined;
1258
- if (defaultGraphPayload) {
1259
- const graphResult = defaultGraphPayload.result;
1260
- runMessages = graphResult.runMessages;
1261
- // Select dense+sparse as a matched pair so RRF fusion combines two
1262
- // signals aligned to the same query text:
1263
- // 1. Context-load with a user query: user-query dense + user-query
1264
- // sparse — the cleanest pairing.
1265
- // 2. Otherwise (context-load without a user query, or per-turn):
1266
- // whatever `queryVector` / `sparseVector` the retriever produced,
1267
- // which are themselves co-aligned (both summary-derived in
1268
- // context-load, both user-last-message-derived in per-turn).
1269
- // Never pair a user-query dense with a summary-aligned sparse.
1270
- if (graphResult.userQueryVector) {
1271
- pkbQueryVector = graphResult.userQueryVector;
1272
- pkbSparseVector = graphResult.userQuerySparseVector;
1273
- } else {
1274
- pkbQueryVector = graphResult.queryVector;
1275
- pkbSparseVector = graphResult.sparseVector;
1276
- }
1277
-
1278
- // Persist the injected block text in message metadata so it survives
1279
- // conversation reloads (eviction, restart, fork). loadFromDb re-injects
1280
- // from metadata. Routed through the `persistence` pipeline so plugins
1281
- // can observe or override metadata updates alongside add/delete.
1282
- if (graphResult.injectedBlockText) {
1283
- try {
1284
- await runPipeline<PersistArgs, PersistResult>(
1285
- "persistence",
1286
- getMiddlewaresFor("persistence"),
1287
- defaultPersistenceTerminal,
1288
- {
1289
- op: "update",
1290
- messageId: userMessageId,
1291
- updates: {
1292
- memoryInjectedBlock: graphResult.injectedBlockText,
1293
- },
1294
- },
1295
- buildPluginTurnContext(ctx, reqId),
1296
- DEFAULT_TIMEOUTS.persistence,
1297
- );
1298
- } catch (err) {
1299
- rlog.warn(
1300
- { err },
1301
- "Failed to persist memory injection to metadata (non-fatal)",
1302
- );
1303
- }
1304
- }
1305
-
1306
- const m = graphResult.metrics;
1307
-
1308
- try {
1309
- recordMemoryRecallLog({
1310
- conversationId: ctx.conversationId,
1311
- enabled: true,
1312
- degraded: false,
1313
- provider: m?.embeddingProvider ?? undefined,
1314
- model: m?.embeddingModel ?? undefined,
1315
- semanticHits: m?.semanticHits ?? 0,
1316
- mergedCount: m?.mergedCount ?? 0,
1317
- selectedCount: m?.selectedCount ?? 0,
1318
- tier1Count: m?.tier1Count ?? 0,
1319
- tier2Count: m?.tier2Count ?? 0,
1320
- hybridSearchLatencyMs: m?.hybridSearchLatencyMs ?? 0,
1321
- sparseVectorUsed: m?.sparseVectorUsed ?? false,
1322
- injectedTokens: graphResult.injectedTokens,
1323
- latencyMs: graphResult.latencyMs,
1324
- topCandidatesJson: (m?.topCandidates ?? []).map((c) => ({
1325
- key: c.nodeId,
1326
- type: c.type,
1327
- kind: "graph",
1328
- finalScore: c.score,
1329
- semantic: c.semanticSimilarity,
1330
- recency: c.recencyBoost,
1331
- })),
1332
- injectedText: graphResult.injectedBlockText ?? undefined,
1333
- reason: `graph:${graphResult.mode}`,
1334
- queryContext: m?.queryContext ?? undefined,
1335
- });
1336
- } catch (err) {
1337
- log.warn({ err }, "Failed to persist memory recall log (non-fatal)");
1338
- }
1339
-
1340
- if (m) {
1341
- const memoryRecalledEvent: MemoryRecalled = {
1342
- type: "memory_recalled",
1343
- provider: m.embeddingProvider ?? "unknown",
1344
- model: m.embeddingModel ?? "unknown",
1345
- semanticHits: m.semanticHits,
1346
- mergedCount: m.mergedCount,
1347
- selectedCount: m.selectedCount,
1348
- tier1Count: m.tier1Count,
1349
- tier2Count: m.tier2Count,
1350
- hybridSearchLatencyMs: m.hybridSearchLatencyMs,
1351
- sparseVectorUsed: m.sparseVectorUsed,
1352
- injectedTokens: graphResult.injectedTokens,
1353
- latencyMs: graphResult.latencyMs,
1354
- topCandidates: m.topCandidates.map((c) => ({
1355
- key: c.nodeId,
1356
- type: c.type,
1357
- kind: "graph",
1358
- finalScore: c.score,
1359
- semantic: c.semanticSimilarity,
1360
- recency: c.recencyBoost,
1361
- })),
1362
- };
1363
- onEvent(memoryRecalledEvent);
1364
- }
1365
- }
1366
-
1367
- // Build active surface context
1368
- let activeSurface: ActiveSurfaceContext | null = null;
1369
- if (ctx.currentActiveSurfaceId) {
1370
- const stored = ctx.surfaceState.get(ctx.currentActiveSurfaceId);
1371
- if (stored && stored.surfaceType === "dynamic_page") {
1372
- const data = stored.data as DynamicPageSurfaceData;
1373
- activeSurface = {
1374
- surfaceId: ctx.currentActiveSurfaceId,
1375
- html: data.html,
1376
- currentPage: ctx.currentPage,
1377
- };
1378
- if (data.appId) {
1379
- const app = getApp(data.appId);
1380
- if (app) {
1381
- activeSurface.appId = app.id;
1382
- activeSurface.appName = app.name;
1383
- activeSurface.appDirName = resolveAppDir(app.id).dirName;
1384
- activeSurface.appSchemaJson = app.schemaJson;
1385
- activeSurface.appFiles = listAppFiles(app.id);
1386
- if (app.pages && Object.keys(app.pages).length > 0) {
1387
- activeSurface.appPages = app.pages;
1388
- }
1389
- }
1390
- }
1391
- }
1392
- }
1393
-
1394
- // Query active documents for this conversation so the injector chain
1395
- // can surface them to the assistant (prevents duplicate document_create
1396
- // calls when existing documents should be targeted with document_update).
1397
- const conversationDocs = getDocumentsForConversation(ctx.conversationId);
1398
- const activeDocuments =
1399
- conversationDocs.length > 0
1400
- ? conversationDocs.map((d) => ({
1401
- surfaceId: d.surfaceId,
1402
- title: d.title,
1403
- wordCount: d.wordCount,
1404
- updatedAt: d.updatedAt,
1405
- }))
1406
- : null;
1407
-
1408
- ctx.refreshWorkspaceTopLevelContextIfNeeded();
1409
-
1410
- // Compute fresh turn timestamp for date grounding.
1411
- // Absolute "now" is always anchored to assistant host clock, while local
1412
- // date semantics prefer configured user timezone, then device timezones.
1134
+ // Resolve the turn's timezone cascade up front. It depends only on config
1135
+ // and the inbound request — never on retrieval output — so it can be
1136
+ // settled before context assembly. Local date semantics prefer the
1137
+ // configured user timezone, then device timezones, then the host clock.
1413
1138
  const hostTimeZone = Intl.DateTimeFormat().resolvedOptions().timeZone;
1414
1139
  const timezoneContext = resolveTurnTimezoneContext({
1415
1140
  configuredUserTimeZone: config.ui.userTimezone ?? null,
@@ -1417,9 +1142,6 @@ export async function runAgentLoopImpl(
1417
1142
  detectedTimezone: config.ui.detectedTimezone ?? null,
1418
1143
  hostTimeZone,
1419
1144
  });
1420
- const timestamp = formatTurnTimestamp({
1421
- timeZone: timezoneContext.effectiveTimezone,
1422
- });
1423
1145
 
1424
1146
  // Resolve the inbound actor context for the unified <turn_context> block.
1425
1147
  // When the conversation carries enough identity info, use the unified
@@ -1443,8 +1165,10 @@ export async function runAgentLoopImpl(
1443
1165
  }
1444
1166
  }
1445
1167
 
1446
- // Build unified turn context block that replaces the separate temporal,
1447
- // channel, interface, and actor context blocks.
1168
+ // Resolve the channel/interface labels and the guardian flag for this
1169
+ // turn. These derive only from the captured turn context and the resolved
1170
+ // actor trust class — never from retrieval — so they settle before context
1171
+ // assembly.
1448
1172
  const interfaceName =
1449
1173
  capturedTurnInterfaceContext.userMessageInterface ?? undefined;
1450
1174
  const channelName =
@@ -1489,9 +1213,54 @@ export async function runAgentLoopImpl(
1489
1213
  });
1490
1214
  const label = profileEntry?.label ?? effectiveProfileKey;
1491
1215
  modelProfileStr = resolved.model ? `${label} (${resolved.model})` : label;
1492
- setLastNotifiedInferenceProfile(ctx.conversationId, effectiveProfileKey);
1216
+ // Record the notification for persistence on delivery rather than here:
1217
+ // the model only "learns" the profile once it receives this turn
1218
+ // context, signalled by the first `message_complete`. Persisting inline
1219
+ // would mark the profile notified even if the turn is cancelled or fails
1220
+ // before the model ever sees the notice.
1221
+ state.pendingNotifiedInferenceProfile = effectiveProfileKey;
1493
1222
  }
1494
1223
 
1224
+ // Memory retrieval — fetches PKB, NOW.md, and memory-graph outputs and
1225
+ // persists the retrieval's own side effects (injected-block metadata,
1226
+ // recall log, `memory_recalled` event). Runs at the early "prompt
1227
+ // submitted, before context assembly" moment because its outputs feed the
1228
+ // injection and overflow-reduction transforms below. It is shaped as the
1229
+ // `user-prompt-submit-temp` hook handler but invoked directly for now: it
1230
+ // must run early, while the canonical late `user-prompt-submit` hook
1231
+ // (history repair, title) runs after those transforms, so the two cannot
1232
+ // share a fire site until compaction is cleared from the gap between them.
1233
+ const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
1234
+ const memoryCtx: MemoryRetrievalHookContext = {
1235
+ graphMemory: ctx.graphMemory,
1236
+ config: getConfig(),
1237
+ onEvent,
1238
+ isTrustedActor,
1239
+ conversationId: ctx.conversationId,
1240
+ userMessageId,
1241
+ logger: rlog,
1242
+ // An external cancel aborts `prepareMemory` instead of letting it run
1243
+ // to completion after the turn has already been torn down.
1244
+ signal: abortController.signal,
1245
+ latestMessages: ctx.messages,
1246
+ };
1247
+ await userPromptSubmitMemoryRetrieval(memoryCtx);
1248
+
1249
+ // The retriever owns its side effects (injected-block metadata, recall
1250
+ // log, `memory_recalled` event) and records the dense/sparse PKB query
1251
+ // pair on the graph handle for the PKB-reminder injector to read back; the
1252
+ // loop only reuses the injected message list downstream.
1253
+ let runMessages = memoryCtx.latestMessages;
1254
+
1255
+ // Capture wall-clock "now" at its point of use, after the blocking memory
1256
+ // retrieval, so the injected `<turn_context>` timestamp reflects current
1257
+ // time rather than the moment the turn began.
1258
+ const timestamp = formatTurnTimestamp({
1259
+ timeZone: timezoneContext.effectiveTimezone,
1260
+ });
1261
+
1262
+ // Build unified turn context block that replaces the separate temporal,
1263
+ // channel, interface, and actor context blocks.
1495
1264
  const baseTurnContext = {
1496
1265
  timestamp,
1497
1266
  interfaceName,
@@ -1513,64 +1282,6 @@ export async function runAgentLoopImpl(
1513
1282
 
1514
1283
  // The `remember` tool handles scratchpad-style memory writes directly to the graph.
1515
1284
 
1516
- // Personal-memory trust gate: PKB, NOW.md, and v2 static blocks all
1517
- // hold private user content. Block exposure to non-guardian actors
1518
- // arriving over a remote channel; internal/local flows pass through.
1519
- // See `shouldExposePersonalMemory` for the threat model.
1520
- const personalMemoryAllowed = shouldExposePersonalMemory({
1521
- sourceChannel: ctx.trustContext?.sourceChannel,
1522
- isTrustedActor,
1523
- });
1524
-
1525
- // Inject NOW.md and PKB content only on the first turn (or after
1526
- // compaction re-strips them). Old injections persist in history and
1527
- // are never stripped on normal turns — this preserves the cached prefix.
1528
- // PKB/NOW content is sourced from the `memoryRetrieval` pipeline above
1529
- // so plugins can override either source without touching the agent loop.
1530
- // NOW.md injection can be disabled via `memory.retrieval.scratchpadInjection.enabled`.
1531
- const scratchpadInjectionEnabled =
1532
- getConfig().memory.retrieval.scratchpadInjection.enabled;
1533
- const currentNowContent =
1534
- personalMemoryAllowed && scratchpadInjectionEnabled
1535
- ? memoryResult.nowContent
1536
- : null;
1537
- const shouldInjectNowAndPkb = isFirstMessage || compactedThisTurn;
1538
- const nowScratchpad = shouldInjectNowAndPkb ? currentNowContent : null;
1539
-
1540
- const currentPkbContent = personalMemoryAllowed
1541
- ? memoryResult.pkbContent
1542
- : null;
1543
- const pkbContext = shouldInjectNowAndPkb ? currentPkbContent : null;
1544
- const pkbActive = currentPkbContent !== null;
1545
-
1546
- // V2 static memory block (essentials/threads/recent/buffer).
1547
- // `currentMemoryV2Static` is the trust-gated content reused by every
1548
- // re-injection path — it stays non-null on non-full-mode turns so
1549
- // that mid-turn reducer compaction (which strips the prior `<info>`
1550
- // block) can restore the freshest content. `memoryV2Static` is the
1551
- // first-turn / post-compaction cadence-gated value for initial
1552
- // injection only. `readMemoryV2StaticContent` self-gates on the v2
1553
- // flag + config and returns null when v2 is off.
1554
- const currentMemoryV2Static = personalMemoryAllowed
1555
- ? readMemoryV2StaticContent()
1556
- : null;
1557
- const memoryV2Static = shouldInjectNowAndPkb ? currentMemoryV2Static : null;
1558
-
1559
- // PKB relevance-hint inputs. Resolved once per turn and reused across
1560
- // re-injections so post-compaction rebuilds pick up fresh hints against
1561
- // the updated conversation history.
1562
- const pkbRoot = pkbActive ? join(getWorkspaceDir(), "pkb") : undefined;
1563
- const pkbAutoInjectList = pkbRoot
1564
- ? getPkbAutoInjectList(pkbRoot)
1565
- : undefined;
1566
- // Pass `ctx` directly — `PkbContextConversation` is structural and
1567
- // `getInContextPkbPaths` re-reads `conversation.messages` on each call,
1568
- // so post-compaction re-injects see the updated history.
1569
- const pkbConversation = pkbActive ? ctx : undefined;
1570
- // PKB points live under a single workspace sentinel scope.
1571
- // See `PKB_WORKSPACE_SCOPE` for why.
1572
- const pkbScopeId = pkbActive ? PKB_WORKSPACE_SCOPE : undefined;
1573
-
1574
1285
  // Subagent status injection — gives the parent LLM visibility into active/completed children.
1575
1286
  // Skipped when this conversation IS a subagent (no nesting) or has no children.
1576
1287
  const subagentStatusBlock = ctx.isSubagent
@@ -1625,21 +1336,14 @@ export async function runAgentLoopImpl(
1625
1336
  )
1626
1337
  : null;
1627
1338
 
1628
- // Guards the chronological-transcript override on re-injection after
1629
- // the reducer compacts `ctx.messages`. The captured transcript is the
1630
- // full persisted history; blindly replaying it on every re-inject would
1631
- // overwrite the reducer's compacted messages and undo compaction. Flip
1632
- // to `true` after any compaction so subsequent re-injections fall back
1633
- // to the reduced `ctx.messages`.
1634
- let reducerCompacted = compactedThisTurn;
1635
-
1636
- // memory-v3-live: route the turn's `<memory>` block to the v3 injector.
1637
- // When on, runtime assembly suppresses v2's `<memory>` injection (only
1638
- // when the v3 injector actually produced a block — otherwise v2 stays as a
1639
- // fallback) and the provider anchors its long-TTL cache breakpoint on the
1640
- // most recent STABLE user message, since the latest user message now
1641
- // carries the volatile per-turn memory block. Flag off → bit-for-bit
1642
- // identical to today's v2 path.
1339
+ state.reducerCompacted = compactedThisTurn;
1340
+
1341
+ // memory-v3-live: when on, the provider anchors its long-TTL cache
1342
+ // breakpoint on the most recent STABLE user message, since the latest user
1343
+ // message now carries the volatile per-turn `<memory>` block the v3
1344
+ // injector emits. The matching v2-suppression strip is owned by
1345
+ // `applyRuntimeInjections`, which reads the same flag itself. Flag off →
1346
+ // bit-for-bit identical to today's v2 path.
1643
1347
  const memoryV3Live = isAssistantFeatureFlagEnabled(
1644
1348
  "memory-v3-live",
1645
1349
  getConfig(),
@@ -1647,27 +1351,24 @@ export async function runAgentLoopImpl(
1647
1351
 
1648
1352
  // Shared injection options — reused whenever we need to re-inject after reduction.
1649
1353
  const injectionOpts = {
1650
- suppressV2MemoryForV3: memoryV3Live,
1651
1354
  diskPressureContext,
1652
- activeSurface,
1653
- activeDocuments,
1654
- workspaceTopLevelContext: shouldInjectWorkspace
1655
- ? ctx.workspaceTopLevelContext
1656
- : null,
1355
+ // Resolved from the conversation's surface state here, where the
1356
+ // runtime injector is the only consumer of the active-surface block.
1357
+ activeSurface: buildActiveSurfaceContext({
1358
+ currentActiveSurfaceId: ctx.currentActiveSurfaceId,
1359
+ currentPage: ctx.currentPage,
1360
+ surfaceState: ctx.surfaceState,
1361
+ }),
1362
+ // Resolved here, where the runtime injector is the only consumer of the
1363
+ // active-documents block.
1364
+ activeDocuments: buildActiveDocuments(ctx.conversationId),
1365
+ workspaceTopLevelContext: buildWorkspaceTopLevelContext(
1366
+ ctx,
1367
+ state.shouldInjectWorkspace,
1368
+ ),
1657
1369
  channelCapabilities: ctx.channelCapabilities ?? null,
1658
1370
  channelCommandContext: ctx.commandIntent ?? null,
1659
1371
  unifiedTurnContext: unifiedTurnContextStr,
1660
- pkbContext,
1661
- pkbActive,
1662
- pkbQueryVector,
1663
- pkbSparseVector,
1664
- pkbScopeId,
1665
- pkbConversation,
1666
- pkbAutoInjectList,
1667
- pkbRoot,
1668
- pkbWorkingDir: pkbActive ? ctx.workingDir : undefined,
1669
- memoryV2Static,
1670
- nowScratchpad,
1671
1372
  voiceCallControlPrompt: ctx.voiceCallControlPrompt ?? null,
1672
1373
  transportHints: ctx.transportHints ?? null,
1673
1374
  isNonInteractive: !isInteractiveResolved,
@@ -1689,7 +1390,7 @@ export async function runAgentLoopImpl(
1689
1390
 
1690
1391
  const injection = await applyRuntimeInjections(runMessages, {
1691
1392
  ...injectionOpts,
1692
- slackChronologicalMessages: reducerCompacted
1393
+ slackChronologicalMessages: state.reducerCompacted
1693
1394
  ? null
1694
1395
  : injectionOpts.slackChronologicalMessages,
1695
1396
  mode: currentInjectionMode,
@@ -1735,18 +1436,7 @@ export async function runAgentLoopImpl(
1735
1436
  metadataUpdates.memoryV2StaticBlock =
1736
1437
  injection.blocks.memoryV2StaticBlock;
1737
1438
  }
1738
- await runPipeline<PersistArgs, PersistResult>(
1739
- "persistence",
1740
- getMiddlewaresFor("persistence"),
1741
- defaultPersistenceTerminal,
1742
- {
1743
- op: "update",
1744
- messageId: userMessageId,
1745
- updates: metadataUpdates,
1746
- },
1747
- buildPluginTurnContext(ctx, reqId),
1748
- DEFAULT_TIMEOUTS.persistence,
1749
- );
1439
+ updateMessageMetadata(userMessageId, metadataUpdates);
1750
1440
  } catch (err) {
1751
1441
  rlog.warn({ err }, "Failed to persist injection metadata (non-fatal)");
1752
1442
  }
@@ -1762,51 +1452,18 @@ export async function runAgentLoopImpl(
1762
1452
  let reducerState: ReducerState | undefined;
1763
1453
 
1764
1454
  const toolTokenBudget = ctx.agentLoop.getToolTokenBudget(runMessages);
1765
- // Canonical calibration key — passed to the `tokenEstimate` pipeline for
1766
- // every preflight/mid-loop estimate, the overflow reducer config, and the
1767
- // convergence-path `estimatePromptTokens` call. Matches the key recorded
1768
- // by `handleUsage` for wrapper providers (OpenRouter routing to
1769
- // Anthropic → key is `"anthropic"`).
1455
+ // Canonical calibration key — used by the preflight estimate, the
1456
+ // overflow reducer config, and the convergence-path `estimatePromptTokens`
1457
+ // call. Matches the key recorded by `handleUsage` for wrapper providers
1458
+ // (OpenRouter routing to Anthropic key is `"anthropic"`).
1770
1459
  const estimationProviderName = getCalibrationProviderKey(ctx.provider);
1771
1460
 
1772
- // Shared `TurnContext` for every `tokenEstimate` pipeline invocation in
1773
- // this turn. The pipeline is the extension point for plugins that want
1774
- // to substitute an alternate estimator (e.g. provider-native tokenization)
1775
- // without touching orchestrator code.
1776
- //
1777
- // Routed through the canonical builder — `turnIndex` is `ctx.turnCount`,
1778
- // trust cascades through per-turn/conversation-level/fallback, and the
1779
- // context-window handle rides along so any middleware that wants to
1780
- // reuse the manager (e.g. to compute compaction-aware estimates) can.
1781
- const pipelineTurnCtx = buildPluginTurnContext(ctx, reqId);
1782
-
1783
- const runTokenEstimatePipeline = (
1784
- history: Message[],
1785
- ): Promise<EstimateResult> =>
1786
- runPipeline<EstimateArgs, EstimateResult>(
1787
- "tokenEstimate",
1788
- getMiddlewaresFor("tokenEstimate"),
1789
- defaultTokenEstimateTerminal,
1790
- {
1791
- // Shallow-frozen copies so a misbehaving middleware that mutates
1792
- // `args.history` or `args.tools` in place (e.g. trims the array
1793
- // before calling next) can't silently strip prompt context from
1794
- // the orchestrator's live `runMessages` / resolved-tools arrays.
1795
- // TypeScript `readonly` on `EstimateArgs` does not prevent
1796
- // `push`/`splice` at runtime; the frozen wrapper throws in strict
1797
- // mode and isolates any mutation attempts from the call-site state.
1798
- history: Object.freeze([...history]) as Message[],
1799
- systemPrompt: ctx.systemPrompt,
1800
- tools: Object.freeze([
1801
- ...ctx.agentLoop.getResolvedTools(history),
1802
- ]) as ToolDefinition[],
1803
- providerName: estimationProviderName,
1804
- },
1805
- pipelineTurnCtx,
1806
- DEFAULT_TIMEOUTS.tokenEstimate,
1807
- );
1808
-
1809
- const preflightTokens = await runTokenEstimatePipeline(runMessages);
1461
+ const preflightTokens = estimatePromptTokensWithTools(
1462
+ runMessages,
1463
+ ctx.systemPrompt,
1464
+ ctx.agentLoop.getResolvedTools(runMessages),
1465
+ estimationProviderName,
1466
+ );
1810
1467
 
1811
1468
  if (overflowRecovery.enabled && preflightTokens > preflightBudget) {
1812
1469
  rlog.warn(
@@ -1869,7 +1526,7 @@ export async function runAgentLoopImpl(
1869
1526
  options: {
1870
1527
  ...(opts ?? {}),
1871
1528
  overrideProfile: resolveCurrentOverrideProfile() ?? null,
1872
- actorTrustClass: ctx.trustContext?.trustClass,
1529
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
1873
1530
  },
1874
1531
  },
1875
1532
  buildPluginTurnContext(ctx, reqId),
@@ -1932,7 +1589,7 @@ export async function runAgentLoopImpl(
1932
1589
  }
1933
1590
  if (result.compacted) {
1934
1591
  await applySuccessfulCompaction(result, compactedBasis);
1935
- shouldInjectWorkspace = true;
1592
+ state.shouldInjectWorkspace = true;
1936
1593
  }
1937
1594
  },
1938
1595
  reinjectForMode: async (
@@ -1951,19 +1608,20 @@ export async function runAgentLoopImpl(
1951
1608
  // injection assembly on the same turn.
1952
1609
  ctx.messages = reducedMessages;
1953
1610
 
1954
- // When THIS iteration compacted, it stripped existing NOW.md /
1955
- // PKB blocks — so we re-inject current content. A later iteration
1956
- // that only truncates or downgrades must NOT re-force PKB/NOW,
1611
+ // When THIS iteration compacted, it stripped the existing
1612
+ // memory-static block — so we re-inject current content. A later
1613
+ // iteration that only truncates or downgrades must NOT re-force it,
1957
1614
  // or each round would grow the token count.
1958
1615
  // Gate: only the iteration that actually compacted re-injects.
1616
+ // (The `<knowledge_base>`, NOW.md, and v2 static `<info>` blocks
1617
+ // self-gate inside their injectors on whether they are already
1618
+ // present in `reducedMessages`.)
1959
1619
  const injection = await applyRuntimeInjections(reducedMessages, {
1960
1620
  ...injectionOpts,
1961
- ...(stepCompacted && { pkbContext: currentPkbContent }),
1962
- ...(stepCompacted && { memoryV2Static: currentMemoryV2Static }),
1963
- ...(stepCompacted && { nowScratchpad: currentNowContent }),
1964
- workspaceTopLevelContext: shouldInjectWorkspace
1965
- ? ctx.workspaceTopLevelContext
1966
- : null,
1621
+ workspaceTopLevelContext: buildWorkspaceTopLevelContext(
1622
+ ctx,
1623
+ state.shouldInjectWorkspace,
1624
+ ),
1967
1625
  // Once ANY iteration has compacted `ctx.messages`, the captured
1968
1626
  // `slackChronologicalMessages` snapshot (built from the full
1969
1627
  // persisted transcript) would overwrite the compacted history
@@ -2018,12 +1676,10 @@ export async function runAgentLoopImpl(
2018
1676
  currentInjectionMode = overflowResult.injectionMode;
2019
1677
  reducerState = overflowResult.reducerState;
2020
1678
  if (overflowResult.reducerCompacted) {
2021
- reducerCompacted = true;
1679
+ state.reducerCompacted = true;
2022
1680
  }
2023
1681
  }
2024
1682
 
2025
- let preRepairMessages = runMessages;
2026
-
2027
1683
  // Replace historical web_search_tool_result blocks with text summaries.
2028
1684
  // The opaque `encrypted_content` tokens Anthropic attaches to each result
2029
1685
  // expire / are route-scoped; replaying a stale token is rejected with
@@ -2046,13 +1702,12 @@ export async function runAgentLoopImpl(
2046
1702
  // context with a fresh array; `runHook` forwards whichever the chain
2047
1703
  // settles on. Order is plugin registration order.
2048
1704
  //
2049
- // Fires BEFORE `preRunHistoryLength` is captured so the boundary
2050
- // between pre-existing and hook-emitted messages consumed by the
2051
- // ordering-error retry gate, the post-run reconcile loop, and the
2052
- // new-message extraction for persistence — reflects exactly what
2053
- // `agentLoop.run` receives.
1705
+ // Fires BEFORE the agent loop runs so the hook-emitted messages are part
1706
+ // of the loop's input; the loop then reports its own appended output via
1707
+ // `AgentLoopRunResult.newMessages`, which is what persistence consumes.
2054
1708
  const userPromptCtx: UserPromptSubmitContext = {
2055
1709
  conversationId: ctx.conversationId,
1710
+ prompt: options?.titleText ?? content,
2056
1711
  originalMessages: ctx.messages,
2057
1712
  latestMessages: runMessages,
2058
1713
  logger: rlog,
@@ -2063,8 +1718,6 @@ export async function runAgentLoopImpl(
2063
1718
  );
2064
1719
  runMessages = finalUserPromptCtx.latestMessages;
2065
1720
 
2066
- let preRunHistoryLength = runMessages.length;
2067
-
2068
1721
  const shouldGenerateTitle = isReplaceableTitle(
2069
1722
  getConversation(ctx.conversationId)?.title ?? null,
2070
1723
  );
@@ -2078,6 +1731,7 @@ export async function runAgentLoopImpl(
2078
1731
  rlog,
2079
1732
  turnChannelContext: capturedTurnChannelContext,
2080
1733
  turnInterfaceContext: capturedTurnInterfaceContext,
1734
+ applyCompaction: applySuccessfulCompaction,
2081
1735
  };
2082
1736
  const eventHandler = (event: AgentEvent): Promise<void> =>
2083
1737
  dispatchAgentEvent(state, deps, event);
@@ -2097,82 +1751,44 @@ export async function runAgentLoopImpl(
2097
1751
  rlog.info({ callSite: turnCallSite }, "Starting agent loop run");
2098
1752
 
2099
1753
  // Thread the orchestrator's canonical per-turn context into the agent
2100
- // loop so its internal pipeline invocations (llmCall, emptyResponse,
2101
- // toolError, toolResultTruncate, toolExecute) see the real
2102
- // conversation identity / trust / contextWindowManager instead of the
2103
- // synthesized `"agent-loop"` placeholder. The loop clones this value
1754
+ // loop so its internal pipeline invocations (e.g. compaction) see the
1755
+ // real conversation identity / trust / contextWindowManager instead of
1756
+ // the synthesized `"agent-loop"` placeholder. The loop clones this value
2104
1757
  // and overwrites `turnIndex` with its own tool-use iteration counter.
2105
1758
  const loopTurnCtx = buildPluginTurnContext(ctx, reqId);
2106
1759
 
2107
- // Hooks for the loop-owned mid-loop compaction. The agent loop owns the
1760
+ // Hook for the loop-owned mid-loop compaction. The agent loop owns the
2108
1761
  // trigger (its budget gate), the `compaction` pipeline call, the result
2109
1762
  // interpretation (circuit-breaker bookkeeping + the exhaustion decision),
2110
- // and the inline continue; these callbacks bridge the durable / injection
2111
- // state the loop is intentionally blind to. Durable persistence and
2112
- // re-injection stay orchestrator-supplied for now.
1763
+ // and the inline continue; this callback bridges the injection state the
1764
+ // loop is intentionally blind to. Durable persistence is signalled via
1765
+ // events; re-injection stays orchestrator-supplied for now.
2113
1766
  const midLoopCompaction: MidLoopCompaction = {
2114
- prepare: (history) => {
2115
- // Strip injected context so the compactor summarizes the raw
2116
- // persistent messages, and commit the stripped set to durable state.
2117
- const rawHistory = stripInjectionsForCompaction(history);
2118
- ctx.messages = rawHistory;
2119
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2120
- return {
2121
- rawHistory,
2122
- options: {
2123
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
2124
- force: true,
2125
- targetInputTokensOverride:
2126
- resolveCurrentContextBudget().preflightBudget,
2127
- conversationOriginChannel:
2128
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
2129
- overrideProfile: resolveCurrentOverrideProfile() ?? null,
2130
- actorTrustClass: ctx.trustContext?.trustClass,
2131
- },
2132
- };
2133
- },
2134
- applyResult: async (result, rawHistory) => {
2135
- await applySuccessfulCompaction(result, rawHistory);
2136
- reducerCompacted = true;
2137
- shouldInjectWorkspace = true;
2138
- },
2139
- reinject: async () => {
1767
+ postCompactionHook: async ({ history, turnContext }) => {
2140
1768
  // stripInjectionsForCompaction() unconditionally removed the existing
2141
- // NOW.md block, so re-inject the current content regardless of whether
2142
- // compaction actually ran.
2143
- const injection = await applyRuntimeInjections(ctx.messages, {
1769
+ // memory-static block, so re-inject the current content regardless of
1770
+ // whether compaction actually ran. The `<knowledge_base>`, NOW.md, and
1771
+ // v2 static `<info>` blocks self-gate inside their injectors on block
1772
+ // presence.
1773
+ const injection = await postCompactReinject({
2144
1774
  ...injectionOpts,
2145
- pkbContext: currentPkbContent,
2146
- memoryV2Static: currentMemoryV2Static,
2147
- nowScratchpad: currentNowContent,
2148
- workspaceTopLevelContext: shouldInjectWorkspace
2149
- ? ctx.workspaceTopLevelContext
2150
- : null,
1775
+ workspaceTopLevelContext: buildWorkspaceTopLevelContext(
1776
+ ctx,
1777
+ state.shouldInjectWorkspace,
1778
+ ),
2151
1779
  // Suppress the chronological-transcript snapshot once the reducer
2152
1780
  // has collapsed `ctx.messages`; the captured snapshot reflects the
2153
1781
  // full persisted transcript and would overwrite compaction.
2154
- slackChronologicalMessages: reducerCompacted
1782
+ slackChronologicalMessages: state.reducerCompacted
2155
1783
  ? null
2156
1784
  : injectionOpts.slackChronologicalMessages,
2157
1785
  mode: currentInjectionMode,
2158
- turnContext: buildPluginTurnContext(ctx, reqId),
1786
+ turnContext,
1787
+ history,
1788
+ isTrustedActor,
1789
+ logger: rlog,
2159
1790
  });
2160
- runMessages = injection.messages;
2161
- if (isTrustedActor && currentInjectionMode !== "minimal") {
2162
- ctx.graphMemory.retrackCachedNodes();
2163
- }
2164
- const midLoopCompactStrip =
2165
- stripHistoricalWebSearchResults(runMessages);
2166
- if (midLoopCompactStrip.stats.blocksStripped > 0) {
2167
- rlog.info(
2168
- { phase: "mid-loop-compact", ...midLoopCompactStrip.stats },
2169
- "Converted historical web_search_tool_result blocks to text summaries",
2170
- );
2171
- runMessages = midLoopCompactStrip.messages;
2172
- }
2173
- preRepairMessages = runMessages;
2174
- preRunHistoryLength = runMessages.length;
2175
- return runMessages;
1791
+ return injection.messages;
2176
1792
  },
2177
1793
  };
2178
1794
 
@@ -2188,10 +1804,8 @@ export async function runAgentLoopImpl(
2188
1804
  msgs: Message[],
2189
1805
  compaction?: MidLoopCompaction,
2190
1806
  ): Promise<Message[]> => {
2191
- const { history, exitReason } = await ctx.agentLoop.run(
2192
- msgs,
2193
- eventHandler,
2194
- {
1807
+ const { history, exitReason, appendedNewMessages, newMessages } =
1808
+ await ctx.agentLoop.run(msgs, eventHandler, {
2195
1809
  signal: abortController.signal,
2196
1810
  requestId: reqId,
2197
1811
  onCheckpoint,
@@ -2205,8 +1819,9 @@ export async function runAgentLoopImpl(
2205
1819
  // `<memory>` block, so anchor the provider's long-TTL cache breakpoint
2206
1820
  // on the most recent stable message instead.
2207
1821
  mutableLatestUserMessage: memoryV3Live,
2208
- },
2209
- );
1822
+ });
1823
+ lastRunAppendedNewMessages = appendedNewMessages;
1824
+ lastRunNewMessages = newMessages;
2210
1825
  if (exitReason === "handoff") {
2211
1826
  yieldedForHandoff = true;
2212
1827
  pendingCheckpointYield = "handoff";
@@ -2244,10 +1859,7 @@ export async function runAgentLoopImpl(
2244
1859
  }
2245
1860
 
2246
1861
  // One-shot ordering error retry
2247
- if (
2248
- state.orderingErrorDetected &&
2249
- updatedHistory.length === preRunHistoryLength
2250
- ) {
1862
+ if (state.orderingErrorDetected && !lastRunAppendedNewMessages) {
2251
1863
  rlog.warn(
2252
1864
  { phase: "retry" },
2253
1865
  "Provider ordering error detected, attempting one-shot deep-repair retry",
@@ -2261,12 +1873,10 @@ export async function runAgentLoopImpl(
2261
1873
  // `user-prompt-submit` hook (the default history-repair plugin runs
2262
1874
  // `repairHistory` there); widening that surface to deep-repair is
2263
1875
  // intentionally deferred until there's a concrete plugin-level use case.
2264
- const retryRepair = deepRepairHistory(runMessages);
1876
+ const retryRepair = deepRepairHistory(updatedHistory);
2265
1877
  runMessages = retryRepair.messages;
2266
1878
  const retryStrip = stripHistoricalWebSearchResults(runMessages);
2267
1879
  runMessages = retryStrip.messages;
2268
- preRepairMessages = runMessages;
2269
- preRunHistoryLength = runMessages.length;
2270
1880
  state.orderingErrorDetected = false;
2271
1881
  state.deferredOrderingError = null;
2272
1882
 
@@ -2319,15 +1929,29 @@ export async function runAgentLoopImpl(
2319
1929
  }
2320
1930
  // Can't resize — replace with a text annotation so the model
2321
1931
  // can explain the situation rather than silently dropping context
2322
- return [
2323
- {
2324
- type: "text" as const,
2325
- text: "(An image was attached but could not be sent — its dimensions exceed the provider limit and automatic resize was not available. Please resize the image and try again.)",
2326
- },
2327
- ];
1932
+ return [{ type: "text" as const, text: UNSENDABLE_IMAGE_NOTE }];
2328
1933
  }),
2329
1934
  };
2330
1935
  });
1936
+ // The transform above only mutates ctx.messages for the current retry.
1937
+ // Persist the downgrade for images that can never be sent so the rejected
1938
+ // upload doesn't rehydrate from the DB and resurface on later turns. This
1939
+ // is cleanup for future turns, so a persistence failure must never abort
1940
+ // the retry that is about to run — log it and continue.
1941
+ try {
1942
+ const rewritten = persistUnsendableImageDowngrades(ctx.conversationId);
1943
+ if (rewritten > 0) {
1944
+ rlog.info(
1945
+ { phase: "image-recovery", rewritten },
1946
+ "Persisted unsendable-image downgrades so they cannot resurface",
1947
+ );
1948
+ }
1949
+ } catch (err) {
1950
+ rlog.warn(
1951
+ { phase: "image-recovery", err },
1952
+ "Failed to persist unsendable-image downgrade; continuing with in-memory recovery",
1953
+ );
1954
+ }
2331
1955
  runMessages = ctx.messages;
2332
1956
  updatedHistory = await runAgentLoop(runMessages);
2333
1957
  if (state.imageTooLargeDetected) {
@@ -2356,19 +1980,9 @@ export async function runAgentLoopImpl(
2356
1980
  // limit), incorporate those new messages into ctx.messages so the
2357
1981
  // convergence loop operates on the full (larger) history.
2358
1982
  if (state.contextTooLargeDetected) {
2359
- // Detect whether ctx.messages currently lacks NOW.md so we know if
2360
- // it needs to be re-injected. Mid-loop compaction (line ~1067) may
2361
- // have already stripped injections before escalating here, so we
2362
- // check actual message state rather than tracking mutation sites.
2363
- let convergenceStripped =
2364
- findLastInjectedNowContent(ctx.messages) === null;
2365
-
2366
- if (updatedHistory.length > preRunHistoryLength) {
1983
+ if (lastRunAppendedNewMessages) {
2367
1984
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2368
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2369
- convergenceStripped = true;
2370
- preRepairMessages = updatedHistory;
2371
- preRunHistoryLength = updatedHistory.length;
1985
+ markHistoryStrippedBestEffort(ctx.conversationId);
2372
1986
  }
2373
1987
  if (!reducerState) {
2374
1988
  reducerState = createInitialReducerState();
@@ -2457,7 +2071,7 @@ export async function runAgentLoopImpl(
2457
2071
  }
2458
2072
  if (emergencyResult.compacted) {
2459
2073
  await applySuccessfulCompaction(emergencyResult, ctx.messages);
2460
- shouldInjectWorkspace = true;
2074
+ state.shouldInjectWorkspace = true;
2461
2075
  }
2462
2076
  // Clear the overflow flag and re-run the agent loop with
2463
2077
  // the compacted context.
@@ -2508,7 +2122,7 @@ export async function runAgentLoopImpl(
2508
2122
  ctx.contextWindowManager.maybeCompact(msgs, signal!, {
2509
2123
  ...(opts ?? {}),
2510
2124
  overrideProfile: resolveCurrentOverrideProfile() ?? null,
2511
- actorTrustClass: ctx.trustContext?.trustClass,
2125
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
2512
2126
  }),
2513
2127
  abortController.signal,
2514
2128
  );
@@ -2537,22 +2151,22 @@ export async function runAgentLoopImpl(
2537
2151
  step.compactionResult,
2538
2152
  convergenceCompactionBasis,
2539
2153
  );
2540
- shouldInjectWorkspace = true;
2541
- reducerCompacted = true;
2154
+ state.shouldInjectWorkspace = true;
2155
+ state.reducerCompacted = true;
2542
2156
  }
2543
2157
 
2544
- // Only re-inject NOW.md when ctx.messages was actually stripped;
2545
- // otherwise the existing NOW.md block is still present and
2546
- // re-injecting would duplicate it.
2158
+ // Only re-inject the memory-static block when ctx.messages was
2159
+ // actually stripped; otherwise the existing block is still present and
2160
+ // re-injecting would duplicate it. (The `<knowledge_base>` and NOW.md
2161
+ // blocks self-gate inside their injectors on whether they are already
2162
+ // present in `ctx.messages`.)
2547
2163
  const injection = await applyRuntimeInjections(ctx.messages, {
2548
2164
  ...injectionOpts,
2549
- pkbContext: currentPkbContent,
2550
- memoryV2Static: convergenceStripped ? currentMemoryV2Static : null,
2551
- nowScratchpad: convergenceStripped ? currentNowContent : null,
2552
- workspaceTopLevelContext: shouldInjectWorkspace
2553
- ? ctx.workspaceTopLevelContext
2554
- : null,
2555
- slackChronologicalMessages: reducerCompacted
2165
+ workspaceTopLevelContext: buildWorkspaceTopLevelContext(
2166
+ ctx,
2167
+ state.shouldInjectWorkspace,
2168
+ ),
2169
+ slackChronologicalMessages: state.reducerCompacted
2556
2170
  ? null
2557
2171
  : injectionOpts.slackChronologicalMessages,
2558
2172
  mode: currentInjectionMode,
@@ -2570,8 +2184,6 @@ export async function runAgentLoopImpl(
2570
2184
  );
2571
2185
  runMessages = convergenceStrip.messages;
2572
2186
  }
2573
- preRepairMessages = runMessages;
2574
- preRunHistoryLength = runMessages.length;
2575
2187
  state.contextTooLargeDetected = false;
2576
2188
  yieldedForBudget = false;
2577
2189
 
@@ -2594,12 +2206,9 @@ export async function runAgentLoopImpl(
2594
2206
  // Fold rerun progress into ctx.messages so the next reducer
2595
2207
  // tier operates on up-to-date history instead of stale
2596
2208
  // pre-rerun messages.
2597
- if (updatedHistory.length > preRunHistoryLength) {
2209
+ if (lastRunAppendedNewMessages) {
2598
2210
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2599
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2600
- convergenceStripped = true;
2601
- preRepairMessages = updatedHistory;
2602
- preRunHistoryLength = updatedHistory.length;
2211
+ markHistoryStrippedBestEffort(ctx.conversationId);
2603
2212
  }
2604
2213
  }
2605
2214
  }
@@ -2638,10 +2247,8 @@ export async function runAgentLoopImpl(
2638
2247
  messages: ctx.messages,
2639
2248
  signal: abortController.signal,
2640
2249
  options: {
2641
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
2642
2250
  force: true,
2643
2251
  minKeepRecentUserTurns: 0,
2644
- targetInputTokensOverride: correctedTarget,
2645
2252
  overrideProfile: resolveCurrentOverrideProfile() ?? null,
2646
2253
  },
2647
2254
  },
@@ -2671,7 +2278,7 @@ export async function runAgentLoopImpl(
2671
2278
  }
2672
2279
  }
2673
2280
  // Only track when the summary LLM actually ran; `force: true`
2674
- // bypasses the cooldown but not the early-return paths.
2281
+ // bypasses the auto-threshold gate but not the early-return paths.
2675
2282
  if (
2676
2283
  emergencyCompact &&
2677
2284
  emergencyCompact.summaryFailed !== undefined
@@ -2684,21 +2291,22 @@ export async function runAgentLoopImpl(
2684
2291
  }
2685
2292
  if (emergencyCompact?.compacted) {
2686
2293
  await applySuccessfulCompaction(emergencyCompact, ctx.messages);
2687
- reducerCompacted = true;
2688
- shouldInjectWorkspace = true;
2294
+ state.reducerCompacted = true;
2295
+ state.shouldInjectWorkspace = true;
2689
2296
  }
2690
2297
 
2691
- // Only re-inject NOW.md when ctx.messages was actually stripped;
2692
- // otherwise the existing block is still present.
2298
+ // Only re-inject the memory-static block when ctx.messages was
2299
+ // actually stripped; otherwise the existing block is still present.
2300
+ // (The `<knowledge_base>`, NOW.md, and v2 static `<info>` blocks
2301
+ // self-gate inside their injectors on whether they are already
2302
+ // present in `ctx.messages`.)
2693
2303
  const injection = await applyRuntimeInjections(ctx.messages, {
2694
2304
  ...injectionOpts,
2695
- pkbContext: currentPkbContent,
2696
- memoryV2Static: convergenceStripped ? currentMemoryV2Static : null,
2697
- nowScratchpad: convergenceStripped ? currentNowContent : null,
2698
- workspaceTopLevelContext: shouldInjectWorkspace
2699
- ? ctx.workspaceTopLevelContext
2700
- : null,
2701
- slackChronologicalMessages: reducerCompacted
2305
+ workspaceTopLevelContext: buildWorkspaceTopLevelContext(
2306
+ ctx,
2307
+ state.shouldInjectWorkspace,
2308
+ ),
2309
+ slackChronologicalMessages: state.reducerCompacted
2702
2310
  ? null
2703
2311
  : injectionOpts.slackChronologicalMessages,
2704
2312
  mode: currentInjectionMode,
@@ -2716,8 +2324,6 @@ export async function runAgentLoopImpl(
2716
2324
  );
2717
2325
  runMessages = fallbackStrip.messages;
2718
2326
  }
2719
- preRepairMessages = runMessages;
2720
- preRunHistoryLength = runMessages.length;
2721
2327
  state.contextTooLargeDetected = false;
2722
2328
 
2723
2329
  updatedHistory = await runAgentLoop(runMessages);
@@ -2771,44 +2377,11 @@ export async function runAgentLoopImpl(
2771
2377
  onEvent(buildConversationErrorMessage(ctx.conversationId, classified));
2772
2378
  }
2773
2379
 
2774
- // Reconcile synthesized cancellation tool_results
2775
- for (let i = preRunHistoryLength; i < updatedHistory.length; i++) {
2776
- const msg = updatedHistory[i];
2777
- if (msg.role === "user") {
2778
- for (const block of msg.content) {
2779
- if (
2780
- block.type === "tool_result" &&
2781
- !state.pendingToolResults.has(block.tool_use_id) &&
2782
- !state.persistedToolUseIds.has(block.tool_use_id)
2783
- ) {
2784
- state.pendingToolResults.set(block.tool_use_id, {
2785
- content: block.content,
2786
- isError: block.is_error ?? false,
2787
- });
2788
- }
2789
- }
2790
- }
2791
- }
2792
-
2793
- // Flush remaining tool results
2380
+ // Flush remaining tool results. On a normal turn these drain at the next
2381
+ // `message_complete`; an aborted or yielded loop exits with them still
2382
+ // buffered, so finalize the (possibly already on-arrival-reserved) grouped
2383
+ // row here rather than writing a duplicate.
2794
2384
  if (state.pendingToolResults.size > 0) {
2795
- const toolResultBlocks = Array.from(
2796
- state.pendingToolResults.entries(),
2797
- ).map(([toolUseId, result]) => ({
2798
- type: "tool_result",
2799
- tool_use_id: toolUseId,
2800
- content: redactSecrets(result.content),
2801
- is_error: result.isError,
2802
- ...(result.contentBlocks
2803
- ? {
2804
- contentBlocks: result.contentBlocks.map((block) =>
2805
- block.type === "text"
2806
- ? { ...block, text: redactSecrets(block.text) }
2807
- : block,
2808
- ),
2809
- }
2810
- : {}),
2811
- }));
2812
2385
  const toolResultMetadata = {
2813
2386
  ...provenanceFromTrustContext(ctx.trustContext),
2814
2387
  userMessageChannel: capturedTurnChannelContext.userMessageChannel,
@@ -2818,21 +2391,12 @@ export async function runAgentLoopImpl(
2818
2391
  assistantMessageInterface:
2819
2392
  capturedTurnInterfaceContext.assistantMessageInterface,
2820
2393
  };
2821
- await runPipeline<PersistArgs, PersistResult>(
2822
- "persistence",
2823
- getMiddlewaresFor("persistence"),
2824
- defaultPersistenceTerminal,
2825
- {
2826
- op: "add",
2827
- conversationId: ctx.conversationId,
2828
- role: "user",
2829
- content: JSON.stringify(toolResultBlocks),
2830
- metadata: toolResultMetadata,
2831
- },
2832
- buildPluginTurnContext(ctx, reqId),
2833
- DEFAULT_TIMEOUTS.persistence,
2394
+ await finalizePendingToolResultRow(
2395
+ state,
2396
+ ctx.conversationId,
2397
+ toolResultMetadata,
2398
+ rlog,
2834
2399
  );
2835
- state.pendingToolResults.clear();
2836
2400
  }
2837
2401
 
2838
2402
  // Persist the budget_yield_unrecovered notice now that any pending
@@ -2856,24 +2420,13 @@ export async function runAgentLoopImpl(
2856
2420
  };
2857
2421
  let yieldNoticePersistedId: string | null = null;
2858
2422
  try {
2859
- const yieldPersistResult = (await runPipeline<
2860
- PersistArgs,
2861
- PersistResult
2862
- >(
2863
- "persistence",
2864
- getMiddlewaresFor("persistence"),
2865
- defaultPersistenceTerminal,
2866
- {
2867
- op: "add",
2868
- conversationId: ctx.conversationId,
2869
- role: "assistant",
2870
- content: JSON.stringify(yieldNoticeMessage.content),
2871
- metadata: yieldNoticeMetadata,
2872
- },
2873
- buildPluginTurnContext(ctx, reqId),
2874
- DEFAULT_TIMEOUTS.persistence,
2875
- )) as PersistAddResult;
2876
- yieldNoticePersistedId = yieldPersistResult.message.id;
2423
+ const yieldRow = await addMessage(
2424
+ ctx.conversationId,
2425
+ "assistant",
2426
+ JSON.stringify(yieldNoticeMessage.content),
2427
+ { metadata: yieldNoticeMetadata },
2428
+ );
2429
+ yieldNoticePersistedId = yieldRow.id;
2877
2430
  } catch (err) {
2878
2431
  // Non-fatal — a DB hiccup must not escalate a budget-yield exit into
2879
2432
  // a turn-level throw. The live SSE event was already emitted, so the
@@ -2929,7 +2482,7 @@ export async function runAgentLoopImpl(
2929
2482
  }
2930
2483
 
2931
2484
  // Reconstruct history
2932
- const newMessages = updatedHistory.slice(preRunHistoryLength).map((msg) => {
2485
+ const newMessages = lastRunNewMessages.map((msg) => {
2933
2486
  if (msg.role !== "assistant") return msg;
2934
2487
  const { cleanedContent } = cleanAssistantContent(msg.content);
2935
2488
  const cleanedBlocks = cleanedContent as ContentBlock[];
@@ -2960,10 +2513,6 @@ export async function runAgentLoopImpl(
2960
2513
  state.assistantRowAwaitingFinalization &&
2961
2514
  state.lastAssistantMessageId
2962
2515
  ) {
2963
- // Direct `deleteMessageById` (not via the `persistence` pipeline):
2964
- // see the same rationale on the matching cleanup in
2965
- // `handleLlmCallStarted` — an unfinalized reservation has no
2966
- // observable history for plugins.
2967
2516
  try {
2968
2517
  deleteMessageById(state.lastAssistantMessageId);
2969
2518
  } catch (err) {
@@ -2985,20 +2534,12 @@ export async function runAgentLoopImpl(
2985
2534
  const errorAssistantMessage = createAssistantMessage(
2986
2535
  state.providerErrorUserMessage,
2987
2536
  );
2988
- const errorPersistResult = (await runPipeline<PersistArgs, PersistResult>(
2989
- "persistence",
2990
- getMiddlewaresFor("persistence"),
2991
- defaultPersistenceTerminal,
2992
- {
2993
- op: "add",
2994
- conversationId: ctx.conversationId,
2995
- role: "assistant",
2996
- content: JSON.stringify(errorAssistantMessage.content),
2997
- metadata: errChannelMeta,
2998
- },
2999
- buildPluginTurnContext(ctx, reqId),
3000
- DEFAULT_TIMEOUTS.persistence,
3001
- )) as PersistAddResult;
2537
+ const errorRow = await addMessage(
2538
+ ctx.conversationId,
2539
+ "assistant",
2540
+ JSON.stringify(errorAssistantMessage.content),
2541
+ { metadata: errChannelMeta },
2542
+ );
3002
2543
  persistedErrorAssistantMessage = true;
3003
2544
  // Repoint `lastAssistantMessageId` at the synthetic error row so the
3004
2545
  // post-loop sync, attachment resolution, and `message_complete`/
@@ -3007,7 +2548,7 @@ export async function runAgentLoopImpl(
3007
2548
  // above. Mark finalization complete so the next LLM call in this run
3008
2549
  // (or a downstream handler) doesn't try to clean up an id that
3009
2550
  // already corresponds to a finalized row.
3010
- state.lastAssistantMessageId = errorPersistResult.message.id;
2551
+ state.lastAssistantMessageId = errorRow.id;
3011
2552
  state.assistantRowAwaitingFinalization = false;
3012
2553
  newMessages.push(errorAssistantMessage);
3013
2554
  // Pipe the just-assigned message id into any orphaned LLM request log
@@ -3021,10 +2562,7 @@ export async function runAgentLoopImpl(
3021
2562
  // other conversations cannot collide. Non-fatal — a DB hiccup must
3022
2563
  // not escalate a provider rejection into a turn-level throw.
3023
2564
  try {
3024
- backfillMessageIdOnLogs(
3025
- ctx.conversationId,
3026
- errorPersistResult.message.id,
3027
- );
2565
+ backfillMessageIdOnLogs(ctx.conversationId, errorRow.id);
3028
2566
  } catch (err) {
3029
2567
  rlog.warn(
3030
2568
  { err },
@@ -3037,7 +2575,16 @@ export async function runAgentLoopImpl(
3037
2575
  // would create a duplicate plain-text bubble below the alert card.
3038
2576
  }
3039
2577
 
3040
- let restoredHistory = [...preRepairMessages, ...newMessages];
2578
+ // Base persisted into `ctx.messages` is the loop's own returned history
2579
+ // (minus the tail it appended this run), with the cleaned `newMessages`
2580
+ // re-appended on top. Sourcing the base from the loop keeps it in lockstep
2581
+ // with any in-loop compaction without the orchestrator maintaining a
2582
+ // parallel snapshot across re-entry sites.
2583
+ const loopBase = updatedHistory.slice(
2584
+ 0,
2585
+ updatedHistory.length - lastRunNewMessages.length,
2586
+ );
2587
+ let restoredHistory = [...loopBase, ...newMessages];
3041
2588
 
3042
2589
  // Post-turn tool result truncation: save large results to disk and
3043
2590
  // replace in-context content with a prefix/suffix stub + file pointer.
@@ -3229,30 +2776,6 @@ export async function runAgentLoopImpl(
3229
2776
  publishLoopMessagesChanged();
3230
2777
  }
3231
2778
  }
3232
-
3233
- // Second title pass: after 3 completed turns, re-generate the title
3234
- // using the last 3 messages for better context. Only fires when the
3235
- // current title was auto-generated (isAutoTitle = 1) and the user
3236
- // has not opted out via `conversations.skipAutoRetitling`.
3237
- if (ctx.turnCount === 2 && !getConfig().conversations.skipAutoRetitling) {
3238
- // turnCount is 0-indexed, incremented in finally; 2 = about to become 3rd turn
3239
- queueRegenerateConversationTitle({
3240
- conversationId: ctx.conversationId,
3241
- provider: ctx.provider,
3242
- onTitleUpdated: (title) => {
3243
- onEvent({
3244
- type: "conversation_title_updated",
3245
- conversationId: ctx.conversationId,
3246
- title,
3247
- });
3248
- onEvent({
3249
- type: "sync_changed",
3250
- tags: [conversationMetadataSyncTag(ctx.conversationId)],
3251
- });
3252
- },
3253
- signal: abortController.signal,
3254
- });
3255
- }
3256
2779
  } catch (err) {
3257
2780
  const errorCtx = {
3258
2781
  phase: "agent_loop" as const,
@@ -3312,8 +2835,6 @@ export async function runAgentLoopImpl(
3312
2835
  }
3313
2836
  } finally {
3314
2837
  if (turnStarted) {
3315
- cleanupBootstrapAfterTurnThreshold(ctx.conversationId);
3316
-
3317
2838
  ctx.turnCount++;
3318
2839
  const config = getConfig();
3319
2840
  const maxWait = config.workspaceGit?.turnCommitMaxWaitMs ?? 4000;
@@ -3351,7 +2872,7 @@ export async function runAgentLoopImpl(
3351
2872
  ctx.profiler.emitSummary(ctx.traceEmitter, reqId);
3352
2873
 
3353
2874
  ctx.abortController = null;
3354
- ctx.processing = false;
2875
+ ctx.setProcessing(false);
3355
2876
  ctx.onConfirmationOutcome = undefined;
3356
2877
  ctx.surfaceActionRequestIds.delete(ctx.currentRequestId ?? "");
3357
2878
  ctx.approvedViaPromptThisTurn = false;
@@ -3498,7 +3019,7 @@ export async function applyCompactionResult(
3498
3019
  result.summaryText,
3499
3020
  ctx.contextCompactedMessageCount,
3500
3021
  );
3501
- markHistoryStrippedBestEffort(ctx.conversationId, compactedAt, log);
3022
+ markHistoryStrippedBestEffort(ctx.conversationId);
3502
3023
  if (options.slackContextCompactionWatermarkTs) {
3503
3024
  updateConversationSlackContextWatermark(
3504
3025
  ctx.conversationId,