@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (570) hide show
  1. package/Dockerfile +20 -4
  2. package/bun.lock +2 -2
  3. package/docker-entrypoint.sh +4 -2
  4. package/docker-init-apt-root.sh +3 -1
  5. package/docker-kata-apt-env.sh +3 -1
  6. package/docker-kata-runtime-family.sh +12 -0
  7. package/docs/architecture/memory.md +1 -1
  8. package/examples/plugins/echo/README.md +61 -66
  9. package/examples/plugins/echo/hooks/post-tool-use.ts +18 -0
  10. package/examples/plugins/echo/hooks/stop.ts +16 -0
  11. package/examples/plugins/echo/hooks/user-prompt-submit.ts +18 -0
  12. package/examples/plugins/echo/package.json +1 -2
  13. package/examples/plugins/echo/src/emit.ts +19 -0
  14. package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
  15. package/node_modules/@vellumai/skill-host-contracts/src/skill-host.ts +7 -6
  16. package/openapi.yaml +3378 -335
  17. package/package.json +2 -2
  18. package/scripts/generate-openapi.ts +68 -41
  19. package/src/__tests__/agent-loop-exit-reason.test.ts +35 -93
  20. package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
  21. package/src/__tests__/agent-loop.test.ts +37 -87
  22. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
  23. package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
  24. package/src/__tests__/annotate-risk-options.test.ts +2 -3
  25. package/src/__tests__/anthropic-provider.test.ts +95 -2
  26. package/src/__tests__/app-control-flow.test.ts +1 -1
  27. package/src/__tests__/app-dir-path-guard.test.ts +1 -0
  28. package/src/__tests__/approval-routes-http.test.ts +4 -1
  29. package/src/__tests__/assistant-event-hub.test.ts +25 -0
  30. package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
  31. package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
  32. package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
  33. package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
  34. package/src/__tests__/btw-routes.test.ts +62 -3
  35. package/src/__tests__/build-persisted-content.test.ts +184 -0
  36. package/src/__tests__/catalog-files.test.ts +1 -1
  37. package/src/__tests__/channel-approval-routes.test.ts +1 -1
  38. package/src/__tests__/channel-approvals.test.ts +1 -1
  39. package/src/__tests__/clawhub-files.test.ts +1 -1
  40. package/src/__tests__/compaction-circuit.test.ts +258 -0
  41. package/src/__tests__/compaction-direct.test.ts +132 -0
  42. package/src/__tests__/compaction.benchmark.test.ts +0 -30
  43. package/src/__tests__/config-watcher.test.ts +1 -1
  44. package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
  45. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -5
  46. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -7
  47. package/src/__tests__/conversation-agent-loop-overflow.test.ts +316 -1143
  48. package/src/__tests__/conversation-agent-loop.test.ts +638 -1655
  49. package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
  50. package/src/__tests__/conversation-clean-command.test.ts +5 -2
  51. package/src/__tests__/conversation-history-web-search.test.ts +11 -1
  52. package/src/__tests__/conversation-pairing.test.ts +4 -31
  53. package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
  54. package/src/__tests__/conversation-provider-retry-repair.test.ts +30 -10
  55. package/src/__tests__/conversation-queue.test.ts +2 -0
  56. package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
  57. package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
  58. package/src/__tests__/conversation-runtime-assembly.test.ts +310 -300
  59. package/src/__tests__/conversation-runtime-workspace.test.ts +105 -45
  60. package/src/__tests__/conversation-slash-commands.test.ts +8 -42
  61. package/src/__tests__/conversation-slash-queue.test.ts +6 -1
  62. package/src/__tests__/conversation-starter-routes.test.ts +14 -6
  63. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
  64. package/src/__tests__/conversation-sync-tags.test.ts +27 -15
  65. package/src/__tests__/conversation-title-service.test.ts +135 -2
  66. package/src/__tests__/conversation-workspace-cache-state.test.ts +17 -16
  67. package/src/__tests__/conversation-workspace-injection.test.ts +67 -2
  68. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +7 -6
  69. package/src/__tests__/conversations-import-system-filter.test.ts +101 -0
  70. package/src/__tests__/cross-provider-web-search.test.ts +214 -1
  71. package/src/__tests__/db-acp-history.test.ts +101 -0
  72. package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
  73. package/src/__tests__/dm-persistence.test.ts +5 -1
  74. package/src/__tests__/dynamic-page-surface.test.ts +31 -0
  75. package/src/__tests__/empty-response-hook.test.ts +304 -0
  76. package/src/__tests__/feature-flag-test-helpers.ts +2 -2
  77. package/src/__tests__/file-write-tool.test.ts +63 -0
  78. package/src/__tests__/gateway-only-guard.test.ts +12 -2
  79. package/src/__tests__/gemini-image-service.test.ts +13 -0
  80. package/src/__tests__/guardian-grant-minting.test.ts +1 -1
  81. package/src/__tests__/guardian-routing-invariants.test.ts +2 -4
  82. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  83. package/src/__tests__/heartbeat-disk-pressure.test.ts +1 -0
  84. package/src/__tests__/heartbeat-service.test.ts +1 -0
  85. package/src/__tests__/helpers/mock-provider.ts +110 -0
  86. package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
  87. package/src/__tests__/history-repair-hook.test.ts +1 -0
  88. package/src/__tests__/host-app-control-routes.test.ts +1 -1
  89. package/src/__tests__/host-cu-routes-targeted.test.ts +3 -3
  90. package/src/__tests__/identity-intro-cache.test.ts +12 -100
  91. package/src/__tests__/identity-routes.test.ts +248 -7
  92. package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
  93. package/src/__tests__/injector-background-turn.test.ts +3 -9
  94. package/src/__tests__/injector-chain.test.ts +139 -275
  95. package/src/__tests__/injector-disk-pressure.test.ts +75 -41
  96. package/src/__tests__/injector-document-comments.test.ts +3 -3
  97. package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
  98. package/src/__tests__/injector-v3-suppression.test.ts +31 -37
  99. package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
  100. package/src/__tests__/list-messages-hidden-metadata.test.ts +38 -0
  101. package/src/__tests__/list-messages-page-latest.test.ts +60 -0
  102. package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
  103. package/src/__tests__/llm-usage-store.test.ts +223 -1
  104. package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
  105. package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
  106. package/src/__tests__/native-web-search.test.ts +191 -0
  107. package/src/__tests__/onboarding-template-contract.test.ts +2 -0
  108. package/src/__tests__/openai-image-service.test.ts +17 -0
  109. package/src/__tests__/openai-provider.test.ts +31 -1
  110. package/src/__tests__/{overflow-reduce-pipeline.test.ts → overflow-reduction-loop.test.ts} +64 -284
  111. package/src/__tests__/persist-unsendable-image.test.ts +215 -0
  112. package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
  113. package/src/__tests__/pkb-autoinject.test.ts +2 -5
  114. package/src/__tests__/plugin-api-shim.test.ts +3 -6
  115. package/src/__tests__/plugin-bootstrap.test.ts +14 -40
  116. package/src/__tests__/plugin-registry.test.ts +3 -76
  117. package/src/__tests__/plugin-types.test.ts +0 -193
  118. package/src/__tests__/process-message-display-content.test.ts +6 -2
  119. package/src/__tests__/reaction-persistence.test.ts +1 -1
  120. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
  121. package/src/__tests__/resolve-trust-class.test.ts +4 -4
  122. package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
  123. package/src/__tests__/schedule-routes.test.ts +603 -2
  124. package/src/__tests__/schedule-store.test.ts +41 -0
  125. package/src/__tests__/schedule-tools.test.ts +35 -0
  126. package/src/__tests__/send-endpoint-busy.test.ts +4 -1
  127. package/src/__tests__/server-history-render.test.ts +314 -1
  128. package/src/__tests__/skill-feature-flags-integration.test.ts +33 -0
  129. package/src/__tests__/skillssh-files.test.ts +1 -1
  130. package/src/__tests__/subagent-call-site-routing.test.ts +1 -1
  131. package/src/__tests__/subagent-fork-notifications.test.ts +1 -3
  132. package/src/__tests__/subagent-fork-spawn.test.ts +1 -1
  133. package/src/__tests__/subagent-manager-notify.test.ts +1 -3
  134. package/src/__tests__/subagent-notify-parent.test.ts +1 -3
  135. package/src/__tests__/subagent-spawn-tool-fork.test.ts +1 -1
  136. package/src/__tests__/system-prompt.test.ts +20 -0
  137. package/src/__tests__/task-scheduler.test.ts +162 -1
  138. package/src/__tests__/terminal-tools.test.ts +6 -1
  139. package/src/__tests__/title-generate-hook.test.ts +319 -0
  140. package/src/__tests__/tool-error-hook.test.ts +278 -0
  141. package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
  142. package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
  143. package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
  144. package/src/__tests__/tool-result-truncation.test.ts +0 -2
  145. package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
  146. package/src/__tests__/ui-work-result-surface.test.ts +159 -0
  147. package/src/__tests__/usage-routes.test.ts +285 -1
  148. package/src/__tests__/user-plugin-loader.test.ts +54 -286
  149. package/src/__tests__/voice-session-bridge.test.ts +6 -3
  150. package/src/__tests__/web-search-backend-failure.test.ts +166 -0
  151. package/src/acp/__tests__/agent-process.test.ts +161 -0
  152. package/src/acp/__tests__/client-handler.test.ts +40 -0
  153. package/src/acp/__tests__/helpers/acp-history-db.ts +82 -0
  154. package/src/acp/__tests__/helpers/exec-file-stub.ts +101 -0
  155. package/src/acp/__tests__/prepare-agent-env.test.ts +137 -0
  156. package/src/acp/__tests__/session-manager-persistence.test.ts +95 -28
  157. package/src/acp/__tests__/session-manager-resume.test.ts +736 -0
  158. package/src/acp/agent-process.ts +61 -1
  159. package/src/acp/auto-install.test.ts +196 -0
  160. package/src/acp/auto-install.ts +177 -0
  161. package/src/acp/client-handler.ts +31 -0
  162. package/src/acp/feature-gate.test.ts +48 -0
  163. package/src/acp/feature-gate.ts +34 -0
  164. package/src/acp/prepare-agent-env.ts +83 -29
  165. package/src/acp/resolve-agent.test.ts +320 -7
  166. package/src/acp/resolve-agent.ts +182 -18
  167. package/src/acp/resume-hint.ts +25 -0
  168. package/src/acp/session-manager.ts +495 -73
  169. package/src/acp/types.ts +8 -0
  170. package/src/agent/compaction-circuit.ts +60 -102
  171. package/src/agent/loop.ts +362 -485
  172. package/src/api/events/assistant-thinking-delta.ts +33 -0
  173. package/src/api/events/tool-output-chunk.ts +45 -0
  174. package/src/api/events/tool-use-preview-start.ts +32 -0
  175. package/src/api/events/trace-event.ts +69 -0
  176. package/src/api/index.ts +48 -13
  177. package/src/api/responses/conversation-message.ts +374 -0
  178. package/src/approvals/guardian-request-resolvers.ts +1 -1
  179. package/src/avatar/__tests__/avatar-store.test.ts +34 -29
  180. package/src/background-wake/next-wake.ts +1 -0
  181. package/src/cli/commands/__tests__/notifications.test.ts +58 -14
  182. package/src/cli/commands/notifications.ts +112 -60
  183. package/src/config/__tests__/feature-flag-registry-guard.test.ts +2 -2
  184. package/src/config/acp-defaults.test.ts +10 -0
  185. package/src/config/acp-defaults.ts +6 -0
  186. package/src/config/assistant-feature-flags.ts +22 -11
  187. package/src/config/bundled-skills/acp/SKILL.md +83 -31
  188. package/src/config/bundled-skills/acp/TOOLS.json +4 -4
  189. package/src/config/bundled-skills/app-builder/SKILL.md +224 -398
  190. package/src/config/bundled-skills/app-builder/TOOLS.json +29 -0
  191. package/src/config/bundled-skills/app-builder/references/DESIGN_SYSTEM.md +48 -0
  192. package/src/config/bundled-skills/app-builder/references/RESPONSIVE.md +57 -0
  193. package/src/config/bundled-skills/app-builder/references/SLIDES.md +38 -0
  194. package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
  195. package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
  196. package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
  197. package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
  198. package/src/config/bundled-skills/app-builder/tools/app-list.ts +62 -0
  199. package/src/config/bundled-skills/document-editor/SKILL.md +28 -23
  200. package/src/config/bundled-skills/document-editor/TOOLS.json +1 -1
  201. package/src/config/bundled-skills/messaging/SKILL.md +0 -7
  202. package/src/config/bundled-tool-registry.ts +2 -0
  203. package/src/config/feature-flag-cache.ts +3 -3
  204. package/src/config/feature-flag-registry.json +48 -7
  205. package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
  206. package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
  207. package/src/config/schemas/heartbeat.ts +9 -0
  208. package/src/config/schemas/llm.ts +1 -0
  209. package/src/config/schemas/memory-v2.ts +8 -0
  210. package/src/config/schemas/memory-v3.ts +8 -0
  211. package/src/config/schemas/platform.ts +8 -0
  212. package/src/config/seed-inference-profiles.ts +2 -2
  213. package/src/config/skills.ts +13 -0
  214. package/src/context/compactor.ts +1 -1
  215. package/src/context/strip-injections.ts +128 -0
  216. package/src/context/token-estimator.ts +23 -0
  217. package/src/context/tool-result-truncation.ts +0 -23
  218. package/src/context/window-manager.ts +5 -7
  219. package/src/credential-execution/executable-discovery.ts +16 -0
  220. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
  221. package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
  222. package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
  223. package/src/daemon/assistant-attachments.ts +1 -1
  224. package/src/daemon/config-watcher.ts +2 -2
  225. package/src/daemon/context-overflow-reducer.ts +0 -1
  226. package/src/daemon/conversation-agent-loop-handlers.ts +594 -153
  227. package/src/daemon/conversation-agent-loop.ts +301 -997
  228. package/src/daemon/conversation-history.ts +5 -4
  229. package/src/daemon/conversation-lifecycle.ts +3 -4
  230. package/src/daemon/conversation-messaging.ts +7 -6
  231. package/src/daemon/conversation-process.ts +11 -16
  232. package/src/daemon/conversation-registry.ts +159 -0
  233. package/src/daemon/conversation-runtime-assembly.ts +218 -398
  234. package/src/daemon/conversation-slash.ts +6 -25
  235. package/src/daemon/conversation-store.ts +9 -90
  236. package/src/daemon/conversation-surfaces.ts +222 -4
  237. package/src/daemon/conversation-tool-setup.ts +2 -29
  238. package/src/daemon/conversation-workspace.ts +17 -0
  239. package/src/daemon/conversation.ts +32 -20
  240. package/src/daemon/external-plugins-bootstrap.ts +17 -18
  241. package/src/daemon/handlers/config-a2a.ts +51 -36
  242. package/src/daemon/handlers/config-slack-channel.ts +20 -14
  243. package/src/daemon/handlers/config-telegram.ts +16 -2
  244. package/src/daemon/handlers/conversations.ts +3 -1
  245. package/src/daemon/handlers/shared.ts +156 -84
  246. package/src/daemon/handlers/skills.ts +42 -10
  247. package/src/daemon/lifecycle.ts +25 -0
  248. package/src/daemon/message-types/apps.ts +1 -29
  249. package/src/daemon/message-types/messages.ts +9 -57
  250. package/src/daemon/message-types/skills.ts +2 -0
  251. package/src/daemon/message-types/surfaces.ts +136 -3
  252. package/src/daemon/now-scratchpad.ts +21 -0
  253. package/src/daemon/orphan-reaper.test.ts +210 -0
  254. package/src/daemon/orphan-reaper.ts +240 -0
  255. package/src/daemon/overflow-reduction-loop.ts +230 -0
  256. package/src/daemon/persist-unsendable-image.ts +117 -0
  257. package/src/daemon/process-message.ts +1 -3
  258. package/src/daemon/server.ts +2 -0
  259. package/src/daemon/trace-emitter.ts +6 -4
  260. package/src/daemon/trust-context.ts +19 -0
  261. package/src/daemon/wake-target-adapter.ts +3 -1
  262. package/src/heartbeat/__tests__/heartbeat-service.test.ts +3 -0
  263. package/src/heartbeat/heartbeat-run-store.ts +23 -1
  264. package/src/heartbeat/heartbeat-service.ts +26 -0
  265. package/src/home/home-greeting-cache.ts +24 -1
  266. package/src/ipc/__tests__/browser-ipc.test.ts +1 -1
  267. package/src/ipc/__tests__/ui-request-route.test.ts +3 -3
  268. package/src/ipc/gateway-client.test.ts +2 -2
  269. package/src/ipc/gateway-client.ts +3 -3
  270. package/src/ipc/skill-routes/__tests__/memory.test.ts +15 -0
  271. package/src/ipc/skill-routes/memory.ts +4 -2
  272. package/src/media/gemini-image-service.ts +15 -0
  273. package/src/media/openai-image-service.ts +14 -0
  274. package/src/media/types.ts +34 -0
  275. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
  276. package/src/memory/auth-fallback-events-store.ts +94 -0
  277. package/src/memory/conversation-starter-checkpoints.ts +1 -0
  278. package/src/memory/conversation-title-service.ts +65 -41
  279. package/src/memory/db-init.ts +6 -0
  280. package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
  281. package/src/memory/graph/conversation-graph-memory.ts +65 -0
  282. package/src/memory/job-handlers/conversation-starters.ts +13 -2
  283. package/src/memory/jobs-store.ts +33 -0
  284. package/src/memory/jobs-worker.ts +32 -5
  285. package/src/memory/llm-usage-store.ts +224 -50
  286. package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
  287. package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
  288. package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
  289. package/src/memory/migrations/272-acp-session-history-cwd.ts +36 -0
  290. package/src/memory/migrations/index.ts +3 -0
  291. package/src/memory/pkb/autoinject.ts +61 -0
  292. package/src/memory/pkb/context.ts +50 -0
  293. package/src/memory/pkb/types.ts +14 -0
  294. package/src/memory/schedule-attribution-sql.ts +104 -0
  295. package/src/memory/schema/acp.ts +4 -0
  296. package/src/memory/schema/infrastructure.ts +16 -0
  297. package/src/memory/usage-grouped-buckets.ts +6 -1
  298. package/src/memory/v2/__tests__/consolidation-job.test.ts +4 -4
  299. package/src/memory/v2/consolidation-job.ts +14 -5
  300. package/src/notifications/conversation-pairing.ts +8 -15
  301. package/src/notifications/decision-engine.ts +6 -3
  302. package/src/notifications/home-feed-side-effect.ts +12 -1
  303. package/src/permissions/prompter.ts +4 -0
  304. package/src/plugin-api/constants.ts +4 -0
  305. package/src/plugin-api/index.ts +7 -5
  306. package/src/plugin-api/types.ts +151 -1
  307. package/src/plugins/defaults/compaction/compact.ts +59 -0
  308. package/src/plugins/defaults/compaction/package.json +1 -1
  309. package/src/plugins/defaults/compaction/register.ts +8 -19
  310. package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
  311. package/src/plugins/defaults/empty-response/register.ts +8 -13
  312. package/src/plugins/defaults/index.ts +2 -18
  313. package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +95 -0
  314. package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
  315. package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
  316. package/src/plugins/defaults/{injectors/register.ts → memory-retrieval/injectors.ts} +288 -81
  317. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/assign.test.ts +4 -4
  318. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/health.test.ts +16 -0
  319. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/live-integration.test.ts +4 -4
  320. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/maintain-job.test.ts +5 -5
  321. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/orchestrate.test.ts +48 -12
  322. package/src/plugins/defaults/memory-v3-shadow/__tests__/provider-blocks.test.ts +13 -0
  323. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/reconcile.test.ts +2 -2
  324. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/render-injection.test.ts +1 -1
  325. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/router.test.ts +104 -32
  326. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selection-log-store.test.ts +8 -8
  327. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/selector.test.ts +96 -30
  328. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/shadow-plugin.test.ts +34 -16
  329. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/assign.ts +5 -5
  330. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/capabilities.ts +2 -2
  331. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/health.ts +0 -0
  332. package/src/plugins/defaults/memory-v3-shadow/hooks/post-compact.ts +14 -0
  333. package/src/plugins/defaults/memory-v3-shadow/hooks/user-prompt-submit.ts +19 -0
  334. package/src/plugins/defaults/memory-v3-shadow/injector.ts +75 -0
  335. package/src/plugins/defaults/memory-v3-shadow/llm-retry.ts +32 -0
  336. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/maintain-job.ts +8 -8
  337. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/orchestrate.ts +26 -14
  338. package/src/plugins/defaults/{llm-call → memory-v3-shadow}/package.json +2 -2
  339. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/page-content.ts +2 -2
  340. package/src/plugins/defaults/memory-v3-shadow/provider-blocks.ts +26 -0
  341. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/reconcile.ts +3 -3
  342. package/src/plugins/defaults/memory-v3-shadow/register.ts +26 -0
  343. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/render-injection.ts +1 -1
  344. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/router.ts +51 -45
  345. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selection-log-store.ts +4 -4
  346. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/selector.ts +61 -46
  347. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/shadow-plugin.ts +69 -99
  348. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/tree.ts +1 -1
  349. package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/types.ts +8 -0
  350. package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
  351. package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
  352. package/src/plugins/defaults/title-generate/package.json +1 -1
  353. package/src/plugins/defaults/title-generate/register.ts +18 -18
  354. package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
  355. package/src/plugins/defaults/tool-error/package.json +1 -1
  356. package/src/plugins/defaults/tool-error/register.ts +9 -21
  357. package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
  358. package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
  359. package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
  360. package/src/plugins/external-api.ts +2 -2
  361. package/src/plugins/pipeline.ts +6 -305
  362. package/src/plugins/registry.ts +10 -55
  363. package/src/plugins/types.ts +62 -797
  364. package/src/plugins/user-loader.ts +30 -127
  365. package/src/proactive-artifact/aux-message-injector.ts +4 -4
  366. package/src/proactive-artifact/job.test.ts +8 -13
  367. package/src/prompts/__tests__/system-prompt.test.ts +42 -0
  368. package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +64 -0
  369. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  370. package/src/prompts/templates/system-sections.ts +15 -0
  371. package/src/providers/anthropic/client.ts +37 -29
  372. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
  373. package/src/providers/openai/chat-completions-provider.ts +44 -0
  374. package/src/providers/openrouter/client.ts +1 -0
  375. package/src/providers/placeholder-sentinels.ts +35 -0
  376. package/src/runtime/__tests__/agent-wake.test.ts +10 -6
  377. package/src/runtime/__tests__/interactive-ui.test.ts +1 -1
  378. package/src/runtime/agent-wake.ts +2 -5
  379. package/src/runtime/assistant-event-hub.ts +37 -7
  380. package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
  381. package/src/runtime/channel-approvals.ts +1 -1
  382. package/src/runtime/http-router.ts +16 -21
  383. package/src/runtime/http-types.ts +16 -70
  384. package/src/runtime/interactive-ui.ts +1 -1
  385. package/src/runtime/pending-interactions.ts +1 -0
  386. package/src/runtime/routes/__tests__/acp-routes.test.ts +283 -55
  387. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
  388. package/src/runtime/routes/__tests__/conversation-list-routes.test.ts +1 -1
  389. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
  390. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
  391. package/src/runtime/routes/__tests__/surface-action-routes.test.ts +5 -4
  392. package/src/runtime/routes/__tests__/surface-content-routes.test.ts +4 -1
  393. package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
  394. package/src/runtime/routes/acp-routes.test.ts +89 -25
  395. package/src/runtime/routes/acp-routes.ts +81 -29
  396. package/src/runtime/routes/app-management-routes.ts +6 -117
  397. package/src/runtime/routes/app-routes.ts +13 -15
  398. package/src/runtime/routes/approval-routes.ts +1 -1
  399. package/src/runtime/routes/attachment-routes.ts +26 -15
  400. package/src/runtime/routes/avatar-routes.ts +26 -0
  401. package/src/runtime/routes/browser-routes.ts +1 -1
  402. package/src/runtime/routes/browser-tabs-routes.ts +6 -10
  403. package/src/runtime/routes/btw-routes.ts +29 -23
  404. package/src/runtime/routes/consolidation-routes.ts +120 -20
  405. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  406. package/src/runtime/routes/conversation-list-routes.ts +1 -1
  407. package/src/runtime/routes/conversation-query-routes.ts +3 -1
  408. package/src/runtime/routes/conversation-routes.ts +372 -185
  409. package/src/runtime/routes/conversation-starter-routes.ts +13 -7
  410. package/src/runtime/routes/conversations-import-routes.ts +24 -7
  411. package/src/runtime/routes/documents-routes.ts +4 -0
  412. package/src/runtime/routes/domain-routes.ts +51 -37
  413. package/src/runtime/routes/epoch-millis-range.ts +34 -0
  414. package/src/runtime/routes/events-routes.ts +28 -34
  415. package/src/runtime/routes/gateway-log-routes.ts +26 -4
  416. package/src/runtime/routes/heartbeat-routes.ts +32 -12
  417. package/src/runtime/routes/host-app-control-routes.ts +1 -1
  418. package/src/runtime/routes/host-cu-routes.ts +1 -1
  419. package/src/runtime/routes/identity-intro-cache.ts +11 -34
  420. package/src/runtime/routes/identity-routes.ts +224 -18
  421. package/src/runtime/routes/image-generation-routes.ts +40 -2
  422. package/src/runtime/routes/inbound-message-handler.ts +1 -1
  423. package/src/runtime/routes/index.ts +2 -0
  424. package/src/runtime/routes/integrations/a2a.ts +12 -10
  425. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
  426. package/src/runtime/routes/integrations/slack/channel.ts +4 -0
  427. package/src/runtime/routes/integrations/slack/share.ts +27 -6
  428. package/src/runtime/routes/integrations/telegram.ts +6 -0
  429. package/src/runtime/routes/integrations/twilio.ts +42 -0
  430. package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
  431. package/src/runtime/routes/log-export-routes.ts +8 -0
  432. package/src/runtime/routes/memory-v2-routes.ts +15 -8
  433. package/src/runtime/routes/memory-v3-routes.ts +66 -34
  434. package/src/runtime/routes/oauth-apps.ts +66 -12
  435. package/src/runtime/routes/oauth-providers.ts +44 -5
  436. package/src/runtime/routes/platform-routes.ts +81 -5
  437. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
  438. package/src/runtime/routes/playground/force-compact.ts +1 -1
  439. package/src/runtime/routes/playground/helpers.ts +1 -1
  440. package/src/runtime/routes/rename-conversation-routes.ts +5 -0
  441. package/src/runtime/routes/schedule-routes.ts +152 -42
  442. package/src/runtime/routes/secret-routes.ts +14 -2
  443. package/src/runtime/routes/skills-routes.ts +43 -14
  444. package/src/runtime/routes/surface-conversation-resolver.ts +4 -3
  445. package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
  446. package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
  447. package/src/runtime/routes/trust-rules-routes.ts +26 -2
  448. package/src/runtime/routes/tts-routes.ts +35 -0
  449. package/src/runtime/routes/types.ts +66 -8
  450. package/src/runtime/routes/usage-routes.ts +47 -39
  451. package/src/runtime/routes/webhook-routes.ts +41 -2
  452. package/src/runtime/routes/work-items-routes.ts +2 -4
  453. package/src/runtime/routes/workspace-routes.ts +4 -0
  454. package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
  455. package/src/runtime/services/analyze-conversation.ts +2 -2
  456. package/src/runtime/services/conversation-serializer.ts +1 -1
  457. package/src/schedule/schedule-store.ts +20 -1
  458. package/src/schedule/schedule-usage-store.ts +83 -0
  459. package/src/schedule/scheduler.ts +12 -5
  460. package/src/signals/cancel.ts +2 -4
  461. package/src/skills/catalog-files.ts +2 -2
  462. package/src/skills/catalog-install.ts +3 -0
  463. package/src/skills/categories-cache.ts +118 -0
  464. package/src/skills/clawhub-files.ts +1 -2
  465. package/src/skills/skillssh-files.ts +1 -2
  466. package/src/subagent/manager.ts +17 -5
  467. package/src/telemetry/types.ts +29 -1
  468. package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
  469. package/src/telemetry/usage-telemetry-reporter.ts +57 -2
  470. package/src/tools/acp/context.ts +20 -0
  471. package/src/tools/acp/list-agents.test.ts +7 -1
  472. package/src/tools/acp/spawn.test.ts +158 -55
  473. package/src/tools/acp/spawn.ts +47 -72
  474. package/src/tools/acp/steer.test.ts +105 -8
  475. package/src/tools/acp/steer.ts +48 -17
  476. package/src/tools/apps/executors.ts +13 -8
  477. package/src/tools/executor.ts +1 -53
  478. package/src/tools/filesystem/write.ts +34 -0
  479. package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
  480. package/src/tools/network/__tests__/web-search.test.ts +11 -3
  481. package/src/tools/network/web-search-error.test.ts +248 -0
  482. package/src/tools/network/web-search-error.ts +267 -0
  483. package/src/tools/network/web-search.ts +207 -48
  484. package/src/tools/schedule/create.ts +2 -0
  485. package/src/tools/subagent/spawn.ts +2 -4
  486. package/src/tools/terminal/safe-env.ts +10 -1
  487. package/src/tools/ui-surface/definitions.ts +34 -5
  488. package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
  489. package/src/tts/provider-catalog.ts +76 -1
  490. package/src/util/mutex.ts +47 -0
  491. package/src/workspace/git-service.ts +1 -42
  492. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +4 -5
  493. package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
  494. package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
  495. package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +117 -0
  496. package/src/workspace/migrations/registry.ts +6 -0
  497. package/docs/plugins.md +0 -836
  498. package/examples/plugins/echo/register.ts +0 -184
  499. package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
  500. package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -405
  501. package/src/__tests__/compaction-pipeline.test.ts +0 -210
  502. package/src/__tests__/compaction-timeout-recovery.test.ts +0 -251
  503. package/src/__tests__/empty-response-pipeline.test.ts +0 -423
  504. package/src/__tests__/llm-call-pipeline.test.ts +0 -287
  505. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
  506. package/src/__tests__/persistence-pipeline.test.ts +0 -503
  507. package/src/__tests__/pipeline-runner.test.ts +0 -564
  508. package/src/__tests__/title-generate-pipeline.test.ts +0 -211
  509. package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
  510. package/src/__tests__/tool-error-pipeline.test.ts +0 -241
  511. package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
  512. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
  513. package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
  514. package/src/gallery/default-gallery.ts +0 -1359
  515. package/src/gallery/gallery-manifest.ts +0 -28
  516. package/src/home/feature-gate.ts +0 -22
  517. package/src/memory/v3/provider-blocks.ts +0 -16
  518. package/src/plugins/defaults/circuit-breaker/middlewares/circuitBreaker.ts +0 -93
  519. package/src/plugins/defaults/circuit-breaker/package.json +0 -15
  520. package/src/plugins/defaults/circuit-breaker/register.ts +0 -39
  521. package/src/plugins/defaults/compaction/middlewares/compaction.ts +0 -25
  522. package/src/plugins/defaults/compaction/terminal.ts +0 -73
  523. package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
  524. package/src/plugins/defaults/empty-response/terminal.ts +0 -106
  525. package/src/plugins/defaults/injectors/package.json +0 -15
  526. package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
  527. package/src/plugins/defaults/llm-call/register.ts +0 -45
  528. package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
  529. package/src/plugins/defaults/memory-retrieval/package.json +0 -15
  530. package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
  531. package/src/plugins/defaults/overflow-reduce/middlewares/overflowReduce.ts +0 -126
  532. package/src/plugins/defaults/overflow-reduce/package.json +0 -15
  533. package/src/plugins/defaults/overflow-reduce/register.ts +0 -42
  534. package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
  535. package/src/plugins/defaults/persistence/package.json +0 -15
  536. package/src/plugins/defaults/persistence/register.ts +0 -38
  537. package/src/plugins/defaults/persistence/terminal.ts +0 -83
  538. package/src/plugins/defaults/title-generate/terminal.ts +0 -31
  539. package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
  540. package/src/plugins/defaults/token-estimate/package.json +0 -15
  541. package/src/plugins/defaults/token-estimate/register.ts +0 -34
  542. package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
  543. package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
  544. package/src/plugins/defaults/tool-error/terminal.ts +0 -47
  545. package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
  546. package/src/plugins/defaults/tool-execute/package.json +0 -15
  547. package/src/plugins/defaults/tool-execute/register.ts +0 -49
  548. package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
  549. package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
  550. package/src/skills/category-inference.ts +0 -111
  551. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/capabilities.test.ts +0 -0
  552. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/core.test.ts +0 -0
  553. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/eval-turns.json +0 -0
  554. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/fixtures/live-turns.json +0 -0
  555. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/needle.test.ts +0 -0
  556. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/snapshot.test.ts +0 -0
  557. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/tree.test.ts +0 -0
  558. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/types.test.ts +0 -0
  559. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-eviction.test.ts +0 -0
  560. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/__tests__/working-set-skeleton.test.ts +0 -0
  561. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/core.ts +0 -0
  562. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/README.md +0 -0
  563. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/assignments.json +0 -0
  564. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/core.json +0 -0
  565. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-x.md +0 -0
  566. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-a/topic-y.md +0 -0
  567. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/data/leaves/domain-b/topic-z.md +0 -0
  568. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/needle.ts +0 -0
  569. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/snapshot.ts +0 -0
  570. /package/src/{memory/v3 → plugins/defaults/memory-v3-shadow}/working-set.ts +0 -0
@@ -7,8 +7,6 @@
7
7
  * runAgentLoop method here via the AgentLoopConversationContext interface.
8
8
  */
9
9
 
10
- import { join } from "node:path";
11
-
12
10
  import { v4 as uuid } from "uuid";
13
11
 
14
12
  import { optimizeImageForTransport } from "../agent/image-optimize.js";
@@ -46,10 +44,13 @@ import {
46
44
  } from "../context/post-turn-tool-result-truncation.js";
47
45
  import {
48
46
  estimatePromptTokens,
47
+ estimatePromptTokensWithTools,
49
48
  getCalibrationProviderKey,
50
49
  } from "../context/token-estimator.js";
51
- import type { ContextWindowManager } from "../context/window-manager.js";
52
- import { getDocumentsForConversation } from "../documents/document-store.js";
50
+ import type {
51
+ ContextWindowCompactOptions,
52
+ ContextWindowManager,
53
+ } from "../context/window-manager.js";
53
54
  import type { ToolProfiler } from "../events/tool-profiling-listener.js";
54
55
  import { writeRelationshipState } from "../home/relationship-state-writer.js";
55
56
  import {
@@ -57,9 +58,9 @@ import {
57
58
  setSentryConversationContext,
58
59
  } from "../instrument.js";
59
60
  import { commitAppTurnChanges } from "../memory/app-git-service.js";
60
- import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
61
61
  import { enqueueAutoAnalysisOnCompaction } from "../memory/auto-analysis-enqueue.js";
62
62
  import {
63
+ addMessage,
63
64
  deleteMessageById,
64
65
  getConversation,
65
66
  getConversationOriginChannel,
@@ -68,77 +69,40 @@ import {
68
69
  getLastUserTimestampBefore,
69
70
  getMessageById,
70
71
  provenanceFromTrustContext,
71
- setConversationHistoryStrippedAt,
72
- setLastNotifiedInferenceProfile,
73
72
  updateConversationContextWindow,
74
73
  updateConversationSlackContextWatermark,
74
+ updateMessageMetadata,
75
75
  } from "../memory/conversation-crud.js";
76
76
  import { getResolvedConversationDirPath } from "../memory/conversation-directories.js";
77
77
  import { syncMessageToDisk } from "../memory/conversation-disk-view.js";
78
- import {
79
- isReplaceableTitle,
80
- queueRegenerateConversationTitle,
81
- } from "../memory/conversation-title-service.js";
78
+ import { isReplaceableTitle } from "../memory/conversation-title-service.js";
82
79
  import { isBackgroundConversationType } from "../memory/conversation-types.js";
83
80
  import type { ConversationGraphMemory } from "../memory/graph/conversation-graph-memory.js";
84
81
  import {
85
82
  backfillMessageIdOnLogs,
86
83
  recordSyntheticAgentErrorMessageLog,
87
84
  } from "../memory/llm-request-log-store.js";
88
- import { recordMemoryRecallLog } from "../memory/memory-recall-log-store.js";
89
85
  import { enqueueMemoryRetrospectiveOnCompaction } from "../memory/memory-retrospective-enqueue.js";
90
- import { PKB_WORKSPACE_SCOPE } from "../memory/pkb/types.js";
91
- import type { QdrantSparseVector } from "../memory/qdrant-client.js";
92
- import {
93
- readMemoryV2StaticContent,
94
- shouldExposePersonalMemory,
95
- } from "../memory/v2/static-context.js";
96
86
  import type { PermissionPrompter } from "../permissions/prompter.js";
97
87
  import { HOOKS } from "../plugin-api/constants.js";
98
88
  import type { UserPromptSubmitContext } from "../plugin-api/types.js";
99
- import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
89
+ import { defaultCompact } from "../plugins/defaults/compaction/compact.js";
100
90
  import { deepRepairHistory } from "../plugins/defaults/history-repair/terminal.js";
101
- import {
102
- asDefaultGraphPayload,
103
- type DefaultMemoryRetrievalDeps,
104
- type GraphMemoryPayload,
105
- runDefaultMemoryRetrieval,
106
- } from "../plugins/defaults/memory-retrieval/register.js";
107
- import { defaultPersistenceTerminal } from "../plugins/defaults/persistence/terminal.js";
108
- import { defaultTitleGenerateTerminal } from "../plugins/defaults/title-generate/terminal.js";
109
- import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate/terminal.js";
110
- import { DEFAULT_TIMEOUTS, runHook, runPipeline } from "../plugins/pipeline.js";
111
- import { getMiddlewaresFor } from "../plugins/registry.js";
112
- import type {
113
- CompactionArgs,
114
- CompactionResult,
115
- EstimateArgs,
116
- EstimateResult,
117
- MemoryArgs,
118
- MemoryResult,
119
- OverflowReduceArgs,
120
- OverflowReduceResult,
121
- PersistAddResult,
122
- PersistArgs,
123
- PersistResult,
124
- TurnContext as PluginTurnContext,
125
- } from "../plugins/types.js";
126
- import { PluginExecutionError, PluginTimeoutError } from "../plugins/types.js";
127
- import type {
128
- ContentBlock,
129
- Message,
130
- ToolDefinition,
131
- } from "../providers/types.js";
91
+ import postCompactReinject from "../plugins/defaults/memory-retrieval/hooks/post-compact.js";
92
+ import userPromptSubmitMemoryRetrieval, {
93
+ type MemoryRetrievalHookContext,
94
+ } from "../plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.js";
95
+ import { runHook } from "../plugins/pipeline.js";
96
+ import type { TurnContext as PluginTurnContext } from "../plugins/types.js";
97
+ import type { ContentBlock, Message } from "../providers/types.js";
132
98
  import type { Provider } from "../providers/types.js";
133
99
  import { resolveActorTrust } from "../runtime/actor-trust-resolver.js";
134
100
  import { broadcastMessage } from "../runtime/assistant-event-hub.js";
135
101
  import { DAEMON_INTERNAL_ASSISTANT_ID } from "../runtime/assistant-scope.js";
136
102
  import { publishConversationMessagesChanged } from "../runtime/sync/resource-sync-events.js";
137
- import { redactSecrets } from "../security/secret-scanner.js";
138
103
  import { getSubagentManager } from "../subagent/index.js";
139
104
  import type { UsageActor } from "../usage/actors.js";
140
105
  import { getLogger } from "../util/logger.js";
141
- import { getWorkspaceDir } from "../util/platform.js";
142
106
  import { timeAgo } from "../util/time.js";
143
107
  import { truncate } from "../util/truncate.js";
144
108
  import { getWorkspaceGitService } from "../workspace/git-service.js";
@@ -147,7 +111,6 @@ import {
147
111
  type AssistantAttachmentDraft,
148
112
  cleanAssistantContent,
149
113
  } from "./assistant-attachments.js";
150
- import { cleanupBootstrapAfterTurnThreshold } from "./bootstrap-turn-cleanup.js";
151
114
  import { resolveOverflowAction } from "./context-overflow-policy.js";
152
115
  import {
153
116
  createInitialReducerState,
@@ -158,6 +121,8 @@ import {
158
121
  createEventHandlerState,
159
122
  dispatchAgentEvent,
160
123
  type EventHandlerDeps,
124
+ finalizePendingToolResultRow,
125
+ markHistoryStrippedBestEffort,
161
126
  } from "./conversation-agent-loop-handlers.js";
162
127
  import {
163
128
  approveHostAttachmentRead,
@@ -173,7 +138,6 @@ import { raceWithTimeout } from "./conversation-media-retry.js";
173
138
  import type { MessageQueue } from "./conversation-queue-manager.js";
174
139
  import type { QueueDrainReason } from "./conversation-queue-manager.js";
175
140
  import type {
176
- ActiveSurfaceContext,
177
141
  ChannelCapabilities,
178
142
  InboundActorContext,
179
143
  InjectionMode,
@@ -182,8 +146,6 @@ import {
182
146
  applyRuntimeInjections,
183
147
  buildSubagentStatusBlock,
184
148
  buildUnifiedTurnContextBlock,
185
- findLastInjectedNowContent,
186
- getPkbAutoInjectList,
187
149
  getSlackCompactionWatermarkForPrefix,
188
150
  inboundActorContextFromTrust,
189
151
  inboundActorContextFromTrustContext,
@@ -194,7 +156,6 @@ import {
194
156
  } from "./conversation-runtime-assembly.js";
195
157
  import type { SkillProjectionCache } from "./conversation-skill-tools.js";
196
158
  import { markSurfaceCompleted } from "./conversation-surfaces.js";
197
- import { resolveTrustClass } from "./conversation-tool-setup.js";
198
159
  import { recordUsage } from "./conversation-usage.js";
199
160
  import {
200
161
  formatTurnTimestamp,
@@ -203,45 +164,27 @@ import {
203
164
  import { getDiskPressureStatus } from "./disk-pressure-guard.js";
204
165
  import { classifyDiskPressureTurnPolicy } from "./disk-pressure-policy.js";
205
166
  import type {
206
- DynamicPageSurfaceData,
207
167
  ServerMessage,
208
168
  SurfaceData,
209
169
  SurfaceType,
210
170
  UsageStats,
211
171
  } from "./message-protocol.js";
212
- import type { MemoryRecalled } from "./message-types/memory.js";
213
172
  import type { ConfirmationStateChanged } from "./message-types/messages.js";
214
- import { conversationMetadataSyncTag } from "./message-types/sync.js";
173
+ import {
174
+ type OverflowReduceArgs,
175
+ runOverflowReductionLoop,
176
+ } from "./overflow-reduction-loop.js";
215
177
  import { parseActualTokensFromError } from "./parse-actual-tokens-from-error.js";
178
+ import {
179
+ persistUnsendableImageDowngrades,
180
+ UNSENDABLE_IMAGE_NOTE,
181
+ } from "./persist-unsendable-image.js";
216
182
  import type { TraceEmitter } from "./trace-emitter.js";
217
- import type { TrustContext } from "./trust-context.js";
183
+ import { resolveTrustClass, type TrustContext } from "./trust-context.js";
218
184
  import { stripHistoricalWebSearchResults } from "./web-search-history.js";
219
185
 
220
186
  const log = getLogger("conversation-agent-loop");
221
187
 
222
- /**
223
- * Best-effort persistence of the history-stripped marker after an
224
- * injection-strip event (compaction / overflow recovery). The marker is a
225
- * durability hint, not turn-critical state — a transient SQLite write failure
226
- * (SQLITE_BUSY, disk-full, read-only FS) must not abort the turn. Logs a
227
- * warning and continues on failure, preserving the long-standing non-fatal
228
- * contract for this metadata write.
229
- */
230
- function markHistoryStrippedBestEffort(
231
- conversationId: string,
232
- strippedAt: number,
233
- logger: ReturnType<typeof getLogger>,
234
- ): void {
235
- try {
236
- setConversationHistoryStrippedAt(conversationId, strippedAt);
237
- } catch (err) {
238
- logger.warn(
239
- { err },
240
- "Failed to persist history-stripped marker after compaction strip (non-fatal)",
241
- );
242
- }
243
- }
244
-
245
188
  const DISK_PRESSURE_ERROR_CODE = "DISK_SPACE_CRITICAL" as const;
246
189
  const DISK_PRESSURE_ERROR_CATEGORY = "disk_pressure";
247
190
 
@@ -270,12 +213,12 @@ function formatDiskPressureBlockedMessage(): string {
270
213
  // ── Plugin pipeline helpers ──────────────────────────────────────────
271
214
  //
272
215
  // Canonical {@link PluginTurnContext} builder threaded into every
273
- // `runPipeline` call inside `runAgentLoopImpl`. The orchestrator composes
216
+ // `runHook` call inside `runAgentLoopImpl`. The orchestrator composes
274
217
  // the context on demand at each call site from ambient state rather than
275
218
  // carrying a persistent `TurnContext` instance across the turn.
276
219
 
277
220
  /**
278
- * Synthetic fallback trust context used when the orchestrator fires a pipeline
221
+ * Synthetic fallback trust context used when the orchestrator fires a hook
279
222
  * before the per-turn trust snapshot has been captured (e.g. invocations that
280
223
  * bypass `processMessage` / `drainQueue`). We bias to `unknown` rather than
281
224
  * `guardian` so a missing snapshot cannot accidentally grant elevated trust
@@ -287,14 +230,14 @@ const FALLBACK_TURN_TRUST: TrustContext = {
287
230
  };
288
231
 
289
232
  /**
290
- * Build the {@link TurnContext} passed to {@link runPipeline}.
233
+ * Build the {@link TurnContext} passed to {@link runHook}.
291
234
  *
292
- * Canonical source of truth for every pipeline call site inside the agent
293
- * loop. Every `runPipeline` invocation in `runAgentLoopImpl` (and in the
235
+ * Canonical source of truth for every hook call site inside the agent
236
+ * loop. Every `runHook` invocation in `runAgentLoopImpl` (and in the
294
237
  * handlers that share its ambient state) must route through this helper
295
238
  * rather than constructing a `TurnContext` literal inline — this keeps
296
239
  * `turnIndex`, trust resolution, and the `contextWindowManager` attachment
297
- * consistent across pipeline slots, which in turn keeps structured logs
240
+ * consistent across hooks, which in turn keeps structured logs
298
241
  * filtered by `conversationId`/`turnIndex` coherent across slots.
299
242
  *
300
243
  * Behavior:
@@ -306,9 +249,9 @@ const FALLBACK_TURN_TRUST: TrustContext = {
306
249
  * level context, then {@link FALLBACK_TURN_TRUST}. The cascade matches
307
250
  * the one inside the orchestrator's inline injection assembly so
308
251
  * middleware reads the same trust class the runtime sees.
309
- * - `contextWindowManager` is attached unconditionally. Pipelines that
310
- * don't need it can ignore it; the default compaction plugin reads it
311
- * via the typed optional field on `TurnContext`.
252
+ * - `contextWindowManager` is attached unconditionally. Hooks that
253
+ * don't need it can ignore it; it remains available via the typed
254
+ * optional field on `TurnContext`.
312
255
  */
313
256
  function buildPluginTurnContext(
314
257
  ctx: AgentLoopConversationContext,
@@ -322,9 +265,23 @@ function buildPluginTurnContext(
322
265
  turnIndex: ctx.turnCount,
323
266
  trust,
324
267
  contextWindowManager: ctx.contextWindowManager,
268
+ callSite: ctx.currentCallSite,
325
269
  };
326
270
  }
327
271
 
272
+ /**
273
+ * Trust class of the actor whose turn is in progress, for the compactor's
274
+ * image manifest filter. Prefers the turn-start snapshot
275
+ * ({@link AgentLoopConversationContext.currentTurnTrustContext}) over the live
276
+ * trust context so compaction running in a later tool iteration can't pick up
277
+ * a concurrent request's actor.
278
+ */
279
+ function resolveTurnActorTrustClass(
280
+ ctx: AgentLoopConversationContext,
281
+ ): TrustContext["trustClass"] | undefined {
282
+ return (ctx.currentTurnTrustContext ?? ctx.trustContext)?.trustClass;
283
+ }
284
+
328
285
  // ── Context Interface ────────────────────────────────────────────────
329
286
 
330
287
  /**
@@ -352,9 +309,18 @@ export interface AssistantSurface {
352
309
  export interface AgentLoopConversationContext {
353
310
  readonly conversationId: string;
354
311
  messages: Message[];
355
- processing: boolean;
312
+ isProcessing(): boolean;
313
+ setProcessing(value: boolean): void;
356
314
  abortController: AbortController | null;
357
315
  currentRequestId?: string;
316
+ /**
317
+ * The {@link LLMCallSite} of the in-flight turn, set at turn start from
318
+ * `options?.callSite ?? "mainAgent"`. Read by {@link buildPluginTurnContext}
319
+ * so pipeline/injector plugins can tell the main reply apart from
320
+ * background agent-loop work (compaction, subagents, …) on this same
321
+ * conversation. Per-turn mutable, mirroring {@link currentRequestId}.
322
+ */
323
+ currentCallSite?: LLMCallSite;
358
324
 
359
325
  readonly agentLoop: AgentLoop;
360
326
  readonly provider: Provider;
@@ -397,8 +363,6 @@ export interface AgentLoopConversationContext {
397
363
  currentTurnSurfaces: AssistantSurface[];
398
364
 
399
365
  workingDir: string;
400
- workspaceTopLevelContext: string | null;
401
- workspaceTopLevelDirty: boolean;
402
366
  channelCapabilities?: ChannelCapabilities;
403
367
  /** Per-turn snapshot of trustContext, frozen at message-processing start. */
404
368
  currentTurnTrustContext?: TrustContext;
@@ -424,8 +388,6 @@ export interface AgentLoopConversationContext {
424
388
  /** Task-run scope for the current turn. Cleared at turn end so queued/drained turns don't inherit it. */
425
389
  taskRunId?: string;
426
390
  assistantId?: string;
427
- voiceCallControlPrompt?: string;
428
- transportHints?: string[];
429
391
  clientTimezone?: string;
430
392
 
431
393
  readonly coreToolNames: Set<string>;
@@ -500,7 +462,6 @@ export interface AgentLoopConversationContext {
500
462
  getWorkspaceGitService?: (workspaceDir: string) => GitServiceInitializer;
501
463
  commitTurnChanges?: typeof commitTurnChanges;
502
464
 
503
- refreshWorkspaceTopLevelContextIfNeeded(): void;
504
465
  markWorkspaceTopLevelDirty(): void;
505
466
  getQueueDepth(): number;
506
467
  hasQueuedMessages(): boolean;
@@ -561,6 +522,13 @@ export async function runAgentLoopImpl(
561
522
  });
562
523
  let yieldedForHandoff = false;
563
524
  let yieldedForBudget = false;
525
+ // Whether the most recent agent-loop run produced at least one new assistant
526
+ // message — the loop's own forward-progress signal, used by the ordering
527
+ // retry gate and the overflow convergence fold.
528
+ let lastRunAppendedNewMessages = false;
529
+ // The messages the most recent agent-loop run appended on top of its base —
530
+ // the loop's own new-output boundary, persisted as this turn's new messages.
531
+ let lastRunNewMessages: Message[] = [];
564
532
  let pendingCheckpointYield: "budget" | "handoff" | null = null;
565
533
  // Captured when the auto_compress_latest_turn rerun yields at the mid-loop
566
534
  // budget checkpoint. SSE emission happens immediately at the detection site;
@@ -579,6 +547,9 @@ export async function runAgentLoopImpl(
579
547
  // `resolveCallSiteConfig`, picking up any user overrides under
580
548
  // `llm.callSites.mainAgent` (falling back to `llm.default` when absent).
581
549
  const turnCallSite: LLMCallSite = options?.callSite ?? "mainAgent";
550
+ // Expose the turn's call site to plugin pipeline/injector contexts (read by
551
+ // buildPluginTurnContext) so plugins can scope behaviour to the main reply.
552
+ ctx.currentCallSite = turnCallSite;
582
553
 
583
554
  // Read the conversation row once for both the override-profile derivation
584
555
  // below and the title-replaceability check at turn start. Later reads in
@@ -792,10 +763,6 @@ export async function runAgentLoopImpl(
792
763
  : null,
793
764
  },
794
765
  );
795
- const diskPressureContext =
796
- diskPressureDecision.action === "allow-cleanup-mode"
797
- ? { cleanupModeActive: true }
798
- : null;
799
766
  ctx.diskPressureCleanupModeActive =
800
767
  diskPressureDecision.action === "allow-cleanup-mode";
801
768
 
@@ -898,55 +865,6 @@ export async function runAgentLoopImpl(
898
865
  }
899
866
  }
900
867
 
901
- // Generate title early — the user message alone is sufficient context.
902
- // Firing before the main LLM call removes the delay of waiting for the
903
- // full assistant response. The second-pass regeneration at turn 3 will
904
- // refine the title with more context.
905
- // No abort signal — title generation should complete even if the user
906
- // cancels the response, since the user message is already persisted.
907
- // Deferred via setTimeout so the main agent loop LLM call enqueues
908
- // first, avoiding rate-limit slot contention on strict configs.
909
- if (isReplaceableTitle(turnStartConversation?.title ?? null)) {
910
- // TurnContext routed through the canonical builder so the pipeline's
911
- // log record reports the same `conversationId`/`turnIndex` shape as
912
- // every other slot in this turn. Title generation does not depend on
913
- // the context-window manager attached by the builder, but sharing the
914
- // builder keeps the invariant enforced in one place.
915
- const titlePipelineCtx = buildPluginTurnContext(ctx, reqId);
916
- const titleArgs = {
917
- conversationId: ctx.conversationId,
918
- provider: ctx.provider,
919
- userMessage: options?.titleText ?? content,
920
- onTitleUpdated: (title: string) => {
921
- onEvent({
922
- type: "conversation_title_updated",
923
- conversationId: ctx.conversationId,
924
- title,
925
- });
926
- onEvent({
927
- type: "sync_changed",
928
- tags: [conversationMetadataSyncTag(ctx.conversationId)],
929
- });
930
- },
931
- };
932
- setTimeout(() => {
933
- runPipeline(
934
- "titleGenerate",
935
- getMiddlewaresFor("titleGenerate"),
936
- defaultTitleGenerateTerminal,
937
- titleArgs,
938
- titlePipelineCtx,
939
- DEFAULT_TIMEOUTS.titleGenerate,
940
- ).catch((err) => {
941
- // Fire-and-forget — keep previous non-propagating semantics.
942
- // queueGenerateConversationTitle already swallows internal
943
- // errors; this catch covers pipeline-layer errors (timeouts,
944
- // middleware throws) without surfacing them to the agent loop.
945
- rlog.warn({ err }, "titleGenerate pipeline failed (non-fatal)");
946
- });
947
- }, 0);
948
- }
949
-
950
868
  const isFirstMessage = ctx.messages.length === 1;
951
869
  // Promote a pending post-compaction re-inject signal (e.g. from `/compact`)
952
870
  // into `compactedThisTurn` so NOW.md / PKB / v2 static blocks land on this
@@ -954,7 +872,6 @@ export async function runAgentLoopImpl(
954
872
  // so this fires exactly once per `/compact` event.
955
873
  const consumedPostCompactReinject = ctx.pendingPostCompactReinject;
956
874
  ctx.pendingPostCompactReinject = false;
957
- let shouldInjectWorkspace = isFirstMessage || consumedPostCompactReinject;
958
875
  let compactedThisTurn = consumedPostCompactReinject;
959
876
  let slackCompactedThisTurn = false;
960
877
  const isSlackConversation = ctx.channelCapabilities?.channel === "slack";
@@ -1092,70 +1009,32 @@ export async function runAgentLoopImpl(
1092
1009
  // Skip auto-compaction while the circuit breaker is open. Force paths
1093
1010
  // and user-initiated /compact bypass this check.
1094
1011
  const autoCompactAllowed =
1095
- !(await ctx.agentLoop.compactionCircuit.isOpen(ctx));
1012
+ !(await ctx.agentLoop.compactionCircuit.isOpen());
1096
1013
  if (compactCheck.needed && autoCompactAllowed) {
1097
1014
  ctx.emitActivityState("thinking", "context_compacting", {
1098
1015
  requestId: reqId,
1099
1016
  });
1100
1017
  }
1101
- const compactionOptions = {
1102
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1103
- precomputedEstimate: compactCheck.estimatedTokens,
1104
- conversationOriginChannel:
1105
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
1106
- overrideProfile: resolveCurrentOverrideProfile() ?? null,
1107
- actorTrustClass: ctx.trustContext?.trustClass,
1108
- };
1109
1018
  let compacted: Awaited<
1110
1019
  ReturnType<typeof ctx.contextWindowManager.maybeCompact>
1111
1020
  > | null = null;
1112
1021
  if (autoCompactAllowed) {
1113
- try {
1114
- compacted = (await runPipeline<CompactionArgs, CompactionResult>(
1115
- "compaction",
1116
- getMiddlewaresFor("compaction"),
1117
- (args) =>
1118
- defaultCompactionTerminal(args, buildPluginTurnContext(ctx, reqId)),
1119
- {
1120
- messages: messagesForStartOfTurnCompaction,
1121
- signal: abortController.signal,
1122
- options: compactionOptions,
1123
- },
1124
- buildPluginTurnContext(ctx, reqId),
1125
- DEFAULT_TIMEOUTS.compaction,
1126
- )) as Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>;
1127
- } catch (err) {
1128
- if (err instanceof PluginTimeoutError) {
1129
- // Pipeline exceeded its budget. Record the failure so the circuit
1130
- // breaker tracks consecutive timeouts (it trips after three),
1131
- // then degrade gracefully by skipping compaction this turn —
1132
- // the turn proceeds with the un-compacted history rather than
1133
- // hard-failing. The inner summary call has been aborted by the
1134
- // runner's signal-linking, so updateSummary's local fallback
1135
- // also ran before this catch block is reached.
1136
- rlog.warn(
1137
- { err, phase: "start-of-turn-compaction" },
1138
- "Compaction pipeline timed out — skipping compaction this turn",
1139
- );
1140
- await ctx.agentLoop.compactionCircuit.recordOutcome(
1141
- ctx,
1142
- true,
1143
- onEvent,
1144
- );
1145
- compacted = null;
1146
- } else {
1147
- throw err;
1148
- }
1149
- }
1022
+ compacted = await defaultCompact({
1023
+ manager: ctx.contextWindowManager,
1024
+ messages: messagesForStartOfTurnCompaction,
1025
+ signal: abortController.signal,
1026
+ precomputedEstimate: compactCheck.estimatedTokens,
1027
+ overrideProfile: resolveCurrentOverrideProfile() ?? null,
1028
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
1029
+ });
1150
1030
  }
1151
1031
  // Only track circuit-breaker state when a summary LLM call actually ran.
1152
1032
  // `summaryFailed` is `undefined` on early returns (compaction disabled,
1153
- // below threshold, cooldown active, no eligible messages, truncation-only
1033
+ // below threshold, no eligible messages, truncation-only
1154
1034
  // path) — treating those as "successful" compactions would silently reset
1155
1035
  // the 3-strike counter and break the invariant.
1156
1036
  if (compacted && compacted.summaryFailed !== undefined) {
1157
1037
  await ctx.agentLoop.compactionCircuit.recordOutcome(
1158
- ctx,
1159
1038
  compacted.summaryFailed,
1160
1039
  onEvent,
1161
1040
  );
@@ -1165,7 +1044,6 @@ export async function runAgentLoopImpl(
1165
1044
  compacted,
1166
1045
  messagesForStartOfTurnCompaction,
1167
1046
  );
1168
- shouldInjectWorkspace = true;
1169
1047
  if (compacted.compactedPersistedMessages > 0) {
1170
1048
  compactedThisTurn = true;
1171
1049
  }
@@ -1203,213 +1081,10 @@ export async function runAgentLoopImpl(
1203
1081
  }
1204
1082
  };
1205
1083
 
1206
- let runMessages = ctx.messages;
1207
-
1208
- // Memory retrieval pipeline fetches PKB, NOW.md, and memory-graph
1209
- // outputs through a single `memoryRetrieval` pipeline. Plugins may
1210
- // replace the terminal behavior by registering a middleware that
1211
- // short-circuits with its own `MemoryResult`; the default terminal
1212
- // below runs `runDefaultMemoryRetrieval` which reproduces the prior
1213
- // in-lined behavior (PKB/NOW reads + gated graph call).
1214
- const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
1215
- // Canonical builder — pulls trust from per-turn snapshot, then
1216
- // conversation-level, then the synthetic fallback. Memory retrieval
1217
- // does not need the context-window handle the builder attaches, but
1218
- // keeping every call site on one helper is load-bearing for log
1219
- // coherence across pipeline slots.
1220
- const memoryPluginTurnCtx = buildPluginTurnContext(ctx, reqId);
1221
- const memoryArgs: MemoryArgs = {
1222
- conversationId: ctx.conversationId,
1223
- trustContext: ctx.trustContext,
1224
- turnIndex: ctx.turnCount,
1225
- // Pass the abort signal via `args` (not `deps`) so the pipeline
1226
- // runner's `linkAbortSignal` can swap it for a signal linked to the
1227
- // pipeline's internal controller — on a plugin-set timeout or
1228
- // external cancel, the linked signal aborts and `prepareMemory`
1229
- // stops mutating graph state / emitting events after the pipeline
1230
- // has already errored.
1231
- signal: abortController.signal,
1232
- };
1233
- const memoryDeps: DefaultMemoryRetrievalDeps = {
1234
- messages: ctx.messages,
1235
- graphMemory: ctx.graphMemory,
1236
- config: getConfig(),
1237
- onEvent,
1238
- isTrustedActor,
1239
- };
1240
- const memoryResult: MemoryResult = await runPipeline(
1241
- "memoryRetrieval",
1242
- getMiddlewaresFor("memoryRetrieval"),
1243
- (args) => runDefaultMemoryRetrieval(args, memoryDeps),
1244
- memoryArgs,
1245
- memoryPluginTurnCtx,
1246
- DEFAULT_TIMEOUTS.memoryRetrieval,
1247
- );
1248
-
1249
- // Consume the memory-graph block when the default retriever emitted
1250
- // one. Custom plugins that substitute their own blocks without the
1251
- // default discriminator are expected to handle their own side effects
1252
- // (event emission, metric persistence) inside their middleware; this
1253
- // block short-circuits to the original no-op behavior in that case.
1254
- const defaultGraphPayload: GraphMemoryPayload | null =
1255
- asDefaultGraphPayload(memoryResult.memoryGraphBlocks);
1256
- let pkbQueryVector: number[] | undefined;
1257
- let pkbSparseVector: QdrantSparseVector | undefined;
1258
- if (defaultGraphPayload) {
1259
- const graphResult = defaultGraphPayload.result;
1260
- runMessages = graphResult.runMessages;
1261
- // Select dense+sparse as a matched pair so RRF fusion combines two
1262
- // signals aligned to the same query text:
1263
- // 1. Context-load with a user query: user-query dense + user-query
1264
- // sparse — the cleanest pairing.
1265
- // 2. Otherwise (context-load without a user query, or per-turn):
1266
- // whatever `queryVector` / `sparseVector` the retriever produced,
1267
- // which are themselves co-aligned (both summary-derived in
1268
- // context-load, both user-last-message-derived in per-turn).
1269
- // Never pair a user-query dense with a summary-aligned sparse.
1270
- if (graphResult.userQueryVector) {
1271
- pkbQueryVector = graphResult.userQueryVector;
1272
- pkbSparseVector = graphResult.userQuerySparseVector;
1273
- } else {
1274
- pkbQueryVector = graphResult.queryVector;
1275
- pkbSparseVector = graphResult.sparseVector;
1276
- }
1277
-
1278
- // Persist the injected block text in message metadata so it survives
1279
- // conversation reloads (eviction, restart, fork). loadFromDb re-injects
1280
- // from metadata. Routed through the `persistence` pipeline so plugins
1281
- // can observe or override metadata updates alongside add/delete.
1282
- if (graphResult.injectedBlockText) {
1283
- try {
1284
- await runPipeline<PersistArgs, PersistResult>(
1285
- "persistence",
1286
- getMiddlewaresFor("persistence"),
1287
- defaultPersistenceTerminal,
1288
- {
1289
- op: "update",
1290
- messageId: userMessageId,
1291
- updates: {
1292
- memoryInjectedBlock: graphResult.injectedBlockText,
1293
- },
1294
- },
1295
- buildPluginTurnContext(ctx, reqId),
1296
- DEFAULT_TIMEOUTS.persistence,
1297
- );
1298
- } catch (err) {
1299
- rlog.warn(
1300
- { err },
1301
- "Failed to persist memory injection to metadata (non-fatal)",
1302
- );
1303
- }
1304
- }
1305
-
1306
- const m = graphResult.metrics;
1307
-
1308
- try {
1309
- recordMemoryRecallLog({
1310
- conversationId: ctx.conversationId,
1311
- enabled: true,
1312
- degraded: false,
1313
- provider: m?.embeddingProvider ?? undefined,
1314
- model: m?.embeddingModel ?? undefined,
1315
- semanticHits: m?.semanticHits ?? 0,
1316
- mergedCount: m?.mergedCount ?? 0,
1317
- selectedCount: m?.selectedCount ?? 0,
1318
- tier1Count: m?.tier1Count ?? 0,
1319
- tier2Count: m?.tier2Count ?? 0,
1320
- hybridSearchLatencyMs: m?.hybridSearchLatencyMs ?? 0,
1321
- sparseVectorUsed: m?.sparseVectorUsed ?? false,
1322
- injectedTokens: graphResult.injectedTokens,
1323
- latencyMs: graphResult.latencyMs,
1324
- topCandidatesJson: (m?.topCandidates ?? []).map((c) => ({
1325
- key: c.nodeId,
1326
- type: c.type,
1327
- kind: "graph",
1328
- finalScore: c.score,
1329
- semantic: c.semanticSimilarity,
1330
- recency: c.recencyBoost,
1331
- })),
1332
- injectedText: graphResult.injectedBlockText ?? undefined,
1333
- reason: `graph:${graphResult.mode}`,
1334
- queryContext: m?.queryContext ?? undefined,
1335
- });
1336
- } catch (err) {
1337
- log.warn({ err }, "Failed to persist memory recall log (non-fatal)");
1338
- }
1339
-
1340
- if (m) {
1341
- const memoryRecalledEvent: MemoryRecalled = {
1342
- type: "memory_recalled",
1343
- provider: m.embeddingProvider ?? "unknown",
1344
- model: m.embeddingModel ?? "unknown",
1345
- semanticHits: m.semanticHits,
1346
- mergedCount: m.mergedCount,
1347
- selectedCount: m.selectedCount,
1348
- tier1Count: m.tier1Count,
1349
- tier2Count: m.tier2Count,
1350
- hybridSearchLatencyMs: m.hybridSearchLatencyMs,
1351
- sparseVectorUsed: m.sparseVectorUsed,
1352
- injectedTokens: graphResult.injectedTokens,
1353
- latencyMs: graphResult.latencyMs,
1354
- topCandidates: m.topCandidates.map((c) => ({
1355
- key: c.nodeId,
1356
- type: c.type,
1357
- kind: "graph",
1358
- finalScore: c.score,
1359
- semantic: c.semanticSimilarity,
1360
- recency: c.recencyBoost,
1361
- })),
1362
- };
1363
- onEvent(memoryRecalledEvent);
1364
- }
1365
- }
1366
-
1367
- // Build active surface context
1368
- let activeSurface: ActiveSurfaceContext | null = null;
1369
- if (ctx.currentActiveSurfaceId) {
1370
- const stored = ctx.surfaceState.get(ctx.currentActiveSurfaceId);
1371
- if (stored && stored.surfaceType === "dynamic_page") {
1372
- const data = stored.data as DynamicPageSurfaceData;
1373
- activeSurface = {
1374
- surfaceId: ctx.currentActiveSurfaceId,
1375
- html: data.html,
1376
- currentPage: ctx.currentPage,
1377
- };
1378
- if (data.appId) {
1379
- const app = getApp(data.appId);
1380
- if (app) {
1381
- activeSurface.appId = app.id;
1382
- activeSurface.appName = app.name;
1383
- activeSurface.appDirName = resolveAppDir(app.id).dirName;
1384
- activeSurface.appSchemaJson = app.schemaJson;
1385
- activeSurface.appFiles = listAppFiles(app.id);
1386
- if (app.pages && Object.keys(app.pages).length > 0) {
1387
- activeSurface.appPages = app.pages;
1388
- }
1389
- }
1390
- }
1391
- }
1392
- }
1393
-
1394
- // Query active documents for this conversation so the injector chain
1395
- // can surface them to the assistant (prevents duplicate document_create
1396
- // calls when existing documents should be targeted with document_update).
1397
- const conversationDocs = getDocumentsForConversation(ctx.conversationId);
1398
- const activeDocuments =
1399
- conversationDocs.length > 0
1400
- ? conversationDocs.map((d) => ({
1401
- surfaceId: d.surfaceId,
1402
- title: d.title,
1403
- wordCount: d.wordCount,
1404
- updatedAt: d.updatedAt,
1405
- }))
1406
- : null;
1407
-
1408
- ctx.refreshWorkspaceTopLevelContextIfNeeded();
1409
-
1410
- // Compute fresh turn timestamp for date grounding.
1411
- // Absolute "now" is always anchored to assistant host clock, while local
1412
- // date semantics prefer configured user timezone, then device timezones.
1084
+ // Resolve the turn's timezone cascade up front. It depends only on config
1085
+ // and the inbound request — never on retrieval output — so it can be
1086
+ // settled before context assembly. Local date semantics prefer the
1087
+ // configured user timezone, then device timezones, then the host clock.
1413
1088
  const hostTimeZone = Intl.DateTimeFormat().resolvedOptions().timeZone;
1414
1089
  const timezoneContext = resolveTurnTimezoneContext({
1415
1090
  configuredUserTimeZone: config.ui.userTimezone ?? null,
@@ -1417,9 +1092,6 @@ export async function runAgentLoopImpl(
1417
1092
  detectedTimezone: config.ui.detectedTimezone ?? null,
1418
1093
  hostTimeZone,
1419
1094
  });
1420
- const timestamp = formatTurnTimestamp({
1421
- timeZone: timezoneContext.effectiveTimezone,
1422
- });
1423
1095
 
1424
1096
  // Resolve the inbound actor context for the unified <turn_context> block.
1425
1097
  // When the conversation carries enough identity info, use the unified
@@ -1443,8 +1115,10 @@ export async function runAgentLoopImpl(
1443
1115
  }
1444
1116
  }
1445
1117
 
1446
- // Build unified turn context block that replaces the separate temporal,
1447
- // channel, interface, and actor context blocks.
1118
+ // Resolve the channel/interface labels and the guardian flag for this
1119
+ // turn. These derive only from the captured turn context and the resolved
1120
+ // actor trust class — never from retrieval — so they settle before context
1121
+ // assembly.
1448
1122
  const interfaceName =
1449
1123
  capturedTurnInterfaceContext.userMessageInterface ?? undefined;
1450
1124
  const channelName =
@@ -1489,9 +1163,54 @@ export async function runAgentLoopImpl(
1489
1163
  });
1490
1164
  const label = profileEntry?.label ?? effectiveProfileKey;
1491
1165
  modelProfileStr = resolved.model ? `${label} (${resolved.model})` : label;
1492
- setLastNotifiedInferenceProfile(ctx.conversationId, effectiveProfileKey);
1166
+ // Record the notification for persistence on delivery rather than here:
1167
+ // the model only "learns" the profile once it receives this turn
1168
+ // context, signalled by the first `message_complete`. Persisting inline
1169
+ // would mark the profile notified even if the turn is cancelled or fails
1170
+ // before the model ever sees the notice.
1171
+ state.pendingNotifiedInferenceProfile = effectiveProfileKey;
1493
1172
  }
1494
1173
 
1174
+ // Memory retrieval — fetches PKB, NOW.md, and memory-graph outputs and
1175
+ // persists the retrieval's own side effects (injected-block metadata,
1176
+ // recall log, `memory_recalled` event). Runs at the early "prompt
1177
+ // submitted, before context assembly" moment because its outputs feed the
1178
+ // injection and overflow-reduction transforms below. It is shaped as the
1179
+ // `user-prompt-submit-temp` hook handler but invoked directly for now: it
1180
+ // must run early, while the canonical late `user-prompt-submit` hook
1181
+ // (history repair, title) runs after those transforms, so the two cannot
1182
+ // share a fire site until compaction is cleared from the gap between them.
1183
+ const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
1184
+ const memoryCtx: MemoryRetrievalHookContext = {
1185
+ graphMemory: ctx.graphMemory,
1186
+ config: getConfig(),
1187
+ onEvent,
1188
+ isTrustedActor,
1189
+ conversationId: ctx.conversationId,
1190
+ userMessageId,
1191
+ logger: rlog,
1192
+ // An external cancel aborts `prepareMemory` instead of letting it run
1193
+ // to completion after the turn has already been torn down.
1194
+ signal: abortController.signal,
1195
+ latestMessages: ctx.messages,
1196
+ };
1197
+ await userPromptSubmitMemoryRetrieval(memoryCtx);
1198
+
1199
+ // The retriever owns its side effects (injected-block metadata, recall
1200
+ // log, `memory_recalled` event) and records the dense/sparse PKB query
1201
+ // pair on the graph handle for the PKB-reminder injector to read back; the
1202
+ // loop only reuses the injected message list downstream.
1203
+ let runMessages = memoryCtx.latestMessages;
1204
+
1205
+ // Capture wall-clock "now" at its point of use, after the blocking memory
1206
+ // retrieval, so the injected `<turn_context>` timestamp reflects current
1207
+ // time rather than the moment the turn began.
1208
+ const timestamp = formatTurnTimestamp({
1209
+ timeZone: timezoneContext.effectiveTimezone,
1210
+ });
1211
+
1212
+ // Build unified turn context block that replaces the separate temporal,
1213
+ // channel, interface, and actor context blocks.
1495
1214
  const baseTurnContext = {
1496
1215
  timestamp,
1497
1216
  interfaceName,
@@ -1513,64 +1232,6 @@ export async function runAgentLoopImpl(
1513
1232
 
1514
1233
  // The `remember` tool handles scratchpad-style memory writes directly to the graph.
1515
1234
 
1516
- // Personal-memory trust gate: PKB, NOW.md, and v2 static blocks all
1517
- // hold private user content. Block exposure to non-guardian actors
1518
- // arriving over a remote channel; internal/local flows pass through.
1519
- // See `shouldExposePersonalMemory` for the threat model.
1520
- const personalMemoryAllowed = shouldExposePersonalMemory({
1521
- sourceChannel: ctx.trustContext?.sourceChannel,
1522
- isTrustedActor,
1523
- });
1524
-
1525
- // Inject NOW.md and PKB content only on the first turn (or after
1526
- // compaction re-strips them). Old injections persist in history and
1527
- // are never stripped on normal turns — this preserves the cached prefix.
1528
- // PKB/NOW content is sourced from the `memoryRetrieval` pipeline above
1529
- // so plugins can override either source without touching the agent loop.
1530
- // NOW.md injection can be disabled via `memory.retrieval.scratchpadInjection.enabled`.
1531
- const scratchpadInjectionEnabled =
1532
- getConfig().memory.retrieval.scratchpadInjection.enabled;
1533
- const currentNowContent =
1534
- personalMemoryAllowed && scratchpadInjectionEnabled
1535
- ? memoryResult.nowContent
1536
- : null;
1537
- const shouldInjectNowAndPkb = isFirstMessage || compactedThisTurn;
1538
- const nowScratchpad = shouldInjectNowAndPkb ? currentNowContent : null;
1539
-
1540
- const currentPkbContent = personalMemoryAllowed
1541
- ? memoryResult.pkbContent
1542
- : null;
1543
- const pkbContext = shouldInjectNowAndPkb ? currentPkbContent : null;
1544
- const pkbActive = currentPkbContent !== null;
1545
-
1546
- // V2 static memory block (essentials/threads/recent/buffer).
1547
- // `currentMemoryV2Static` is the trust-gated content reused by every
1548
- // re-injection path — it stays non-null on non-full-mode turns so
1549
- // that mid-turn reducer compaction (which strips the prior `<info>`
1550
- // block) can restore the freshest content. `memoryV2Static` is the
1551
- // first-turn / post-compaction cadence-gated value for initial
1552
- // injection only. `readMemoryV2StaticContent` self-gates on the v2
1553
- // flag + config and returns null when v2 is off.
1554
- const currentMemoryV2Static = personalMemoryAllowed
1555
- ? readMemoryV2StaticContent()
1556
- : null;
1557
- const memoryV2Static = shouldInjectNowAndPkb ? currentMemoryV2Static : null;
1558
-
1559
- // PKB relevance-hint inputs. Resolved once per turn and reused across
1560
- // re-injections so post-compaction rebuilds pick up fresh hints against
1561
- // the updated conversation history.
1562
- const pkbRoot = pkbActive ? join(getWorkspaceDir(), "pkb") : undefined;
1563
- const pkbAutoInjectList = pkbRoot
1564
- ? getPkbAutoInjectList(pkbRoot)
1565
- : undefined;
1566
- // Pass `ctx` directly — `PkbContextConversation` is structural and
1567
- // `getInContextPkbPaths` re-reads `conversation.messages` on each call,
1568
- // so post-compaction re-injects see the updated history.
1569
- const pkbConversation = pkbActive ? ctx : undefined;
1570
- // PKB points live under a single workspace sentinel scope.
1571
- // See `PKB_WORKSPACE_SCOPE` for why.
1572
- const pkbScopeId = pkbActive ? PKB_WORKSPACE_SCOPE : undefined;
1573
-
1574
1235
  // Subagent status injection — gives the parent LLM visibility into active/completed children.
1575
1236
  // Skipped when this conversation IS a subagent (no nesting) or has no children.
1576
1237
  const subagentStatusBlock = ctx.isSubagent
@@ -1625,21 +1286,14 @@ export async function runAgentLoopImpl(
1625
1286
  )
1626
1287
  : null;
1627
1288
 
1628
- // Guards the chronological-transcript override on re-injection after
1629
- // the reducer compacts `ctx.messages`. The captured transcript is the
1630
- // full persisted history; blindly replaying it on every re-inject would
1631
- // overwrite the reducer's compacted messages and undo compaction. Flip
1632
- // to `true` after any compaction so subsequent re-injections fall back
1633
- // to the reduced `ctx.messages`.
1634
- let reducerCompacted = compactedThisTurn;
1635
-
1636
- // memory-v3-live: route the turn's `<memory>` block to the v3 injector.
1637
- // When on, runtime assembly suppresses v2's `<memory>` injection (only
1638
- // when the v3 injector actually produced a block — otherwise v2 stays as a
1639
- // fallback) and the provider anchors its long-TTL cache breakpoint on the
1640
- // most recent STABLE user message, since the latest user message now
1641
- // carries the volatile per-turn memory block. Flag off → bit-for-bit
1642
- // identical to today's v2 path.
1289
+ state.reducerCompacted = compactedThisTurn;
1290
+
1291
+ // memory-v3-live: when on, the provider anchors its long-TTL cache
1292
+ // breakpoint on the most recent STABLE user message, since the latest user
1293
+ // message now carries the volatile per-turn `<memory>` block the v3
1294
+ // injector emits. The matching v2-suppression strip is owned by
1295
+ // `applyRuntimeInjections`, which reads the same flag itself. Flag off →
1296
+ // bit-for-bit identical to today's v2 path.
1643
1297
  const memoryV3Live = isAssistantFeatureFlagEnabled(
1644
1298
  "memory-v3-live",
1645
1299
  getConfig(),
@@ -1647,29 +1301,7 @@ export async function runAgentLoopImpl(
1647
1301
 
1648
1302
  // Shared injection options — reused whenever we need to re-inject after reduction.
1649
1303
  const injectionOpts = {
1650
- suppressV2MemoryForV3: memoryV3Live,
1651
- diskPressureContext,
1652
- activeSurface,
1653
- activeDocuments,
1654
- workspaceTopLevelContext: shouldInjectWorkspace
1655
- ? ctx.workspaceTopLevelContext
1656
- : null,
1657
- channelCapabilities: ctx.channelCapabilities ?? null,
1658
- channelCommandContext: ctx.commandIntent ?? null,
1659
1304
  unifiedTurnContext: unifiedTurnContextStr,
1660
- pkbContext,
1661
- pkbActive,
1662
- pkbQueryVector,
1663
- pkbSparseVector,
1664
- pkbScopeId,
1665
- pkbConversation,
1666
- pkbAutoInjectList,
1667
- pkbRoot,
1668
- pkbWorkingDir: pkbActive ? ctx.workingDir : undefined,
1669
- memoryV2Static,
1670
- nowScratchpad,
1671
- voiceCallControlPrompt: ctx.voiceCallControlPrompt ?? null,
1672
- transportHints: ctx.transportHints ?? null,
1673
1305
  isNonInteractive: !isInteractiveResolved,
1674
1306
  isBackgroundConversation: isBackgroundConversationType(
1675
1307
  turnStartConversation?.conversationType,
@@ -1689,7 +1321,7 @@ export async function runAgentLoopImpl(
1689
1321
 
1690
1322
  const injection = await applyRuntimeInjections(runMessages, {
1691
1323
  ...injectionOpts,
1692
- slackChronologicalMessages: reducerCompacted
1324
+ slackChronologicalMessages: state.reducerCompacted
1693
1325
  ? null
1694
1326
  : injectionOpts.slackChronologicalMessages,
1695
1327
  mode: currentInjectionMode,
@@ -1735,18 +1367,7 @@ export async function runAgentLoopImpl(
1735
1367
  metadataUpdates.memoryV2StaticBlock =
1736
1368
  injection.blocks.memoryV2StaticBlock;
1737
1369
  }
1738
- await runPipeline<PersistArgs, PersistResult>(
1739
- "persistence",
1740
- getMiddlewaresFor("persistence"),
1741
- defaultPersistenceTerminal,
1742
- {
1743
- op: "update",
1744
- messageId: userMessageId,
1745
- updates: metadataUpdates,
1746
- },
1747
- buildPluginTurnContext(ctx, reqId),
1748
- DEFAULT_TIMEOUTS.persistence,
1749
- );
1370
+ updateMessageMetadata(userMessageId, metadataUpdates);
1750
1371
  } catch (err) {
1751
1372
  rlog.warn({ err }, "Failed to persist injection metadata (non-fatal)");
1752
1373
  }
@@ -1762,51 +1383,18 @@ export async function runAgentLoopImpl(
1762
1383
  let reducerState: ReducerState | undefined;
1763
1384
 
1764
1385
  const toolTokenBudget = ctx.agentLoop.getToolTokenBudget(runMessages);
1765
- // Canonical calibration key — passed to the `tokenEstimate` pipeline for
1766
- // every preflight/mid-loop estimate, the overflow reducer config, and the
1767
- // convergence-path `estimatePromptTokens` call. Matches the key recorded
1768
- // by `handleUsage` for wrapper providers (OpenRouter routing to
1769
- // Anthropic → key is `"anthropic"`).
1386
+ // Canonical calibration key — used by the preflight estimate, the
1387
+ // overflow reducer config, and the convergence-path `estimatePromptTokens`
1388
+ // call. Matches the key recorded by `handleUsage` for wrapper providers
1389
+ // (OpenRouter routing to Anthropic key is `"anthropic"`).
1770
1390
  const estimationProviderName = getCalibrationProviderKey(ctx.provider);
1771
1391
 
1772
- // Shared `TurnContext` for every `tokenEstimate` pipeline invocation in
1773
- // this turn. The pipeline is the extension point for plugins that want
1774
- // to substitute an alternate estimator (e.g. provider-native tokenization)
1775
- // without touching orchestrator code.
1776
- //
1777
- // Routed through the canonical builder — `turnIndex` is `ctx.turnCount`,
1778
- // trust cascades through per-turn/conversation-level/fallback, and the
1779
- // context-window handle rides along so any middleware that wants to
1780
- // reuse the manager (e.g. to compute compaction-aware estimates) can.
1781
- const pipelineTurnCtx = buildPluginTurnContext(ctx, reqId);
1782
-
1783
- const runTokenEstimatePipeline = (
1784
- history: Message[],
1785
- ): Promise<EstimateResult> =>
1786
- runPipeline<EstimateArgs, EstimateResult>(
1787
- "tokenEstimate",
1788
- getMiddlewaresFor("tokenEstimate"),
1789
- defaultTokenEstimateTerminal,
1790
- {
1791
- // Shallow-frozen copies so a misbehaving middleware that mutates
1792
- // `args.history` or `args.tools` in place (e.g. trims the array
1793
- // before calling next) can't silently strip prompt context from
1794
- // the orchestrator's live `runMessages` / resolved-tools arrays.
1795
- // TypeScript `readonly` on `EstimateArgs` does not prevent
1796
- // `push`/`splice` at runtime; the frozen wrapper throws in strict
1797
- // mode and isolates any mutation attempts from the call-site state.
1798
- history: Object.freeze([...history]) as Message[],
1799
- systemPrompt: ctx.systemPrompt,
1800
- tools: Object.freeze([
1801
- ...ctx.agentLoop.getResolvedTools(history),
1802
- ]) as ToolDefinition[],
1803
- providerName: estimationProviderName,
1804
- },
1805
- pipelineTurnCtx,
1806
- DEFAULT_TIMEOUTS.tokenEstimate,
1807
- );
1808
-
1809
- const preflightTokens = await runTokenEstimatePipeline(runMessages);
1392
+ const preflightTokens = estimatePromptTokensWithTools(
1393
+ runMessages,
1394
+ ctx.systemPrompt,
1395
+ ctx.agentLoop.getResolvedTools(runMessages),
1396
+ estimationProviderName,
1397
+ );
1810
1398
 
1811
1399
  if (overflowRecovery.enabled && preflightTokens > preflightBudget) {
1812
1400
  rlog.warn(
@@ -1818,16 +1406,12 @@ export async function runAgentLoopImpl(
1818
1406
  "Preflight budget exceeded — running overflow reducer before provider call",
1819
1407
  );
1820
1408
 
1821
- // Overflow reduction runs through the plugin pipeline. The default
1822
- // middleware (`default-overflow-reduce`, registered at bootstrap)
1823
- // contains the historical tier loop forced compaction → tool-result
1824
- // truncation media stubbing injection downgrade — plus the
1825
- // re-inject/re-estimate convergence check. The callbacks below are
1826
- // the orchestrator-specific side effects that the plugin coordinates
1827
- // per iteration (activity emission, compaction application, runtime
1828
- // injection reassembly, token re-estimation). Registered plugins that
1829
- // wrap the `overflowReduce` slot see each iteration through their own
1830
- // middleware `next` callback.
1409
+ // `runOverflowReductionLoop` drives the tier loop forced compaction
1410
+ // tool-result truncation media stubbing → injection downgrade — plus
1411
+ // the re-inject/re-estimate convergence check. The callbacks below are
1412
+ // the orchestrator-specific side effects it coordinates per iteration
1413
+ // (activity emission, compaction application, runtime injection
1414
+ // reassembly, token re-estimation).
1831
1415
  const messagesForPreflightOverflowReduction =
1832
1416
  slackChronologicalContext?.messages ?? ctx.messages;
1833
1417
  const overflowArgs: OverflowReduceArgs = {
@@ -1841,72 +1425,18 @@ export async function runAgentLoopImpl(
1841
1425
  maxAttempts: resolveCurrentContextBudget().overflowRecovery.maxAttempts,
1842
1426
  abortSignal: abortController.signal,
1843
1427
  compactFn: async (msgs, signal, opts) => {
1844
- // Route the reducer's forced-compaction tier through the
1845
- // `compaction` pipeline so registered plugins observe these
1846
- // invocations. Without this, custom compaction middleware only
1847
- // sees the three orchestrator-owned call sites and misses the
1848
- // reducer-initiated forced compactions entirely.
1849
- //
1850
- // Pipeline timeouts must be caught locally — a `PluginTimeoutError`
1851
- // bubbling out of here would abort the overflow-reducer tier loop
1852
- // entirely, skipping fallback tiers (tool-result truncation, media
1853
- // stubbing, injection downgrade) and bypassing circuit-breaker
1854
- // bookkeeping. On timeout, record the failure and return a
1855
- // `compacted: false` result so the reducer falls through to the
1856
- // next tier.
1857
- try {
1858
- return (await runPipeline<CompactionArgs, CompactionResult>(
1859
- "compaction",
1860
- getMiddlewaresFor("compaction"),
1861
- (args) =>
1862
- defaultCompactionTerminal(
1863
- args,
1864
- buildPluginTurnContext(ctx, reqId),
1865
- ),
1866
- {
1867
- messages: msgs,
1868
- signal,
1869
- options: {
1870
- ...(opts ?? {}),
1871
- overrideProfile: resolveCurrentOverrideProfile() ?? null,
1872
- actorTrustClass: ctx.trustContext?.trustClass,
1873
- },
1874
- },
1875
- buildPluginTurnContext(ctx, reqId),
1876
- DEFAULT_TIMEOUTS.compaction,
1877
- )) as Awaited<
1878
- ReturnType<typeof ctx.contextWindowManager.maybeCompact>
1879
- >;
1880
- } catch (err) {
1881
- if (err instanceof PluginTimeoutError) {
1882
- rlog.warn(
1883
- { err, phase: "overflow-reducer-forced-compaction" },
1884
- "Compaction pipeline timed out — falling through to next reducer tier",
1885
- );
1886
- await ctx.agentLoop.compactionCircuit.recordOutcome(
1887
- ctx,
1888
- true,
1889
- onEvent,
1890
- );
1891
- return {
1892
- messages: msgs,
1893
- compacted: false,
1894
- previousEstimatedInputTokens: 0,
1895
- estimatedInputTokens: 0,
1896
- maxInputTokens: 0,
1897
- thresholdTokens: 0,
1898
- compactedMessages: 0,
1899
- compactedPersistedMessages: 0,
1900
- summaryCalls: 0,
1901
- summaryInputTokens: 0,
1902
- summaryOutputTokens: 0,
1903
- summaryModel: "",
1904
- summaryText: "",
1905
- reason: "compaction pipeline timed out",
1906
- };
1907
- }
1908
- throw err;
1909
- }
1428
+ // Delegate the reducer's forced-compaction tier to the default
1429
+ // compaction plugin, overlaying the turn's resolved inference
1430
+ // profile and actor trust class onto the reducer-supplied options.
1431
+ const reducerOptions = (opts ?? {}) as ContextWindowCompactOptions;
1432
+ return defaultCompact({
1433
+ manager: ctx.contextWindowManager,
1434
+ messages: msgs,
1435
+ signal,
1436
+ ...reducerOptions,
1437
+ overrideProfile: resolveCurrentOverrideProfile() ?? null,
1438
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
1439
+ });
1910
1440
  },
1911
1441
  emitActivityState: () => {
1912
1442
  ctx.emitActivityState("thinking", "context_compacting", {
@@ -1925,14 +1455,12 @@ export async function runAgentLoopImpl(
1925
1455
  // breaker.
1926
1456
  if (result.summaryFailed !== undefined) {
1927
1457
  await ctx.agentLoop.compactionCircuit.recordOutcome(
1928
- ctx,
1929
1458
  result.summaryFailed,
1930
1459
  onEvent,
1931
1460
  );
1932
1461
  }
1933
1462
  if (result.compacted) {
1934
1463
  await applySuccessfulCompaction(result, compactedBasis);
1935
- shouldInjectWorkspace = true;
1936
1464
  }
1937
1465
  },
1938
1466
  reinjectForMode: async (
@@ -1943,27 +1471,25 @@ export async function runAgentLoopImpl(
1943
1471
  ) => {
1944
1472
  // Mirror the pre-PR-23 behavior: `ctx.messages` must track the
1945
1473
  // reducer's latest output before re-injection runs, because other
1946
- // sites consulted through `injectionOpts` (`workspaceTopLevelContext`,
1947
- // slack history, etc.) depend on it and `applyCompactionResult`
1948
- // only updates `ctx.messages` on a compaction tier. Assigning here
1474
+ // sites consulted through `injectionOpts` (slack history, etc.) and
1475
+ // the injectors' own message-presence scans depend on it, and
1476
+ // `applyCompactionResult` only updates `ctx.messages` on a
1477
+ // compaction tier. Assigning here
1949
1478
  // keeps non-compaction tiers (tool-result truncation, media
1950
1479
  // stubbing, injection downgrade) observable to downstream
1951
1480
  // injection assembly on the same turn.
1952
1481
  ctx.messages = reducedMessages;
1953
1482
 
1954
- // When THIS iteration compacted, it stripped existing NOW.md /
1955
- // PKB blocks — so we re-inject current content. A later iteration
1956
- // that only truncates or downgrades must NOT re-force PKB/NOW,
1483
+ // When THIS iteration compacted, it stripped the existing
1484
+ // memory-static block — so we re-inject current content. A later
1485
+ // iteration that only truncates or downgrades must NOT re-force it,
1957
1486
  // or each round would grow the token count.
1958
1487
  // Gate: only the iteration that actually compacted re-injects.
1488
+ // (The `<knowledge_base>`, NOW.md, and v2 static `<info>` blocks
1489
+ // self-gate inside their injectors on whether they are already
1490
+ // present in `reducedMessages`.)
1959
1491
  const injection = await applyRuntimeInjections(reducedMessages, {
1960
1492
  ...injectionOpts,
1961
- ...(stepCompacted && { pkbContext: currentPkbContent }),
1962
- ...(stepCompacted && { memoryV2Static: currentMemoryV2Static }),
1963
- ...(stepCompacted && { nowScratchpad: currentNowContent }),
1964
- workspaceTopLevelContext: shouldInjectWorkspace
1965
- ? ctx.workspaceTopLevelContext
1966
- : null,
1967
1493
  // Once ANY iteration has compacted `ctx.messages`, the captured
1968
1494
  // `slackChronologicalMessages` snapshot (built from the full
1969
1495
  // persisted transcript) would overwrite the compacted history
@@ -1989,41 +1515,17 @@ export async function runAgentLoopImpl(
1989
1515
  }),
1990
1516
  };
1991
1517
 
1992
- const overflowResult = await runPipeline<
1993
- OverflowReduceArgs,
1994
- OverflowReduceResult
1995
- >(
1996
- "overflowReduce",
1997
- getMiddlewaresFor("overflowReduce"),
1998
- // Terminal — only reached when every registered middleware calls
1999
- // `next` and delegates past the innermost layer. The default plugin
2000
- // is a terminal itself (it doesn't call `next`), so in practice
2001
- // this fallback fires only when the default has been explicitly
2002
- // deregistered (tests) and no user plugin replaces it. Strict-fail
2003
- // semantics: throw so the missing terminal surfaces as a visible
2004
- // error instead of silently returning the history untouched.
2005
- async () => {
2006
- throw new PluginExecutionError(
2007
- "overflowReduce pipeline has no terminal handler — every reducer middleware called next() without providing a replacement",
2008
- "overflowReduce",
2009
- );
2010
- },
2011
- overflowArgs,
2012
- buildPluginTurnContext(ctx, reqId),
2013
- DEFAULT_TIMEOUTS.overflowReduce,
2014
- );
1518
+ const overflowResult = await runOverflowReductionLoop(overflowArgs);
2015
1519
 
2016
1520
  ctx.messages = overflowResult.messages;
2017
1521
  runMessages = overflowResult.runMessages;
2018
1522
  currentInjectionMode = overflowResult.injectionMode;
2019
1523
  reducerState = overflowResult.reducerState;
2020
1524
  if (overflowResult.reducerCompacted) {
2021
- reducerCompacted = true;
1525
+ state.reducerCompacted = true;
2022
1526
  }
2023
1527
  }
2024
1528
 
2025
- let preRepairMessages = runMessages;
2026
-
2027
1529
  // Replace historical web_search_tool_result blocks with text summaries.
2028
1530
  // The opaque `encrypted_content` tokens Anthropic attaches to each result
2029
1531
  // expire / are route-scoped; replaying a stale token is rejected with
@@ -2046,13 +1548,12 @@ export async function runAgentLoopImpl(
2046
1548
  // context with a fresh array; `runHook` forwards whichever the chain
2047
1549
  // settles on. Order is plugin registration order.
2048
1550
  //
2049
- // Fires BEFORE `preRunHistoryLength` is captured so the boundary
2050
- // between pre-existing and hook-emitted messages consumed by the
2051
- // ordering-error retry gate, the post-run reconcile loop, and the
2052
- // new-message extraction for persistence — reflects exactly what
2053
- // `agentLoop.run` receives.
1551
+ // Fires BEFORE the agent loop runs so the hook-emitted messages are part
1552
+ // of the loop's input; the loop then reports its own appended output via
1553
+ // `AgentLoopRunResult.newMessages`, which is what persistence consumes.
2054
1554
  const userPromptCtx: UserPromptSubmitContext = {
2055
1555
  conversationId: ctx.conversationId,
1556
+ prompt: options?.titleText ?? content,
2056
1557
  originalMessages: ctx.messages,
2057
1558
  latestMessages: runMessages,
2058
1559
  logger: rlog,
@@ -2063,8 +1564,6 @@ export async function runAgentLoopImpl(
2063
1564
  );
2064
1565
  runMessages = finalUserPromptCtx.latestMessages;
2065
1566
 
2066
- let preRunHistoryLength = runMessages.length;
2067
-
2068
1567
  const shouldGenerateTitle = isReplaceableTitle(
2069
1568
  getConversation(ctx.conversationId)?.title ?? null,
2070
1569
  );
@@ -2078,6 +1577,7 @@ export async function runAgentLoopImpl(
2078
1577
  rlog,
2079
1578
  turnChannelContext: capturedTurnChannelContext,
2080
1579
  turnInterfaceContext: capturedTurnInterfaceContext,
1580
+ applyCompaction: applySuccessfulCompaction,
2081
1581
  };
2082
1582
  const eventHandler = (event: AgentEvent): Promise<void> =>
2083
1583
  dispatchAgentEvent(state, deps, event);
@@ -2097,82 +1597,39 @@ export async function runAgentLoopImpl(
2097
1597
  rlog.info({ callSite: turnCallSite }, "Starting agent loop run");
2098
1598
 
2099
1599
  // Thread the orchestrator's canonical per-turn context into the agent
2100
- // loop so its internal pipeline invocations (llmCall, emptyResponse,
2101
- // toolError, toolResultTruncate, toolExecute) see the real
2102
- // conversation identity / trust / contextWindowManager instead of the
2103
- // synthesized `"agent-loop"` placeholder. The loop clones this value
1600
+ // loop so its internal pipeline invocations (e.g. compaction) see the
1601
+ // real conversation identity / trust / contextWindowManager instead of
1602
+ // the synthesized `"agent-loop"` placeholder. The loop clones this value
2104
1603
  // and overwrites `turnIndex` with its own tool-use iteration counter.
2105
1604
  const loopTurnCtx = buildPluginTurnContext(ctx, reqId);
2106
1605
 
2107
- // Hooks for the loop-owned mid-loop compaction. The agent loop owns the
1606
+ // Hook for the loop-owned mid-loop compaction. The agent loop owns the
2108
1607
  // trigger (its budget gate), the `compaction` pipeline call, the result
2109
1608
  // interpretation (circuit-breaker bookkeeping + the exhaustion decision),
2110
- // and the inline continue; these callbacks bridge the durable / injection
2111
- // state the loop is intentionally blind to. Durable persistence and
2112
- // re-injection stay orchestrator-supplied for now.
1609
+ // and the inline continue; this callback bridges the injection state the
1610
+ // loop is intentionally blind to. Durable persistence is signalled via
1611
+ // events; re-injection stays orchestrator-supplied for now.
2113
1612
  const midLoopCompaction: MidLoopCompaction = {
2114
- prepare: (history) => {
2115
- // Strip injected context so the compactor summarizes the raw
2116
- // persistent messages, and commit the stripped set to durable state.
2117
- const rawHistory = stripInjectionsForCompaction(history);
2118
- ctx.messages = rawHistory;
2119
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2120
- return {
2121
- rawHistory,
2122
- options: {
2123
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
2124
- force: true,
2125
- targetInputTokensOverride:
2126
- resolveCurrentContextBudget().preflightBudget,
2127
- conversationOriginChannel:
2128
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
2129
- overrideProfile: resolveCurrentOverrideProfile() ?? null,
2130
- actorTrustClass: ctx.trustContext?.trustClass,
2131
- },
2132
- };
2133
- },
2134
- applyResult: async (result, rawHistory) => {
2135
- await applySuccessfulCompaction(result, rawHistory);
2136
- reducerCompacted = true;
2137
- shouldInjectWorkspace = true;
2138
- },
2139
- reinject: async () => {
1613
+ postCompactionHook: async ({ history, turnContext }) => {
2140
1614
  // stripInjectionsForCompaction() unconditionally removed the existing
2141
- // NOW.md block, so re-inject the current content regardless of whether
2142
- // compaction actually ran.
2143
- const injection = await applyRuntimeInjections(ctx.messages, {
1615
+ // memory-static block, so re-inject the current content regardless of
1616
+ // whether compaction actually ran. The `<knowledge_base>`, NOW.md, and
1617
+ // v2 static `<info>` blocks self-gate inside their injectors on block
1618
+ // presence.
1619
+ const injection = await postCompactReinject({
2144
1620
  ...injectionOpts,
2145
- pkbContext: currentPkbContent,
2146
- memoryV2Static: currentMemoryV2Static,
2147
- nowScratchpad: currentNowContent,
2148
- workspaceTopLevelContext: shouldInjectWorkspace
2149
- ? ctx.workspaceTopLevelContext
2150
- : null,
2151
1621
  // Suppress the chronological-transcript snapshot once the reducer
2152
1622
  // has collapsed `ctx.messages`; the captured snapshot reflects the
2153
1623
  // full persisted transcript and would overwrite compaction.
2154
- slackChronologicalMessages: reducerCompacted
1624
+ slackChronologicalMessages: state.reducerCompacted
2155
1625
  ? null
2156
1626
  : injectionOpts.slackChronologicalMessages,
2157
1627
  mode: currentInjectionMode,
2158
- turnContext: buildPluginTurnContext(ctx, reqId),
1628
+ turnContext,
1629
+ history,
1630
+ logger: rlog,
2159
1631
  });
2160
- runMessages = injection.messages;
2161
- if (isTrustedActor && currentInjectionMode !== "minimal") {
2162
- ctx.graphMemory.retrackCachedNodes();
2163
- }
2164
- const midLoopCompactStrip =
2165
- stripHistoricalWebSearchResults(runMessages);
2166
- if (midLoopCompactStrip.stats.blocksStripped > 0) {
2167
- rlog.info(
2168
- { phase: "mid-loop-compact", ...midLoopCompactStrip.stats },
2169
- "Converted historical web_search_tool_result blocks to text summaries",
2170
- );
2171
- runMessages = midLoopCompactStrip.messages;
2172
- }
2173
- preRepairMessages = runMessages;
2174
- preRunHistoryLength = runMessages.length;
2175
- return runMessages;
1632
+ return injection.messages;
2176
1633
  },
2177
1634
  };
2178
1635
 
@@ -2188,10 +1645,8 @@ export async function runAgentLoopImpl(
2188
1645
  msgs: Message[],
2189
1646
  compaction?: MidLoopCompaction,
2190
1647
  ): Promise<Message[]> => {
2191
- const { history, exitReason } = await ctx.agentLoop.run(
2192
- msgs,
2193
- eventHandler,
2194
- {
1648
+ const { history, exitReason, appendedNewMessages, newMessages } =
1649
+ await ctx.agentLoop.run(msgs, eventHandler, {
2195
1650
  signal: abortController.signal,
2196
1651
  requestId: reqId,
2197
1652
  onCheckpoint,
@@ -2205,8 +1660,9 @@ export async function runAgentLoopImpl(
2205
1660
  // `<memory>` block, so anchor the provider's long-TTL cache breakpoint
2206
1661
  // on the most recent stable message instead.
2207
1662
  mutableLatestUserMessage: memoryV3Live,
2208
- },
2209
- );
1663
+ });
1664
+ lastRunAppendedNewMessages = appendedNewMessages;
1665
+ lastRunNewMessages = newMessages;
2210
1666
  if (exitReason === "handoff") {
2211
1667
  yieldedForHandoff = true;
2212
1668
  pendingCheckpointYield = "handoff";
@@ -2244,10 +1700,7 @@ export async function runAgentLoopImpl(
2244
1700
  }
2245
1701
 
2246
1702
  // One-shot ordering error retry
2247
- if (
2248
- state.orderingErrorDetected &&
2249
- updatedHistory.length === preRunHistoryLength
2250
- ) {
1703
+ if (state.orderingErrorDetected && !lastRunAppendedNewMessages) {
2251
1704
  rlog.warn(
2252
1705
  { phase: "retry" },
2253
1706
  "Provider ordering error detected, attempting one-shot deep-repair retry",
@@ -2261,12 +1714,10 @@ export async function runAgentLoopImpl(
2261
1714
  // `user-prompt-submit` hook (the default history-repair plugin runs
2262
1715
  // `repairHistory` there); widening that surface to deep-repair is
2263
1716
  // intentionally deferred until there's a concrete plugin-level use case.
2264
- const retryRepair = deepRepairHistory(runMessages);
1717
+ const retryRepair = deepRepairHistory(updatedHistory);
2265
1718
  runMessages = retryRepair.messages;
2266
1719
  const retryStrip = stripHistoricalWebSearchResults(runMessages);
2267
1720
  runMessages = retryStrip.messages;
2268
- preRepairMessages = runMessages;
2269
- preRunHistoryLength = runMessages.length;
2270
1721
  state.orderingErrorDetected = false;
2271
1722
  state.deferredOrderingError = null;
2272
1723
 
@@ -2319,15 +1770,29 @@ export async function runAgentLoopImpl(
2319
1770
  }
2320
1771
  // Can't resize — replace with a text annotation so the model
2321
1772
  // can explain the situation rather than silently dropping context
2322
- return [
2323
- {
2324
- type: "text" as const,
2325
- text: "(An image was attached but could not be sent — its dimensions exceed the provider limit and automatic resize was not available. Please resize the image and try again.)",
2326
- },
2327
- ];
1773
+ return [{ type: "text" as const, text: UNSENDABLE_IMAGE_NOTE }];
2328
1774
  }),
2329
1775
  };
2330
1776
  });
1777
+ // The transform above only mutates ctx.messages for the current retry.
1778
+ // Persist the downgrade for images that can never be sent so the rejected
1779
+ // upload doesn't rehydrate from the DB and resurface on later turns. This
1780
+ // is cleanup for future turns, so a persistence failure must never abort
1781
+ // the retry that is about to run — log it and continue.
1782
+ try {
1783
+ const rewritten = persistUnsendableImageDowngrades(ctx.conversationId);
1784
+ if (rewritten > 0) {
1785
+ rlog.info(
1786
+ { phase: "image-recovery", rewritten },
1787
+ "Persisted unsendable-image downgrades so they cannot resurface",
1788
+ );
1789
+ }
1790
+ } catch (err) {
1791
+ rlog.warn(
1792
+ { phase: "image-recovery", err },
1793
+ "Failed to persist unsendable-image downgrade; continuing with in-memory recovery",
1794
+ );
1795
+ }
2331
1796
  runMessages = ctx.messages;
2332
1797
  updatedHistory = await runAgentLoop(runMessages);
2333
1798
  if (state.imageTooLargeDetected) {
@@ -2356,19 +1821,9 @@ export async function runAgentLoopImpl(
2356
1821
  // limit), incorporate those new messages into ctx.messages so the
2357
1822
  // convergence loop operates on the full (larger) history.
2358
1823
  if (state.contextTooLargeDetected) {
2359
- // Detect whether ctx.messages currently lacks NOW.md so we know if
2360
- // it needs to be re-injected. Mid-loop compaction (line ~1067) may
2361
- // have already stripped injections before escalating here, so we
2362
- // check actual message state rather than tracking mutation sites.
2363
- let convergenceStripped =
2364
- findLastInjectedNowContent(ctx.messages) === null;
2365
-
2366
- if (updatedHistory.length > preRunHistoryLength) {
1824
+ if (lastRunAppendedNewMessages) {
2367
1825
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2368
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2369
- convergenceStripped = true;
2370
- preRepairMessages = updatedHistory;
2371
- preRunHistoryLength = updatedHistory.length;
1826
+ markHistoryStrippedBestEffort(ctx.conversationId);
2372
1827
  }
2373
1828
  if (!reducerState) {
2374
1829
  reducerState = createInitialReducerState();
@@ -2450,14 +1905,12 @@ export async function runAgentLoopImpl(
2450
1905
  );
2451
1906
  if (emergencyResult.summaryFailed !== undefined) {
2452
1907
  await ctx.agentLoop.compactionCircuit.recordOutcome(
2453
- ctx,
2454
1908
  emergencyResult.summaryFailed,
2455
1909
  onEvent,
2456
1910
  );
2457
1911
  }
2458
1912
  if (emergencyResult.compacted) {
2459
1913
  await applySuccessfulCompaction(emergencyResult, ctx.messages);
2460
- shouldInjectWorkspace = true;
2461
1914
  }
2462
1915
  // Clear the overflow flag and re-run the agent loop with
2463
1916
  // the compacted context.
@@ -2508,7 +1961,7 @@ export async function runAgentLoopImpl(
2508
1961
  ctx.contextWindowManager.maybeCompact(msgs, signal!, {
2509
1962
  ...(opts ?? {}),
2510
1963
  overrideProfile: resolveCurrentOverrideProfile() ?? null,
2511
- actorTrustClass: ctx.trustContext?.trustClass,
1964
+ actorTrustClass: resolveTurnActorTrustClass(ctx),
2512
1965
  }),
2513
1966
  abortController.signal,
2514
1967
  );
@@ -2526,7 +1979,6 @@ export async function runAgentLoopImpl(
2526
1979
  step.compactionResult.summaryFailed !== undefined
2527
1980
  ) {
2528
1981
  await ctx.agentLoop.compactionCircuit.recordOutcome(
2529
- ctx,
2530
1982
  step.compactionResult.summaryFailed,
2531
1983
  onEvent,
2532
1984
  );
@@ -2537,22 +1989,17 @@ export async function runAgentLoopImpl(
2537
1989
  step.compactionResult,
2538
1990
  convergenceCompactionBasis,
2539
1991
  );
2540
- shouldInjectWorkspace = true;
2541
- reducerCompacted = true;
1992
+ state.reducerCompacted = true;
2542
1993
  }
2543
1994
 
2544
- // Only re-inject NOW.md when ctx.messages was actually stripped;
2545
- // otherwise the existing NOW.md block is still present and
2546
- // re-injecting would duplicate it.
1995
+ // Only re-inject the memory-static block when ctx.messages was
1996
+ // actually stripped; otherwise the existing block is still present and
1997
+ // re-injecting would duplicate it. (The `<knowledge_base>` and NOW.md
1998
+ // blocks self-gate inside their injectors on whether they are already
1999
+ // present in `ctx.messages`.)
2547
2000
  const injection = await applyRuntimeInjections(ctx.messages, {
2548
2001
  ...injectionOpts,
2549
- pkbContext: currentPkbContent,
2550
- memoryV2Static: convergenceStripped ? currentMemoryV2Static : null,
2551
- nowScratchpad: convergenceStripped ? currentNowContent : null,
2552
- workspaceTopLevelContext: shouldInjectWorkspace
2553
- ? ctx.workspaceTopLevelContext
2554
- : null,
2555
- slackChronologicalMessages: reducerCompacted
2002
+ slackChronologicalMessages: state.reducerCompacted
2556
2003
  ? null
2557
2004
  : injectionOpts.slackChronologicalMessages,
2558
2005
  mode: currentInjectionMode,
@@ -2570,8 +2017,6 @@ export async function runAgentLoopImpl(
2570
2017
  );
2571
2018
  runMessages = convergenceStrip.messages;
2572
2019
  }
2573
- preRepairMessages = runMessages;
2574
- preRunHistoryLength = runMessages.length;
2575
2020
  state.contextTooLargeDetected = false;
2576
2021
  yieldedForBudget = false;
2577
2022
 
@@ -2594,12 +2039,9 @@ export async function runAgentLoopImpl(
2594
2039
  // Fold rerun progress into ctx.messages so the next reducer
2595
2040
  // tier operates on up-to-date history instead of stale
2596
2041
  // pre-rerun messages.
2597
- if (updatedHistory.length > preRunHistoryLength) {
2042
+ if (lastRunAppendedNewMessages) {
2598
2043
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2599
- markHistoryStrippedBestEffort(ctx.conversationId, Date.now(), rlog);
2600
- convergenceStripped = true;
2601
- preRepairMessages = updatedHistory;
2602
- preRunHistoryLength = updatedHistory.length;
2044
+ markHistoryStrippedBestEffort(ctx.conversationId);
2603
2045
  }
2604
2046
  }
2605
2047
  }
@@ -2619,86 +2061,35 @@ export async function runAgentLoopImpl(
2619
2061
  ctx.emitActivityState("thinking", "context_compacting", {
2620
2062
  requestId: reqId,
2621
2063
  });
2622
- let emergencyCompact: Awaited<
2623
- ReturnType<typeof ctx.contextWindowManager.maybeCompact>
2624
- > | null = null;
2625
- try {
2626
- emergencyCompact = (await runPipeline<
2627
- CompactionArgs,
2628
- CompactionResult
2629
- >(
2630
- "compaction",
2631
- getMiddlewaresFor("compaction"),
2632
- (args) =>
2633
- defaultCompactionTerminal(
2634
- args,
2635
- buildPluginTurnContext(ctx, reqId),
2636
- ),
2637
- {
2638
- messages: ctx.messages,
2639
- signal: abortController.signal,
2640
- options: {
2641
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
2642
- force: true,
2643
- minKeepRecentUserTurns: 0,
2644
- targetInputTokensOverride: correctedTarget,
2645
- overrideProfile: resolveCurrentOverrideProfile() ?? null,
2646
- },
2647
- },
2648
- buildPluginTurnContext(ctx, reqId),
2649
- DEFAULT_TIMEOUTS.compaction,
2650
- )) as Awaited<
2651
- ReturnType<typeof ctx.contextWindowManager.maybeCompact>
2652
- >;
2653
- } catch (err) {
2654
- if (err instanceof PluginTimeoutError) {
2655
- // Emergency compaction timed out. Record the circuit-breaker
2656
- // failure and fall through to the graceful-error path below
2657
- // (the unsuccessful-compaction fallback) rather than hard-
2658
- // failing the turn.
2659
- rlog.warn(
2660
- { err, phase: "emergency-compaction" },
2661
- "Emergency compaction pipeline timed out — continuing with overflow fallback",
2662
- );
2663
- await ctx.agentLoop.compactionCircuit.recordOutcome(
2664
- ctx,
2665
- true,
2666
- onEvent,
2667
- );
2668
- emergencyCompact = null;
2669
- } else {
2670
- throw err;
2671
- }
2672
- }
2064
+ const emergencyCompact = await defaultCompact({
2065
+ manager: ctx.contextWindowManager,
2066
+ messages: ctx.messages,
2067
+ signal: abortController.signal,
2068
+ force: true,
2069
+ minKeepRecentUserTurns: 0,
2070
+ overrideProfile: resolveCurrentOverrideProfile() ?? null,
2071
+ });
2673
2072
  // Only track when the summary LLM actually ran; `force: true`
2674
- // bypasses the cooldown but not the early-return paths.
2675
- if (
2676
- emergencyCompact &&
2677
- emergencyCompact.summaryFailed !== undefined
2678
- ) {
2073
+ // bypasses the auto-threshold gate but not the early-return paths.
2074
+ if (emergencyCompact.summaryFailed !== undefined) {
2679
2075
  await ctx.agentLoop.compactionCircuit.recordOutcome(
2680
- ctx,
2681
2076
  emergencyCompact.summaryFailed,
2682
2077
  onEvent,
2683
2078
  );
2684
2079
  }
2685
- if (emergencyCompact?.compacted) {
2080
+ if (emergencyCompact.compacted) {
2686
2081
  await applySuccessfulCompaction(emergencyCompact, ctx.messages);
2687
- reducerCompacted = true;
2688
- shouldInjectWorkspace = true;
2082
+ state.reducerCompacted = true;
2689
2083
  }
2690
2084
 
2691
- // Only re-inject NOW.md when ctx.messages was actually stripped;
2692
- // otherwise the existing block is still present.
2085
+ // Only re-inject the memory-static block when ctx.messages was
2086
+ // actually stripped; otherwise the existing block is still present.
2087
+ // (The `<knowledge_base>`, NOW.md, and v2 static `<info>` blocks
2088
+ // self-gate inside their injectors on whether they are already
2089
+ // present in `ctx.messages`.)
2693
2090
  const injection = await applyRuntimeInjections(ctx.messages, {
2694
2091
  ...injectionOpts,
2695
- pkbContext: currentPkbContent,
2696
- memoryV2Static: convergenceStripped ? currentMemoryV2Static : null,
2697
- nowScratchpad: convergenceStripped ? currentNowContent : null,
2698
- workspaceTopLevelContext: shouldInjectWorkspace
2699
- ? ctx.workspaceTopLevelContext
2700
- : null,
2701
- slackChronologicalMessages: reducerCompacted
2092
+ slackChronologicalMessages: state.reducerCompacted
2702
2093
  ? null
2703
2094
  : injectionOpts.slackChronologicalMessages,
2704
2095
  mode: currentInjectionMode,
@@ -2716,8 +2107,6 @@ export async function runAgentLoopImpl(
2716
2107
  );
2717
2108
  runMessages = fallbackStrip.messages;
2718
2109
  }
2719
- preRepairMessages = runMessages;
2720
- preRunHistoryLength = runMessages.length;
2721
2110
  state.contextTooLargeDetected = false;
2722
2111
 
2723
2112
  updatedHistory = await runAgentLoop(runMessages);
@@ -2771,44 +2160,11 @@ export async function runAgentLoopImpl(
2771
2160
  onEvent(buildConversationErrorMessage(ctx.conversationId, classified));
2772
2161
  }
2773
2162
 
2774
- // Reconcile synthesized cancellation tool_results
2775
- for (let i = preRunHistoryLength; i < updatedHistory.length; i++) {
2776
- const msg = updatedHistory[i];
2777
- if (msg.role === "user") {
2778
- for (const block of msg.content) {
2779
- if (
2780
- block.type === "tool_result" &&
2781
- !state.pendingToolResults.has(block.tool_use_id) &&
2782
- !state.persistedToolUseIds.has(block.tool_use_id)
2783
- ) {
2784
- state.pendingToolResults.set(block.tool_use_id, {
2785
- content: block.content,
2786
- isError: block.is_error ?? false,
2787
- });
2788
- }
2789
- }
2790
- }
2791
- }
2792
-
2793
- // Flush remaining tool results
2163
+ // Flush remaining tool results. On a normal turn these drain at the next
2164
+ // `message_complete`; an aborted or yielded loop exits with them still
2165
+ // buffered, so finalize the (possibly already on-arrival-reserved) grouped
2166
+ // row here rather than writing a duplicate.
2794
2167
  if (state.pendingToolResults.size > 0) {
2795
- const toolResultBlocks = Array.from(
2796
- state.pendingToolResults.entries(),
2797
- ).map(([toolUseId, result]) => ({
2798
- type: "tool_result",
2799
- tool_use_id: toolUseId,
2800
- content: redactSecrets(result.content),
2801
- is_error: result.isError,
2802
- ...(result.contentBlocks
2803
- ? {
2804
- contentBlocks: result.contentBlocks.map((block) =>
2805
- block.type === "text"
2806
- ? { ...block, text: redactSecrets(block.text) }
2807
- : block,
2808
- ),
2809
- }
2810
- : {}),
2811
- }));
2812
2168
  const toolResultMetadata = {
2813
2169
  ...provenanceFromTrustContext(ctx.trustContext),
2814
2170
  userMessageChannel: capturedTurnChannelContext.userMessageChannel,
@@ -2818,21 +2174,12 @@ export async function runAgentLoopImpl(
2818
2174
  assistantMessageInterface:
2819
2175
  capturedTurnInterfaceContext.assistantMessageInterface,
2820
2176
  };
2821
- await runPipeline<PersistArgs, PersistResult>(
2822
- "persistence",
2823
- getMiddlewaresFor("persistence"),
2824
- defaultPersistenceTerminal,
2825
- {
2826
- op: "add",
2827
- conversationId: ctx.conversationId,
2828
- role: "user",
2829
- content: JSON.stringify(toolResultBlocks),
2830
- metadata: toolResultMetadata,
2831
- },
2832
- buildPluginTurnContext(ctx, reqId),
2833
- DEFAULT_TIMEOUTS.persistence,
2177
+ await finalizePendingToolResultRow(
2178
+ state,
2179
+ ctx.conversationId,
2180
+ toolResultMetadata,
2181
+ rlog,
2834
2182
  );
2835
- state.pendingToolResults.clear();
2836
2183
  }
2837
2184
 
2838
2185
  // Persist the budget_yield_unrecovered notice now that any pending
@@ -2856,24 +2203,13 @@ export async function runAgentLoopImpl(
2856
2203
  };
2857
2204
  let yieldNoticePersistedId: string | null = null;
2858
2205
  try {
2859
- const yieldPersistResult = (await runPipeline<
2860
- PersistArgs,
2861
- PersistResult
2862
- >(
2863
- "persistence",
2864
- getMiddlewaresFor("persistence"),
2865
- defaultPersistenceTerminal,
2866
- {
2867
- op: "add",
2868
- conversationId: ctx.conversationId,
2869
- role: "assistant",
2870
- content: JSON.stringify(yieldNoticeMessage.content),
2871
- metadata: yieldNoticeMetadata,
2872
- },
2873
- buildPluginTurnContext(ctx, reqId),
2874
- DEFAULT_TIMEOUTS.persistence,
2875
- )) as PersistAddResult;
2876
- yieldNoticePersistedId = yieldPersistResult.message.id;
2206
+ const yieldRow = await addMessage(
2207
+ ctx.conversationId,
2208
+ "assistant",
2209
+ JSON.stringify(yieldNoticeMessage.content),
2210
+ { metadata: yieldNoticeMetadata },
2211
+ );
2212
+ yieldNoticePersistedId = yieldRow.id;
2877
2213
  } catch (err) {
2878
2214
  // Non-fatal — a DB hiccup must not escalate a budget-yield exit into
2879
2215
  // a turn-level throw. The live SSE event was already emitted, so the
@@ -2929,7 +2265,7 @@ export async function runAgentLoopImpl(
2929
2265
  }
2930
2266
 
2931
2267
  // Reconstruct history
2932
- const newMessages = updatedHistory.slice(preRunHistoryLength).map((msg) => {
2268
+ const newMessages = lastRunNewMessages.map((msg) => {
2933
2269
  if (msg.role !== "assistant") return msg;
2934
2270
  const { cleanedContent } = cleanAssistantContent(msg.content);
2935
2271
  const cleanedBlocks = cleanedContent as ContentBlock[];
@@ -2960,10 +2296,6 @@ export async function runAgentLoopImpl(
2960
2296
  state.assistantRowAwaitingFinalization &&
2961
2297
  state.lastAssistantMessageId
2962
2298
  ) {
2963
- // Direct `deleteMessageById` (not via the `persistence` pipeline):
2964
- // see the same rationale on the matching cleanup in
2965
- // `handleLlmCallStarted` — an unfinalized reservation has no
2966
- // observable history for plugins.
2967
2299
  try {
2968
2300
  deleteMessageById(state.lastAssistantMessageId);
2969
2301
  } catch (err) {
@@ -2985,20 +2317,12 @@ export async function runAgentLoopImpl(
2985
2317
  const errorAssistantMessage = createAssistantMessage(
2986
2318
  state.providerErrorUserMessage,
2987
2319
  );
2988
- const errorPersistResult = (await runPipeline<PersistArgs, PersistResult>(
2989
- "persistence",
2990
- getMiddlewaresFor("persistence"),
2991
- defaultPersistenceTerminal,
2992
- {
2993
- op: "add",
2994
- conversationId: ctx.conversationId,
2995
- role: "assistant",
2996
- content: JSON.stringify(errorAssistantMessage.content),
2997
- metadata: errChannelMeta,
2998
- },
2999
- buildPluginTurnContext(ctx, reqId),
3000
- DEFAULT_TIMEOUTS.persistence,
3001
- )) as PersistAddResult;
2320
+ const errorRow = await addMessage(
2321
+ ctx.conversationId,
2322
+ "assistant",
2323
+ JSON.stringify(errorAssistantMessage.content),
2324
+ { metadata: errChannelMeta },
2325
+ );
3002
2326
  persistedErrorAssistantMessage = true;
3003
2327
  // Repoint `lastAssistantMessageId` at the synthetic error row so the
3004
2328
  // post-loop sync, attachment resolution, and `message_complete`/
@@ -3007,7 +2331,7 @@ export async function runAgentLoopImpl(
3007
2331
  // above. Mark finalization complete so the next LLM call in this run
3008
2332
  // (or a downstream handler) doesn't try to clean up an id that
3009
2333
  // already corresponds to a finalized row.
3010
- state.lastAssistantMessageId = errorPersistResult.message.id;
2334
+ state.lastAssistantMessageId = errorRow.id;
3011
2335
  state.assistantRowAwaitingFinalization = false;
3012
2336
  newMessages.push(errorAssistantMessage);
3013
2337
  // Pipe the just-assigned message id into any orphaned LLM request log
@@ -3021,10 +2345,7 @@ export async function runAgentLoopImpl(
3021
2345
  // other conversations cannot collide. Non-fatal — a DB hiccup must
3022
2346
  // not escalate a provider rejection into a turn-level throw.
3023
2347
  try {
3024
- backfillMessageIdOnLogs(
3025
- ctx.conversationId,
3026
- errorPersistResult.message.id,
3027
- );
2348
+ backfillMessageIdOnLogs(ctx.conversationId, errorRow.id);
3028
2349
  } catch (err) {
3029
2350
  rlog.warn(
3030
2351
  { err },
@@ -3037,7 +2358,16 @@ export async function runAgentLoopImpl(
3037
2358
  // would create a duplicate plain-text bubble below the alert card.
3038
2359
  }
3039
2360
 
3040
- let restoredHistory = [...preRepairMessages, ...newMessages];
2361
+ // Base persisted into `ctx.messages` is the loop's own returned history
2362
+ // (minus the tail it appended this run), with the cleaned `newMessages`
2363
+ // re-appended on top. Sourcing the base from the loop keeps it in lockstep
2364
+ // with any in-loop compaction without the orchestrator maintaining a
2365
+ // parallel snapshot across re-entry sites.
2366
+ const loopBase = updatedHistory.slice(
2367
+ 0,
2368
+ updatedHistory.length - lastRunNewMessages.length,
2369
+ );
2370
+ let restoredHistory = [...loopBase, ...newMessages];
3041
2371
 
3042
2372
  // Post-turn tool result truncation: save large results to disk and
3043
2373
  // replace in-context content with a prefix/suffix stub + file pointer.
@@ -3229,30 +2559,6 @@ export async function runAgentLoopImpl(
3229
2559
  publishLoopMessagesChanged();
3230
2560
  }
3231
2561
  }
3232
-
3233
- // Second title pass: after 3 completed turns, re-generate the title
3234
- // using the last 3 messages for better context. Only fires when the
3235
- // current title was auto-generated (isAutoTitle = 1) and the user
3236
- // has not opted out via `conversations.skipAutoRetitling`.
3237
- if (ctx.turnCount === 2 && !getConfig().conversations.skipAutoRetitling) {
3238
- // turnCount is 0-indexed, incremented in finally; 2 = about to become 3rd turn
3239
- queueRegenerateConversationTitle({
3240
- conversationId: ctx.conversationId,
3241
- provider: ctx.provider,
3242
- onTitleUpdated: (title) => {
3243
- onEvent({
3244
- type: "conversation_title_updated",
3245
- conversationId: ctx.conversationId,
3246
- title,
3247
- });
3248
- onEvent({
3249
- type: "sync_changed",
3250
- tags: [conversationMetadataSyncTag(ctx.conversationId)],
3251
- });
3252
- },
3253
- signal: abortController.signal,
3254
- });
3255
- }
3256
2562
  } catch (err) {
3257
2563
  const errorCtx = {
3258
2564
  phase: "agent_loop" as const,
@@ -3312,8 +2618,6 @@ export async function runAgentLoopImpl(
3312
2618
  }
3313
2619
  } finally {
3314
2620
  if (turnStarted) {
3315
- cleanupBootstrapAfterTurnThreshold(ctx.conversationId);
3316
-
3317
2621
  ctx.turnCount++;
3318
2622
  const config = getConfig();
3319
2623
  const maxWait = config.workspaceGit?.turnCommitMaxWaitMs ?? 4000;
@@ -3351,7 +2655,7 @@ export async function runAgentLoopImpl(
3351
2655
  ctx.profiler.emitSummary(ctx.traceEmitter, reqId);
3352
2656
 
3353
2657
  ctx.abortController = null;
3354
- ctx.processing = false;
2658
+ ctx.setProcessing(false);
3355
2659
  ctx.onConfirmationOutcome = undefined;
3356
2660
  ctx.surfaceActionRequestIds.delete(ctx.currentRequestId ?? "");
3357
2661
  ctx.approvedViaPromptThisTurn = false;
@@ -3498,7 +2802,7 @@ export async function applyCompactionResult(
3498
2802
  result.summaryText,
3499
2803
  ctx.contextCompactedMessageCount,
3500
2804
  );
3501
- markHistoryStrippedBestEffort(ctx.conversationId, compactedAt, log);
2805
+ markHistoryStrippedBestEffort(ctx.conversationId);
3502
2806
  if (options.slackContextCompactionWatermarkTs) {
3503
2807
  updateConversationSlackContextWatermark(
3504
2808
  ctx.conversationId,