@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +15 -17
  3. package/Dockerfile +6 -4
  4. package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
  5. package/docs/architecture/integrations.md +32 -39
  6. package/docs/architecture/memory.md +25 -30
  7. package/docs/architecture/security.md +7 -6
  8. package/docs/browser-use-architecture-phase2.md +63 -20
  9. package/docs/plugins.md +761 -0
  10. package/examples/plugins/echo/README.md +132 -0
  11. package/examples/plugins/echo/package.json +17 -0
  12. package/examples/plugins/echo/register.ts +187 -0
  13. package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
  14. package/openapi.yaml +212 -68
  15. package/package.json +1 -1
  16. package/src/__tests__/app-compiler.test.ts +57 -0
  17. package/src/__tests__/approval-cascade.test.ts +7 -2
  18. package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
  19. package/src/__tests__/avatar-generator.test.ts +4 -2
  20. package/src/__tests__/bundled-asset.test.ts +6 -6
  21. package/src/__tests__/catalog-cache.test.ts +69 -0
  22. package/src/__tests__/checker.test.ts +459 -171
  23. package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
  24. package/src/__tests__/compaction-events.test.ts +501 -0
  25. package/src/__tests__/compaction-pipeline.test.ts +210 -0
  26. package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
  27. package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
  28. package/src/__tests__/config-model-image-provider.test.ts +110 -0
  29. package/src/__tests__/config-schema.test.ts +22 -9
  30. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
  31. package/src/__tests__/contacts-tools.test.ts +26 -0
  32. package/src/__tests__/context-overflow-policy.test.ts +7 -7
  33. package/src/__tests__/context-window-manager.test.ts +355 -4
  34. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  35. package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
  36. package/src/__tests__/conversation-agent-loop.test.ts +30 -141
  37. package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
  38. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  39. package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
  40. package/src/__tests__/conversation-pairing.test.ts +174 -10
  41. package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
  42. package/src/__tests__/conversation-process-callsite.test.ts +3 -0
  43. package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
  44. package/src/__tests__/conversation-queue.test.ts +29 -14
  45. package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
  46. package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
  47. package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
  48. package/src/__tests__/conversation-seed-composer.test.ts +2 -2
  49. package/src/__tests__/conversation-slash-queue.test.ts +7 -2
  50. package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
  51. package/src/__tests__/conversation-speed-override.test.ts +6 -1
  52. package/src/__tests__/conversation-title-service.test.ts +116 -0
  53. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
  54. package/src/__tests__/conversation-usage.test.ts +1 -1
  55. package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
  56. package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
  57. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
  58. package/src/__tests__/credential-health-service.test.ts +78 -9
  59. package/src/__tests__/credential-security-invariants.test.ts +2 -2
  60. package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
  61. package/src/__tests__/empty-response-pipeline.test.ts +305 -0
  62. package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
  63. package/src/__tests__/first-greeting.test.ts +247 -5
  64. package/src/__tests__/headless-browser-mode.test.ts +57 -0
  65. package/src/__tests__/history-repair-pipeline.test.ts +399 -0
  66. package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
  67. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
  68. package/src/__tests__/host-proxy-interface.test.ts +36 -2
  69. package/src/__tests__/image-credentials.test.ts +137 -0
  70. package/src/__tests__/image-service-dispatcher.test.ts +186 -0
  71. package/src/__tests__/injector-chain.test.ts +526 -0
  72. package/src/__tests__/intent-routing.test.ts +0 -26
  73. package/src/__tests__/llm-call-pipeline.test.ts +285 -0
  74. package/src/__tests__/llm-schema.test.ts +1 -1
  75. package/src/__tests__/media-generate-image.test.ts +119 -13
  76. package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
  77. package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
  78. package/src/__tests__/migration-import-from-url.test.ts +5 -68
  79. package/src/__tests__/model-intents.test.ts +4 -2
  80. package/src/__tests__/notification-broadcaster.test.ts +3 -3
  81. package/src/__tests__/notification-decision-strategy.test.ts +0 -11
  82. package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
  83. package/src/__tests__/oauth-apps-routes.test.ts +1 -1
  84. package/src/__tests__/oauth-cli.test.ts +14 -12
  85. package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
  86. package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
  87. package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
  88. package/src/__tests__/oauth-providers-routes.test.ts +3 -2
  89. package/src/__tests__/oauth-store.test.ts +41 -76
  90. package/src/__tests__/onboarding-template-contract.test.ts +16 -64
  91. package/src/__tests__/openai-image-service.test.ts +368 -0
  92. package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
  93. package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
  94. package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
  95. package/src/__tests__/persistence-pipeline.test.ts +377 -0
  96. package/src/__tests__/pipeline-runner.test.ts +565 -0
  97. package/src/__tests__/platform.test.ts +5 -2
  98. package/src/__tests__/plugin-bootstrap.test.ts +483 -0
  99. package/src/__tests__/plugin-registry.test.ts +273 -0
  100. package/src/__tests__/plugin-route-contribution.test.ts +288 -0
  101. package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
  102. package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
  103. package/src/__tests__/plugin-types.test.ts +320 -0
  104. package/src/__tests__/pricing.test.ts +44 -12
  105. package/src/__tests__/proxy-approval-callback.test.ts +69 -8
  106. package/src/__tests__/reaction-persistence.test.ts +1 -0
  107. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
  108. package/src/__tests__/registry.test.ts +0 -2
  109. package/src/__tests__/schedule-routes.test.ts +131 -1
  110. package/src/__tests__/scheduler-recurrence.test.ts +14 -70
  111. package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
  112. package/src/__tests__/secret-detection-handler.test.ts +0 -10
  113. package/src/__tests__/shell-identity.test.ts +0 -134
  114. package/src/__tests__/suggestion-routes.test.ts +103 -4
  115. package/src/__tests__/task-memory-cleanup.test.ts +1 -0
  116. package/src/__tests__/task-scheduler.test.ts +3 -15
  117. package/src/__tests__/test-preload.ts +11 -0
  118. package/src/__tests__/title-generate-pipeline.test.ts +224 -0
  119. package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
  120. package/src/__tests__/tool-error-pipeline.test.ts +244 -0
  121. package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
  122. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
  123. package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
  124. package/src/__tests__/tool-executor.test.ts +141 -0
  125. package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
  126. package/src/__tests__/tool-result-truncation.test.ts +0 -110
  127. package/src/__tests__/user-plugin-loader.test.ts +191 -0
  128. package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
  129. package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
  130. package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
  131. package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
  132. package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
  133. package/src/__tests__/workspace-policy.test.ts +21 -3
  134. package/src/agent/loop.ts +340 -102
  135. package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
  136. package/src/approvals/guardian-request-resolvers.ts +80 -0
  137. package/src/backup/__tests__/backup-worker.test.ts +2 -13
  138. package/src/backup/backup-worker.ts +3 -15
  139. package/src/bundler/app-compiler.ts +84 -1
  140. package/src/calls/call-state.ts +2 -2
  141. package/src/channels/__tests__/types.test.ts +3 -3
  142. package/src/channels/types.ts +6 -4
  143. package/src/cli/__tests__/notifications.test.ts +87 -211
  144. package/src/cli/commands/__tests__/backup.test.ts +1 -1
  145. package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
  146. package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
  147. package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
  148. package/src/cli/commands/backup.ts +2 -2
  149. package/src/cli/commands/clients.ts +138 -0
  150. package/src/cli/commands/completions.ts +2 -9
  151. package/src/cli/commands/conversations.ts +55 -7
  152. package/src/cli/commands/image-generation.ts +33 -34
  153. package/src/cli/commands/notifications.ts +68 -103
  154. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
  155. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
  156. package/src/cli/commands/oauth/connect.ts +2 -2
  157. package/src/cli/commands/oauth/providers.ts +176 -8
  158. package/src/cli/commands/oauth/status.ts +46 -36
  159. package/src/cli/commands/skills.ts +3 -4
  160. package/src/cli/program.ts +25 -29
  161. package/src/config/__tests__/backup-schema.test.ts +7 -2
  162. package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
  163. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
  164. package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
  165. package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
  166. package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
  167. package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
  168. package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
  169. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
  170. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  171. package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
  172. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
  173. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
  174. package/src/config/bundled-skills/schedule/SKILL.md +8 -3
  175. package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
  176. package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
  177. package/src/config/bundled-tool-registry.ts +0 -15
  178. package/src/config/feature-flag-registry.json +17 -1
  179. package/src/config/schema.ts +19 -0
  180. package/src/config/schemas/backup.ts +1 -1
  181. package/src/config/schemas/conversations.ts +16 -0
  182. package/src/config/schemas/llm.ts +2 -3
  183. package/src/config/schemas/security.ts +6 -6
  184. package/src/config/schemas/tts.ts +11 -0
  185. package/src/config/skill-state.ts +6 -2
  186. package/src/config/skills.ts +94 -5
  187. package/src/context/__tests__/compact-prompt.test.ts +27 -9
  188. package/src/context/prompts/compact.md +26 -12
  189. package/src/context/tool-result-truncation.ts +3 -63
  190. package/src/context/window-manager.ts +190 -16
  191. package/src/credential-health/credential-health-service.ts +19 -6
  192. package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
  193. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
  194. package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
  195. package/src/daemon/config-watcher.ts +0 -2
  196. package/src/daemon/context-overflow-policy.ts +4 -13
  197. package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
  198. package/src/daemon/conversation-agent-loop.ts +984 -683
  199. package/src/daemon/conversation-history.ts +10 -19
  200. package/src/daemon/conversation-lifecycle.ts +37 -19
  201. package/src/daemon/conversation-notifiers.ts +2 -110
  202. package/src/daemon/conversation-process.ts +14 -7
  203. package/src/daemon/conversation-runtime-assembly.ts +532 -411
  204. package/src/daemon/conversation-tool-setup.ts +41 -4
  205. package/src/daemon/conversation.ts +80 -35
  206. package/src/daemon/external-plugins-bootstrap.ts +478 -0
  207. package/src/daemon/first-greeting.ts +191 -14
  208. package/src/daemon/handlers/config-model.ts +11 -0
  209. package/src/daemon/handlers/skills.ts +5 -1
  210. package/src/daemon/lifecycle.ts +33 -68
  211. package/src/daemon/message-types/computer-use.ts +2 -34
  212. package/src/daemon/message-types/conversations.ts +49 -0
  213. package/src/daemon/message-types/messages.ts +12 -0
  214. package/src/daemon/server.ts +5 -3
  215. package/src/daemon/shutdown-handlers.ts +2 -12
  216. package/src/daemon/tool-side-effects.ts +14 -56
  217. package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
  218. package/src/heartbeat/heartbeat-service.ts +24 -1
  219. package/src/home/__tests__/feed-population-integration.test.ts +312 -0
  220. package/src/home/emit-feed-event.ts +7 -0
  221. package/src/home/feed-types.ts +41 -2
  222. package/src/home/rewrite-command-preview.ts +66 -0
  223. package/src/ipc/__tests__/socket-path.test.ts +11 -50
  224. package/src/ipc/cli-client.ts +1 -1
  225. package/src/ipc/cli-server.ts +3 -3
  226. package/src/ipc/gateway-client.ts +4 -1
  227. package/src/ipc/routes/browser-context.ts +2 -0
  228. package/src/ipc/routes/browser.ts +1 -0
  229. package/src/ipc/routes/get-contact.ts +16 -0
  230. package/src/ipc/routes/index.ts +14 -0
  231. package/src/ipc/routes/list-clients.ts +31 -0
  232. package/src/ipc/routes/merge-contacts.ts +17 -0
  233. package/src/ipc/routes/notification.ts +133 -0
  234. package/src/ipc/routes/rename-conversation.ts +59 -0
  235. package/src/ipc/routes/search-contacts.ts +19 -0
  236. package/src/ipc/routes/upsert-contact.ts +25 -0
  237. package/src/ipc/socket-path.ts +14 -38
  238. package/src/media/app-icon-generator.ts +23 -46
  239. package/src/media/avatar-router.ts +26 -41
  240. package/src/media/gemini-image-service.ts +8 -41
  241. package/src/media/image-credentials.ts +73 -0
  242. package/src/media/image-service.ts +85 -0
  243. package/src/media/openai-image-service.ts +131 -0
  244. package/src/media/types.ts +46 -0
  245. package/src/memory/conversation-crud.ts +48 -18
  246. package/src/memory/conversation-queries.ts +57 -4
  247. package/src/memory/conversation-title-service.ts +25 -0
  248. package/src/memory/db-init.ts +8 -0
  249. package/src/memory/embedding-gemini.test.ts +41 -2
  250. package/src/memory/embedding-gemini.ts +6 -1
  251. package/src/memory/graph/bootstrap.test.ts +282 -0
  252. package/src/memory/graph/bootstrap.ts +8 -5
  253. package/src/memory/graph/extraction.ts +10 -2
  254. package/src/memory/graph/graph-search.test.ts +1 -0
  255. package/src/memory/graph/inspect.ts +2 -2
  256. package/src/memory/graph/retriever.ts +10 -3
  257. package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
  258. package/src/memory/migrations/149-oauth-tables.ts +1 -0
  259. package/src/memory/migrations/223-schedule-script-column.ts +11 -0
  260. package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
  261. package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
  262. package/src/memory/migrations/index.ts +4 -0
  263. package/src/memory/pkb/pkb-index.test.ts +1 -0
  264. package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
  265. package/src/memory/pkb/pkb-search.test.ts +65 -4
  266. package/src/memory/pkb/pkb-search.ts +40 -18
  267. package/src/memory/qdrant-client.test.ts +60 -0
  268. package/src/memory/qdrant-client.ts +25 -0
  269. package/src/memory/schema/infrastructure.ts +1 -0
  270. package/src/memory/schema/oauth.ts +4 -1
  271. package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
  272. package/src/messaging/providers/slack/render-transcript.ts +58 -0
  273. package/src/notifications/conversation-pairing.ts +78 -19
  274. package/src/notifications/copy-composer.ts +0 -5
  275. package/src/notifications/emit-signal.ts +1 -1
  276. package/src/notifications/signal.ts +1 -2
  277. package/src/oauth/AGENTS.md +1 -1
  278. package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
  279. package/src/oauth/connect-orchestrator.ts +8 -34
  280. package/src/oauth/connect-types.ts +6 -10
  281. package/src/oauth/manual-token-connection.ts +23 -0
  282. package/src/oauth/oauth-store.ts +30 -14
  283. package/src/oauth/provider-serializer.ts +6 -1
  284. package/src/oauth/seed-providers.ts +56 -108
  285. package/src/outbound-proxy/http-forwarder.ts +9 -0
  286. package/src/permissions/approval-policy.test.ts +293 -18
  287. package/src/permissions/approval-policy.ts +110 -58
  288. package/src/permissions/arg-parser.test.ts +161 -0
  289. package/src/permissions/arg-parser.ts +141 -0
  290. package/src/permissions/bash-risk-classifier.test.ts +414 -2
  291. package/src/permissions/bash-risk-classifier.ts +303 -60
  292. package/src/permissions/checker.ts +157 -29
  293. package/src/permissions/command-registry.test.ts +239 -0
  294. package/src/permissions/command-registry.ts +234 -54
  295. package/src/permissions/defaults.ts +5 -4
  296. package/src/permissions/gateway-threshold-reader.ts +196 -0
  297. package/src/permissions/prompter.ts +4 -0
  298. package/src/permissions/risk-types.ts +61 -4
  299. package/src/permissions/schedule-risk-classifier.test.ts +129 -0
  300. package/src/permissions/schedule-risk-classifier.ts +85 -0
  301. package/src/permissions/shell-identity.ts +2 -42
  302. package/src/permissions/types.ts +2 -0
  303. package/src/permissions/workspace-policy.ts +8 -3
  304. package/src/plugins/defaults/circuit-breaker.ts +146 -0
  305. package/src/plugins/defaults/compaction.ts +145 -0
  306. package/src/plugins/defaults/empty-response.ts +126 -0
  307. package/src/plugins/defaults/history-repair.ts +85 -0
  308. package/src/plugins/defaults/index.ts +116 -0
  309. package/src/plugins/defaults/injectors.ts +491 -0
  310. package/src/plugins/defaults/llm-call.ts +82 -0
  311. package/src/plugins/defaults/memory-retrieval.ts +226 -0
  312. package/src/plugins/defaults/overflow-reduce.ts +181 -0
  313. package/src/plugins/defaults/persistence.ts +129 -0
  314. package/src/plugins/defaults/title-generate.ts +95 -0
  315. package/src/plugins/defaults/token-estimate.ts +104 -0
  316. package/src/plugins/defaults/tool-error.ts +126 -0
  317. package/src/plugins/defaults/tool-execute.ts +89 -0
  318. package/src/plugins/defaults/tool-result-truncate.ts +88 -0
  319. package/src/plugins/pipeline.ts +316 -0
  320. package/src/plugins/plugin-skill-contributions.ts +292 -0
  321. package/src/plugins/registry.ts +241 -0
  322. package/src/plugins/types.ts +1134 -0
  323. package/src/plugins/user-loader.ts +177 -0
  324. package/src/prompts/templates/BOOTSTRAP.md +27 -77
  325. package/src/providers/model-catalog.ts +52 -29
  326. package/src/providers/model-intents.ts +1 -1
  327. package/src/providers/openrouter/client.ts +5 -1
  328. package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
  329. package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
  330. package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
  331. package/src/providers/speech-to-text/xai-realtime.ts +39 -14
  332. package/src/runtime/AGENTS.md +25 -16
  333. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
  334. package/src/runtime/__tests__/client-registry.test.ts +293 -0
  335. package/src/runtime/client-registry.ts +261 -0
  336. package/src/runtime/http-server.ts +77 -8
  337. package/src/runtime/http-types.ts +0 -2
  338. package/src/runtime/migrations/vbundle-builder.ts +1 -22
  339. package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
  340. package/src/runtime/routes/approval-routes.ts +17 -0
  341. package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
  342. package/src/runtime/routes/conversation-routes.ts +223 -116
  343. package/src/runtime/routes/inbound-message-handler.ts +88 -13
  344. package/src/runtime/routes/memory-item-routes.test.ts +1 -0
  345. package/src/runtime/routes/migration-routes.ts +0 -3
  346. package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
  347. package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
  348. package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
  349. package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
  350. package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
  351. package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
  352. package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
  353. package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
  354. package/src/runtime/routes/playground/deps.ts +56 -0
  355. package/src/runtime/routes/playground/force-compact.ts +73 -0
  356. package/src/runtime/routes/playground/guard.ts +37 -0
  357. package/src/runtime/routes/playground/index.ts +28 -0
  358. package/src/runtime/routes/playground/inject-failures.ts +159 -0
  359. package/src/runtime/routes/playground/reset-circuit.ts +115 -0
  360. package/src/runtime/routes/playground/seed-conversation.ts +139 -0
  361. package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
  362. package/src/runtime/routes/playground/state.ts +78 -0
  363. package/src/runtime/routes/schedule-routes.ts +89 -8
  364. package/src/runtime/skill-route-registry.ts +75 -15
  365. package/src/schedule/run-script.ts +68 -0
  366. package/src/schedule/schedule-store.ts +7 -1
  367. package/src/schedule/scheduler.ts +48 -8
  368. package/src/skills/catalog-cache.ts +12 -5
  369. package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
  370. package/src/tools/browser/browser-execution.ts +88 -19
  371. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
  372. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
  373. package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
  374. package/src/tools/browser/cdp-client/factory.ts +15 -4
  375. package/src/tools/executor.ts +126 -74
  376. package/src/tools/network/script-proxy/session-manager.ts +37 -1
  377. package/src/tools/permission-checker.ts +98 -49
  378. package/src/tools/policy-context.ts +4 -0
  379. package/src/tools/registry.ts +140 -3
  380. package/src/tools/schedule/create.ts +23 -8
  381. package/src/tools/schedule/update.ts +3 -1
  382. package/src/tools/secret-detection-handler.ts +0 -51
  383. package/src/tools/system/avatar-generator.ts +6 -2
  384. package/src/tools/types.ts +28 -2
  385. package/src/util/platform.ts +7 -2
  386. package/src/util/pricing.ts +26 -3
  387. package/src/workspace/migrations/006-services-config.ts +2 -4
  388. package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
  389. package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
  390. package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
  391. package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
  392. package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
  393. package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
  394. package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
  395. package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
  396. package/src/workspace/migrations/registry.ts +12 -0
  397. package/tsconfig.json +1 -1
  398. package/hook-templates/debug-prompt-logger/hook.json +0 -7
  399. package/hook-templates/debug-prompt-logger/run.sh +0 -66
  400. package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
  401. package/src/__tests__/context-overflow-approval.test.ts +0 -156
  402. package/src/__tests__/hooks-blocking.test.ts +0 -178
  403. package/src/__tests__/hooks-cli.test.ts +0 -182
  404. package/src/__tests__/hooks-config.test.ts +0 -108
  405. package/src/__tests__/hooks-discovery.test.ts +0 -211
  406. package/src/__tests__/hooks-integration.test.ts +0 -196
  407. package/src/__tests__/hooks-manager.test.ts +0 -226
  408. package/src/__tests__/hooks-runner.test.ts +0 -175
  409. package/src/__tests__/hooks-settings.test.ts +0 -160
  410. package/src/__tests__/hooks-templates.test.ts +0 -169
  411. package/src/__tests__/hooks-ts-runner.test.ts +0 -170
  412. package/src/__tests__/hooks-watch.test.ts +0 -112
  413. package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
  414. package/src/__tests__/oauth-scope-policy.test.ts +0 -180
  415. package/src/__tests__/send-notification-tool.test.ts +0 -83
  416. package/src/cli/commands/shotgun.ts +0 -266
  417. package/src/config/bundled-skills/conversations/SKILL.md +0 -20
  418. package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
  419. package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
  420. package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
  421. package/src/config/bundled-skills/notifications/SKILL.md +0 -40
  422. package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
  423. package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
  424. package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
  425. package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
  426. package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
  427. package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
  428. package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
  429. package/src/daemon/context-overflow-approval.ts +0 -52
  430. package/src/daemon/watch-handler.ts +0 -399
  431. package/src/hooks/cli.ts +0 -253
  432. package/src/hooks/config.ts +0 -100
  433. package/src/hooks/discovery.ts +0 -135
  434. package/src/hooks/manager.ts +0 -179
  435. package/src/hooks/runner.ts +0 -117
  436. package/src/hooks/templates.ts +0 -77
  437. package/src/hooks/types.ts +0 -75
  438. package/src/oauth/scope-policy.ts +0 -89
  439. package/src/runtime/gateway-internal-client.ts +0 -94
  440. package/src/runtime/routes/watch-routes.ts +0 -156
  441. package/src/signals/shotgun.ts +0 -203
  442. package/src/tools/watch/screen-watch.ts +0 -144
  443. package/src/tools/watch/watch-state.ts +0 -142
@@ -37,8 +37,8 @@ import {
37
37
  } from "../context/token-estimator.js";
38
38
  import type { ContextWindowManager } from "../context/window-manager.js";
39
39
  import type { ToolProfiler } from "../events/tool-profiling-listener.js";
40
+ import { emitFeedEvent } from "../home/emit-feed-event.js";
40
41
  import { writeRelationshipState } from "../home/relationship-state-writer.js";
41
- import { getHookManager } from "../hooks/manager.js";
42
42
  import {
43
43
  clearSentryConversationContext,
44
44
  setSentryConversationContext,
@@ -47,9 +47,7 @@ import { commitAppTurnChanges } from "../memory/app-git-service.js";
47
47
  import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
48
48
  import { enqueueAutoAnalysisOnCompaction } from "../memory/auto-analysis-enqueue.js";
49
49
  import {
50
- addMessage,
51
- clearPkbSystemReminderMetadataForConversation,
52
- deleteMessageById,
50
+ clearStrippedInjectionMetadataForConversation,
53
51
  getConversation,
54
52
  getConversationOriginChannel,
55
53
  getConversationOriginInterface,
@@ -57,22 +55,53 @@ import {
57
55
  getMessageById,
58
56
  provenanceFromTrustContext,
59
57
  updateConversationContextWindow,
60
- updateConversationTitle,
61
- updateMessageMetadata,
62
58
  } from "../memory/conversation-crud.js";
63
59
  import { getResolvedConversationDirPath } from "../memory/conversation-directories.js";
64
60
  import { syncMessageToDisk } from "../memory/conversation-disk-view.js";
65
61
  import {
66
62
  isReplaceableTitle,
67
- queueGenerateConversationTitle,
68
63
  queueRegenerateConversationTitle,
69
- UNTITLED_FALLBACK,
70
64
  } from "../memory/conversation-title-service.js";
71
65
  import type { ConversationGraphMemory } from "../memory/graph/conversation-graph-memory.js";
72
66
  import { recordMemoryRecallLog } from "../memory/memory-recall-log-store.js";
73
67
  import { PKB_WORKSPACE_SCOPE } from "../memory/pkb/types.js";
74
68
  import type { PermissionPrompter } from "../permissions/prompter.js";
75
- import type { ContentBlock, Message } from "../providers/types.js";
69
+ import { defaultCompactionTerminal } from "../plugins/defaults/compaction.js";
70
+ import { defaultHistoryRepairTerminal } from "../plugins/defaults/history-repair.js";
71
+ import {
72
+ asDefaultGraphPayload,
73
+ type DefaultMemoryRetrievalDeps,
74
+ type GraphMemoryPayload,
75
+ runDefaultMemoryRetrieval,
76
+ } from "../plugins/defaults/memory-retrieval.js";
77
+ import { defaultPersistenceTerminal } from "../plugins/defaults/persistence.js";
78
+ import { defaultTitleGenerateTerminal } from "../plugins/defaults/title-generate.js";
79
+ import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate.js";
80
+ import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
81
+ import { getMiddlewaresFor } from "../plugins/registry.js";
82
+ import type {
83
+ CircuitBreakerArgs,
84
+ CircuitBreakerResult,
85
+ CompactionArgs,
86
+ CompactionResult,
87
+ EstimateArgs,
88
+ EstimateResult,
89
+ HistoryRepairArgs,
90
+ HistoryRepairResult,
91
+ MemoryArgs,
92
+ MemoryResult,
93
+ OverflowReduceArgs,
94
+ OverflowReduceResult,
95
+ PersistArgs,
96
+ PersistResult,
97
+ TurnContext as PluginTurnContext,
98
+ } from "../plugins/types.js";
99
+ import { PluginExecutionError, PluginTimeoutError } from "../plugins/types.js";
100
+ import type {
101
+ ContentBlock,
102
+ Message,
103
+ ToolDefinition,
104
+ } from "../providers/types.js";
76
105
  import type { Provider } from "../providers/types.js";
77
106
  import { resolveActorTrust } from "../runtime/actor-trust-resolver.js";
78
107
  import { DAEMON_INTERNAL_ASSISTANT_ID } from "../runtime/assistant-scope.js";
@@ -88,7 +117,6 @@ import {
88
117
  type AssistantAttachmentDraft,
89
118
  cleanAssistantContent,
90
119
  } from "./assistant-attachments.js";
91
- import { requestCompressionApproval } from "./context-overflow-approval.js";
92
120
  import { resolveOverflowAction } from "./context-overflow-policy.js";
93
121
  import {
94
122
  createInitialReducerState,
@@ -129,8 +157,6 @@ import {
129
157
  inboundActorContextFromTrustContext,
130
158
  loadSlackActiveThreadFocusBlock,
131
159
  loadSlackChronologicalMessages,
132
- readNowScratchpad,
133
- readPkbContext,
134
160
  stripInjectionsForCompaction,
135
161
  } from "./conversation-runtime-assembly.js";
136
162
  import type { SkillProjectionCache } from "./conversation-skill-tools.js";
@@ -138,7 +164,7 @@ import { markSurfaceCompleted } from "./conversation-surfaces.js";
138
164
  import { resolveTrustClass } from "./conversation-tool-setup.js";
139
165
  import { recordUsage } from "./conversation-usage.js";
140
166
  import { formatTurnTimestamp } from "./date-context.js";
141
- import { deepRepairHistory, repairHistory } from "./history-repair.js";
167
+ import { deepRepairHistory } from "./history-repair.js";
142
168
  import type {
143
169
  DynamicPageSurfaceData,
144
170
  ServerMessage,
@@ -171,77 +197,210 @@ type GitServiceInitializer = {
171
197
  ensureInitialized(): Promise<void>;
172
198
  };
173
199
 
174
- // ── Compaction circuit-breaker constants ────────────────────────────
200
+ // ── Compaction circuit-breaker pipeline helpers ─────────────────────
201
+ //
202
+ // The circuit-breaker behavior (3 consecutive summary-LLM failures trips a
203
+ // 1-hour cooldown) is now implemented by the `circuitBreaker` plugin
204
+ // pipeline. The default plugin (`plugins/defaults/circuit-breaker.ts`)
205
+ // replicates the legacy threshold/cooldown constants and event-emission
206
+ // semantics exactly — it operates on the `consecutiveCompactionFailures` /
207
+ // `compactionCircuitOpenUntil` fields the conversation still owns so the
208
+ // dev-only playground routes (`POST /playground/reset-compaction-circuit`,
209
+ // `POST /playground/inject-compaction-failures`) continue to read and
210
+ // mutate those fields directly.
175
211
  //
176
- // The circuit opens after `COMPACTION_CIRCUIT_FAILURE_THRESHOLD` consecutive
177
- // summary-LLM failures and stays open for `COMPACTION_CIRCUIT_COOLDOWN_MS`
178
- // before auto-compaction is allowed to retry. User-initiated compaction
179
- // (`force: true`) bypasses the breaker regardless of its state.
180
- const COMPACTION_CIRCUIT_FAILURE_THRESHOLD = 3;
181
- const COMPACTION_CIRCUIT_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour
212
+ // The helpers below build the pipeline inputs and invoke the runner. They
213
+ // are the sole entry points the rest of the daemon uses to query or update
214
+ // the compaction circuit.
215
+
216
+ /** Circuit-breaker key for a specific conversation's compaction pipeline. */
217
+ function compactionCircuitKey(conversationId: string): string {
218
+ return `compaction:${conversationId}`;
219
+ }
182
220
 
183
221
  /**
184
- * Check whether the compaction circuit breaker is currently open for the
185
- * given context. The breaker auto-closes once `compactionCircuitOpenUntil`
186
- * has elapsed.
222
+ * Build the minimal {@link TurnContext} the pipeline runner requires. Called
223
+ * both from inside the agent loop (where turn identifiers are available) and
224
+ * from non-turn invocations like `Conversation.forceCompact` (which falls
225
+ * back to stable placeholders so the runner's log records still carry the
226
+ * conversation identifier).
187
227
  */
188
- export function isCompactionCircuitOpen(ctx: {
189
- compactionCircuitOpenUntil: number | null;
190
- }): boolean {
191
- return (
192
- ctx.compactionCircuitOpenUntil !== null &&
193
- Date.now() < ctx.compactionCircuitOpenUntil
194
- );
228
+ function buildCircuitTurnContext(ctx: {
229
+ readonly conversationId: string;
230
+ currentRequestId?: string;
231
+ currentTurnTrustContext?: TrustContext;
232
+ trustContext?: TrustContext;
233
+ turnCount: number;
234
+ }): PluginTurnContext {
235
+ const trust: TrustContext =
236
+ ctx.currentTurnTrustContext ?? ctx.trustContext ?? FALLBACK_TURN_TRUST;
237
+ return {
238
+ requestId: ctx.currentRequestId ?? "circuit-breaker",
239
+ conversationId: ctx.conversationId,
240
+ turnIndex: ctx.turnCount,
241
+ trust,
242
+ };
195
243
  }
196
244
 
197
245
  /**
198
- * Track the outcome of a `maybeCompact()` call against the circuit breaker.
246
+ * Run the `circuitBreaker` pipeline for the compaction circuit on this
247
+ * conversation. When `outcome` is provided, state is updated (and transition
248
+ * events emit via `onEvent`); when omitted the call is query-only.
199
249
  *
200
- * - When the summary LLM call failed (local fallback covered the result),
201
- * increment the consecutive-failure counter. If the counter reaches the
202
- * threshold, open the circuit for the cooldown window and emit
203
- * `compaction_circuit_open` so clients can surface a notice.
204
- * - When the call did not fail, reset the counter and clear any open circuit.
250
+ * Returns the post-call decision from the pipeline. Callers gate auto-paths
251
+ * on `!result.open` and admit forced paths regardless of the decision.
252
+ */
253
+ async function runCompactionCircuitPipeline(
254
+ ctx: {
255
+ readonly conversationId: string;
256
+ consecutiveCompactionFailures: number;
257
+ compactionCircuitOpenUntil: number | null;
258
+ currentRequestId?: string;
259
+ currentTurnTrustContext?: TrustContext;
260
+ trustContext?: TrustContext;
261
+ turnCount: number;
262
+ },
263
+ args: {
264
+ outcome?: "success" | "failure";
265
+ onEvent?: (msg: ServerMessage) => void;
266
+ },
267
+ ): Promise<CircuitBreakerResult> {
268
+ const turnContext = buildCircuitTurnContext(ctx);
269
+ return runPipeline<CircuitBreakerArgs, CircuitBreakerResult>(
270
+ "circuitBreaker",
271
+ getMiddlewaresFor("circuitBreaker"),
272
+ async (terminalArgs) => {
273
+ // No plugin in the chain produced a decision. This should be
274
+ // unreachable in production because the default plugin registers a
275
+ // `circuitBreaker` middleware that always returns a decision, but we
276
+ // defensively derive the state here so test setups that intentionally
277
+ // omit the default plugin still get a sensible response.
278
+ const openUntil = terminalArgs.state.compactionCircuitOpenUntil;
279
+ const now = Date.now();
280
+ if (openUntil !== null && now < openUntil) {
281
+ return { open: true, cooldownRemainingMs: openUntil - now };
282
+ }
283
+ return { open: false };
284
+ },
285
+ {
286
+ key: compactionCircuitKey(ctx.conversationId),
287
+ // Pass the ctx directly as the mutable state container. The
288
+ // `CircuitBreakerArgs.state` shape deliberately matches the subset of
289
+ // fields the conversation owns so plugins mutate the same object the
290
+ // playground routes read and write.
291
+ state: ctx,
292
+ ...(args.outcome !== undefined ? { outcome: args.outcome } : {}),
293
+ ...(args.onEvent ? { onEvent: args.onEvent } : {}),
294
+ },
295
+ turnContext,
296
+ DEFAULT_TIMEOUTS.circuitBreaker,
297
+ );
298
+ }
299
+
300
+ /**
301
+ * Query-only: is the compaction circuit breaker currently open for this
302
+ * conversation? Thin wrapper around {@link runCompactionCircuitPipeline}
303
+ * with no outcome. Async because the pipeline runner is async, but the
304
+ * default plugin resolves synchronously on its microtask.
305
+ */
306
+ export async function isCompactionCircuitOpen(ctx: {
307
+ readonly conversationId: string;
308
+ consecutiveCompactionFailures: number;
309
+ compactionCircuitOpenUntil: number | null;
310
+ currentRequestId?: string;
311
+ currentTurnTrustContext?: TrustContext;
312
+ trustContext?: TrustContext;
313
+ turnCount: number;
314
+ }): Promise<boolean> {
315
+ const decision = await runCompactionCircuitPipeline(ctx, {});
316
+ return decision.open;
317
+ }
318
+
319
+ /**
320
+ * Update the compaction circuit breaker with the outcome of a `maybeCompact`
321
+ * call and emit any transition event. A `summaryFailed` value of `undefined`
322
+ * means the summary LLM never ran (early return) — callers must guard with
323
+ * `summaryFailed !== undefined` before invoking this helper so early-return
324
+ * paths don't silently reset the 3-strike counter.
205
325
  *
206
- * This is called by every `maybeCompact()` site (including forced ones),
207
- * because a run of three failures is a provider-health signal regardless of
208
- * whether the caller bypassed the breaker.
326
+ * The default plugin handles threshold-based tripping and cooldown reset;
327
+ * see `plugins/defaults/circuit-breaker.ts` for the canonical semantics.
209
328
  */
210
- export function trackCompactionOutcome(
329
+ export async function trackCompactionOutcome(
211
330
  ctx: {
331
+ readonly conversationId: string;
212
332
  consecutiveCompactionFailures: number;
213
333
  compactionCircuitOpenUntil: number | null;
334
+ currentRequestId?: string;
335
+ currentTurnTrustContext?: TrustContext;
336
+ trustContext?: TrustContext;
337
+ turnCount: number;
214
338
  },
215
- summaryFailed: boolean | undefined,
339
+ summaryFailed: boolean,
216
340
  onEvent: (msg: ServerMessage) => void,
217
- ): void {
218
- if (summaryFailed) {
219
- ctx.consecutiveCompactionFailures += 1;
220
- // Treat a stale/expired open-until timestamp the same as null so a new
221
- // 3-strike window can re-open the circuit after the prior cooldown
222
- // elapses. Without this the second trip would no-op because
223
- // `compactionCircuitOpenUntil` remains set to a past timestamp even
224
- // though `isCompactionCircuitOpen()` correctly reports closed.
225
- const circuitDormant =
226
- ctx.compactionCircuitOpenUntil === null ||
227
- Date.now() >= ctx.compactionCircuitOpenUntil;
228
- if (
229
- ctx.consecutiveCompactionFailures >=
230
- COMPACTION_CIRCUIT_FAILURE_THRESHOLD &&
231
- circuitDormant
232
- ) {
233
- const openUntil = Date.now() + COMPACTION_CIRCUIT_COOLDOWN_MS;
234
- ctx.compactionCircuitOpenUntil = openUntil;
235
- onEvent({
236
- type: "compaction_circuit_open",
237
- reason: "3_consecutive_failures",
238
- openUntil,
239
- });
240
- }
241
- } else {
242
- ctx.consecutiveCompactionFailures = 0;
243
- ctx.compactionCircuitOpenUntil = null;
244
- }
341
+ ): Promise<void> {
342
+ await runCompactionCircuitPipeline(ctx, {
343
+ outcome: summaryFailed ? "failure" : "success",
344
+ onEvent,
345
+ });
346
+ }
347
+
348
+ // ── Plugin pipeline helpers ──────────────────────────────────────────
349
+ //
350
+ // Canonical {@link PluginTurnContext} builder threaded into every
351
+ // `runPipeline` call inside `runAgentLoopImpl`. The orchestrator composes
352
+ // the context on demand at each call site from ambient state rather than
353
+ // carrying a persistent `TurnContext` instance across the turn.
354
+
355
+ /**
356
+ * Synthetic fallback trust context used when the orchestrator fires a pipeline
357
+ * before the per-turn trust snapshot has been captured (e.g. invocations that
358
+ * bypass `processMessage` / `drainQueue`). We bias to `unknown` rather than
359
+ * `guardian` so a missing snapshot cannot accidentally grant elevated trust
360
+ * to a custom plugin reading `ctx.trust`.
361
+ */
362
+ export const FALLBACK_TURN_TRUST: TrustContext = {
363
+ sourceChannel: "vellum",
364
+ trustClass: "unknown",
365
+ };
366
+
367
+ /**
368
+ * Build the {@link TurnContext} passed to {@link runPipeline}.
369
+ *
370
+ * Canonical source of truth for every pipeline call site inside the agent
371
+ * loop. Every `runPipeline` invocation in `runAgentLoopImpl` (and in the
372
+ * handlers that share its ambient state) must route through this helper
373
+ * rather than constructing a `TurnContext` literal inline — this keeps
374
+ * `turnIndex`, trust resolution, and the `contextWindowManager` attachment
375
+ * consistent across pipeline slots, which in turn keeps structured logs
376
+ * filtered by `conversationId`/`turnIndex` coherent across slots.
377
+ *
378
+ * Behavior:
379
+ * - `turnIndex` is always `ctx.turnCount` — the orchestrator-owned
380
+ * 0-based turn counter. Reading from a single source avoids the
381
+ * earlier inconsistency (`ctx.turnCount`, `ctx.messages.length - 1`,
382
+ * `ctx.messages.length`, and `0` were all used for the same turn).
383
+ * - Trust pulls from the per-turn snapshot first, then the conversation-
384
+ * level context, then {@link FALLBACK_TURN_TRUST}. The cascade matches
385
+ * the one inside the orchestrator's inline injection assembly so
386
+ * middleware reads the same trust class the runtime sees.
387
+ * - `contextWindowManager` is attached unconditionally. Pipelines that
388
+ * don't need it can ignore it; the default compaction plugin reads it
389
+ * via the typed optional field on `TurnContext`.
390
+ */
391
+ export function buildPluginTurnContext(
392
+ ctx: AgentLoopConversationContext,
393
+ requestId: string,
394
+ ): PluginTurnContext {
395
+ const trust =
396
+ ctx.currentTurnTrustContext ?? ctx.trustContext ?? FALLBACK_TURN_TRUST;
397
+ return {
398
+ requestId,
399
+ conversationId: ctx.conversationId,
400
+ turnIndex: ctx.turnCount,
401
+ trust,
402
+ contextWindowManager: ctx.contextWindowManager,
403
+ };
245
404
  }
246
405
 
247
406
  // ── Context Interface ────────────────────────────────────────────────
@@ -404,7 +563,6 @@ export async function runAgentLoopImpl(
404
563
  userMessageId: string,
405
564
  onEvent: (msg: ServerMessage) => void,
406
565
  options?: {
407
- skipPreMessageRollback?: boolean;
408
566
  isInteractive?: boolean;
409
567
  isUserMessage?: boolean;
410
568
  titleText?: string;
@@ -528,40 +686,10 @@ export async function runAgentLoopImpl(
528
686
  }
529
687
  }
530
688
 
531
- const preMessageResult = await getHookManager().trigger("pre-message", {
532
- conversationId: ctx.conversationId,
533
- messagePreview: truncate(content, 200, ""),
534
- });
535
-
536
- if (preMessageResult.blocked) {
537
- if (!options?.skipPreMessageRollback) {
538
- ctx.messages.pop();
539
- deleteMessageById(userMessageId);
540
- }
541
- // Replace loading placeholder so the conversation isn't stuck as "Generating title..."
542
- const currentConv = getConversation(ctx.conversationId);
543
- if (
544
- isReplaceableTitle(currentConv?.title ?? null) &&
545
- currentConv?.title !== UNTITLED_FALLBACK
546
- ) {
547
- updateConversationTitle(ctx.conversationId, UNTITLED_FALLBACK);
548
- onEvent({
549
- type: "conversation_title_updated",
550
- conversationId: ctx.conversationId,
551
- title: UNTITLED_FALLBACK,
552
- });
553
- }
554
- onEvent({
555
- type: "error",
556
- message: `Message blocked by hook "${preMessageResult.blockedBy}"`,
557
- });
558
- return;
559
- }
560
-
561
689
  // Generate title early — the user message alone is sufficient context.
562
- // Firing after hook gating but before the main LLM call removes the
563
- // delay of waiting for the full assistant response. The second-pass
564
- // regeneration at turn 3 will refine the title with more context.
690
+ // Firing before the main LLM call removes the delay of waiting for the
691
+ // full assistant response. The second-pass regeneration at turn 3 will
692
+ // refine the title with more context.
565
693
  // No abort signal — title generation should complete even if the user
566
694
  // cancels the response, since the user message is already persisted.
567
695
  // Deferred via setTimeout so the main agent loop LLM call enqueues
@@ -569,18 +697,38 @@ export async function runAgentLoopImpl(
569
697
  if (
570
698
  isReplaceableTitle(getConversation(ctx.conversationId)?.title ?? null)
571
699
  ) {
700
+ // TurnContext routed through the canonical builder so the pipeline's
701
+ // log record reports the same `conversationId`/`turnIndex` shape as
702
+ // every other slot in this turn. Title generation does not depend on
703
+ // the context-window manager attached by the builder, but sharing the
704
+ // builder keeps the invariant enforced in one place.
705
+ const titlePipelineCtx = buildPluginTurnContext(ctx, reqId);
706
+ const titleArgs = {
707
+ conversationId: ctx.conversationId,
708
+ provider: ctx.provider,
709
+ userMessage: options?.titleText ?? content,
710
+ onTitleUpdated: (title: string) => {
711
+ onEvent({
712
+ type: "conversation_title_updated",
713
+ conversationId: ctx.conversationId,
714
+ title,
715
+ });
716
+ },
717
+ };
572
718
  setTimeout(() => {
573
- queueGenerateConversationTitle({
574
- conversationId: ctx.conversationId,
575
- provider: ctx.provider,
576
- userMessage: options?.titleText ?? content,
577
- onTitleUpdated: (title) => {
578
- onEvent({
579
- type: "conversation_title_updated",
580
- conversationId: ctx.conversationId,
581
- title,
582
- });
583
- },
719
+ runPipeline(
720
+ "titleGenerate",
721
+ getMiddlewaresFor("titleGenerate"),
722
+ defaultTitleGenerateTerminal,
723
+ titleArgs,
724
+ titlePipelineCtx,
725
+ DEFAULT_TIMEOUTS.titleGenerate,
726
+ ).catch((err) => {
727
+ // Fire-and-forget — keep previous non-propagating semantics.
728
+ // queueGenerateConversationTitle already swallows internal
729
+ // errors; this catch covers pipeline-layer errors (timeouts,
730
+ // middleware throws) without surfacing them to the agent loop.
731
+ rlog.warn({ err }, "titleGenerate pipeline failed (non-fatal)");
584
732
  });
585
733
  }, 0);
586
734
  }
@@ -592,7 +740,7 @@ export async function runAgentLoopImpl(
592
740
  const compactCheck = ctx.contextWindowManager.shouldCompact(ctx.messages);
593
741
  // Skip auto-compaction while the circuit breaker is open. Force paths
594
742
  // and user-initiated /compact bypass this check.
595
- const autoCompactAllowed = !isCompactionCircuitOpen(ctx);
743
+ const autoCompactAllowed = !(await isCompactionCircuitOpen(ctx));
596
744
  if (compactCheck.needed && autoCompactAllowed) {
597
745
  ctx.emitActivityState(
598
746
  "thinking",
@@ -601,69 +749,59 @@ export async function runAgentLoopImpl(
601
749
  reqId,
602
750
  );
603
751
  }
604
- const compacted = autoCompactAllowed
605
- ? await ctx.contextWindowManager.maybeCompact(
606
- ctx.messages,
607
- abortController.signal,
752
+ let compacted: Awaited<
753
+ ReturnType<typeof ctx.contextWindowManager.maybeCompact>
754
+ > | null = null;
755
+ if (autoCompactAllowed) {
756
+ try {
757
+ compacted = (await runPipeline<CompactionArgs, CompactionResult>(
758
+ "compaction",
759
+ getMiddlewaresFor("compaction"),
760
+ (args) =>
761
+ defaultCompactionTerminal(args, buildPluginTurnContext(ctx, reqId)),
608
762
  {
609
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
610
- precomputedEstimate: compactCheck.estimatedTokens,
611
- conversationOriginChannel:
612
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
763
+ messages: ctx.messages,
764
+ signal: abortController.signal,
765
+ options: {
766
+ lastCompactedAt: ctx.contextCompactedAt ?? undefined,
767
+ precomputedEstimate: compactCheck.estimatedTokens,
768
+ conversationOriginChannel:
769
+ getConversationOriginChannel(ctx.conversationId) ?? undefined,
770
+ },
613
771
  },
614
- )
615
- : null;
772
+ buildPluginTurnContext(ctx, reqId),
773
+ DEFAULT_TIMEOUTS.compaction,
774
+ )) as Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>;
775
+ } catch (err) {
776
+ if (err instanceof PluginTimeoutError) {
777
+ // Pipeline exceeded its budget. Record the failure so the circuit
778
+ // breaker tracks consecutive timeouts (it trips after three),
779
+ // then degrade gracefully by skipping compaction this turn —
780
+ // the turn proceeds with the un-compacted history rather than
781
+ // hard-failing. The inner summary call has been aborted by the
782
+ // runner's signal-linking, so updateSummary's local fallback
783
+ // also ran before this catch block is reached.
784
+ rlog.warn(
785
+ { err, phase: "start-of-turn-compaction" },
786
+ "Compaction pipeline timed out — skipping compaction this turn",
787
+ );
788
+ await trackCompactionOutcome(ctx, true, onEvent);
789
+ compacted = null;
790
+ } else {
791
+ throw err;
792
+ }
793
+ }
794
+ }
616
795
  // Only track circuit-breaker state when a summary LLM call actually ran.
617
796
  // `summaryFailed` is `undefined` on early returns (compaction disabled,
618
797
  // below threshold, cooldown active, no eligible messages, truncation-only
619
798
  // path) — treating those as "successful" compactions would silently reset
620
799
  // the 3-strike counter and break the invariant.
621
800
  if (compacted && compacted.summaryFailed !== undefined) {
622
- trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent);
801
+ await trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent);
623
802
  }
624
803
  if (compacted?.compacted) {
625
- ctx.messages = compacted.messages;
626
- ctx.contextCompactedMessageCount += compacted.compactedPersistedMessages;
627
- ctx.contextCompactedAt = Date.now();
628
- // Notify memory graph that compaction happened — triggers full context
629
- // reload on the next turn to replenish lost memory context.
630
- ctx.graphMemory.onCompacted(compacted.compactedPersistedMessages);
631
- updateConversationContextWindow(
632
- ctx.conversationId,
633
- compacted.summaryText,
634
- ctx.contextCompactedMessageCount,
635
- );
636
- // Fire auto-analysis on compaction so the reflective agent can
637
- // crystallize anything worth remembering before the context window
638
- // narrows further.
639
- enqueueAutoAnalysisOnCompaction(
640
- ctx.conversationId,
641
- ctx.trustContext?.trustClass,
642
- );
643
- onEvent({
644
- type: "context_compacted",
645
- previousEstimatedInputTokens: compacted.previousEstimatedInputTokens,
646
- estimatedInputTokens: compacted.estimatedInputTokens,
647
- maxInputTokens: compacted.maxInputTokens,
648
- thresholdTokens: compacted.thresholdTokens,
649
- compactedMessages: compacted.compactedMessages,
650
- summaryCalls: compacted.summaryCalls,
651
- summaryInputTokens: compacted.summaryInputTokens,
652
- summaryOutputTokens: compacted.summaryOutputTokens,
653
- summaryModel: compacted.summaryModel,
654
- });
655
- emitUsage(
656
- ctx,
657
- compacted.summaryInputTokens,
658
- compacted.summaryOutputTokens,
659
- compacted.summaryModel,
660
- onEvent,
661
- "context_compactor",
662
- reqId,
663
- compacted.summaryCacheCreationInputTokens ?? 0,
664
- compacted.summaryCacheReadInputTokens ?? 0,
665
- collapseRawResponses(compacted.summaryRawResponses),
666
- );
804
+ applyCompactionResult(ctx, compacted, onEvent, reqId);
667
805
  shouldInjectWorkspace = true;
668
806
  if (compacted.compactedPersistedMessages > 0) {
669
807
  compactedThisTurn = true;
@@ -711,21 +849,60 @@ export async function runAgentLoopImpl(
711
849
 
712
850
  let runMessages = ctx.messages;
713
851
 
714
- // Memory graph retrieval — dispatches to context-load / per-turn based on
715
- // conversation state. Keep the query vector around so the PKB reminder
716
- // can reuse it for relevance-hint search (see `applyRuntimeInjections`).
852
+ // Memory retrieval pipeline fetches PKB, NOW.md, and memory-graph
853
+ // outputs through a single `memoryRetrieval` pipeline. Plugins may
854
+ // replace the terminal behavior by registering a middleware that
855
+ // short-circuits with its own `MemoryResult`; the default terminal
856
+ // below runs `runDefaultMemoryRetrieval` which reproduces the prior
857
+ // in-lined behavior (PKB/NOW reads + gated graph call).
858
+ const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
859
+ // Canonical builder — pulls trust from per-turn snapshot, then
860
+ // conversation-level, then the synthetic fallback. Memory retrieval
861
+ // does not need the context-window handle the builder attaches, but
862
+ // keeping every call site on one helper is load-bearing for log
863
+ // coherence across pipeline slots.
864
+ const memoryPluginTurnCtx = buildPluginTurnContext(ctx, reqId);
865
+ const memoryArgs: MemoryArgs = {
866
+ conversationId: ctx.conversationId,
867
+ trustContext: ctx.trustContext,
868
+ turnIndex: ctx.turnCount,
869
+ // Pass the abort signal via `args` (not `deps`) so the pipeline
870
+ // runner's `linkAbortSignal` can swap it for a signal linked to the
871
+ // pipeline's internal controller — on a plugin-set timeout or
872
+ // external cancel, the linked signal aborts and `prepareMemory`
873
+ // stops mutating graph state / emitting events after the pipeline
874
+ // has already errored.
875
+ signal: abortController.signal,
876
+ };
877
+ const memoryDeps: DefaultMemoryRetrievalDeps = {
878
+ messages: ctx.messages,
879
+ graphMemory: ctx.graphMemory,
880
+ config: getConfig(),
881
+ onEvent,
882
+ isTrustedActor,
883
+ };
884
+ const memoryResult: MemoryResult = await runPipeline(
885
+ "memoryRetrieval",
886
+ getMiddlewaresFor("memoryRetrieval"),
887
+ (args) => runDefaultMemoryRetrieval(args, memoryDeps),
888
+ memoryArgs,
889
+ memoryPluginTurnCtx,
890
+ DEFAULT_TIMEOUTS.memoryRetrieval,
891
+ );
892
+
893
+ // Consume the memory-graph block when the default retriever emitted
894
+ // one. Custom plugins that substitute their own blocks without the
895
+ // default discriminator are expected to handle their own side effects
896
+ // (event emission, metric persistence) inside their middleware; this
897
+ // block short-circuits to the original no-op behavior in that case.
898
+ const defaultGraphPayload: GraphMemoryPayload | null =
899
+ asDefaultGraphPayload(memoryResult.memoryGraphBlocks);
717
900
  let pkbQueryVector: number[] | undefined;
718
901
  let pkbSparseVector:
719
902
  | import("../memory/qdrant-client.js").QdrantSparseVector
720
903
  | undefined;
721
- const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
722
- if (isTrustedActor) {
723
- const graphResult = await ctx.graphMemory.prepareMemory(
724
- ctx.messages,
725
- getConfig(),
726
- abortController.signal,
727
- onEvent,
728
- );
904
+ if (defaultGraphPayload) {
905
+ const graphResult = defaultGraphPayload.result;
729
906
  runMessages = graphResult.runMessages;
730
907
  // Select dense+sparse as a matched pair so RRF fusion combines two
731
908
  // signals aligned to the same query text:
@@ -746,12 +923,24 @@ export async function runAgentLoopImpl(
746
923
 
747
924
  // Persist the injected block text in message metadata so it survives
748
925
  // conversation reloads (eviction, restart, fork). loadFromDb re-injects
749
- // from metadata.
926
+ // from metadata. Routed through the `persistence` pipeline so plugins
927
+ // can observe or override metadata updates alongside add/delete.
750
928
  if (graphResult.injectedBlockText) {
751
929
  try {
752
- updateMessageMetadata(userMessageId, {
753
- memoryInjectedBlock: graphResult.injectedBlockText,
754
- });
930
+ await runPipeline<PersistArgs, PersistResult>(
931
+ "persistence",
932
+ getMiddlewaresFor("persistence"),
933
+ defaultPersistenceTerminal,
934
+ {
935
+ op: "update",
936
+ messageId: userMessageId,
937
+ updates: {
938
+ memoryInjectedBlock: graphResult.injectedBlockText,
939
+ },
940
+ },
941
+ buildPluginTurnContext(ctx, reqId),
942
+ DEFAULT_TIMEOUTS.persistence,
943
+ );
755
944
  } catch (err) {
756
945
  rlog.warn(
757
946
  { err },
@@ -933,11 +1122,13 @@ export async function runAgentLoopImpl(
933
1122
  // Inject NOW.md and PKB content only on the first turn (or after
934
1123
  // compaction re-strips them). Old injections persist in history and
935
1124
  // are never stripped on normal turns — this preserves the cached prefix.
936
- const currentNowContent = readNowScratchpad();
1125
+ // PKB/NOW content is sourced from the `memoryRetrieval` pipeline above
1126
+ // so plugins can override either source without touching the agent loop.
1127
+ const currentNowContent = memoryResult.nowContent;
937
1128
  const shouldInjectNowAndPkb = isFirstMessage || compactedThisTurn;
938
1129
  const nowScratchpad = shouldInjectNowAndPkb ? currentNowContent : null;
939
1130
 
940
- const currentPkbContent = readPkbContext();
1131
+ const currentPkbContent = memoryResult.pkbContent;
941
1132
  const pkbContext = shouldInjectNowAndPkb ? currentPkbContent : null;
942
1133
  const pkbActive = currentPkbContent !== null;
943
1134
 
@@ -1030,12 +1221,19 @@ export async function runAgentLoopImpl(
1030
1221
 
1031
1222
  let currentInjectionMode: InjectionMode = "full";
1032
1223
 
1224
+ // Canonical per-turn TurnContext forwarded to the injector chain. The
1225
+ // per-turn injection inputs are built inside `applyRuntimeInjections`
1226
+ // from the `injectionOpts` bag; we only need to hand in identity +
1227
+ // trust here so third-party injectors see the real turn metadata.
1228
+ const injectionTurnCtx = buildPluginTurnContext(ctx, reqId);
1229
+
1033
1230
  const injection = await applyRuntimeInjections(runMessages, {
1034
1231
  ...injectionOpts,
1035
1232
  slackChronologicalMessages: reducerCompacted
1036
1233
  ? null
1037
1234
  : injectionOpts.slackChronologicalMessages,
1038
1235
  mode: currentInjectionMode,
1236
+ turnContext: injectionTurnCtx,
1039
1237
  });
1040
1238
  runMessages = injection.messages;
1041
1239
 
@@ -1043,11 +1241,14 @@ export async function runAgentLoopImpl(
1043
1241
  // reloads (eviction, restart, fork). loadFromDb re-injects from metadata.
1044
1242
  // Only the first call site persists — the overflow-recovery re-entry sites
1045
1243
  // send identical bytes and the tail row may not correspond to
1046
- // `userMessageId`. Both blocks are written in a single call to avoid
1244
+ // `userMessageId`. All blocks are written in a single call to avoid
1047
1245
  // doubling SQLite SELECT+UPDATE work on every turn.
1048
1246
  if (
1049
1247
  injection.blocks.unifiedTurnContext ||
1050
- injection.blocks.pkbSystemReminder
1248
+ injection.blocks.pkbSystemReminder ||
1249
+ injection.blocks.workspaceBlock ||
1250
+ injection.blocks.nowScratchpadBlock ||
1251
+ injection.blocks.pkbContextBlock
1051
1252
  ) {
1052
1253
  try {
1053
1254
  const metadataUpdates: Record<string, unknown> = {};
@@ -1059,7 +1260,28 @@ export async function runAgentLoopImpl(
1059
1260
  metadataUpdates.pkbSystemReminderBlock =
1060
1261
  injection.blocks.pkbSystemReminder;
1061
1262
  }
1062
- updateMessageMetadata(userMessageId, metadataUpdates);
1263
+ if (injection.blocks.workspaceBlock) {
1264
+ metadataUpdates.workspaceBlock = injection.blocks.workspaceBlock;
1265
+ }
1266
+ if (injection.blocks.nowScratchpadBlock) {
1267
+ metadataUpdates.nowScratchpadBlock =
1268
+ injection.blocks.nowScratchpadBlock;
1269
+ }
1270
+ if (injection.blocks.pkbContextBlock) {
1271
+ metadataUpdates.pkbContextBlock = injection.blocks.pkbContextBlock;
1272
+ }
1273
+ await runPipeline<PersistArgs, PersistResult>(
1274
+ "persistence",
1275
+ getMiddlewaresFor("persistence"),
1276
+ defaultPersistenceTerminal,
1277
+ {
1278
+ op: "update",
1279
+ messageId: userMessageId,
1280
+ updates: metadataUpdates,
1281
+ },
1282
+ buildPluginTurnContext(ctx, reqId),
1283
+ DEFAULT_TIMEOUTS.persistence,
1284
+ );
1063
1285
  } catch (err) {
1064
1286
  rlog.warn({ err }, "Failed to persist injection metadata (non-fatal)");
1065
1287
  }
@@ -1082,18 +1304,51 @@ export async function runAgentLoopImpl(
1082
1304
  let reducerState: ReducerState | undefined;
1083
1305
 
1084
1306
  const toolTokenBudget = ctx.agentLoop.getToolTokenBudget(runMessages);
1085
- // Canonical calibration key used at every `estimatePromptTokens` site in
1086
- // this function. Matches the key recorded by `handleUsage` for wrapper
1087
- // providers (OpenRouter routing to Anthropic key is `"anthropic"`).
1307
+ // Canonical calibration key passed to the `tokenEstimate` pipeline for
1308
+ // every preflight/mid-loop estimate, the overflow reducer config, and the
1309
+ // convergence-path `estimatePromptTokens` call. Matches the key recorded
1310
+ // by `handleUsage` for wrapper providers (OpenRouter routing to
1311
+ // Anthropic → key is `"anthropic"`).
1088
1312
  const estimationProviderName = getCalibrationProviderKey(ctx.provider);
1089
- const preflightTokens = estimatePromptTokens(
1090
- runMessages,
1091
- ctx.systemPrompt,
1092
- {
1093
- providerName: estimationProviderName,
1094
- toolTokenBudget,
1095
- },
1096
- );
1313
+
1314
+ // Shared `TurnContext` for every `tokenEstimate` pipeline invocation in
1315
+ // this turn. The pipeline is the extension point for plugins that want
1316
+ // to substitute an alternate estimator (e.g. provider-native tokenization)
1317
+ // without touching orchestrator code.
1318
+ //
1319
+ // Routed through the canonical builder — `turnIndex` is `ctx.turnCount`,
1320
+ // trust cascades through per-turn/conversation-level/fallback, and the
1321
+ // context-window handle rides along so any middleware that wants to
1322
+ // reuse the manager (e.g. to compute compaction-aware estimates) can.
1323
+ const pipelineTurnCtx = buildPluginTurnContext(ctx, reqId);
1324
+
1325
+ const runTokenEstimatePipeline = (
1326
+ history: Message[],
1327
+ ): Promise<EstimateResult> =>
1328
+ runPipeline<EstimateArgs, EstimateResult>(
1329
+ "tokenEstimate",
1330
+ getMiddlewaresFor("tokenEstimate"),
1331
+ defaultTokenEstimateTerminal,
1332
+ {
1333
+ // Shallow-frozen copies so a misbehaving middleware that mutates
1334
+ // `args.history` or `args.tools` in place (e.g. trims the array
1335
+ // before calling next) can't silently strip prompt context from
1336
+ // the orchestrator's live `runMessages` / resolved-tools arrays.
1337
+ // TypeScript `readonly` on `EstimateArgs` does not prevent
1338
+ // `push`/`splice` at runtime; the frozen wrapper throws in strict
1339
+ // mode and isolates any mutation attempts from the call-site state.
1340
+ history: Object.freeze([...history]) as Message[],
1341
+ systemPrompt: ctx.systemPrompt,
1342
+ tools: Object.freeze([
1343
+ ...ctx.agentLoop.getResolvedTools(history),
1344
+ ]) as ToolDefinition[],
1345
+ providerName: estimationProviderName,
1346
+ },
1347
+ pipelineTurnCtx,
1348
+ DEFAULT_TIMEOUTS.tokenEstimate,
1349
+ );
1350
+
1351
+ const preflightTokens = await runTokenEstimatePipeline(runMessages);
1097
1352
 
1098
1353
  if (overflowRecovery.enabled && preflightTokens > preflightBudget) {
1099
1354
  rlog.warn(
@@ -1105,157 +1360,198 @@ export async function runAgentLoopImpl(
1105
1360
  "Preflight budget exceeded — running overflow reducer before provider call",
1106
1361
  );
1107
1362
 
1108
- reducerState = createInitialReducerState();
1109
- let preflightAttempts = 0;
1110
-
1111
- while (
1112
- preflightAttempts < overflowRecovery.maxAttempts &&
1113
- !reducerState.exhausted
1114
- ) {
1115
- preflightAttempts++;
1116
- ctx.emitActivityState(
1117
- "thinking",
1118
- "context_compacting",
1119
- "assistant_turn",
1120
- reqId,
1121
- );
1122
- const step = await reduceContextOverflow(
1123
- ctx.messages,
1124
- {
1125
- providerName: estimationProviderName,
1126
- systemPrompt: ctx.systemPrompt,
1127
- contextWindow: config.llm.default.contextWindow,
1128
- targetTokens: preflightBudget,
1129
- toolTokenBudget,
1130
- },
1131
- reducerState,
1132
- (msgs, signal, opts) =>
1133
- ctx.contextWindowManager.maybeCompact(msgs, signal!, opts),
1134
- abortController.signal,
1135
- );
1136
-
1137
- reducerState = step.state;
1138
- ctx.messages = step.messages;
1139
- currentInjectionMode = step.state.injectionMode;
1140
-
1141
- // Track circuit-breaker state whenever the reducer invoked compaction.
1142
- // The reducer's forced_compaction tier uses force:true, so it bypasses
1143
- // the open-circuit check, but we still want failure tracking to detect
1144
- // a run of broken summaries and clear the counter on success. Only
1145
- // track when the summary LLM actually ran — `summaryFailed === undefined`
1146
- // indicates an early return (no eligible messages, truncation-only
1147
- // path, etc.) that shouldn't influence the breaker.
1148
- if (
1149
- step.compactionResult &&
1150
- step.compactionResult.summaryFailed !== undefined
1151
- ) {
1152
- trackCompactionOutcome(
1153
- ctx,
1154
- step.compactionResult.summaryFailed,
1155
- onEvent,
1156
- );
1157
- }
1158
-
1159
- if (step.compactionResult?.compacted) {
1160
- ctx.contextCompactedMessageCount +=
1161
- step.compactionResult.compactedPersistedMessages;
1162
- ctx.contextCompactedAt = Date.now();
1163
- updateConversationContextWindow(
1164
- ctx.conversationId,
1165
- step.compactionResult.summaryText,
1166
- ctx.contextCompactedMessageCount,
1167
- );
1168
- // Fire auto-analysis on compaction — see forceCompact() for rationale.
1169
- enqueueAutoAnalysisOnCompaction(
1170
- ctx.conversationId,
1171
- ctx.trustContext?.trustClass,
1172
- );
1173
- onEvent({
1174
- type: "context_compacted",
1175
- previousEstimatedInputTokens:
1176
- step.compactionResult.previousEstimatedInputTokens,
1177
- estimatedInputTokens: step.compactionResult.estimatedInputTokens,
1178
- maxInputTokens: step.compactionResult.maxInputTokens,
1179
- thresholdTokens: step.compactionResult.thresholdTokens,
1180
- compactedMessages: step.compactionResult.compactedMessages,
1181
- summaryCalls: step.compactionResult.summaryCalls,
1182
- summaryInputTokens: step.compactionResult.summaryInputTokens,
1183
- summaryOutputTokens: step.compactionResult.summaryOutputTokens,
1184
- summaryModel: step.compactionResult.summaryModel,
1185
- });
1186
- emitUsage(
1187
- ctx,
1188
- step.compactionResult.summaryInputTokens,
1189
- step.compactionResult.summaryOutputTokens,
1190
- step.compactionResult.summaryModel,
1191
- onEvent,
1192
- "context_compactor",
1363
+ // Overflow reduction runs through the plugin pipeline. The default
1364
+ // middleware (`default-overflow-reduce`, registered at bootstrap)
1365
+ // contains the historical tier loop — forced compaction → tool-result
1366
+ // truncation → media stubbing → injection downgrade — plus the
1367
+ // re-inject/re-estimate convergence check. The callbacks below are
1368
+ // the orchestrator-specific side effects that the plugin coordinates
1369
+ // per iteration (activity emission, compaction application, runtime
1370
+ // injection reassembly, token re-estimation). Registered plugins that
1371
+ // wrap the `overflowReduce` slot see each iteration through their own
1372
+ // middleware `next` callback.
1373
+ const overflowArgs: OverflowReduceArgs = {
1374
+ messages: ctx.messages,
1375
+ runMessages,
1376
+ systemPrompt: ctx.systemPrompt,
1377
+ providerName: estimationProviderName,
1378
+ contextWindow: config.llm.default.contextWindow,
1379
+ preflightBudget,
1380
+ toolTokenBudget,
1381
+ maxAttempts: overflowRecovery.maxAttempts,
1382
+ abortSignal: abortController.signal,
1383
+ compactFn: async (msgs, signal, opts) =>
1384
+ // Route the reducer's forced-compaction tier through the
1385
+ // `compaction` pipeline so registered plugins observe these
1386
+ // invocations. Without this, custom compaction middleware only
1387
+ // sees the three orchestrator-owned call sites and misses the
1388
+ // reducer-initiated forced compactions entirely.
1389
+ (await runPipeline<CompactionArgs, CompactionResult>(
1390
+ "compaction",
1391
+ getMiddlewaresFor("compaction"),
1392
+ (args) =>
1393
+ defaultCompactionTerminal(
1394
+ args,
1395
+ buildPluginTurnContext(ctx, reqId),
1396
+ ),
1397
+ {
1398
+ messages: msgs,
1399
+ signal,
1400
+ options: opts,
1401
+ },
1402
+ buildPluginTurnContext(ctx, reqId),
1403
+ DEFAULT_TIMEOUTS.compaction,
1404
+ )) as Awaited<
1405
+ ReturnType<typeof ctx.contextWindowManager.maybeCompact>
1406
+ >,
1407
+ emitActivityState: () => {
1408
+ ctx.emitActivityState(
1409
+ "thinking",
1410
+ "context_compacting",
1411
+ "assistant_turn",
1193
1412
  reqId,
1194
- step.compactionResult.summaryCacheCreationInputTokens ?? 0,
1195
- step.compactionResult.summaryCacheReadInputTokens ?? 0,
1196
- collapseRawResponses(step.compactionResult.summaryRawResponses),
1197
- );
1198
- ctx.graphMemory.onCompacted(
1199
- step.compactionResult.compactedPersistedMessages,
1200
1413
  );
1201
- shouldInjectWorkspace = true;
1202
- reducerCompacted = true;
1203
- }
1204
-
1205
- // Re-inject with potentially downgraded injection mode.
1206
- // When compaction ran it strips existing NOW.md / PKB blocks, so we
1207
- // must re-inject the current content. Otherwise rely on the deduplicated
1208
- // value from injectionOpts to avoid duplicate injection.
1209
- const injection = await applyRuntimeInjections(ctx.messages, {
1210
- ...injectionOpts,
1211
- ...(step.compactionResult?.compacted && {
1212
- pkbContext: currentPkbContent,
1213
- }),
1214
- ...(step.compactionResult?.compacted && {
1215
- nowScratchpad: currentNowContent,
1216
- }),
1217
- workspaceTopLevelContext: shouldInjectWorkspace
1218
- ? ctx.workspaceTopLevelContext
1219
- : null,
1220
- // Once the reducer has compacted `ctx.messages`, the captured
1221
- // `slackChronologicalMessages` snapshot (built from the full
1222
- // persisted transcript) would overwrite the compacted history
1223
- // and undo compaction. Suppress the override from here on.
1224
- slackChronologicalMessages: reducerCompacted
1225
- ? null
1226
- : injectionOpts.slackChronologicalMessages,
1227
- mode: currentInjectionMode,
1228
- });
1229
- runMessages = injection.messages;
1230
- if (isTrustedActor && currentInjectionMode !== "minimal") {
1231
- const memResult = ctx.graphMemory.reinjectCachedMemory(runMessages);
1232
- runMessages = memResult.runMessages;
1233
- }
1234
-
1235
- // Re-estimate with injections included — step.estimatedTokens was
1236
- // computed on bare history (ctx.messages) and doesn't account for
1237
- // tokens added by runtime injections.
1238
- const postInjectionTokens = estimatePromptTokens(
1239
- runMessages,
1240
- ctx.systemPrompt,
1241
- {
1414
+ },
1415
+ onCompactionResult: async (result) => {
1416
+ // Track circuit-breaker state whenever the reducer invoked
1417
+ // compaction. The reducer's forced_compaction tier uses
1418
+ // force:true, so it bypasses the open-circuit check, but we
1419
+ // still want failure tracking to detect a run of broken
1420
+ // summaries and clear the counter on success. Only track when
1421
+ // the summary LLM actually ran `summaryFailed === undefined`
1422
+ // indicates an early return (no eligible messages,
1423
+ // truncation-only path, etc.) that shouldn't influence the
1424
+ // breaker.
1425
+ if (result.summaryFailed !== undefined) {
1426
+ await trackCompactionOutcome(ctx, result.summaryFailed, onEvent);
1427
+ }
1428
+ if (result.compacted) {
1429
+ applyCompactionResult(ctx, result, onEvent, reqId);
1430
+ shouldInjectWorkspace = true;
1431
+ }
1432
+ },
1433
+ reinjectForMode: async (
1434
+ reducedMessages,
1435
+ mode,
1436
+ stepCompacted,
1437
+ accumulatedCompacted,
1438
+ ) => {
1439
+ // Mirror the pre-PR-23 behavior: `ctx.messages` must track the
1440
+ // reducer's latest output before re-injection runs, because other
1441
+ // sites consulted through `injectionOpts` (`workspaceTopLevelContext`,
1442
+ // slack history, etc.) depend on it and `applyCompactionResult`
1443
+ // only updates `ctx.messages` on a compaction tier. Assigning here
1444
+ // keeps non-compaction tiers (tool-result truncation, media
1445
+ // stubbing, injection downgrade) observable to downstream
1446
+ // injection assembly on the same turn.
1447
+ ctx.messages = reducedMessages;
1448
+
1449
+ // When THIS iteration compacted, it stripped existing NOW.md /
1450
+ // PKB blocks so we re-inject current content. A later iteration
1451
+ // that only truncates or downgrades must NOT re-force PKB/NOW,
1452
+ // or each round would grow the token count. Matches the
1453
+ // pre-PR-23 per-iteration `step.compactionResult?.compacted` gate.
1454
+ const injection = await applyRuntimeInjections(reducedMessages, {
1455
+ ...injectionOpts,
1456
+ ...(stepCompacted && { pkbContext: currentPkbContent }),
1457
+ ...(stepCompacted && { nowScratchpad: currentNowContent }),
1458
+ workspaceTopLevelContext: shouldInjectWorkspace
1459
+ ? ctx.workspaceTopLevelContext
1460
+ : null,
1461
+ // Once ANY iteration has compacted `ctx.messages`, the captured
1462
+ // `slackChronologicalMessages` snapshot (built from the full
1463
+ // persisted transcript) would overwrite the compacted history
1464
+ // and undo compaction. Suppress the override from here on —
1465
+ // sticky across subsequent non-compacting iterations.
1466
+ slackChronologicalMessages: accumulatedCompacted
1467
+ ? null
1468
+ : injectionOpts.slackChronologicalMessages,
1469
+ mode,
1470
+ turnContext: buildPluginTurnContext(ctx, reqId),
1471
+ });
1472
+ let next = injection.messages;
1473
+ if (isTrustedActor && mode !== "minimal") {
1474
+ const memResult = ctx.graphMemory.reinjectCachedMemory(next);
1475
+ next = memResult.runMessages;
1476
+ }
1477
+ return next;
1478
+ },
1479
+ estimatePostInjection: (runMsgs) =>
1480
+ estimatePromptTokens(runMsgs, ctx.systemPrompt, {
1242
1481
  providerName: estimationProviderName,
1243
1482
  toolTokenBudget,
1244
- },
1245
- );
1483
+ }),
1484
+ };
1485
+
1486
+ const overflowResult = await runPipeline<
1487
+ OverflowReduceArgs,
1488
+ OverflowReduceResult
1489
+ >(
1490
+ "overflowReduce",
1491
+ getMiddlewaresFor("overflowReduce"),
1492
+ // Terminal — only reached when every registered middleware calls
1493
+ // `next` and delegates past the innermost layer. The default plugin
1494
+ // is a terminal itself (it doesn't call `next`), so in practice
1495
+ // this fallback fires only when the default has been explicitly
1496
+ // deregistered (tests) and no user plugin replaces it. Strict-fail
1497
+ // semantics: throw so the missing terminal surfaces as a visible
1498
+ // error instead of silently returning the history untouched.
1499
+ async () => {
1500
+ throw new PluginExecutionError(
1501
+ "overflowReduce pipeline has no terminal handler — every reducer middleware called next() without providing a replacement",
1502
+ "overflowReduce",
1503
+ );
1504
+ },
1505
+ overflowArgs,
1506
+ buildPluginTurnContext(ctx, reqId),
1507
+ DEFAULT_TIMEOUTS.overflowReduce,
1508
+ );
1246
1509
 
1247
- if (postInjectionTokens <= preflightBudget) break;
1510
+ ctx.messages = overflowResult.messages;
1511
+ runMessages = overflowResult.runMessages;
1512
+ currentInjectionMode = overflowResult.injectionMode;
1513
+ reducerState = overflowResult.reducerState;
1514
+ if (overflowResult.reducerCompacted) {
1515
+ reducerCompacted = true;
1248
1516
  }
1249
1517
  }
1250
1518
 
1251
- // Pre-run repair
1519
+ // Pre-run repair — routed through the `historyRepair` plugin pipeline so
1520
+ // plugins can observe or override repair behavior. The default plugin's
1521
+ // middleware is a passthrough; the actual repair runs in the terminal
1522
+ // (`defaultHistoryRepairTerminal`).
1252
1523
  let preRepairMessages = runMessages;
1253
- const preRunRepair = repairHistory(runMessages);
1524
+ let preRunRepair: HistoryRepairResult | null = null;
1525
+ try {
1526
+ preRunRepair = await runPipeline<HistoryRepairArgs, HistoryRepairResult>(
1527
+ "historyRepair",
1528
+ getMiddlewaresFor("historyRepair"),
1529
+ async (args) => defaultHistoryRepairTerminal(args),
1530
+ { history: runMessages, provider: ctx.provider.name },
1531
+ buildPluginTurnContext(ctx, reqId),
1532
+ DEFAULT_TIMEOUTS.historyRepair,
1533
+ );
1534
+ } catch (err) {
1535
+ if (err instanceof PluginTimeoutError) {
1536
+ // Pipeline exceeded its budget — likely a misbehaving third-party
1537
+ // middleware. Degrade gracefully by proceeding with the un-repaired
1538
+ // history rather than turn-fatal-erroring; un-repaired history is
1539
+ // strictly better than no turn at all, and the provider call itself
1540
+ // will still error visibly if the drift is unrecoverable.
1541
+ rlog.warn(
1542
+ { err, phase: "pre_run" },
1543
+ "historyRepair pipeline timed out — proceeding with un-repaired history",
1544
+ );
1545
+ } else {
1546
+ throw err;
1547
+ }
1548
+ }
1254
1549
  if (
1255
- preRunRepair.stats.assistantToolResultsMigrated > 0 ||
1256
- preRunRepair.stats.missingToolResultsInserted > 0 ||
1257
- preRunRepair.stats.orphanToolResultsDowngraded > 0 ||
1258
- preRunRepair.stats.consecutiveSameRoleMerged > 0
1550
+ preRunRepair !== null &&
1551
+ (preRunRepair.stats.assistantToolResultsMigrated > 0 ||
1552
+ preRunRepair.stats.missingToolResultsInserted > 0 ||
1553
+ preRunRepair.stats.orphanToolResultsDowngraded > 0 ||
1554
+ preRunRepair.stats.consecutiveSameRoleMerged > 0)
1259
1555
  ) {
1260
1556
  rlog.warn(
1261
1557
  { phase: "pre_run", ...preRunRepair.stats },
@@ -1299,7 +1595,9 @@ export async function runAgentLoopImpl(
1299
1595
 
1300
1596
  let yieldedForBudget = false;
1301
1597
 
1302
- const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
1598
+ const onCheckpoint = async (
1599
+ checkpoint: CheckpointInfo,
1600
+ ): Promise<CheckpointDecision> => {
1303
1601
  state.currentTurnToolNames = [];
1304
1602
 
1305
1603
  if (ctx.canHandoffAtCheckpoint()) {
@@ -1312,14 +1610,7 @@ export async function runAgentLoopImpl(
1312
1610
  // conversation-agent-loop run compaction before the provider rejects.
1313
1611
  if (overflowRecovery.enabled) {
1314
1612
  const midLoopThreshold = preflightBudget * 0.85;
1315
- const estimated = estimatePromptTokens(
1316
- checkpoint.history,
1317
- ctx.systemPrompt,
1318
- {
1319
- providerName: estimationProviderName,
1320
- toolTokenBudget,
1321
- },
1322
- );
1613
+ const estimated = await runTokenEstimatePipeline(checkpoint.history);
1323
1614
  if (estimated > midLoopThreshold) {
1324
1615
  rlog.warn(
1325
1616
  { phase: "mid-loop", estimated, threshold: midLoopThreshold },
@@ -1335,10 +1626,16 @@ export async function runAgentLoopImpl(
1335
1626
 
1336
1627
  turnStarted = true;
1337
1628
 
1338
- let denyCompressionMessage: Message | null = null;
1339
-
1340
1629
  rlog.info({ callSite: turnCallSite }, "Starting agent loop run");
1341
1630
 
1631
+ // Thread the orchestrator's canonical per-turn context into the agent
1632
+ // loop so its internal pipeline invocations (llmCall, emptyResponse,
1633
+ // toolError, toolResultTruncate, toolExecute) see the real
1634
+ // conversation identity / trust / contextWindowManager instead of the
1635
+ // synthesized `"agent-loop"` placeholder. The loop clones this value
1636
+ // and overwrites `turnIndex` with its own tool-use iteration counter.
1637
+ const loopTurnCtx = buildPluginTurnContext(ctx, reqId);
1638
+
1342
1639
  let updatedHistory = await ctx.agentLoop.run(
1343
1640
  runMessages,
1344
1641
  eventHandler,
@@ -1346,6 +1643,7 @@ export async function runAgentLoopImpl(
1346
1643
  reqId,
1347
1644
  onCheckpoint,
1348
1645
  turnCallSite,
1646
+ loopTurnCtx,
1349
1647
  );
1350
1648
 
1351
1649
  rlog.info(
@@ -1379,11 +1677,11 @@ export async function runAgentLoopImpl(
1379
1677
  const rawHistory = stripInjectionsForCompaction(updatedHistory);
1380
1678
  ctx.messages = rawHistory;
1381
1679
  try {
1382
- clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
1680
+ clearStrippedInjectionMetadataForConversation(ctx.conversationId);
1383
1681
  } catch (err) {
1384
1682
  rlog.warn(
1385
1683
  { err },
1386
- "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
1684
+ "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
1387
1685
  );
1388
1686
  }
1389
1687
 
@@ -1394,65 +1692,61 @@ export async function runAgentLoopImpl(
1394
1692
  reqId,
1395
1693
  "Compacting context",
1396
1694
  );
1397
- const midLoopCompact = await ctx.contextWindowManager.maybeCompact(
1398
- ctx.messages,
1399
- abortController.signal,
1400
- {
1401
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1402
- force: true,
1403
- targetInputTokensOverride: preflightBudget,
1404
- conversationOriginChannel:
1405
- getConversationOriginChannel(ctx.conversationId) ?? undefined,
1406
- },
1407
- );
1695
+ let midLoopCompact: Awaited<
1696
+ ReturnType<typeof ctx.contextWindowManager.maybeCompact>
1697
+ >;
1698
+ try {
1699
+ midLoopCompact = (await runPipeline<CompactionArgs, CompactionResult>(
1700
+ "compaction",
1701
+ getMiddlewaresFor("compaction"),
1702
+ (args) =>
1703
+ defaultCompactionTerminal(args, buildPluginTurnContext(ctx, reqId)),
1704
+ {
1705
+ messages: ctx.messages,
1706
+ signal: abortController.signal,
1707
+ options: {
1708
+ lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1709
+ force: true,
1710
+ targetInputTokensOverride: preflightBudget,
1711
+ conversationOriginChannel:
1712
+ getConversationOriginChannel(ctx.conversationId) ?? undefined,
1713
+ },
1714
+ },
1715
+ buildPluginTurnContext(ctx, reqId),
1716
+ DEFAULT_TIMEOUTS.compaction,
1717
+ )) as Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>;
1718
+ } catch (err) {
1719
+ if (err instanceof PluginTimeoutError) {
1720
+ // Mid-loop compaction timed out. Record the failure for the
1721
+ // circuit breaker and escalate to the convergence loop's more
1722
+ // aggressive reducer tiers (tool-result truncation, media
1723
+ // stubbing, injection downgrade) by flipping the overflow flag
1724
+ // and breaking out of the mid-loop retry. The existing
1725
+ // "exhausted all attempts" block further down handles the
1726
+ // escalation.
1727
+ rlog.warn(
1728
+ { err, phase: "mid-loop-compact" },
1729
+ "Compaction pipeline timed out — escalating to convergence loop",
1730
+ );
1731
+ await trackCompactionOutcome(ctx, true, onEvent);
1732
+ state.contextTooLargeDetected = true;
1733
+ break;
1734
+ }
1735
+ throw err;
1736
+ }
1408
1737
  // `force: true` bypasses the cooldown/threshold gates but early returns
1409
1738
  // for "no eligible messages" / "insufficient messages" still leave
1410
1739
  // `summaryFailed` undefined. Only track when the summary LLM actually ran.
1411
1740
  if (midLoopCompact.summaryFailed !== undefined) {
1412
- trackCompactionOutcome(ctx, midLoopCompact.summaryFailed, onEvent);
1413
- }
1414
- if (midLoopCompact.compacted) {
1415
- ctx.messages = midLoopCompact.messages;
1416
- reducerCompacted = true;
1417
- ctx.contextCompactedMessageCount +=
1418
- midLoopCompact.compactedPersistedMessages;
1419
- ctx.contextCompactedAt = Date.now();
1420
- updateConversationContextWindow(
1421
- ctx.conversationId,
1422
- midLoopCompact.summaryText,
1423
- ctx.contextCompactedMessageCount,
1424
- );
1425
- // Fire auto-analysis on compaction — see forceCompact() for rationale.
1426
- enqueueAutoAnalysisOnCompaction(
1427
- ctx.conversationId,
1428
- ctx.trustContext?.trustClass,
1429
- );
1430
- onEvent({
1431
- type: "context_compacted",
1432
- previousEstimatedInputTokens:
1433
- midLoopCompact.previousEstimatedInputTokens,
1434
- estimatedInputTokens: midLoopCompact.estimatedInputTokens,
1435
- maxInputTokens: midLoopCompact.maxInputTokens,
1436
- thresholdTokens: midLoopCompact.thresholdTokens,
1437
- compactedMessages: midLoopCompact.compactedMessages,
1438
- summaryCalls: midLoopCompact.summaryCalls,
1439
- summaryInputTokens: midLoopCompact.summaryInputTokens,
1440
- summaryOutputTokens: midLoopCompact.summaryOutputTokens,
1441
- summaryModel: midLoopCompact.summaryModel,
1442
- });
1443
- emitUsage(
1741
+ await trackCompactionOutcome(
1444
1742
  ctx,
1445
- midLoopCompact.summaryInputTokens,
1446
- midLoopCompact.summaryOutputTokens,
1447
- midLoopCompact.summaryModel,
1743
+ midLoopCompact.summaryFailed,
1448
1744
  onEvent,
1449
- "context_compactor",
1450
- reqId,
1451
- midLoopCompact.summaryCacheCreationInputTokens ?? 0,
1452
- midLoopCompact.summaryCacheReadInputTokens ?? 0,
1453
- collapseRawResponses(midLoopCompact.summaryRawResponses),
1454
1745
  );
1455
- ctx.graphMemory.onCompacted(midLoopCompact.compactedPersistedMessages);
1746
+ }
1747
+ if (midLoopCompact.compacted) {
1748
+ applyCompactionResult(ctx, midLoopCompact, onEvent, reqId);
1749
+ reducerCompacted = true;
1456
1750
  shouldInjectWorkspace = true;
1457
1751
  }
1458
1752
 
@@ -1474,6 +1768,7 @@ export async function runAgentLoopImpl(
1474
1768
  ? null
1475
1769
  : injectionOpts.slackChronologicalMessages,
1476
1770
  mode: currentInjectionMode,
1771
+ turnContext: buildPluginTurnContext(ctx, reqId),
1477
1772
  });
1478
1773
  runMessages = injection.messages;
1479
1774
  if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -1497,6 +1792,7 @@ export async function runAgentLoopImpl(
1497
1792
  reqId,
1498
1793
  onCheckpoint,
1499
1794
  turnCallSite,
1795
+ loopTurnCtx,
1500
1796
  );
1501
1797
  }
1502
1798
 
@@ -1526,6 +1822,15 @@ export async function runAgentLoopImpl(
1526
1822
  { phase: "retry" },
1527
1823
  "Provider ordering error detected, attempting one-shot deep-repair retry",
1528
1824
  );
1825
+ // Design note: deep-repair intentionally bypasses the `historyRepair`
1826
+ // plugin pipeline. Deep-repair is a recovery-only path triggered by a
1827
+ // provider ordering error — it must be deterministic and unaffected by
1828
+ // user middleware that might have caused (or be unable to recover from)
1829
+ // the original drift. Plugins can already observe / override the
1830
+ // pre-run repair via the `historyRepair` pipeline above; widening that
1831
+ // surface to deep-repair is intentionally deferred until there's a
1832
+ // concrete plugin-level use case. Do not route this call through
1833
+ // `runPipeline` without first revisiting that contract.
1529
1834
  const retryRepair = deepRepairHistory(runMessages);
1530
1835
  runMessages = retryRepair.messages;
1531
1836
  const retryStrip = stripHistoricalWebSearchResults(runMessages);
@@ -1542,6 +1847,7 @@ export async function runAgentLoopImpl(
1542
1847
  reqId,
1543
1848
  onCheckpoint,
1544
1849
  turnCallSite,
1850
+ loopTurnCtx,
1545
1851
  );
1546
1852
 
1547
1853
  if (state.orderingErrorDetected) {
@@ -1555,8 +1861,7 @@ export async function runAgentLoopImpl(
1555
1861
  // ── Bounded context overflow convergence loop ──────────────────
1556
1862
  // When the provider rejects with context-too-large, iterate through
1557
1863
  // reducer tiers (forced compaction, tool-result truncation, media
1558
- // stubbing, injection downgrade) with optional approval gating for
1559
- // interactive latest-turn compression.
1864
+ // stubbing, injection downgrade).
1560
1865
  //
1561
1866
  // When progress was made (agent added messages before hitting the
1562
1867
  // limit), incorporate those new messages into ctx.messages so the
@@ -1572,11 +1877,11 @@ export async function runAgentLoopImpl(
1572
1877
  if (updatedHistory.length > preRunHistoryLength) {
1573
1878
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
1574
1879
  try {
1575
- clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
1880
+ clearStrippedInjectionMetadataForConversation(ctx.conversationId);
1576
1881
  } catch (err) {
1577
1882
  rlog.warn(
1578
1883
  { err },
1579
- "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
1884
+ "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
1580
1885
  );
1581
1886
  }
1582
1887
  convergenceStripped = true;
@@ -1675,7 +1980,7 @@ export async function runAgentLoopImpl(
1675
1980
  step.compactionResult &&
1676
1981
  step.compactionResult.summaryFailed !== undefined
1677
1982
  ) {
1678
- trackCompactionOutcome(
1983
+ await trackCompactionOutcome(
1679
1984
  ctx,
1680
1985
  step.compactionResult.summaryFailed,
1681
1986
  onEvent,
@@ -1683,47 +1988,7 @@ export async function runAgentLoopImpl(
1683
1988
  }
1684
1989
 
1685
1990
  if (step.compactionResult?.compacted) {
1686
- ctx.contextCompactedMessageCount +=
1687
- step.compactionResult.compactedPersistedMessages;
1688
- ctx.contextCompactedAt = Date.now();
1689
- updateConversationContextWindow(
1690
- ctx.conversationId,
1691
- step.compactionResult.summaryText,
1692
- ctx.contextCompactedMessageCount,
1693
- );
1694
- // Fire auto-analysis on compaction — see forceCompact() for rationale.
1695
- enqueueAutoAnalysisOnCompaction(
1696
- ctx.conversationId,
1697
- ctx.trustContext?.trustClass,
1698
- );
1699
- onEvent({
1700
- type: "context_compacted",
1701
- previousEstimatedInputTokens:
1702
- step.compactionResult.previousEstimatedInputTokens,
1703
- estimatedInputTokens: step.compactionResult.estimatedInputTokens,
1704
- maxInputTokens: step.compactionResult.maxInputTokens,
1705
- thresholdTokens: step.compactionResult.thresholdTokens,
1706
- compactedMessages: step.compactionResult.compactedMessages,
1707
- summaryCalls: step.compactionResult.summaryCalls,
1708
- summaryInputTokens: step.compactionResult.summaryInputTokens,
1709
- summaryOutputTokens: step.compactionResult.summaryOutputTokens,
1710
- summaryModel: step.compactionResult.summaryModel,
1711
- });
1712
- emitUsage(
1713
- ctx,
1714
- step.compactionResult.summaryInputTokens,
1715
- step.compactionResult.summaryOutputTokens,
1716
- step.compactionResult.summaryModel,
1717
- onEvent,
1718
- "context_compactor",
1719
- reqId,
1720
- step.compactionResult.summaryCacheCreationInputTokens ?? 0,
1721
- step.compactionResult.summaryCacheReadInputTokens ?? 0,
1722
- collapseRawResponses(step.compactionResult.summaryRawResponses),
1723
- );
1724
- ctx.graphMemory.onCompacted(
1725
- step.compactionResult.compactedPersistedMessages,
1726
- );
1991
+ applyCompactionResult(ctx, step.compactionResult, onEvent, reqId);
1727
1992
  shouldInjectWorkspace = true;
1728
1993
  reducerCompacted = true;
1729
1994
  }
@@ -1742,6 +2007,7 @@ export async function runAgentLoopImpl(
1742
2007
  ? null
1743
2008
  : injectionOpts.slackChronologicalMessages,
1744
2009
  mode: currentInjectionMode,
2010
+ turnContext: buildPluginTurnContext(ctx, reqId),
1745
2011
  });
1746
2012
  runMessages = injection.messages;
1747
2013
  if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -1767,6 +2033,7 @@ export async function runAgentLoopImpl(
1767
2033
  reqId,
1768
2034
  onCheckpoint,
1769
2035
  turnCallSite,
2036
+ loopTurnCtx,
1770
2037
  );
1771
2038
 
1772
2039
  // If the rerun still yields at checkpoint, the turn is still
@@ -1789,11 +2056,11 @@ export async function runAgentLoopImpl(
1789
2056
  if (updatedHistory.length > preRunHistoryLength) {
1790
2057
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
1791
2058
  try {
1792
- clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
2059
+ clearStrippedInjectionMetadataForConversation(ctx.conversationId);
1793
2060
  } catch (err) {
1794
2061
  rlog.warn(
1795
2062
  { err },
1796
- "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
2063
+ "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
1797
2064
  );
1798
2065
  }
1799
2066
  convergenceStripped = true;
@@ -1805,231 +2072,83 @@ export async function runAgentLoopImpl(
1805
2072
 
1806
2073
  // All reducer tiers exhausted but provider still rejects —
1807
2074
  // consult the overflow policy for latest-turn compression.
1808
- // Emergency compaction is deferred to the policy-gated paths below
1809
- // so that `request_user_approval` sessions collect consent first.
2075
+ // The policy either auto-compresses the latest turn or falls
2076
+ // through to the final graceful-error fallback below.
1810
2077
  if (state.contextTooLargeDetected) {
1811
2078
  const action = resolveOverflowAction({
1812
2079
  overflowRecovery,
1813
2080
  isInteractive: isInteractiveResolved,
1814
2081
  });
1815
2082
 
1816
- if (action === "request_user_approval") {
1817
- const approval = await requestCompressionApproval(ctx.prompter, {
1818
- signal: abortController.signal,
1819
- });
1820
-
1821
- if (approval.approved) {
1822
- // User approved — force emergency compaction with aggressive settings
1823
- const emergencyCompact =
1824
- await ctx.contextWindowManager.maybeCompact(
1825
- ctx.messages,
1826
- abortController.signal,
1827
- {
2083
+ if (action === "auto_compress_latest_turn") {
2084
+ // Auto-compress without asking — users opt out via the "drop" policy.
2085
+ ctx.emitActivityState(
2086
+ "thinking",
2087
+ "context_compacting",
2088
+ "assistant_turn",
2089
+ reqId,
2090
+ );
2091
+ let emergencyCompact: Awaited<
2092
+ ReturnType<typeof ctx.contextWindowManager.maybeCompact>
2093
+ > | null = null;
2094
+ try {
2095
+ emergencyCompact = (await runPipeline<
2096
+ CompactionArgs,
2097
+ CompactionResult
2098
+ >(
2099
+ "compaction",
2100
+ getMiddlewaresFor("compaction"),
2101
+ (args) =>
2102
+ defaultCompactionTerminal(
2103
+ args,
2104
+ buildPluginTurnContext(ctx, reqId),
2105
+ ),
2106
+ {
2107
+ messages: ctx.messages,
2108
+ signal: abortController.signal,
2109
+ options: {
1828
2110
  lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1829
2111
  force: true,
1830
2112
  minKeepRecentUserTurns: 0,
1831
2113
  targetInputTokensOverride: correctedTarget,
1832
2114
  },
2115
+ },
2116
+ buildPluginTurnContext(ctx, reqId),
2117
+ DEFAULT_TIMEOUTS.compaction,
2118
+ )) as Awaited<
2119
+ ReturnType<typeof ctx.contextWindowManager.maybeCompact>
2120
+ >;
2121
+ } catch (err) {
2122
+ if (err instanceof PluginTimeoutError) {
2123
+ // Emergency compaction timed out. Record the circuit-breaker
2124
+ // failure and fall through to the graceful-error path below
2125
+ // (the unsuccessful-compaction fallback) rather than hard-
2126
+ // failing the turn.
2127
+ rlog.warn(
2128
+ { err, phase: "emergency-compaction" },
2129
+ "Emergency compaction pipeline timed out — continuing with overflow fallback",
1833
2130
  );
1834
- // Only track when the summary LLM actually ran; `force: true`
1835
- // bypasses the cooldown but not the early-return paths.
1836
- if (emergencyCompact.summaryFailed !== undefined) {
1837
- trackCompactionOutcome(
1838
- ctx,
1839
- emergencyCompact.summaryFailed,
1840
- onEvent,
1841
- );
1842
- }
1843
- if (emergencyCompact.compacted) {
1844
- ctx.messages = emergencyCompact.messages;
1845
- reducerCompacted = true;
1846
- ctx.contextCompactedMessageCount +=
1847
- emergencyCompact.compactedPersistedMessages;
1848
- ctx.contextCompactedAt = Date.now();
1849
- updateConversationContextWindow(
1850
- ctx.conversationId,
1851
- emergencyCompact.summaryText,
1852
- ctx.contextCompactedMessageCount,
1853
- );
1854
- // Fire auto-analysis on compaction — see forceCompact() for rationale.
1855
- enqueueAutoAnalysisOnCompaction(
1856
- ctx.conversationId,
1857
- ctx.trustContext?.trustClass,
1858
- );
1859
- onEvent({
1860
- type: "context_compacted",
1861
- previousEstimatedInputTokens:
1862
- emergencyCompact.previousEstimatedInputTokens,
1863
- estimatedInputTokens: emergencyCompact.estimatedInputTokens,
1864
- maxInputTokens: emergencyCompact.maxInputTokens,
1865
- thresholdTokens: emergencyCompact.thresholdTokens,
1866
- compactedMessages: emergencyCompact.compactedMessages,
1867
- summaryCalls: emergencyCompact.summaryCalls,
1868
- summaryInputTokens: emergencyCompact.summaryInputTokens,
1869
- summaryOutputTokens: emergencyCompact.summaryOutputTokens,
1870
- summaryModel: emergencyCompact.summaryModel,
1871
- });
1872
- emitUsage(
1873
- ctx,
1874
- emergencyCompact.summaryInputTokens,
1875
- emergencyCompact.summaryOutputTokens,
1876
- emergencyCompact.summaryModel,
1877
- onEvent,
1878
- "context_compactor",
1879
- reqId,
1880
- emergencyCompact.summaryCacheCreationInputTokens ?? 0,
1881
- emergencyCompact.summaryCacheReadInputTokens ?? 0,
1882
- collapseRawResponses(emergencyCompact.summaryRawResponses),
1883
- );
1884
- ctx.graphMemory.onCompacted(
1885
- emergencyCompact.compactedPersistedMessages,
1886
- );
1887
- shouldInjectWorkspace = true;
1888
- }
1889
-
1890
- // Only re-inject NOW.md when ctx.messages was actually stripped;
1891
- // otherwise the existing block is still present.
1892
- const injection = await applyRuntimeInjections(ctx.messages, {
1893
- ...injectionOpts,
1894
- pkbContext: currentPkbContent,
1895
- nowScratchpad: convergenceStripped ? currentNowContent : null,
1896
- workspaceTopLevelContext: shouldInjectWorkspace
1897
- ? ctx.workspaceTopLevelContext
1898
- : null,
1899
- slackChronologicalMessages: reducerCompacted
1900
- ? null
1901
- : injectionOpts.slackChronologicalMessages,
1902
- mode: currentInjectionMode,
1903
- });
1904
- runMessages = injection.messages;
1905
- if (isTrustedActor && currentInjectionMode !== "minimal") {
1906
- ctx.graphMemory.retrackCachedNodes();
1907
- }
1908
- const emergencyStrip = stripHistoricalWebSearchResults(runMessages);
1909
- if (emergencyStrip.stats.blocksStripped > 0) {
1910
- rlog.info(
1911
- { phase: "emergency_compact", ...emergencyStrip.stats },
1912
- "Converted historical web_search_tool_result blocks to text summaries",
1913
- );
1914
- runMessages = emergencyStrip.messages;
2131
+ await trackCompactionOutcome(ctx, true, onEvent);
2132
+ emergencyCompact = null;
2133
+ } else {
2134
+ throw err;
1915
2135
  }
1916
- preRepairMessages = runMessages;
1917
- preRunHistoryLength = runMessages.length;
1918
- state.contextTooLargeDetected = false;
1919
-
1920
- updatedHistory = await ctx.agentLoop.run(
1921
- runMessages,
1922
- eventHandler,
1923
- abortController.signal,
1924
- reqId,
1925
- onCheckpoint,
1926
- turnCallSite,
1927
- );
1928
- } else {
1929
- // User denied compression — emit a graceful assistant explanation
1930
- // instead of a conversation_error, and end the turn cleanly.
1931
- state.contextTooLargeDetected = false;
1932
- const denyText =
1933
- "The conversation has grown too long for the model to process, " +
1934
- "and compression was declined. Please start a new conversation " +
1935
- "or manually shorten the conversation to continue.";
1936
- const loopChannelMeta = {
1937
- ...provenanceFromTrustContext(ctx.trustContext),
1938
- userMessageChannel: capturedTurnChannelContext.userMessageChannel,
1939
- assistantMessageChannel:
1940
- capturedTurnChannelContext.assistantMessageChannel,
1941
- userMessageInterface:
1942
- capturedTurnInterfaceContext.userMessageInterface,
1943
- assistantMessageInterface:
1944
- capturedTurnInterfaceContext.assistantMessageInterface,
1945
- };
1946
- const denyMessage = createAssistantMessage(denyText);
1947
- await addMessage(
1948
- ctx.conversationId,
1949
- "assistant",
1950
- JSON.stringify(denyMessage.content),
1951
- loopChannelMeta,
1952
- );
1953
- denyCompressionMessage = denyMessage;
1954
- onEvent({
1955
- type: "assistant_text_delta",
1956
- text: denyText,
1957
- conversationId: ctx.conversationId,
1958
- });
1959
- // Prevent the final error fallback from firing
1960
- state.providerErrorUserMessage = null;
1961
2136
  }
1962
- } else if (action === "auto_compress_latest_turn") {
1963
- // Non-interactive — auto-compress without asking
1964
- ctx.emitActivityState(
1965
- "thinking",
1966
- "context_compacting",
1967
- "assistant_turn",
1968
- reqId,
1969
- );
1970
- const emergencyCompact = await ctx.contextWindowManager.maybeCompact(
1971
- ctx.messages,
1972
- abortController.signal,
1973
- {
1974
- lastCompactedAt: ctx.contextCompactedAt ?? undefined,
1975
- force: true,
1976
- minKeepRecentUserTurns: 0,
1977
- targetInputTokensOverride: correctedTarget,
1978
- },
1979
- );
1980
2137
  // Only track when the summary LLM actually ran; `force: true`
1981
2138
  // bypasses the cooldown but not the early-return paths.
1982
- if (emergencyCompact.summaryFailed !== undefined) {
1983
- trackCompactionOutcome(
2139
+ if (
2140
+ emergencyCompact &&
2141
+ emergencyCompact.summaryFailed !== undefined
2142
+ ) {
2143
+ await trackCompactionOutcome(
1984
2144
  ctx,
1985
2145
  emergencyCompact.summaryFailed,
1986
2146
  onEvent,
1987
2147
  );
1988
2148
  }
1989
- if (emergencyCompact.compacted) {
1990
- ctx.messages = emergencyCompact.messages;
2149
+ if (emergencyCompact?.compacted) {
2150
+ applyCompactionResult(ctx, emergencyCompact, onEvent, reqId);
1991
2151
  reducerCompacted = true;
1992
- ctx.contextCompactedMessageCount +=
1993
- emergencyCompact.compactedPersistedMessages;
1994
- ctx.contextCompactedAt = Date.now();
1995
- updateConversationContextWindow(
1996
- ctx.conversationId,
1997
- emergencyCompact.summaryText,
1998
- ctx.contextCompactedMessageCount,
1999
- );
2000
- // Fire auto-analysis on compaction — see forceCompact() for rationale.
2001
- enqueueAutoAnalysisOnCompaction(
2002
- ctx.conversationId,
2003
- ctx.trustContext?.trustClass,
2004
- );
2005
- onEvent({
2006
- type: "context_compacted",
2007
- previousEstimatedInputTokens:
2008
- emergencyCompact.previousEstimatedInputTokens,
2009
- estimatedInputTokens: emergencyCompact.estimatedInputTokens,
2010
- maxInputTokens: emergencyCompact.maxInputTokens,
2011
- thresholdTokens: emergencyCompact.thresholdTokens,
2012
- compactedMessages: emergencyCompact.compactedMessages,
2013
- summaryCalls: emergencyCompact.summaryCalls,
2014
- summaryInputTokens: emergencyCompact.summaryInputTokens,
2015
- summaryOutputTokens: emergencyCompact.summaryOutputTokens,
2016
- summaryModel: emergencyCompact.summaryModel,
2017
- });
2018
- emitUsage(
2019
- ctx,
2020
- emergencyCompact.summaryInputTokens,
2021
- emergencyCompact.summaryOutputTokens,
2022
- emergencyCompact.summaryModel,
2023
- onEvent,
2024
- "context_compactor",
2025
- reqId,
2026
- emergencyCompact.summaryCacheCreationInputTokens ?? 0,
2027
- emergencyCompact.summaryCacheReadInputTokens ?? 0,
2028
- collapseRawResponses(emergencyCompact.summaryRawResponses),
2029
- );
2030
- ctx.graphMemory.onCompacted(
2031
- emergencyCompact.compactedPersistedMessages,
2032
- );
2033
2152
  shouldInjectWorkspace = true;
2034
2153
  }
2035
2154
 
@@ -2046,6 +2165,7 @@ export async function runAgentLoopImpl(
2046
2165
  ? null
2047
2166
  : injectionOpts.slackChronologicalMessages,
2048
2167
  mode: currentInjectionMode,
2168
+ turnContext: buildPluginTurnContext(ctx, reqId),
2049
2169
  });
2050
2170
  runMessages = injection.messages;
2051
2171
  if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -2070,6 +2190,7 @@ export async function runAgentLoopImpl(
2070
2190
  reqId,
2071
2191
  onCheckpoint,
2072
2192
  turnCallSite,
2193
+ loopTurnCtx,
2073
2194
  );
2074
2195
  }
2075
2196
  // action === "fail_gracefully" falls through to the final error below
@@ -2134,11 +2255,19 @@ export async function runAgentLoopImpl(
2134
2255
  assistantMessageInterface:
2135
2256
  capturedTurnInterfaceContext.assistantMessageInterface,
2136
2257
  };
2137
- await addMessage(
2138
- ctx.conversationId,
2139
- "user",
2140
- JSON.stringify(toolResultBlocks),
2141
- toolResultMetadata,
2258
+ await runPipeline<PersistArgs, PersistResult>(
2259
+ "persistence",
2260
+ getMiddlewaresFor("persistence"),
2261
+ defaultPersistenceTerminal,
2262
+ {
2263
+ op: "add",
2264
+ conversationId: ctx.conversationId,
2265
+ role: "user",
2266
+ content: JSON.stringify(toolResultBlocks),
2267
+ metadata: toolResultMetadata,
2268
+ },
2269
+ buildPluginTurnContext(ctx, reqId),
2270
+ DEFAULT_TIMEOUTS.persistence,
2142
2271
  );
2143
2272
  state.pendingToolResults.clear();
2144
2273
  }
@@ -2151,10 +2280,6 @@ export async function runAgentLoopImpl(
2151
2280
  return { ...msg, content: cleanedBlocks };
2152
2281
  });
2153
2282
 
2154
- if (denyCompressionMessage) {
2155
- newMessages.push(denyCompressionMessage);
2156
- }
2157
-
2158
2283
  const hasAssistantResponse = newMessages.some(
2159
2284
  (msg) => msg.role === "assistant",
2160
2285
  );
@@ -2176,11 +2301,19 @@ export async function runAgentLoopImpl(
2176
2301
  const errorAssistantMessage = createAssistantMessage(
2177
2302
  state.providerErrorUserMessage,
2178
2303
  );
2179
- await addMessage(
2180
- ctx.conversationId,
2181
- "assistant",
2182
- JSON.stringify(errorAssistantMessage.content),
2183
- errChannelMeta,
2304
+ await runPipeline<PersistArgs, PersistResult>(
2305
+ "persistence",
2306
+ getMiddlewaresFor("persistence"),
2307
+ defaultPersistenceTerminal,
2308
+ {
2309
+ op: "add",
2310
+ conversationId: ctx.conversationId,
2311
+ role: "assistant",
2312
+ content: JSON.stringify(errorAssistantMessage.content),
2313
+ metadata: errChannelMeta,
2314
+ },
2315
+ buildPluginTurnContext(ctx, reqId),
2316
+ DEFAULT_TIMEOUTS.persistence,
2184
2317
  );
2185
2318
  newMessages.push(errorAssistantMessage);
2186
2319
  // Do NOT send assistant_text_delta here — handleProviderError already
@@ -2248,10 +2381,6 @@ export async function runAgentLoopImpl(
2248
2381
  },
2249
2382
  );
2250
2383
 
2251
- void getHookManager().trigger("post-message", {
2252
- conversationId: ctx.conversationId,
2253
- });
2254
-
2255
2384
  const syncLastAssistantMessageToDisk = (): void => {
2256
2385
  if (!state.lastAssistantMessageId) return;
2257
2386
  const convForDisk = getConversation(ctx.conversationId);
@@ -2368,13 +2497,65 @@ export async function runAgentLoopImpl(
2368
2497
  ? { messageId: state.lastAssistantMessageId }
2369
2498
  : {}),
2370
2499
  });
2500
+
2501
+ // Emit a home-feed event for background/scheduled conversation completions.
2502
+ // Scoped to message_complete only (not cancelled/handoff), wrapped in
2503
+ // try-catch so malformed message content can never propagate errors.
2504
+ try {
2505
+ const conv = getConversation(ctx.conversationId);
2506
+ if (
2507
+ conv &&
2508
+ (conv.conversationType === "background" ||
2509
+ conv.conversationType === "scheduled")
2510
+ ) {
2511
+ const lastMsg = state.lastAssistantMessageId
2512
+ ? getMessageById(state.lastAssistantMessageId, ctx.conversationId)
2513
+ : undefined;
2514
+ let summary: string;
2515
+ if (lastMsg) {
2516
+ const parsed: unknown = JSON.parse(lastMsg.content);
2517
+ if (typeof parsed === "string") {
2518
+ summary = parsed.slice(0, 200);
2519
+ } else if (Array.isArray(parsed)) {
2520
+ const textBlock = parsed.find(
2521
+ (b: { type?: string }) => b.type === "text",
2522
+ );
2523
+ summary =
2524
+ typeof textBlock?.text === "string"
2525
+ ? textBlock.text.slice(0, 200)
2526
+ : (conv.title ?? "Background task completed.");
2527
+ } else {
2528
+ summary = conv.title ?? "Background task completed.";
2529
+ }
2530
+ } else {
2531
+ summary = conv.title ?? "Background task completed.";
2532
+ }
2533
+ void emitFeedEvent({
2534
+ source: "assistant",
2535
+ title: conv.title ?? "Background Task",
2536
+ summary,
2537
+ dedupKey: `bg-conv:${ctx.conversationId}`,
2538
+ }).catch((err) => {
2539
+ log.warn(
2540
+ { err, conversationId: ctx.conversationId },
2541
+ "Failed to emit background conversation feed event",
2542
+ );
2543
+ });
2544
+ }
2545
+ } catch (feedErr) {
2546
+ log.warn(
2547
+ { err: feedErr, conversationId: ctx.conversationId },
2548
+ "Failed to build home-feed event for background conversation",
2549
+ );
2550
+ }
2371
2551
  }
2372
2552
  }
2373
2553
 
2374
2554
  // Second title pass: after 3 completed turns, re-generate the title
2375
2555
  // using the last 3 messages for better context. Only fires when the
2376
- // current title was auto-generated (isAutoTitle = 1).
2377
- if (ctx.turnCount === 2) {
2556
+ // current title was auto-generated (isAutoTitle = 1) and the user
2557
+ // has not opted out via `conversations.skipAutoRetitling`.
2558
+ if (ctx.turnCount === 2 && !getConfig().conversations.skipAutoRetitling) {
2378
2559
  // turnCount is 0-indexed, incremented in finally; 2 = about to become 3rd turn
2379
2560
  queueRegenerateConversationTitle({
2380
2561
  conversationId: ctx.conversationId,
@@ -2427,12 +2608,6 @@ export async function runAgentLoopImpl(
2427
2608
  });
2428
2609
  onEvent({ type: "error", message: classified.userMessage });
2429
2610
  onEvent(buildConversationErrorMessage(ctx.conversationId, classified));
2430
- void getHookManager().trigger("on-error", {
2431
- error: err instanceof Error ? err.name : "Error",
2432
- message,
2433
- stack: err instanceof Error ? err.stack : undefined,
2434
- conversationId: ctx.conversationId,
2435
- });
2436
2611
  }
2437
2612
  } finally {
2438
2613
  if (turnStarted) {
@@ -2542,7 +2717,133 @@ function emitUsage(
2542
2717
  );
2543
2718
  }
2544
2719
 
2545
- function collapseRawResponses(rawResponses?: unknown[]): unknown | undefined {
2720
+ /**
2721
+ * Minimal context shape consumed by `applyCompactionResult`. Both
2722
+ * `AgentLoopConversationContext` and `Conversation` satisfy this via structural
2723
+ * typing, so the helper can back both the 5 agent-loop auto-compaction sites
2724
+ * and the single `forceCompact` user-initiated site.
2725
+ */
2726
+ export interface CompactionApplyContext {
2727
+ readonly conversationId: string;
2728
+ messages: Message[];
2729
+ contextCompactedMessageCount: number;
2730
+ contextCompactedAt: number | null;
2731
+ readonly graphMemory: ConversationGraphMemory;
2732
+ readonly provider: Provider;
2733
+ usageStats: UsageStats;
2734
+ trustContext?: TrustContext;
2735
+ }
2736
+
2737
+ /**
2738
+ * Applies a successful `ContextWindowResult` to a conversation: updates the
2739
+ * in-memory message buffer and compaction counters, notifies the graph memory
2740
+ * and conversation-summary store, enqueues auto-analysis, emits the
2741
+ * `context_compacted` event, and records a `context_compactor` usage event.
2742
+ *
2743
+ * The emitted `usage_update` intentionally omits `contextWindow` — the
2744
+ * `context_compacted` event already carries the fresh
2745
+ * `estimatedInputTokens` / `maxInputTokens` and is the single source of
2746
+ * truth for the UI indicator after compaction. Emitting both caused a
2747
+ * redundant SwiftUI invalidation on every compaction.
2748
+ */
2749
+ export function applyCompactionResult(
2750
+ ctx: CompactionApplyContext,
2751
+ result: {
2752
+ messages: Message[];
2753
+ compactedPersistedMessages: number;
2754
+ previousEstimatedInputTokens: number;
2755
+ estimatedInputTokens: number;
2756
+ maxInputTokens: number;
2757
+ thresholdTokens: number;
2758
+ compactedMessages: number;
2759
+ summaryCalls: number;
2760
+ summaryInputTokens: number;
2761
+ summaryOutputTokens: number;
2762
+ summaryModel: string;
2763
+ summaryText: string;
2764
+ summaryCacheCreationInputTokens?: number;
2765
+ summaryCacheReadInputTokens?: number;
2766
+ summaryRawResponses?: unknown[];
2767
+ },
2768
+ onEvent: (msg: ServerMessage) => void,
2769
+ reqId: string | null,
2770
+ ): void {
2771
+ ctx.messages = result.messages;
2772
+ ctx.contextCompactedMessageCount += result.compactedPersistedMessages;
2773
+ ctx.contextCompactedAt = Date.now();
2774
+ ctx.graphMemory.onCompacted(result.compactedPersistedMessages);
2775
+ updateConversationContextWindow(
2776
+ ctx.conversationId,
2777
+ result.summaryText,
2778
+ ctx.contextCompactedMessageCount,
2779
+ );
2780
+ enqueueAutoAnalysisOnCompaction(
2781
+ ctx.conversationId,
2782
+ ctx.trustContext?.trustClass,
2783
+ );
2784
+ const summarySignals = computeSummaryQualitySignals(result.summaryText);
2785
+ onEvent({
2786
+ type: "context_compacted",
2787
+ conversationId: ctx.conversationId,
2788
+ previousEstimatedInputTokens: result.previousEstimatedInputTokens,
2789
+ estimatedInputTokens: result.estimatedInputTokens,
2790
+ maxInputTokens: result.maxInputTokens,
2791
+ thresholdTokens: result.thresholdTokens,
2792
+ compactedMessages: result.compactedMessages,
2793
+ summaryCalls: result.summaryCalls,
2794
+ summaryInputTokens: result.summaryInputTokens,
2795
+ summaryOutputTokens: result.summaryOutputTokens,
2796
+ summaryModel: result.summaryModel,
2797
+ summaryCharCount: summarySignals.charCount,
2798
+ summaryHeaderCount: summarySignals.headerCount,
2799
+ summaryHadMemoryEcho: summarySignals.hadMemoryEcho,
2800
+ });
2801
+ emitUsage(
2802
+ ctx,
2803
+ result.summaryInputTokens,
2804
+ result.summaryOutputTokens,
2805
+ result.summaryModel,
2806
+ onEvent,
2807
+ "context_compactor",
2808
+ reqId,
2809
+ result.summaryCacheCreationInputTokens ?? 0,
2810
+ result.summaryCacheReadInputTokens ?? 0,
2811
+ collapseRawResponses(result.summaryRawResponses),
2812
+ undefined /* providerName */,
2813
+ 1 /* llmCallCount */,
2814
+ );
2815
+ }
2816
+
2817
+ export function collapseRawResponses(
2818
+ rawResponses?: unknown[],
2819
+ ): unknown | undefined {
2546
2820
  if (!rawResponses || rawResponses.length === 0) return undefined;
2547
2821
  return rawResponses.length === 1 ? rawResponses[0] : rawResponses;
2548
2822
  }
2823
+
2824
+ /**
2825
+ * Matches any runtime-injection tag that should never appear inside a
2826
+ * generated summary. If the regex hits, either the compaction strip logic
2827
+ * failed to drop an injected block from the summarizer input, or the
2828
+ * summarizer invented tag-like text on its own — both are quality bugs
2829
+ * worth surfacing via telemetry.
2830
+ */
2831
+ const SUMMARY_MEMORY_ECHO_PATTERN =
2832
+ /<(?:memory|memory_context|memory_image|turn_context|workspace|workspace_top_level|knowledge_base|pkb|system_reminder|now_scratchpad|NOW\.md|active_thread|active_subagents|active_workspace|active_dynamic_page|channel_capabilities|transport_hints|system_notice|non_interactive_context|temporal_context|guardian_context|inbound_actor_context|channel_turn_context|interface_turn_context|channel_command_context|voice_call_control)\b/i;
2833
+
2834
+ /**
2835
+ * Compute light-weight quality signals for a compaction summary. Emitted
2836
+ * on every `context_compacted` event so regressions (short outputs,
2837
+ * header collapse, memory-injection leakage) are visible without having
2838
+ * to read the summary text from the DB.
2839
+ */
2840
+ export function computeSummaryQualitySignals(summaryText: string): {
2841
+ charCount: number;
2842
+ headerCount: number;
2843
+ hadMemoryEcho: boolean;
2844
+ } {
2845
+ const charCount = summaryText.length;
2846
+ const headerCount = (summaryText.match(/^## /gm) ?? []).length;
2847
+ const hadMemoryEcho = SUMMARY_MEMORY_ECHO_PATTERN.test(summaryText);
2848
+ return { charCount, headerCount, hadMemoryEcho };
2849
+ }