@vellumai/assistant 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (630) hide show
  1. package/ARCHITECTURE.md +13 -19
  2. package/Dockerfile +75 -1
  3. package/bun.lock +11 -1
  4. package/docker-entrypoint.sh +17 -0
  5. package/docker-init-apt-root.sh +167 -0
  6. package/docker-kata-apt-env.sh +39 -0
  7. package/docs/plugins.md +88 -47
  8. package/docs/skills.md +9 -7
  9. package/examples/plugins/echo/README.md +27 -27
  10. package/examples/plugins/echo/package.json +3 -0
  11. package/examples/plugins/echo/register.ts +31 -31
  12. package/node_modules/@vellumai/slack-text/src/index.test.ts +114 -14
  13. package/node_modules/@vellumai/slack-text/src/index.ts +82 -18
  14. package/openapi.yaml +642 -5
  15. package/package.json +3 -1
  16. package/scripts/generate-openapi.ts +83 -10
  17. package/scripts/sync-llm-catalog.ts +2 -2
  18. package/scripts/sync-web-search-catalog.ts +47 -25
  19. package/src/__tests__/agent-image-optimize.test.ts +11 -3
  20. package/src/__tests__/agent-loop-exit-reason.test.ts +272 -0
  21. package/src/__tests__/agent-loop-provider-error-recording.test.ts +195 -0
  22. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +131 -0
  23. package/src/__tests__/anthropic-provider.test.ts +45 -0
  24. package/src/__tests__/app-builder-tool-scripts.test.ts +9 -3
  25. package/src/__tests__/app-executors.test.ts +220 -4
  26. package/src/__tests__/auto-analysis-end-to-end.test.ts +35 -0
  27. package/src/__tests__/bundled-asset.test.ts +6 -6
  28. package/src/__tests__/channel-availability-routes.test.ts +206 -0
  29. package/src/__tests__/channel-delivery-store.test.ts +289 -1
  30. package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -1
  31. package/src/__tests__/clawhub.test.ts +75 -16
  32. package/src/__tests__/compactor-tail-resolution.test.ts +147 -0
  33. package/src/__tests__/config-get-vision-flag.test.ts +136 -0
  34. package/src/__tests__/config-loader-backfill.test.ts +115 -18
  35. package/src/__tests__/config-schema.test.ts +21 -0
  36. package/src/__tests__/config-set-route.test.ts +80 -0
  37. package/src/__tests__/config-sounds-sync.test.ts +97 -0
  38. package/src/__tests__/config-watcher-skill-reseed.test.ts +453 -0
  39. package/src/__tests__/context-search-conversations-source.test.ts +117 -2
  40. package/src/__tests__/context-search-memory-v2-source.test.ts +0 -1
  41. package/src/__tests__/context-search-workspace-source.test.ts +7 -0
  42. package/src/__tests__/context-token-estimator.test.ts +31 -65
  43. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  44. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -0
  45. package/src/__tests__/conversation-agent-loop-overflow.test.ts +92 -92
  46. package/src/__tests__/conversation-agent-loop.test.ts +59 -1
  47. package/src/__tests__/conversation-error.test.ts +42 -3
  48. package/src/__tests__/conversation-fork-crud.test.ts +82 -0
  49. package/src/__tests__/conversation-inference-profile-route.test.ts +40 -4
  50. package/src/__tests__/conversation-lifecycle.test.ts +173 -0
  51. package/src/__tests__/conversation-media-retry.test.ts +19 -8
  52. package/src/__tests__/conversation-message-sync-tags.test.ts +97 -0
  53. package/src/__tests__/conversation-pairing.test.ts +54 -0
  54. package/src/__tests__/conversation-process-callsite.test.ts +4 -1
  55. package/src/__tests__/conversation-provider-retry-repair.test.ts +5 -1
  56. package/src/__tests__/conversation-queue.test.ts +4 -1
  57. package/src/__tests__/conversation-runtime-assembly.test.ts +102 -13
  58. package/src/__tests__/conversation-slash-queue.test.ts +59 -1
  59. package/src/__tests__/conversation-slash-unknown.test.ts +4 -1
  60. package/src/__tests__/conversation-surfaces-table-action.test.ts +360 -0
  61. package/src/__tests__/conversation-sync-tags.test.ts +235 -0
  62. package/src/__tests__/conversation-workspace-injection.test.ts +5 -1
  63. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +5 -1
  64. package/src/__tests__/credential-security-invariants.test.ts +3 -2
  65. package/src/__tests__/date-context.test.ts +45 -0
  66. package/src/__tests__/db-slack-external-content-normalization.test.ts +301 -0
  67. package/src/__tests__/delete-managed-skill-tool.test.ts +55 -13
  68. package/src/__tests__/disk-pressure-tools.test.ts +1 -0
  69. package/src/__tests__/dm-backfill.test.ts +121 -10
  70. package/src/__tests__/document-tool-security.test.ts +258 -0
  71. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  72. package/src/__tests__/edit-propagation.test.ts +33 -0
  73. package/src/__tests__/empty-response-pipeline.test.ts +0 -4
  74. package/src/__tests__/external-plugin-loader.test.ts +151 -55
  75. package/src/__tests__/filing-service.test.ts +140 -0
  76. package/src/__tests__/get-skill-detail-audit.test.ts +0 -4
  77. package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +0 -1
  78. package/src/__tests__/guardian-dispatch.test.ts +1 -0
  79. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +43 -62
  80. package/src/__tests__/heartbeat-service.test.ts +24 -164
  81. package/src/__tests__/helpers/channel-test-adapter.ts +0 -2
  82. package/src/__tests__/helpers/tar-fixtures.ts +39 -0
  83. package/src/__tests__/helpers/wait-for.ts +21 -0
  84. package/src/__tests__/history-repair-pipeline.test.ts +0 -3
  85. package/src/__tests__/history-repair.test.ts +73 -0
  86. package/src/__tests__/host-app-control-proxy.test.ts +507 -10
  87. package/src/__tests__/host-proxy-preactivation.test.ts +200 -13
  88. package/src/__tests__/image-credentials.test.ts +1 -1
  89. package/src/__tests__/inbound-slack-persistence.test.ts +2 -0
  90. package/src/__tests__/inference-no-mode-boot-e2e.test.ts +1 -1
  91. package/src/__tests__/inference-profile-reaper.test.ts +4 -2
  92. package/src/__tests__/inference-profile-session-handler.test.ts +18 -6
  93. package/src/__tests__/inference-profile-session-ipc.test.ts +17 -5
  94. package/src/__tests__/injector-background-turn.test.ts +153 -0
  95. package/src/__tests__/injector-chain.test.ts +15 -8
  96. package/src/__tests__/install-skill-routing.test.ts +155 -37
  97. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +99 -3
  98. package/src/__tests__/list-messages-page-latest.test.ts +55 -0
  99. package/src/__tests__/llm-call-pipeline.test.ts +0 -3
  100. package/src/__tests__/llm-callsite-catalog.test.ts +25 -0
  101. package/src/__tests__/llm-catalog-parity.test.ts +58 -13
  102. package/src/__tests__/llm-request-log-agent-loop-exit-reason.test.ts +116 -0
  103. package/src/__tests__/llm-request-log-error-payload.test.ts +138 -0
  104. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +36 -0
  105. package/src/__tests__/llm-request-log-source-factory.test.ts +29 -53
  106. package/src/__tests__/llm-resolver.test.ts +255 -2
  107. package/src/__tests__/llm-usage-store.test.ts +114 -0
  108. package/src/__tests__/managed-profile-guard.test.ts +41 -29
  109. package/src/__tests__/managed-skill-lifecycle.test.ts +109 -18
  110. package/src/__tests__/managed-store.test.ts +84 -192
  111. package/src/__tests__/media-generate-image.test.ts +1 -1
  112. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -2
  113. package/src/__tests__/messages-after-tiebreaker.test.ts +122 -0
  114. package/src/__tests__/notification-decision-fallback.test.ts +0 -91
  115. package/src/__tests__/notification-decision-strategy.test.ts +14 -31
  116. package/src/__tests__/notification-deep-link.test.ts +15 -0
  117. package/src/__tests__/notification-guardian-path.test.ts +1 -2
  118. package/src/__tests__/notification-platform-adapter.test.ts +5 -4
  119. package/src/__tests__/notification-telegram-adapter.test.ts +1 -0
  120. package/src/__tests__/notification-vellum-adapter.test.ts +113 -0
  121. package/src/__tests__/oauth-commands-routes.test.ts +168 -16
  122. package/src/__tests__/oauth-provider-profiles.test.ts +9 -0
  123. package/src/__tests__/openai-provider.test.ts +242 -3
  124. package/src/__tests__/openai-responses-cutover-guard.test.ts +17 -9
  125. package/src/__tests__/openrouter-provider-only.test.ts +51 -3
  126. package/src/__tests__/openrouter-token-estimation.test.ts +34 -25
  127. package/src/__tests__/overflow-reduce-pipeline.test.ts +0 -2
  128. package/src/__tests__/persistence-pipeline.test.ts +0 -2
  129. package/src/__tests__/{managed-proxy-context.test.ts → platform-proxy-context.test.ts} +7 -2
  130. package/src/__tests__/platform.test.ts +2 -0
  131. package/src/__tests__/plugin-api-shim.test.ts +125 -0
  132. package/src/__tests__/plugin-bootstrap.test.ts +10 -36
  133. package/src/__tests__/plugin-external-api.test.ts +68 -0
  134. package/src/__tests__/plugin-registry.test.ts +0 -77
  135. package/src/__tests__/plugin-route-contribution.test.ts +0 -1
  136. package/src/__tests__/plugin-skill-contribution.test.ts +0 -2
  137. package/src/__tests__/plugin-tool-contribution.test.ts +16 -15
  138. package/src/__tests__/plugin-types.test.ts +3 -13
  139. package/src/__tests__/process-message-background-slack.test.ts +8 -1
  140. package/src/__tests__/process-message-display-content.test.ts +421 -0
  141. package/src/__tests__/provider-catalog-visibility.test.ts +158 -0
  142. package/src/__tests__/provider-error-scenarios.test.ts +111 -0
  143. package/src/__tests__/{provider-managed-proxy-integration.test.ts → provider-platform-proxy-integration.test.ts} +33 -31
  144. package/src/__tests__/scaffold-managed-skill-tool.test.ts +65 -13
  145. package/src/__tests__/schedule-routes.test.ts +50 -3
  146. package/src/__tests__/schedule-store.test.ts +94 -0
  147. package/src/__tests__/scheduler-reuse-conversation.test.ts +54 -7
  148. package/src/__tests__/schema-transforms.test.ts +20 -0
  149. package/src/__tests__/search-skills-unified.test.ts +0 -5
  150. package/src/__tests__/{secret-routes-managed-proxy.test.ts → secret-routes-platform-proxy.test.ts} +1 -1
  151. package/src/__tests__/server-history-render.test.ts +43 -0
  152. package/src/__tests__/skill-load-feature-flag.test.ts +0 -12
  153. package/src/__tests__/skill-load-tool.test.ts +27 -89
  154. package/src/__tests__/skill-memory.test.ts +23 -3
  155. package/src/__tests__/skills-file-content-endpoint.test.ts +9 -38
  156. package/src/__tests__/skills-files-catalog-fallback.test.ts +0 -3
  157. package/src/__tests__/skills-install-extract.test.ts +49 -38
  158. package/src/__tests__/skills-install-staging.test.ts +159 -0
  159. package/src/__tests__/skills-uninstall.test.ts +9 -41
  160. package/src/__tests__/skills.test.ts +51 -58
  161. package/src/__tests__/slack-channel-config.test.ts +9 -0
  162. package/src/__tests__/subagent-tool-filtering.test.ts +50 -0
  163. package/src/__tests__/system-prompt.test.ts +670 -63
  164. package/src/__tests__/terminal-tools.test.ts +28 -1
  165. package/src/__tests__/thread-backfill.test.ts +557 -27
  166. package/src/__tests__/title-generate-pipeline.test.ts +0 -13
  167. package/src/__tests__/token-estimate-pipeline.test.ts +0 -3
  168. package/src/__tests__/tool-error-pipeline.test.ts +0 -3
  169. package/src/__tests__/tool-execute-pipeline.test.ts +0 -5
  170. package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -1
  171. package/src/__tests__/tool-executor.test.ts +16 -4
  172. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -12
  173. package/src/__tests__/turn-events-store.test.ts +256 -0
  174. package/src/__tests__/twilio-routes.test.ts +4 -0
  175. package/src/__tests__/user-plugin-loader.test.ts +0 -7
  176. package/src/__tests__/voice-session-bridge.test.ts +198 -0
  177. package/src/__tests__/web-search-catalog-parity.test.ts +32 -10
  178. package/src/__tests__/workspace-migration-057-repair-stale-gemini-model-ids.test.ts +115 -3
  179. package/src/__tests__/workspace-migration-072-seed-reply-suggestion-callsite.test.ts +50 -0
  180. package/src/__tests__/workspace-migration-073-repair-recall-callsite-empty-profile.test.ts +153 -0
  181. package/src/__tests__/workspace-migration-085-memory-v2-bm25-b-reembed-disabled-v2-pages.test.ts +220 -0
  182. package/src/__tests__/workspace-migration-086-revert-stale-gemini-mis-rewrites.test.ts +269 -0
  183. package/src/__tests__/workspace-migration-087-memory-router-balanced-profile.test.ts +228 -0
  184. package/src/__tests__/workspace-migration-remove-legacy-skills-index.test.ts +309 -0
  185. package/src/__tests__/workspace-migrations-runner.test.ts +111 -3
  186. package/src/a2a/__tests__/agent-card.test.ts +98 -0
  187. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +597 -0
  188. package/src/a2a/__tests__/protocol-helpers.test.ts +113 -0
  189. package/src/a2a/__tests__/task-store.test.ts +246 -0
  190. package/src/a2a/agent-card.ts +58 -0
  191. package/src/a2a/feature-gate.ts +8 -0
  192. package/src/a2a/protocol-constants.ts +21 -0
  193. package/src/a2a/protocol-errors.ts +50 -0
  194. package/src/a2a/protocol-types.ts +162 -0
  195. package/src/a2a/task-store.ts +168 -0
  196. package/src/acp/resolve-agent.ts +1 -1
  197. package/src/agent/image-optimize.ts +13 -5
  198. package/src/agent/loop.ts +167 -18
  199. package/src/calls/voice-session-bridge.ts +61 -42
  200. package/src/channels/config.ts +9 -0
  201. package/src/channels/types.ts +122 -0
  202. package/src/cli/__tests__/unknown-command.test.ts +24 -0
  203. package/src/cli/commands/__tests__/changelog.test.ts +304 -319
  204. package/src/cli/{__tests__ → commands/__tests__}/notifications.test.ts +201 -28
  205. package/src/cli/commands/__tests__/schedules.test.ts +960 -0
  206. package/src/cli/commands/changelog.ts +106 -42
  207. package/src/cli/commands/conversations.ts +102 -17
  208. package/src/cli/commands/default-action.ts +10 -53
  209. package/src/cli/commands/notifications.ts +388 -346
  210. package/src/cli/commands/plugins.ts +252 -0
  211. package/src/cli/commands/schedules.ts +683 -0
  212. package/src/cli/commands/telemetry.ts +40 -0
  213. package/src/cli/lib/__tests__/cli-colors.test.ts +48 -0
  214. package/src/cli/lib/__tests__/confirm-prompt.test.ts +159 -0
  215. package/src/cli/lib/__tests__/install-from-github.test.ts +355 -0
  216. package/src/cli/lib/__tests__/list-installed-plugins.test.ts +154 -0
  217. package/src/cli/lib/__tests__/search-plugins.test.ts +261 -0
  218. package/src/cli/lib/__tests__/uninstall-plugin.test.ts +124 -0
  219. package/src/cli/lib/__tests__/unknown-command.test.ts +106 -0
  220. package/src/cli/lib/cli-colors.ts +12 -0
  221. package/src/cli/lib/confirm-prompt.ts +79 -0
  222. package/src/cli/lib/install-from-github.ts +303 -0
  223. package/src/cli/lib/list-installed-plugins.ts +137 -0
  224. package/src/cli/lib/search-plugins.ts +163 -0
  225. package/src/cli/lib/uninstall-plugin.ts +82 -0
  226. package/src/cli/lib/unknown-command.ts +111 -0
  227. package/src/cli/program.ts +52 -2
  228. package/src/config/assistant-feature-flags.ts +24 -54
  229. package/src/config/bundled-skills/app-builder/SKILL.md +140 -22
  230. package/src/config/bundled-skills/app-builder/TOOLS.json +7 -0
  231. package/src/config/bundled-skills/computer-use/TOOLS.json +15 -52
  232. package/src/config/bundled-skills/document/SKILL.md +23 -3
  233. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  234. package/src/config/bundled-skills/document/tools/document-delete.ts +12 -0
  235. package/src/config/bundled-skills/document/tools/document-list.ts +12 -0
  236. package/src/config/bundled-skills/document/tools/document-read.ts +12 -0
  237. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -1
  238. package/src/config/bundled-skills/skill-management/SKILL.md +2 -2
  239. package/src/config/bundled-skills/skill-management/TOOLS.json +7 -7
  240. package/src/config/bundled-tool-registry.ts +6 -0
  241. package/src/config/call-site-defaults.ts +105 -0
  242. package/src/config/feature-flag-registry.json +41 -9
  243. package/src/config/llm-resolver.ts +52 -1
  244. package/src/config/loader.ts +64 -38
  245. package/src/config/schema.ts +9 -10
  246. package/src/config/schemas/__tests__/llm-request-logs.test.ts +36 -0
  247. package/src/config/schemas/__tests__/memory-v2.test.ts +3 -3
  248. package/src/config/schemas/channels.ts +17 -0
  249. package/src/config/schemas/compaction.ts +28 -0
  250. package/src/config/schemas/conversations.ts +10 -0
  251. package/src/config/schemas/heartbeat.ts +23 -0
  252. package/src/config/schemas/llm-request-logs.ts +31 -7
  253. package/src/config/schemas/llm.ts +1 -0
  254. package/src/config/schemas/memory-retrieval.ts +18 -0
  255. package/src/config/schemas/memory-retrospective.ts +1 -1
  256. package/src/config/schemas/memory-v2.ts +4 -4
  257. package/src/config/schemas/memory.ts +3 -1
  258. package/src/config/schemas/tools.ts +14 -0
  259. package/src/config/seed-inference-profiles.ts +99 -29
  260. package/src/config/skills.ts +3 -96
  261. package/src/context/compactor.ts +1107 -0
  262. package/src/context/token-estimator.ts +34 -36
  263. package/src/context/window-manager.ts +197 -1520
  264. package/src/credential-execution/managed-catalog.ts +37 -0
  265. package/src/credential-health/credential-health-service.ts +280 -19
  266. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +33 -18
  267. package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +138 -0
  268. package/src/daemon/__tests__/conversation-tool-setup.test.ts +74 -0
  269. package/src/daemon/approval-generators.ts +8 -6
  270. package/src/daemon/config-watcher.ts +94 -31
  271. package/src/daemon/conversation-agent-loop-handlers.ts +78 -0
  272. package/src/daemon/conversation-agent-loop.ts +198 -11
  273. package/src/daemon/conversation-error.ts +171 -37
  274. package/src/daemon/conversation-lifecycle.ts +53 -40
  275. package/src/daemon/conversation-messaging.ts +25 -6
  276. package/src/daemon/conversation-process.ts +49 -12
  277. package/src/daemon/conversation-runtime-assembly.ts +25 -1
  278. package/src/daemon/conversation-slash.ts +12 -5
  279. package/src/daemon/conversation-store.ts +11 -4
  280. package/src/daemon/conversation-tool-setup.ts +39 -7
  281. package/src/daemon/conversation.ts +33 -8
  282. package/src/daemon/date-context.ts +40 -0
  283. package/src/daemon/external-plugins-bootstrap.ts +217 -181
  284. package/src/daemon/first-greeting.ts +22 -2
  285. package/src/daemon/guardian-action-generators.ts +1 -125
  286. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +248 -0
  287. package/src/daemon/handlers/__tests__/config-a2a-invite.test.ts +154 -0
  288. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +133 -0
  289. package/src/daemon/handlers/__tests__/config-a2a.test.ts +95 -0
  290. package/src/daemon/handlers/config-a2a.ts +289 -0
  291. package/src/daemon/handlers/config-model.ts +6 -5
  292. package/src/daemon/handlers/config-slack-channel.ts +15 -3
  293. package/src/daemon/handlers/conversations.ts +1 -0
  294. package/src/daemon/handlers/shared.ts +14 -5
  295. package/src/daemon/handlers/skills.ts +111 -108
  296. package/src/daemon/history-repair.ts +28 -1
  297. package/src/daemon/host-app-control-proxy.ts +153 -27
  298. package/src/daemon/host-proxy-preactivation.ts +85 -18
  299. package/src/daemon/lifecycle.ts +89 -91
  300. package/src/daemon/meet-host-supervisor.ts +5 -4
  301. package/src/daemon/memory-v2-startup.ts +85 -0
  302. package/src/daemon/message-protocol.ts +1 -0
  303. package/src/daemon/message-types/conversations.ts +25 -0
  304. package/src/daemon/message-types/messages.ts +61 -0
  305. package/src/daemon/message-types/notifications.ts +21 -0
  306. package/src/daemon/message-types/subagents.ts +1 -0
  307. package/src/daemon/message-types/sync.ts +1 -0
  308. package/src/daemon/pkb-reminder-builder.test.ts +11 -54
  309. package/src/daemon/pkb-reminder-builder.ts +5 -20
  310. package/src/daemon/plugin-source-watcher.ts +146 -0
  311. package/src/daemon/process-message.ts +24 -3
  312. package/src/daemon/server.ts +11 -2
  313. package/src/daemon/skill-memory-refresh.ts +33 -0
  314. package/src/daemon/wake-target-adapter.ts +2 -0
  315. package/src/documents/document-store.ts +221 -3
  316. package/src/embedded/plugin-api.ts +40 -0
  317. package/src/export/__tests__/transcript-formatter.test.ts +121 -0
  318. package/src/export/transcript-formatter.ts +54 -20
  319. package/src/filing/filing-service.ts +39 -0
  320. package/src/heartbeat/__tests__/heartbeat-service.test.ts +135 -6
  321. package/src/heartbeat/heartbeat-run-store.ts +2 -1
  322. package/src/heartbeat/heartbeat-service.ts +73 -189
  323. package/src/home/__tests__/feed-types.test.ts +80 -0
  324. package/src/home/feed-types.ts +36 -2
  325. package/src/home/post-connect-feed.ts +1 -0
  326. package/src/index.ts +18 -1
  327. package/src/ipc/cli-client.ts +147 -45
  328. package/src/live-voice/__tests__/live-voice-stt.test.ts +57 -0
  329. package/src/mcp/client.ts +20 -4
  330. package/src/media/image-credentials.ts +3 -3
  331. package/src/memory/__tests__/bookmark-crud.test.ts +33 -27
  332. package/src/memory/__tests__/conversation-queries.test.ts +483 -0
  333. package/src/memory/__tests__/jobs-worker-v2-graph-trigger-embed.test.ts +113 -0
  334. package/src/memory/__tests__/memory-retrospective-enqueue.test.ts +2 -50
  335. package/src/memory/__tests__/memory-retrospective-job.test.ts +87 -4
  336. package/src/memory/__tests__/memory-retrospective-startup-cleanup.test.ts +119 -14
  337. package/src/memory/__tests__/message-content.test.ts +35 -0
  338. package/src/memory/bookmark-crud.ts +42 -10
  339. package/src/memory/context-search/sources/conversations.ts +62 -2
  340. package/src/memory/context-search/sources/workspace.ts +4 -0
  341. package/src/memory/conversation-crud.ts +63 -19
  342. package/src/memory/conversation-queries.ts +197 -11
  343. package/src/memory/conversation-title-service.ts +26 -4
  344. package/src/memory/db-init.ts +12 -0
  345. package/src/memory/delivery-crud.ts +152 -5
  346. package/src/memory/embedding-backend.ts +4 -4
  347. package/src/memory/external-conversation-store.ts +66 -5
  348. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +150 -12
  349. package/src/memory/graph/conversation-graph-memory.ts +49 -21
  350. package/src/memory/graph/tools.ts +9 -40
  351. package/src/memory/indexer.ts +34 -29
  352. package/src/memory/invite-store.ts +53 -0
  353. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +73 -0
  354. package/src/memory/jobs/embed-concept-page.ts +20 -11
  355. package/src/memory/jobs-worker.ts +6 -1
  356. package/src/memory/llm-request-log-source-clickhouse.ts +24 -12
  357. package/src/memory/llm-request-log-source.ts +19 -52
  358. package/src/memory/llm-request-log-store.ts +92 -1
  359. package/src/memory/llm-usage-store.ts +125 -5
  360. package/src/memory/memory-retrospective-enqueue.ts +1 -20
  361. package/src/memory/memory-retrospective-job.ts +33 -6
  362. package/src/memory/memory-retrospective-startup-cleanup.ts +72 -5
  363. package/src/memory/message-content.ts +1 -1
  364. package/src/memory/migrations/109-external-conversation-bindings.ts +15 -4
  365. package/src/memory/migrations/229-delete-private-conversations.test.ts +38 -1
  366. package/src/memory/migrations/229-delete-private-conversations.ts +7 -0
  367. package/src/memory/migrations/247-external-conversation-binding-thread-id.ts +78 -0
  368. package/src/memory/migrations/248-create-onboarding-events.ts +21 -0
  369. package/src/memory/migrations/249-normalize-slack-external-content.ts +240 -0
  370. package/src/memory/migrations/250-provider-connection-base-url-and-models.ts +28 -0
  371. package/src/memory/migrations/251-a2a-tasks.ts +49 -0
  372. package/src/memory/migrations/252-llm-request-log-agent-loop-exit-reason.ts +32 -0
  373. package/src/memory/migrations/index.ts +9 -0
  374. package/src/memory/migrations/registry.ts +16 -0
  375. package/src/memory/onboarding-events-store.ts +106 -0
  376. package/src/memory/schema/a2a.ts +15 -0
  377. package/src/memory/schema/bookmarks.ts +0 -2
  378. package/src/memory/schema/calls.ts +1 -0
  379. package/src/memory/schema/index.ts +1 -0
  380. package/src/memory/schema/inference.ts +3 -3
  381. package/src/memory/schema/infrastructure.ts +13 -0
  382. package/src/memory/turn-events-store.ts +127 -2
  383. package/src/memory/v2/__tests__/activation-store.test.ts +25 -23
  384. package/src/memory/v2/__tests__/activation.test.ts +0 -8
  385. package/src/memory/v2/__tests__/cli-command-store.test.ts +404 -0
  386. package/src/memory/v2/__tests__/frontmatter-sweep.test.ts +25 -4
  387. package/src/memory/v2/__tests__/injection.test.ts +288 -11
  388. package/src/memory/v2/__tests__/migration.test.ts +87 -0
  389. package/src/memory/v2/__tests__/page-index.test.ts +83 -0
  390. package/src/memory/v2/__tests__/prompts-router.test.ts +58 -6
  391. package/src/memory/v2/__tests__/qdrant.test.ts +66 -3
  392. package/src/memory/v2/__tests__/router.test.ts +15 -0
  393. package/src/memory/v2/__tests__/skill-store.test.ts +387 -8
  394. package/src/memory/v2/__tests__/static-context.test.ts +12 -1
  395. package/src/memory/v2/activation-store.ts +14 -16
  396. package/src/memory/v2/cli-command-content.ts +19 -0
  397. package/src/memory/v2/cli-command-store.ts +304 -0
  398. package/src/memory/v2/frontmatter-sweep.ts +7 -1
  399. package/src/memory/v2/injection.ts +81 -26
  400. package/src/memory/v2/migration.ts +49 -19
  401. package/src/memory/v2/page-index.ts +63 -8
  402. package/src/memory/v2/prompts/router.ts +11 -8
  403. package/src/memory/v2/prompts/sweep.ts +2 -2
  404. package/src/memory/v2/qdrant.ts +135 -7
  405. package/src/memory/v2/router.ts +9 -8
  406. package/src/memory/v2/skill-store.ts +120 -35
  407. package/src/memory/v2/static-context.ts +4 -4
  408. package/src/memory/v2/types.ts +23 -0
  409. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +274 -0
  410. package/src/messaging/providers/a2a/deliver.ts +156 -0
  411. package/src/messaging/providers/gmail/client.ts +9 -2
  412. package/src/messaging/providers/index.ts +11 -2
  413. package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +45 -5
  414. package/src/messaging/providers/slack/__tests__/download.test.ts +231 -0
  415. package/src/messaging/providers/slack/adapter.ts +43 -5
  416. package/src/messaging/providers/slack/client.ts +27 -0
  417. package/src/messaging/providers/slack/deep-link.ts +65 -0
  418. package/src/messaging/providers/slack/download.ts +104 -0
  419. package/src/messaging/providers/slack/message-metadata.test.ts +32 -0
  420. package/src/messaging/providers/slack/message-metadata.ts +27 -0
  421. package/src/messaging/providers/slack/render-transcript.test.ts +134 -0
  422. package/src/messaging/providers/slack/render-transcript.ts +69 -5
  423. package/src/messaging/providers/slack/types.ts +20 -1
  424. package/src/notifications/__tests__/broadcaster.test.ts +203 -0
  425. package/src/notifications/__tests__/decision-engine.test.ts +283 -0
  426. package/src/notifications/__tests__/deterministic-checks.test.ts +286 -0
  427. package/src/notifications/__tests__/emit-signal-home-feed.test.ts +1 -0
  428. package/src/notifications/__tests__/home-feed-side-effect.test.ts +430 -7
  429. package/src/notifications/adapters/macos.ts +12 -2
  430. package/src/notifications/broadcaster.ts +29 -4
  431. package/src/notifications/conversation-pairing.ts +2 -1
  432. package/src/notifications/copy-composer.ts +17 -64
  433. package/src/notifications/decision-engine.ts +113 -45
  434. package/src/notifications/deterministic-checks.ts +96 -0
  435. package/src/notifications/emit-signal.ts +21 -1
  436. package/src/notifications/home-feed-side-effect.ts +138 -5
  437. package/src/notifications/signal.ts +3 -5
  438. package/src/notifications/types.ts +8 -0
  439. package/src/oauth/connection-resolver.ts +8 -4
  440. package/src/oauth/platform-connection.test.ts +43 -3
  441. package/src/oauth/platform-connection.ts +19 -6
  442. package/src/oauth/seed-providers.ts +10 -1
  443. package/src/permissions/checker.ts +2 -0
  444. package/src/permissions/ipc-risk-types.ts +1 -0
  445. package/src/permissions/question-prompter.test.ts +416 -0
  446. package/src/permissions/question-prompter.ts +294 -0
  447. package/src/platform/client.test.ts +1 -1
  448. package/src/platform/client.ts +1 -1
  449. package/src/plugin-api/constants.ts +26 -0
  450. package/src/plugin-api/index.ts +34 -1
  451. package/src/plugin-api/types.ts +104 -22
  452. package/src/plugins/defaults/circuit-breaker.ts +0 -5
  453. package/src/plugins/defaults/compaction.ts +0 -4
  454. package/src/plugins/defaults/empty-response.ts +0 -2
  455. package/src/plugins/defaults/history-repair.ts +0 -2
  456. package/src/plugins/defaults/injectors.ts +74 -22
  457. package/src/plugins/defaults/llm-call.ts +0 -2
  458. package/src/plugins/defaults/memory-retrieval.ts +0 -1
  459. package/src/plugins/defaults/overflow-reduce.ts +0 -1
  460. package/src/plugins/defaults/persistence.ts +0 -2
  461. package/src/plugins/defaults/title-generate.ts +0 -5
  462. package/src/plugins/defaults/token-estimate.ts +0 -2
  463. package/src/plugins/defaults/tool-error.ts +0 -7
  464. package/src/plugins/defaults/tool-execute.ts +0 -2
  465. package/src/plugins/defaults/tool-result-truncate.ts +0 -4
  466. package/src/plugins/ensure-plugin-api-shim.ts +96 -0
  467. package/src/plugins/external-api.ts +104 -0
  468. package/src/plugins/external-plugin-loader.ts +187 -42
  469. package/src/plugins/feature-gate.ts +22 -0
  470. package/src/plugins/pipeline.ts +37 -0
  471. package/src/plugins/registry.ts +48 -80
  472. package/src/plugins/types.ts +40 -26
  473. package/src/plugins/user-loader.ts +21 -2
  474. package/src/proactive-artifact/aux-message-injector.ts +11 -0
  475. package/src/proactive-artifact/job.test.ts +37 -5
  476. package/src/prompts/__tests__/system-prompt.test.ts +10 -43
  477. package/src/prompts/__tests__/task-progress-hint-section.test.ts +95 -0
  478. package/src/prompts/normalize-onboarding.ts +27 -0
  479. package/src/prompts/sections.ts +302 -0
  480. package/src/prompts/system-prompt.ts +63 -174
  481. package/src/prompts/templates/BOOTSTRAP.md +17 -1
  482. package/src/prompts/templates/system-sections.ts +164 -0
  483. package/src/providers/__tests__/inference.test.ts +24 -7
  484. package/src/providers/anthropic/client.ts +28 -28
  485. package/src/providers/call-site-routing.ts +24 -6
  486. package/src/providers/connection-resolution.ts +68 -11
  487. package/src/providers/inference/__tests__/adapter-factory-openai-compatible.test.ts +74 -0
  488. package/src/providers/inference/__tests__/connections-openai-compatible.test.ts +175 -0
  489. package/src/providers/inference/__tests__/connections-status-label.test.ts +15 -0
  490. package/src/providers/inference/adapter-factory.ts +32 -6
  491. package/src/providers/inference/auth.ts +12 -0
  492. package/src/providers/inference/backfill.ts +14 -1
  493. package/src/providers/inference/connections.ts +159 -34
  494. package/src/providers/inference/resolve-auth.ts +14 -4
  495. package/src/providers/model-catalog.ts +249 -12
  496. package/src/providers/model-intents.ts +3 -3
  497. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +235 -0
  498. package/src/providers/openai/chat-completions-provider.ts +169 -8
  499. package/src/providers/openrouter/client.ts +49 -4
  500. package/src/providers/{managed-proxy → platform-proxy}/constants.ts +4 -2
  501. package/src/providers/{managed-proxy → platform-proxy}/context.ts +3 -3
  502. package/src/providers/provider-availability.ts +17 -2
  503. package/src/providers/provider-catalog-visibility.ts +38 -0
  504. package/src/providers/provider-send-message.ts +27 -12
  505. package/src/providers/registry.ts +52 -15
  506. package/src/providers/retry.ts +47 -1
  507. package/src/runtime/__tests__/agent-wake.test.ts +152 -0
  508. package/src/runtime/agent-wake.ts +103 -15
  509. package/src/runtime/auth/route-policy.ts +21 -1
  510. package/src/runtime/btw-sidechain.ts +2 -0
  511. package/src/runtime/http-server.ts +7 -16
  512. package/src/runtime/http-types.ts +19 -47
  513. package/src/runtime/migrations/origin-mode.ts +1 -1
  514. package/src/runtime/pending-interactions.ts +1 -0
  515. package/src/runtime/routes/__tests__/bookmark-routes.test.ts +17 -0
  516. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +258 -0
  517. package/src/runtime/routes/__tests__/conversation-management-routes.test.ts +5 -1
  518. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +172 -23
  519. package/src/runtime/routes/__tests__/inference-provider-connection-routes.test.ts +275 -44
  520. package/src/runtime/routes/__tests__/llm-call-sites-routes.test.ts +12 -0
  521. package/src/runtime/routes/__tests__/question-routes.test.ts +395 -0
  522. package/src/runtime/routes/__tests__/tts-routes.test.ts +64 -1
  523. package/src/runtime/routes/acp-routes-list.test.ts +143 -0
  524. package/src/runtime/routes/acp-routes.ts +5 -3
  525. package/src/runtime/routes/auth-routes.ts +1 -1
  526. package/src/runtime/routes/bookmark-routes.ts +5 -3
  527. package/src/runtime/routes/btw-routes.ts +5 -1
  528. package/src/runtime/routes/channel-availability-routes.ts +126 -0
  529. package/src/runtime/routes/consolidation-routes.ts +100 -0
  530. package/src/runtime/routes/conversation-cli-routes.ts +44 -3
  531. package/src/runtime/routes/conversation-list-routes.ts +3 -20
  532. package/src/runtime/routes/conversation-management-routes.ts +17 -42
  533. package/src/runtime/routes/conversation-query-routes.ts +99 -35
  534. package/src/runtime/routes/conversation-routes.ts +97 -11
  535. package/src/runtime/routes/documents-routes.ts +25 -86
  536. package/src/runtime/routes/group-routes.ts +5 -0
  537. package/src/runtime/routes/inbound-conversation.ts +28 -8
  538. package/src/runtime/routes/inbound-message-handler.ts +236 -41
  539. package/src/runtime/routes/inbound-stages/background-dispatch.test.ts +111 -0
  540. package/src/runtime/routes/inbound-stages/background-dispatch.ts +32 -1
  541. package/src/runtime/routes/inbound-stages/edit-intercept.ts +17 -4
  542. package/src/runtime/routes/index.ts +8 -0
  543. package/src/runtime/routes/inference-profile-session-handler.ts +17 -44
  544. package/src/runtime/routes/inference-profile-session-reaper.ts +7 -21
  545. package/src/runtime/routes/inference-provider-connection-routes.ts +199 -22
  546. package/src/runtime/routes/integrations/a2a.ts +235 -0
  547. package/src/runtime/routes/integrations/slack/share.ts +4 -52
  548. package/src/runtime/routes/integrations/slack/token.ts +43 -0
  549. package/src/runtime/routes/integrations/twilio.ts +6 -13
  550. package/src/runtime/routes/llm-call-sites-routes.ts +11 -1
  551. package/src/runtime/routes/notification-routes.ts +1 -1
  552. package/src/runtime/routes/oauth-commands-routes.ts +105 -15
  553. package/src/runtime/routes/oauth-lifecycle-routes.ts +43 -0
  554. package/src/runtime/routes/question-routes.ts +259 -0
  555. package/src/runtime/routes/rename-conversation-routes.ts +2 -33
  556. package/src/runtime/routes/schedule-routes.ts +4 -7
  557. package/src/runtime/routes/subagents-routes.ts +98 -18
  558. package/src/runtime/routes/telemetry-routes.ts +27 -0
  559. package/src/runtime/routes/tts-routes.ts +27 -2
  560. package/src/runtime/routes/workspace-routes.test.ts +43 -0
  561. package/src/runtime/routes/workspace-routes.ts +28 -0
  562. package/src/runtime/services/conversation-serializer.ts +39 -7
  563. package/src/runtime/sync/resource-sync-events.ts +93 -1
  564. package/src/schedule/schedule-store.ts +27 -2
  565. package/src/schedule/scheduler.ts +9 -1
  566. package/src/security/__tests__/untrusted-content.test.ts +86 -0
  567. package/src/security/untrusted-content.ts +93 -8
  568. package/src/skills/catalog-files.ts +1 -1
  569. package/src/skills/catalog-install.ts +233 -116
  570. package/src/skills/clawhub.ts +70 -13
  571. package/src/skills/managed-store.ts +4 -119
  572. package/src/skills/skillssh-registry.ts +27 -48
  573. package/src/subagent/manager.ts +17 -7
  574. package/src/telemetry/types.ts +113 -1
  575. package/src/telemetry/usage-telemetry-reporter.test.ts +312 -5
  576. package/src/telemetry/usage-telemetry-reporter.ts +113 -7
  577. package/src/tools/apps/executors.ts +58 -7
  578. package/src/tools/ask-question/ask-question-tool.test.ts +509 -0
  579. package/src/tools/ask-question/ask-question-tool.ts +304 -0
  580. package/src/tools/browser/browser-execution.ts +15 -11
  581. package/src/tools/computer-use/definitions.ts +3 -3
  582. package/src/tools/credentials/vault.ts +1 -1
  583. package/src/tools/document/document-tool.ts +124 -1
  584. package/src/tools/filesystem/edit.ts +1 -1
  585. package/src/tools/filesystem/list.ts +1 -1
  586. package/src/tools/filesystem/read.ts +1 -1
  587. package/src/tools/filesystem/write.ts +5 -2
  588. package/src/tools/host-filesystem/transfer.ts +1 -1
  589. package/src/tools/host-terminal/host-shell.ts +1 -1
  590. package/src/tools/memory/register.ts +1 -9
  591. package/src/tools/permission-checker.ts +1 -1
  592. package/src/tools/registry.ts +17 -7
  593. package/src/tools/schedule/create.ts +2 -2
  594. package/src/tools/schema-transforms.ts +7 -2
  595. package/src/tools/side-effects.ts +1 -0
  596. package/src/tools/skills/delete-managed.ts +4 -4
  597. package/src/tools/skills/execute.ts +1 -1
  598. package/src/tools/skills/scaffold-managed.ts +3 -2
  599. package/src/tools/subagent/notify-parent.ts +1 -1
  600. package/src/tools/system/request-permission.ts +2 -2
  601. package/src/tools/terminal/safe-env.ts +60 -1
  602. package/src/tools/tool-manifest.ts +2 -0
  603. package/src/tools/types.ts +107 -21
  604. package/src/tools/ui-surface/definitions.ts +6 -5
  605. package/src/tts/__tests__/provider-adapters.test.ts +76 -2
  606. package/src/tts/providers/elevenlabs-provider.ts +75 -1
  607. package/src/types/onboarding-context.ts +2 -0
  608. package/src/util/errors.ts +17 -0
  609. package/src/util/platform.ts +10 -0
  610. package/src/watcher/__tests__/engine.test.ts +22 -0
  611. package/src/watcher/engine.ts +6 -2
  612. package/src/workspace/migrations/057-repair-stale-gemini-model-ids.ts +80 -15
  613. package/src/workspace/migrations/072-seed-reply-suggestion-callsite.ts +35 -22
  614. package/src/workspace/migrations/073-repair-recall-callsite-empty-profile.ts +3 -1
  615. package/src/workspace/migrations/083-system-prompt-prefix-to-file.ts +191 -0
  616. package/src/workspace/migrations/084-remove-legacy-skills-index.ts +276 -0
  617. package/src/workspace/migrations/085-memory-v2-bm25-b-reembed-disabled-v2-pages.ts +137 -0
  618. package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +198 -0
  619. package/src/workspace/migrations/087-memory-router-balanced-profile.ts +91 -0
  620. package/src/workspace/migrations/registry.ts +10 -0
  621. package/src/workspace/migrations/runner.ts +39 -9
  622. package/src/workspace/migrations/types.ts +4 -0
  623. package/examples/plugins/echo/bun.lock +0 -25
  624. package/src/__tests__/context-window-manager.test.ts +0 -2481
  625. package/src/__tests__/guardian-action-conversation-turn.test.ts +0 -441
  626. package/src/context/__tests__/compact-prompt.test.ts +0 -63
  627. package/src/context/prompts/compact.md +0 -26
  628. package/src/memory/graph/__tests__/remember-description.test.ts +0 -55
  629. package/src/prompts/__tests__/build-cli-reference-section.test.ts +0 -37
  630. package/src/runtime/guardian-action-conversation-turn.ts +0 -99
@@ -1,247 +1,46 @@
1
- import { readFileSync } from "node:fs";
2
- import { join } from "node:path";
3
-
1
+ /**
2
+ * Context window manager — the surface the rest of the daemon talks to
3
+ * when it needs to know whether and how to compact a conversation.
4
+ *
5
+ * The actual compaction work is delegated to {@link runAssistantDrivenCompaction}
6
+ * in `./compactor.js`, which hands the model the full conversation plus a
7
+ * user-role instruction message and lets the assistant write its own
8
+ * summary and choose its own cut point.
9
+ *
10
+ * This module retains a small set of legacy exports — `CONTEXT_SUMMARY_MARKER`,
11
+ * `createContextSummaryMessage`, `getSummaryFromContextMessage` — because
12
+ * conversation reload, fork inheritance, and Slack chronological-context
13
+ * assembly all detect a previously-produced summary via the marker. The
14
+ * marker is wrapped around the assistant-role memory message we emit on
15
+ * successful compaction so those code paths keep working unchanged.
16
+ */
17
+ import { getConfig } from "../config/loader.js";
18
+ import type { CompactionConfig } from "../config/schemas/compaction.js";
4
19
  import type { LLMCallSite } from "../config/schemas/llm.js";
5
20
  import type { ContextWindowConfig } from "../config/types.js";
6
21
  import type {
7
22
  ContentBlock,
8
- ImageContent,
9
23
  Message,
10
24
  Provider,
25
+ ToolDefinition,
11
26
  } from "../providers/types.js";
12
- import { resolveBundledDir } from "../util/bundled-asset.js";
13
27
  import { getLogger } from "../util/logger.js";
14
- import { safeStringSlice } from "../util/unicode.js";
15
28
  import {
16
- estimateContentBlockTokens,
17
- estimatePromptTokens,
18
- estimateTextTokens,
19
- } from "./token-estimator.js";
20
- import { truncateToolResultsAcrossHistory } from "./tool-result-truncation.js";
29
+ type CompactionRunArgs,
30
+ runAssistantDrivenCompaction,
31
+ } from "./compactor.js";
32
+ import { estimatePromptTokens } from "./token-estimator.js";
21
33
 
22
34
  const log = getLogger("context-window");
23
35
 
24
36
  export const CONTEXT_SUMMARY_MARKER = "<context_summary>";
25
- const CONVERSATION_SUMMARY_CALL_SITE: LLMCallSite = "conversationSummarization";
26
- const MAX_BLOCK_PREVIEW_CHARS = 3000;
27
- const MAX_FALLBACK_SUMMARY_CHARS = 12000;
28
- const COMPACTION_COOLDOWN_MS = 2 * 60 * 1000;
29
- const MIN_GAIN_TOKENS_DURING_COOLDOWN = 1200;
30
- const SEVERE_PRESSURE_RATIO = 0.95;
31
- const COMPACTION_TOOL_RESULT_MAX_CHARS = 6_000;
32
- const MIN_COMPACTABLE_PERSISTED_MESSAGES = 2;
37
+ const CONTEXT_SUMMARY_CLOSE = "</context_summary>";
33
38
  const INTERNAL_CONTEXT_SUMMARY_MESSAGES = new WeakSet<Message>();
34
39
 
35
- /**
36
- * Hard cap on the verbatim tail-anchor block we splice into the
37
- * post-compaction summary message (see `extractTailAssistantText`). 1500
38
- * chars (~375 tokens) covers a few paragraphs of recent assistant
39
- * narration without bloating the summary. When the tail exceeds this
40
- * size we keep the END (most recent text), since "next step" / "now I'll
41
- * …" statements typically live at the end of the assistant's last text
42
- * block and that's the part the post-compaction model needs most.
43
- */
44
- const TAIL_ANCHOR_MAX_CHARS = 1500;
45
- const TAIL_ANCHOR_OPEN_TAG = "<verbatim_tail>";
46
- const TAIL_ANCHOR_CLOSE_TAG = "</verbatim_tail>";
47
-
48
- /**
49
- * When the existing summary is this fraction or more of the per-summary
50
- * token budget, inject a "compress older content aggressively" instruction
51
- * so incremental-update passes don't let the summary grow unboundedly.
52
- */
53
- const SUMMARY_COMPRESSION_PRESSURE_RATIO = 0.6;
54
-
55
- /**
56
- * Text-block prefixes that persist in live history (for prefix-caching
57
- * stability and model grounding) but pollute the summarizer's view of the
58
- * actual conversation. These blocks are system-metadata attached to user
59
- * turns — memory injections, turn context, workspace hints, etc. They are
60
- * stripped ONLY from the messages fed to the summarization LLM call. Live
61
- * history is never mutated, so prefix caching is preserved.
62
- *
63
- * This list intentionally overlaps with `RUNTIME_INJECTION_PREFIXES` in
64
- * `conversation-runtime-assembly.ts`. That list governs in-flight turn
65
- * assembly via pure prefix matching; this one governs compaction input.
66
- * Keep the two lists in sync when a new injection type is added.
67
- *
68
- * Compaction strip coverage is two-tier: this prefix list catches
69
- * internal-vocabulary tags and any tag carrying the `__injected`
70
- * attribute, while `COMPACTION_ONLY_WRAPPED_STRIP_TAGS` below matches
71
- * ambiguous bare-tag blocks that are shaped like a runtime-emitted
72
- * open/close wrap. A new ambiguous tag added upstream needs to be
73
- * evaluated against both tiers — internal-vocabulary names go here,
74
- * and names whose bare form collides with ordinary English
75
- * (`<memory>`, `<workspace>`, `<knowledge_base>`, `<pkb>`,
76
- * `<system_reminder>`) go in the wrapped-strip list so user prose
77
- * mentioning the tag is preserved.
78
- */
79
- const COMPACTION_ONLY_STRIP_PREFIXES = [
80
- "<memory __injected>",
81
- "<memory_image __injected>",
82
- "</memory_image>",
83
- "<memory_context __injected>",
84
- "<turn_context>",
85
- "<channel_turn_context>",
86
- "<guardian_context>",
87
- "<inbound_actor_context>",
88
- "<interface_turn_context>",
89
- "<workspace_top_level>",
90
- "<now_scratchpad>",
91
- "<NOW.md Always keep this up to date",
92
- "<active_thread>",
93
- "<active_subagents>",
94
- "<active_workspace>",
95
- "<active_dynamic_page>",
96
- "<channel_capabilities>",
97
- "<channel_command_context>",
98
- "<voice_call_control>",
99
- "<transport_hints>",
100
- "<system_notice>",
101
- "<non_interactive_context>",
102
- "<temporal_context>",
103
- ];
104
-
105
- /**
106
- * Tags whose bare form (`<tag>`) is common English vocabulary or markup a
107
- * user might legitimately type in prose. For these we only strip a text
108
- * block if it is shaped exactly like a runtime injection: starts with
109
- * `<tag>\n` and ends with `</tag>`. This bare-tag wrapped shape
110
- * (e.g. `<memory>\n...\n</memory>`) appears in persisted history
111
- * alongside the `__injected`-attributed variants, which the prefix list
112
- * above already catches via `<memory __injected>`. A user who mentions
113
- * `<memory>` in a sentence or inlines `<workspace>...</workspace>` within
114
- * other prose will not match this shape.
115
- */
116
- const COMPACTION_ONLY_WRAPPED_STRIP_TAGS = [
117
- "memory",
118
- "memory_context",
119
- "workspace",
120
- "knowledge_base",
121
- "pkb",
122
- "system_reminder",
123
- ];
124
-
125
- function isCompactionInjectedBlock(text: string): boolean {
126
- if (COMPACTION_ONLY_STRIP_PREFIXES.some((p) => text.startsWith(p))) {
127
- return true;
128
- }
129
- return COMPACTION_ONLY_WRAPPED_STRIP_TAGS.some(
130
- (tag) => text.startsWith(`<${tag}>\n`) && text.endsWith(`</${tag}>`),
131
- );
132
- }
133
-
134
- /**
135
- * Remove text blocks that look like runtime injections from user messages.
136
- * Non-text blocks (images, tool_use, tool_result, etc.) are untouched.
137
- * Empty messages (every block filtered out) are dropped from the output.
138
- *
139
- * Used only on the `compactableMessages` slice right before it is
140
- * serialized for the summarization LLM — the caller's original message
141
- * array is never mutated.
142
- */
143
- export function stripCompactionOnlyInjections(messages: Message[]): Message[] {
144
- return messages
145
- .map((message) => {
146
- if (message.role !== "user") return message;
147
- const nextContent = message.content.filter((block) => {
148
- if (block.type !== "text") return true;
149
- return !isCompactionInjectedBlock(block.text);
150
- });
151
- if (nextContent.length === message.content.length) return message;
152
- if (nextContent.length === 0) return null;
153
- return { ...message, content: nextContent };
154
- })
155
- .filter(
156
- (message): message is NonNullable<typeof message> => message != null,
157
- );
158
- }
159
-
160
- /**
161
- * Load the compaction summary system prompt from the bundled markdown asset.
162
- *
163
- * `resolveBundledDir` handles the compiled-binary case where the caller path
164
- * points to `/$bunfs/` and the asset lives next to the executable (macOS app
165
- * bundle `Contents/Resources/` or sibling dir). In source mode it falls back
166
- * to the sibling `prompts/` directory.
167
- */
168
- export function loadCompactPrompt(): string {
169
- const callerDir = import.meta.dirname ?? __dirname;
170
- const promptsDir = resolveBundledDir(callerDir, "prompts", "compact-prompts");
171
- const promptPath = join(promptsDir, "compact.md");
172
- const contents = readFileSync(promptPath, "utf-8");
173
- if (contents.length === 0) {
174
- throw new Error(
175
- `compact.md at ${promptPath} is empty — compaction summary prompt missing`,
176
- );
177
- }
178
- return contents;
179
- }
180
-
181
- /**
182
- * Hardcoded fallback prompt used when the bundled `compact.md` asset is
183
- * missing or unreadable, so the daemon can still compact conversations
184
- * rather than failing module import at startup.
185
- */
186
- const SUMMARY_PROMPT_FALLBACK = [
187
- "You are summarizing a long conversation so that the assistant can keep working with it after older messages are dropped. Your summary will REPLACE those messages — the assistant's only access to what was said earlier will be what you write here.",
188
- "",
189
- "Be thorough. Capture what happened, why it mattered, what's unresolved, and what was felt. Do not compress away emotional tone, relationship context, or nuance. Keep specific details (names, numbers, file paths, commands, URLs, exact phrasings) when they might matter later.",
190
- "",
191
- "Target length: aim for 1500–4000 tokens. Use the upper end when the conversation is rich in decisions, relationships, emotional content, or threads that are still open. Use the lower end for short or simple task execution.",
192
- "",
193
- "Open with a 1–2 paragraph narrative describing what the conversation is about and where it currently stands. Then use `## ` section headers. Use these when they apply; skip sections that have nothing to say; add your own headers when something doesn't fit:",
194
- "- `## What We're Working On`",
195
- "- `## Decisions & Commitments`",
196
- "- `## Facts Worth Remembering`",
197
- "- `## Open Threads`",
198
- "- `## Emotional Arc / Relationship Notes` (include when relevant)",
199
- "- `## Artifacts & References`",
200
- "",
201
- "If an existing summary is provided, update it: merge new information in, prefer the most recent and explicit detail on conflicts, and preserve anything still unresolved or still true. Do not restart from scratch.",
202
- "",
203
- "Never include in the summary: content inside `<memory __injected>`, `<memory>`, `<turn_context>`, `<workspace>`, `<knowledge_base>`, `<system_reminder>`, `<now_scratchpad>`, `<NOW.md …>`, `<active_thread>`, `<channel_capabilities>`, `<transport_hints>`, `<system_notice>`, or any other angle-bracket-tagged system blocks. Tool-call boilerplate (retries, failed attempts the assistant recovered from, routine status updates) — summarize the outcome instead. Repetitive chit-chat that adds nothing.",
204
- "",
205
- 'Thread anchors (Slack only): if the input includes a "Retained Thread References" section, each listed reply cites its parent via `→ Mxxxxxx`. If that parent appears in the Transcript, preserve its text verbatim. Omit when absent.',
206
- "",
207
- "Return only the summary itself in markdown — no preamble, no meta-commentary.",
208
- ].join("\n");
209
-
210
- /**
211
- * Load the compact prompt with graceful fallback. If `loader` throws (missing
212
- * or unreadable bundled asset, partial deployment, filesystem corruption),
213
- * logs a warning and returns the hardcoded fallback string so module import
214
- * never fails. The loader is injectable for testability.
215
- */
216
- export function loadCompactPromptOrFallback(
217
- loader: () => string = loadCompactPrompt,
218
- ): string {
219
- try {
220
- return loader();
221
- } catch (err) {
222
- log.warn(
223
- { err },
224
- "Failed to load compact.md from bundle; using inline fallback prompt. The bundled asset may be missing or unreadable.",
225
- );
226
- return SUMMARY_PROMPT_FALLBACK;
227
- }
228
- }
229
-
230
- const SUMMARY_SYSTEM_PROMPT = loadCompactPromptOrFallback();
231
-
232
- /**
233
- * Pattern matching a Slack-style reply tag-line's parent-alias reference.
234
- * The chronological renderer emits reply lines as
235
- * `[MM/DD/YY HH:MM @sender → Mxxxxxx]: body`, or, for edited replies,
236
- * `[MM/DD/YY HH:MM @sender → Mxxxxxx, edited MM/DD/YY HH:MM]: body`. The
237
- * character after the 6-hex parent alias is therefore `]` for a plain reply
238
- * or `,` for an edited one — the regex accepts either. `Mxxxxxx` is the
239
- * first 6 hex chars of sha256(threadTs). A retained-tail text block that
240
- * contains this pattern is carrying a live reference to a parent that may
241
- * still live in the compactable region — the summarizer needs to know about
242
- * it to act on the Thread-anchors clause of SUMMARY_SYSTEM_PROMPT.
243
- */
244
- const THREAD_REPLY_REFERENCE_PATTERN = /→ M[0-9a-f]{6}[,\]]/;
40
+ // ---------------------------------------------------------------------------
41
+ // Public types preserved for downstream consumers (agent loop, conversation,
42
+ // plugin pipeline, applyCompactionResult, routes/playground/force-compact).
43
+ // ---------------------------------------------------------------------------
245
44
 
246
45
  export interface ContextWindowResult {
247
46
  messages: Message[];
@@ -263,12 +62,6 @@ export interface ContextWindowResult {
263
62
  summaryRawResponses?: unknown[];
264
63
  summaryText: string;
265
64
  reason?: string;
266
- /**
267
- * True when the summary LLM call threw and the local fallback produced the
268
- * summary. Callers use this to distinguish provider-side summary failures
269
- * from successful compactions so they can apply circuit-breaker logic
270
- * without losing the fallback-compacted messages.
271
- */
272
65
  summaryFailed?: boolean;
273
66
  }
274
67
 
@@ -279,41 +72,27 @@ export interface ShouldCompactResult {
279
72
 
280
73
  export interface ContextWindowCompactOptions {
281
74
  lastCompactedAt?: number;
282
- /** Bypass the threshold check and force compaction. Used for context-too-large error recovery. */
75
+ /** Skip the auto-threshold check (used for /compact and recovery). */
283
76
  force?: boolean;
284
77
  /**
285
- * Override the minimum number of recent user turns to preserve.
286
- * Set to `0` for emergency recovery that can compact the entire history
287
- * (except the summary message itself). When omitted, the default floor
288
- * is `1` (or `8` when `conversationOriginChannel === "slack"`).
289
- */
290
- minKeepRecentUserTurns?: number;
291
- /**
292
- * Origin channel hint used when `minKeepRecentUserTurns` is omitted.
293
- * Slack-originated conversations bump the default keep floor so multi-turn
294
- * thread context (replies, quoted messages) is not summarized away too
295
- * aggressively. Explicit `minKeepRecentUserTurns` overrides this hint.
296
- */
297
- conversationOriginChannel?: string;
298
- /**
299
- * Per-conversation inference-profile override forwarded to the summary LLM
300
- * call and usage attribution.
78
+ * Per-conversation inference-profile override forwarded to the compaction
79
+ * LLM call.
301
80
  */
302
81
  overrideProfile?: string | null;
303
82
  /**
304
- * Override the target input token budget used for keep-boundary
305
- * projected-fit checks. Clamped to no looser than `config.targetInputTokens`
306
- * — i.e. the override may only demand a *stricter* fit. Passing a looser
307
- * value has no effect. Intended for forced recovery paths that need a
308
- * tighter target than the default.
83
+ * Pre-computed token estimate from a prior {@link shouldCompact} call.
84
+ * Avoids a redundant tokenization pass when the caller already has one.
309
85
  */
310
- targetInputTokensOverride?: number;
86
+ precomputedEstimate?: number;
311
87
  /**
312
- * Pre-computed token estimate from a prior `shouldCompact()` call.
313
- * When provided, `maybeCompact()` skips its own `estimatePromptTokens()`
314
- * call, avoiding a redundant O(history) tokenization pass.
88
+ * Legacy fields retained for backwards compatibility with existing
89
+ * callers. The new assistant-driven compactor does not consume them —
90
+ * the model decides where to cut and what to keep — but accepting them
91
+ * here lets callers keep their existing call sites unchanged.
315
92
  */
316
- precomputedEstimate?: number;
93
+ minKeepRecentUserTurns?: number;
94
+ conversationOriginChannel?: string;
95
+ targetInputTokensOverride?: number;
317
96
  }
318
97
 
319
98
  export interface ContextWindowManagerOptions {
@@ -322,36 +101,83 @@ export interface ContextWindowManagerOptions {
322
101
  config: ContextWindowConfig;
323
102
  /** Pre-computed tool token budget to include in all estimations. */
324
103
  toolTokenBudget?: number;
104
+ /** Conversation ID — required for image-manifest and timestamp lookups. */
105
+ conversationId?: string;
106
+ /**
107
+ * Optional tools resolver. The compactor passes tools to the provider on
108
+ * the compaction call so the cached prefix (system prompt + tools +
109
+ * conversation messages) matches the agent's main-turn cache key.
110
+ */
111
+ resolveTools?: () => ToolDefinition[] | undefined;
112
+ }
113
+
114
+ // ---------------------------------------------------------------------------
115
+ // Summary-message helpers (used by lifecycle rehydrate + fork inheritance)
116
+ // ---------------------------------------------------------------------------
117
+
118
+ /**
119
+ * Build the synthetic memory message that heads a compacted conversation.
120
+ * Produces an `assistant`-role message wrapped in `<context_summary>` tags
121
+ * so reload and inheritance paths can recognize it via
122
+ * {@link getSummaryFromContextMessage}.
123
+ */
124
+ export function createContextSummaryMessage(summary: string): Message {
125
+ const message: Message = {
126
+ role: "assistant",
127
+ content: [
128
+ {
129
+ type: "text",
130
+ text: `${CONTEXT_SUMMARY_MARKER}\n${summary}\n${CONTEXT_SUMMARY_CLOSE}`,
131
+ },
132
+ ],
133
+ };
134
+ INTERNAL_CONTEXT_SUMMARY_MESSAGES.add(message);
135
+ return message;
136
+ }
137
+
138
+ export function getSummaryFromContextMessage(
139
+ message: Message | undefined,
140
+ ): string | null {
141
+ if (!message) return null;
142
+ const text = extractText(message.content).trim();
143
+ if (!text.startsWith(CONTEXT_SUMMARY_MARKER)) return null;
144
+ if (!INTERNAL_CONTEXT_SUMMARY_MESSAGES.has(message)) return null;
145
+ let inner = text.slice(CONTEXT_SUMMARY_MARKER.length);
146
+ const closeIdx = inner.lastIndexOf(CONTEXT_SUMMARY_CLOSE);
147
+ if (closeIdx !== -1) inner = inner.slice(0, closeIdx);
148
+ return inner.trim();
325
149
  }
326
150
 
151
+ function extractText(content: ContentBlock[]): string {
152
+ return content
153
+ .filter(
154
+ (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
155
+ )
156
+ .map((b) => b.text)
157
+ .join("\n");
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // ContextWindowManager
162
+ // ---------------------------------------------------------------------------
163
+
327
164
  export class ContextWindowManager {
328
165
  private readonly provider: Provider;
329
166
  private readonly _systemPrompt: string | (() => string);
330
167
  private config: ContextWindowConfig;
331
168
  private readonly toolTokenBudget: number;
169
+ private readonly conversationId: string | undefined;
170
+ private readonly resolveTools:
171
+ | (() => ToolDefinition[] | undefined)
172
+ | undefined;
332
173
  /**
333
174
  * Number of leading messages that are non-persisted (injected inherited
334
- * context from a parent conversation). `countPersistedMessages` subtracts
335
- * this so `compactedPersistedMessages` only reflects DB-backed messages.
336
- * Set by `Conversation.injectInheritedContext` and consumed (decremented)
337
- * after a successful compaction pass.
175
+ * context from a parent conversation). The compactor subtracts this from
176
+ * `compactedMessages` so `compactedPersistedMessages` only reflects DB
177
+ * rows. Decremented after a successful compaction.
338
178
  */
339
179
  nonPersistedPrefixCount = 0;
340
- /**
341
- * True when the message at index 0 is a context summary that was inherited
342
- * from a parent fork (i.e. injected as part of the non-persisted prefix),
343
- * rather than produced by this conversation's own compaction. The parent
344
- * summary sits at index 0 but is excluded from `compactableMessages` by
345
- * `summaryOffset`, so its slot in `nonPersistedPrefixCount` must be
346
- * accounted for separately. Cleared after the first compaction replaces
347
- * the parent summary with a child-owned one.
348
- */
349
180
  summaryIsInjected = false;
350
- /**
351
- * Cached resolved system prompt. Lazily populated on first access via the
352
- * `systemPrompt` getter and cleared after each compaction pass so the next
353
- * pass picks up any prompt changes.
354
- */
355
181
  private _resolvedSystemPrompt: string | undefined;
356
182
 
357
183
  constructor(options: ContextWindowManagerOptions) {
@@ -359,27 +185,21 @@ export class ContextWindowManager {
359
185
  this._systemPrompt = options.systemPrompt;
360
186
  this.config = options.config;
361
187
  this.toolTokenBudget = options.toolTokenBudget ?? 0;
188
+ this.conversationId = options.conversationId;
189
+ this.resolveTools = options.resolveTools;
362
190
  }
363
191
 
364
192
  updateConfig(config: ContextWindowConfig): void {
365
193
  this.config = config;
366
194
  }
367
195
 
368
- /**
369
- * Provider key for the local token estimator. Wrapper providers (e.g.
370
- * OpenRouter routing to `anthropic/*`) override `tokenEstimationProvider`
371
- * so image/PDF sizing uses the same rules as the upstream API instead of
372
- * the generic `base64/4` fallback.
373
- */
374
196
  private get estimationProviderName(): string {
375
197
  return this.provider.tokenEstimationProvider ?? this.provider.name;
376
198
  }
377
199
 
378
- /** Lazily resolve and cache the system prompt for the duration of a compaction pass. */
379
200
  private get systemPrompt(): string {
380
- if (this._resolvedSystemPrompt !== undefined) {
201
+ if (this._resolvedSystemPrompt !== undefined)
381
202
  return this._resolvedSystemPrompt;
382
- }
383
203
  const resolved =
384
204
  typeof this._systemPrompt === "function"
385
205
  ? this._systemPrompt()
@@ -392,21 +212,26 @@ export class ContextWindowManager {
392
212
  this._resolvedSystemPrompt = undefined;
393
213
  }
394
214
 
215
+ private resolveCompactionConfig(): CompactionConfig {
216
+ return getConfig().compaction;
217
+ }
218
+
395
219
  /**
396
- * Cheap pre-check: returns whether the estimated token count exceeds
397
- * the compaction threshold, along with the estimated token count so
398
- * callers can pass it into `maybeCompact()` via `precomputedEstimate`
399
- * to avoid a redundant tokenization pass.
220
+ * Cheap pre-check estimate the current token count and compare against
221
+ * `compaction.autoThreshold`. Callers pass the estimate back through
222
+ * `precomputedEstimate` on the {@link maybeCompact} call to avoid
223
+ * re-tokenizing the same history twice.
400
224
  */
401
225
  shouldCompact(messages: Message[]): ShouldCompactResult {
402
- if (!this.config.enabled) return { needed: false, estimatedTokens: 0 };
226
+ const compaction = this.resolveCompactionConfig();
227
+ if (!compaction.enabled) return { needed: false, estimatedTokens: 0 };
403
228
  try {
404
229
  const estimated = estimatePromptTokens(messages, this.systemPrompt, {
405
230
  providerName: this.estimationProviderName,
406
231
  toolTokenBudget: this.toolTokenBudget,
407
232
  });
408
233
  const threshold = Math.floor(
409
- this.config.maxInputTokens * this.config.compactThreshold,
234
+ this.config.maxInputTokens * compaction.autoThreshold,
410
235
  );
411
236
  return { needed: estimated >= threshold, estimatedTokens: estimated };
412
237
  } finally {
@@ -431,6 +256,7 @@ export class ContextWindowManager {
431
256
  signal?: AbortSignal,
432
257
  options?: ContextWindowCompactOptions,
433
258
  ): Promise<ContextWindowResult> {
259
+ const compaction = this.resolveCompactionConfig();
434
260
  const previousEstimatedInputTokens =
435
261
  options?.precomputedEstimate ??
436
262
  estimatePromptTokens(messages, this.systemPrompt, {
@@ -438,1264 +264,115 @@ export class ContextWindowManager {
438
264
  toolTokenBudget: this.toolTokenBudget,
439
265
  });
440
266
  const thresholdTokens = Math.floor(
441
- this.config.maxInputTokens * this.config.compactThreshold,
267
+ this.config.maxInputTokens * compaction.autoThreshold,
442
268
  );
443
- const existingSummary = getSummaryFromContextMessage(messages[0]);
444
269
 
445
- if (!this.config.enabled) {
446
- return {
447
- messages,
448
- compacted: false,
449
- previousEstimatedInputTokens,
450
- estimatedInputTokens: previousEstimatedInputTokens,
270
+ if (!compaction.enabled) {
271
+ return noopResult(messages, previousEstimatedInputTokens, {
451
272
  maxInputTokens: this.config.maxInputTokens,
452
273
  thresholdTokens,
453
- compactedMessages: 0,
454
- compactedPersistedMessages: 0,
455
- summaryCalls: 0,
456
- summaryInputTokens: 0,
457
- summaryOutputTokens: 0,
458
- summaryModel: "",
459
- summaryText: existingSummary ?? "",
460
- reason: "context window compaction disabled",
461
- };
274
+ reason: "compaction disabled",
275
+ });
462
276
  }
463
277
 
464
- if (!options?.force && previousEstimatedInputTokens < thresholdTokens) {
465
- return {
466
- messages,
467
- compacted: false,
468
- previousEstimatedInputTokens,
469
- estimatedInputTokens: previousEstimatedInputTokens,
278
+ if (this.conversationId == null) {
279
+ // The compactor needs the conversation id to look up image
280
+ // attachments and DB timestamps. If we don't have one (legacy test
281
+ // path, ad-hoc instantiation), skip — never fabricate one.
282
+ log.warn(
283
+ "ContextWindowManager has no conversationId — skipping compaction",
284
+ );
285
+ return noopResult(messages, previousEstimatedInputTokens, {
470
286
  maxInputTokens: this.config.maxInputTokens,
471
287
  thresholdTokens,
472
- compactedMessages: 0,
473
- compactedPersistedMessages: 0,
474
- summaryCalls: 0,
475
- summaryInputTokens: 0,
476
- summaryOutputTokens: 0,
477
- summaryModel: "",
478
- summaryText: existingSummary ?? "",
479
- reason: "below compaction threshold",
480
- };
288
+ reason: "no conversation id",
289
+ });
481
290
  }
482
291
 
483
- const summaryOffset = existingSummary != null ? 1 : 0;
484
- const userTurnStarts = collectUserTurnStartIndexes(messages);
485
- if (userTurnStarts.length === 0) {
486
- return {
487
- messages,
488
- compacted: false,
489
- previousEstimatedInputTokens,
490
- estimatedInputTokens: previousEstimatedInputTokens,
292
+ if (!options?.force && previousEstimatedInputTokens < thresholdTokens) {
293
+ return noopResult(messages, previousEstimatedInputTokens, {
491
294
  maxInputTokens: this.config.maxInputTokens,
492
295
  thresholdTokens,
493
- compactedMessages: 0,
494
- compactedPersistedMessages: 0,
495
- summaryCalls: 0,
496
- summaryInputTokens: 0,
497
- summaryOutputTokens: 0,
498
- summaryModel: "",
499
- summaryText: existingSummary ?? "",
500
- reason: "no user turns available for compaction",
501
- };
296
+ reason: "below auto threshold",
297
+ });
502
298
  }
503
299
 
504
- const keepPlanInitial = this.pickKeepBoundary(messages, userTurnStarts, {
505
- minKeepRecentUserTurns: options?.minKeepRecentUserTurns,
506
- targetInputTokensOverride: options?.targetInputTokensOverride,
507
- conversationOriginChannel: options?.conversationOriginChannel,
508
- force: options?.force,
300
+ const args: CompactionRunArgs = {
301
+ conversationId: this.conversationId,
302
+ messages,
303
+ provider: this.provider,
304
+ systemPrompt: this.systemPrompt,
305
+ tools: this.resolveTools?.(),
306
+ compaction,
307
+ maxInputTokens: this.config.maxInputTokens,
509
308
  previousEstimatedInputTokens,
510
- });
511
- // Under force (user-explicit `/compact`), never route through the
512
- // "already fits" / "truncated tool results without summarization"
513
- // early-return — those are no-op responses to a direct user command.
514
- // The boundary can collapse to the summary in two cases the
515
- // projection-optimism clamp in pickKeepBoundary does not cover:
516
- // 1. `adjustForToolPairs` walked the boundary back through a
517
- // tool_use/tool_result chain at the start of the conversation.
518
- // 2. The binary search settled below `userTurnStarts.length` (so
519
- // the clamp at the top of pickKeepBoundary did not fire) but
520
- // `adjustForToolPairs` still walked the resulting boundary
521
- // backwards past `summaryOffset`.
522
- // Rescue: restore the binary search's intended keep depth (capped at
523
- // `length - 1` so we always summarize at least one turn) and bypass
524
- // `adjustForToolPairs`. The kept region's first message may then
525
- // contain a `tool_result` whose matching `tool_use` lives in the
526
- // compacted region; we strip such orphans below before assembling
527
- // the final messages array so the next agent turn does not fail
528
- // when sending to the LLM.
529
- const forceRescueApplied =
530
- options?.force === true &&
531
- keepPlanInitial.keepFromIndex <= summaryOffset &&
532
- userTurnStarts.length >= 2;
533
- const safeKeepTurns = Math.max(
534
- 1,
535
- Math.min(keepPlanInitial.keepTurns, userTurnStarts.length - 1),
536
- );
537
- const keepPlan = forceRescueApplied
538
- ? {
539
- keepFromIndex: userTurnStarts[userTurnStarts.length - safeKeepTurns],
540
- keepTurns: safeKeepTurns,
541
- }
542
- : keepPlanInitial;
543
- if (keepPlan.keepFromIndex <= summaryOffset) {
544
- // All turns fit after truncation projection, but the real in-memory
545
- // messages may still contain un-truncated tool results. Apply truncation
546
- // so the caller gets the token savings even without summarization.
547
- const { messages: truncatedMessages, truncatedCount } =
548
- truncateToolResultsAcrossHistory(
549
- messages,
550
- COMPACTION_TOOL_RESULT_MAX_CHARS,
551
- );
552
- const didTruncate = truncatedCount > 0;
553
- const estimatedAfterTruncation = didTruncate
554
- ? estimatePromptTokens(truncatedMessages, this.systemPrompt, {
555
- providerName: this.estimationProviderName,
556
- toolTokenBudget: this.toolTokenBudget,
557
- })
558
- : previousEstimatedInputTokens;
559
- // Under force with only one user turn, the rescue above could not
560
- // fire — there is nothing earlier to summarize. Surface that
561
- // explicitly instead of "conversation already fits..." so the user
562
- // knows why `/compact` did not produce a summary.
563
- const noSummarizationReason =
564
- options?.force && userTurnStarts.length < 2
565
- ? "only one user turn — nothing earlier to compact"
566
- : "conversation already fits within the compaction target";
567
- return {
568
- messages: truncatedMessages,
569
- compacted: didTruncate,
570
- previousEstimatedInputTokens,
571
- estimatedInputTokens: estimatedAfterTruncation,
572
- maxInputTokens: this.config.maxInputTokens,
573
- thresholdTokens,
574
- compactedMessages: 0,
575
- compactedPersistedMessages: 0,
576
- summaryCalls: 0,
577
- summaryInputTokens: 0,
578
- summaryOutputTokens: 0,
579
- summaryModel: "",
580
- summaryText: existingSummary ?? "",
581
- reason: didTruncate
582
- ? "truncated tool results without summarization"
583
- : noSummarizationReason,
584
- };
585
- }
586
-
587
- const compactableMessages = messages.slice(
588
- summaryOffset,
589
- keepPlan.keepFromIndex,
590
- );
591
- if (compactableMessages.length === 0) {
592
- return {
593
- messages,
594
- compacted: false,
595
- previousEstimatedInputTokens,
596
- estimatedInputTokens: previousEstimatedInputTokens,
597
- maxInputTokens: this.config.maxInputTokens,
598
- thresholdTokens,
599
- compactedMessages: 0,
600
- compactedPersistedMessages: 0,
601
- summaryCalls: 0,
602
- summaryInputTokens: 0,
603
- summaryOutputTokens: 0,
604
- summaryModel: "",
605
- summaryText: existingSummary ?? "",
606
- reason: "no eligible messages to compact",
607
- };
608
- }
309
+ force: options?.force,
310
+ signal,
311
+ overrideProfile: options?.overrideProfile ?? null,
312
+ nonPersistedPrefixCount: this.nonPersistedPrefixCount,
313
+ };
609
314
 
610
- // When the summary at index 0 was injected from a parent fork, it
611
- // contributes 1 to `nonPersistedPrefixCount` but is excluded from
612
- // `compactableMessages` by `summaryOffset`; subtract it here so the
613
- // remaining injected count lines up with compactableMessages. A summary
614
- // produced by this conversation's own prior compaction is not part of
615
- // `nonPersistedPrefixCount` (already decremented), so no subtraction.
616
- const injectedSummaryOffset = this.summaryIsInjected ? summaryOffset : 0;
617
- const injectedInCompactable = Math.min(
618
- Math.max(0, this.nonPersistedPrefixCount - injectedSummaryOffset),
619
- compactableMessages.length,
620
- );
621
- const compactedPersistedMessages =
622
- countPersistedMessages(compactableMessages) - injectedInCompactable;
623
- const rawProjectedMessages = [
624
- createContextSummaryMessage(existingSummary ?? "Projected summary"),
625
- ...messages.slice(keepPlan.keepFromIndex),
626
- ];
627
- const { messages: projectedMessages } = truncateToolResultsAcrossHistory(
628
- rawProjectedMessages,
629
- COMPACTION_TOOL_RESULT_MAX_CHARS,
630
- );
631
- const projectedInputTokens = estimatePromptTokens(
632
- projectedMessages,
633
- this.systemPrompt,
634
- {
635
- providerName: this.estimationProviderName,
636
- toolTokenBudget: this.toolTokenBudget,
637
- },
638
- );
639
- const projectedGainTokens = Math.max(
640
- 0,
641
- previousEstimatedInputTokens - projectedInputTokens,
642
- );
643
- const severePressure =
644
- previousEstimatedInputTokens >=
645
- Math.floor(this.config.maxInputTokens * SEVERE_PRESSURE_RATIO);
646
- const lastCompactedAt = options?.lastCompactedAt;
315
+ const result = await runAssistantDrivenCompaction(args);
647
316
 
648
- // Adaptive cooldown: conversations growing quickly (high projected gain) compact
649
- // sooner. Scale the cooldown inversely with the growth-rate multiplier, capped at
650
- // 1/4 of the base cooldown so we never check more than 4× as frequently.
651
- const growthRateMultiplier = Math.max(
652
- 1,
653
- projectedGainTokens / MIN_GAIN_TOKENS_DURING_COOLDOWN,
654
- );
655
- const adaptiveCooldownMs = Math.max(
656
- COMPACTION_COOLDOWN_MS / 4,
657
- COMPACTION_COOLDOWN_MS / growthRateMultiplier,
658
- );
659
- const withinCooldown =
660
- typeof lastCompactedAt === "number" &&
661
- Date.now() - lastCompactedAt < adaptiveCooldownMs;
317
+ if (!result.compacted) return result;
662
318
 
663
- // The adaptive cooldown is already tuned to be shorter for fast-growing
664
- // conversations (high projectedGainTokens smaller adaptiveCooldownMs).
665
- // Removing the redundant MIN_GAIN_TOKENS_DURING_COOLDOWN guard here lets
666
- // that shorter cooldown actually gate compaction: high-growth conversations
667
- // break out of the cooldown sooner and compact more frequently.
668
- // force=true bypasses the cooldown so context-too-large recovery can always
669
- // attempt a compaction even within the cooldown window.
670
- if (withinCooldown && !severePressure && !options?.force) {
671
- log.debug(
319
+ // Recompute the post-compaction token estimate now that the message
320
+ // array has been rebuilt. The compactor returns a conservative
321
+ // placeholder; the agent loop wants the real number for its next
322
+ // budget decision.
323
+ let estimatedInputTokens = result.estimatedInputTokens;
324
+ try {
325
+ estimatedInputTokens = estimatePromptTokens(
326
+ result.messages,
327
+ this.systemPrompt,
672
328
  {
673
- projectedGainTokens,
674
- adaptiveCooldownMs,
675
- growthRateMultiplier,
676
- msSinceCompaction:
677
- typeof lastCompactedAt === "number"
678
- ? Date.now() - lastCompactedAt
679
- : null,
329
+ providerName: this.estimationProviderName,
330
+ toolTokenBudget: this.toolTokenBudget,
680
331
  },
681
- "Compaction cooldown active",
682
332
  );
683
- return {
684
- messages,
685
- compacted: false,
686
- previousEstimatedInputTokens,
687
- estimatedInputTokens: previousEstimatedInputTokens,
688
- maxInputTokens: this.config.maxInputTokens,
689
- thresholdTokens,
690
- compactedMessages: 0,
691
- compactedPersistedMessages: 0,
692
- summaryCalls: 0,
693
- summaryInputTokens: 0,
694
- summaryOutputTokens: 0,
695
- summaryModel: "",
696
- summaryText: existingSummary ?? "",
697
- reason: "compaction cooldown active",
698
- };
699
- }
700
-
701
- // `severePressure` already bypasses this guard to keep context from
702
- // overflowing. Forced compaction also bypasses: when the user
703
- // explicitly types `/compact` we must summarize whatever is
704
- // available rather than return "insufficient compactable persisted
705
- // messages" — that is a no-op response to a direct user command.
706
- if (
707
- compactedPersistedMessages < MIN_COMPACTABLE_PERSISTED_MESSAGES &&
708
- !severePressure &&
709
- !options?.force
710
- ) {
711
- return {
712
- messages,
713
- compacted: false,
714
- previousEstimatedInputTokens,
715
- estimatedInputTokens: previousEstimatedInputTokens,
716
- maxInputTokens: this.config.maxInputTokens,
717
- thresholdTokens,
718
- compactedMessages: 0,
719
- compactedPersistedMessages: 0,
720
- summaryCalls: 0,
721
- summaryInputTokens: 0,
722
- summaryOutputTokens: 0,
723
- summaryModel: "",
724
- summaryText: existingSummary ?? "",
725
- reason: "insufficient compactable persisted messages",
726
- };
727
- }
728
-
729
- const retainedThreadRefs = collectRetainedThreadReferences(
730
- messages.slice(keepPlan.keepFromIndex),
731
- );
732
- // Strip runtime injections (memory, turn context, workspace hints, etc.)
733
- // from the messages fed to the summarizer. These blocks are system
734
- // metadata; leaving them in causes the summary to echo rotating memory
735
- // content instead of the actual conversation. The caller's live message
736
- // array is untouched so prefix caching stays intact.
737
- const transcriptSource = stripCompactionOnlyInjections(compactableMessages);
738
- const transcriptBlocks = this.capTranscriptBlocksToTokenBudget(
739
- serializeMessagesToContentBlocks(transcriptSource),
740
- existingSummary ?? "No previous summary.",
741
- retainedThreadRefs,
742
- );
743
- const summaryUpdate = await this.updateSummary(
744
- existingSummary ?? "No previous summary.",
745
- transcriptBlocks,
746
- retainedThreadRefs,
747
- signal,
748
- options?.overrideProfile ?? null,
749
- );
750
- const summaryInputTokens = summaryUpdate.inputTokens;
751
- const summaryOutputTokens = summaryUpdate.outputTokens;
752
- const summaryModel = summaryUpdate.model;
753
- const summaryCacheCreationInputTokens =
754
- summaryUpdate.cacheCreationInputTokens;
755
- const summaryCacheReadInputTokens = summaryUpdate.cacheReadInputTokens;
756
- const summaryFailed = summaryUpdate.failed;
757
- const summaryRawResponses: unknown[] = [];
758
- if (Array.isArray(summaryUpdate.rawResponse)) {
759
- summaryRawResponses.push(...summaryUpdate.rawResponse);
760
- } else if (summaryUpdate.rawResponse !== undefined) {
761
- summaryRawResponses.push(summaryUpdate.rawResponse);
333
+ } catch (err) {
334
+ log.warn({ err }, "Post-compaction token estimate failed");
762
335
  }
763
- const summaryCalls = 1;
764
-
765
- // Force-keep the most recent assistant text from the compactable region
766
- // by splicing it verbatim into the summary message. This is independent
767
- // of what the LLM summarizer chose to surface — when compaction
768
- // interrupts a long assistant work span, this anchor preserves the
769
- // model's last self-narration ("Next step: …", "About to …") so the
770
- // post-compaction model has unambiguous continuity instead of falling
771
- // back to a "where am I?" recovery shape.
772
- const tailAnchorText = extractTailAssistantText(compactableMessages);
773
- const summary =
774
- tailAnchorText != null
775
- ? appendTailAnchorToSummary(summaryUpdate.summary, tailAnchorText)
776
- : summaryUpdate.summary;
777
-
778
- // Media (images, files) in kept turns is preserved naturally — those
779
- // turns are carried forward as-is and their token cost is already
780
- // accounted for by pickKeepBoundary's estimatePromptTokens call.
781
- // Images in compacted turns are passed to the summarizer so it can
782
- // describe their visual content in the summary text.
783
- const summaryMessage = createContextSummaryMessage(summary);
784
336
 
785
- const { messages: truncatedKeptMessages } =
786
- truncateToolResultsAcrossHistory(
787
- messages.slice(keepPlan.keepFromIndex),
788
- COMPACTION_TOOL_RESULT_MAX_CHARS,
789
- );
790
- // The force-rescue boundary bypasses `adjustForToolPairs`, so the
791
- // kept region may contain `tool_result` blocks whose matching
792
- // `tool_use` is in the (now-compacted) prefix. Strip those orphans
793
- // so the next agent turn does not fail with an LLM API error.
794
- const keptMessages = forceRescueApplied
795
- ? stripOrphanToolResults(truncatedKeptMessages)
796
- : truncatedKeptMessages;
797
- const compactedMessages = [summaryMessage, ...keptMessages];
798
- const estimatedInputTokens = estimatePromptTokens(
799
- compactedMessages,
800
- this.systemPrompt,
801
- {
802
- providerName: this.estimationProviderName,
803
- toolTokenBudget: this.toolTokenBudget,
804
- },
337
+ // Consume any non-persisted prefix messages that were compacted away
338
+ // and clear the injected-summary flag.
339
+ const compactedAway = Math.min(
340
+ this.nonPersistedPrefixCount,
341
+ result.compactedMessages,
805
342
  );
806
- // Consume the injected prefix messages that were compacted away. When the
807
- // parent-injected summary was replaced by a freshly produced child summary,
808
- // also consume its slot (it was excluded from injectedInCompactable via
809
- // injectedSummaryOffset) and clear the flag so subsequent compactions treat
810
- // the summary at index 0 as child-owned.
811
343
  this.nonPersistedPrefixCount = Math.max(
812
344
  0,
813
- this.nonPersistedPrefixCount -
814
- injectedInCompactable -
815
- injectedSummaryOffset,
345
+ this.nonPersistedPrefixCount - compactedAway,
816
346
  );
817
347
  this.summaryIsInjected = false;
818
348
 
819
- log.info(
820
- {
821
- previousEstimatedInputTokens,
822
- estimatedInputTokens,
823
- compactedMessages: compactableMessages.length,
824
- compactedPersistedMessages,
825
- keepTurns: keepPlan.keepTurns,
826
- summaryCalls,
827
- },
828
- "Compacted conversation context window",
829
- );
830
-
831
- return {
832
- messages: compactedMessages,
833
- compacted: true,
834
- previousEstimatedInputTokens,
835
- estimatedInputTokens,
836
- maxInputTokens: this.config.maxInputTokens,
837
- thresholdTokens,
838
- compactedMessages: compactableMessages.length,
839
- compactedPersistedMessages,
840
- summaryCalls,
841
- summaryInputTokens,
842
- summaryOutputTokens,
843
- summaryModel,
844
- summaryCallSite: CONVERSATION_SUMMARY_CALL_SITE,
845
- summaryOverrideProfile: options?.overrideProfile ?? null,
846
- summaryCacheCreationInputTokens,
847
- summaryCacheReadInputTokens,
848
- summaryRawResponses,
849
- summaryText: summary,
850
- summaryFailed,
851
- };
852
- }
853
-
854
- private get targetInputTokens(): number {
855
- return Math.floor(
856
- this.config.maxInputTokens *
857
- (this.config.targetBudgetRatio - this.config.summaryBudgetRatio),
858
- );
349
+ return { ...result, estimatedInputTokens };
859
350
  }
860
-
861
- private pickKeepBoundary(
862
- messages: Message[],
863
- userTurnStarts: number[],
864
- opts?: {
865
- minKeepRecentUserTurns?: number;
866
- targetInputTokensOverride?: number;
867
- conversationOriginChannel?: string;
868
- force?: boolean;
869
- previousEstimatedInputTokens?: number;
870
- },
871
- ): { keepFromIndex: number; keepTurns: number } {
872
- // Slack-originated conversations rely on multi-turn thread context
873
- // (reply chains, quoted messages, contextual references). Bump the
874
- // default keep floor for them so compaction does not summarize away
875
- // recent turns that the next reply may directly cite. Explicit
876
- // `minKeepRecentUserTurns` (including emergency `0`) wins.
877
- const defaultTurns = opts?.conversationOriginChannel === "slack" ? 8 : 1;
878
- const minFloor = Math.min(
879
- Math.max(0, Math.floor(opts?.minKeepRecentUserTurns ?? defaultTurns)),
880
- userTurnStarts.length,
881
- );
882
- const targetTokens = Math.min(
883
- opts?.targetInputTokensOverride ?? this.targetInputTokens,
884
- this.targetInputTokens,
885
- );
886
-
887
- // Binary search for the maximum keepTurns whose projected tokens fit
888
- // within the budget. Token count is monotonically non-decreasing with
889
- // keepTurns (more turns = more tokens), so binary search is valid.
890
- const projectedTokensForKeep = (turns: number): number => {
891
- const fromIndex =
892
- turns === 0
893
- ? messages.length
894
- : (userTurnStarts[userTurnStarts.length - turns] ?? messages.length);
895
- const rawProjected = [
896
- createContextSummaryMessage("Projected summary"),
897
- ...messages.slice(fromIndex),
898
- ];
899
- const { messages: projectedMessages } = truncateToolResultsAcrossHistory(
900
- rawProjected,
901
- COMPACTION_TOOL_RESULT_MAX_CHARS,
902
- );
903
- return estimatePromptTokens(projectedMessages, this.systemPrompt, {
904
- providerName: this.estimationProviderName,
905
- toolTokenBudget: this.toolTokenBudget,
906
- });
907
- };
908
-
909
- let lo = minFloor;
910
- let hi = userTurnStarts.length;
911
-
912
- // Fast path: if keeping all turns already fits, skip the search.
913
- if (hi > lo && projectedTokensForKeep(hi) > targetTokens) {
914
- // Binary search: find the largest keepTurns where projected tokens fit.
915
- while (lo < hi) {
916
- const mid = lo + Math.ceil((hi - lo) / 2);
917
- if (projectedTokensForKeep(mid) <= targetTokens) {
918
- lo = mid;
919
- } else {
920
- hi = mid - 1;
921
- }
922
- }
923
- } else {
924
- lo = hi;
925
- }
926
-
927
- // Under forced compaction with only the implicit default floor in play,
928
- // that floor stops being an absolute override when the kept region still
929
- // exceeds the target. Walk keepTurns below the floor — down to 0 if
930
- // needed — so /compact can always drive the conversation toward target,
931
- // even when the floor turn itself is oversized (e.g. a huge paste in the
932
- // last user message). Exceptions that still treat the floor as hard:
933
- // - Explicit `minKeepRecentUserTurns` (the caller opted in to that
934
- // floor; emergency recovery already passes 0 when it wants to go all
935
- // the way down).
936
- // - Slack origin (the bumped 8-turn floor protects thread reply chains
937
- // and quoted-message context that the next reply may directly cite).
938
- // Automatic mid-loop compaction (force !== true) always honors the floor
939
- // so the in-flight agent turn isn't summarized away.
940
- const floorIsImplicitDefault =
941
- opts?.minKeepRecentUserTurns === undefined &&
942
- opts?.conversationOriginChannel !== "slack";
943
- if (
944
- opts?.force &&
945
- floorIsImplicitDefault &&
946
- projectedTokensForKeep(lo) > targetTokens
947
- ) {
948
- while (lo > 0 && projectedTokensForKeep(lo) > targetTokens) {
949
- lo--;
950
- }
951
- }
952
-
953
- // The projection's summary-swap and tool_result truncation can make
954
- // projectedTokensForKeep(hi) optimistically fit even when the live
955
- // conversation is well over target — sending /compact through the
956
- // "already fits" skip path as a no-op. Clamp lo so summarization runs.
957
- if (
958
- opts?.force &&
959
- floorIsImplicitDefault &&
960
- lo === userTurnStarts.length &&
961
- lo > 0 &&
962
- (opts?.previousEstimatedInputTokens ?? 0) > targetTokens
963
- ) {
964
- lo -= 1;
965
- }
966
-
967
- const keepTurns = lo;
968
- const rawKeepFromIndex =
969
- keepTurns === 0
970
- ? messages.length
971
- : (userTurnStarts[userTurnStarts.length - keepTurns] ??
972
- messages.length);
973
- const keepFromIndex = adjustForToolPairs(messages, rawKeepFromIndex);
974
- return { keepFromIndex, keepTurns };
975
- }
976
-
977
- private get summaryMaxTokens(): number {
978
- return Math.max(
979
- 1,
980
- Math.floor(this.config.maxInputTokens * this.config.summaryBudgetRatio),
981
- );
982
- }
983
-
984
- /**
985
- * Trim the serialized transcript content blocks so that the summary prompt
986
- * (system prompt + existing summary + transcript + scaffolding) fits within
987
- * the provider's input token limit, minus the output budget reserved for the
988
- * summary itself.
989
- *
990
- * When the transcript exceeds the budget, blocks are dropped from the
991
- * beginning (oldest messages first) to preserve recent context. Image blocks
992
- * are dropped before text blocks within each pass since they are expensive
993
- * and their surrounding text context already captures the conversation flow.
994
- */
995
- private capTranscriptBlocksToTokenBudget(
996
- blocks: ContentBlock[],
997
- currentSummary: string,
998
- retainedThreadRefs: string[],
999
- ): ContentBlock[] {
1000
- const retainedRefsText = retainedThreadRefs.join("\n");
1001
- const overheadTokens =
1002
- estimateTextTokens(SUMMARY_SYSTEM_PROMPT) +
1003
- estimateTextTokens(currentSummary) +
1004
- estimateTextTokens(retainedRefsText) +
1005
- // Scaffolding text in buildSummaryContentBlocks ("Update the summary...",
1006
- // section headers, etc.) — generous fixed estimate.
1007
- 200 +
1008
- this.summaryMaxTokens;
1009
-
1010
- const maxTranscriptTokens = Math.max(
1011
- 0,
1012
- this.config.maxInputTokens - overheadTokens,
1013
- );
1014
-
1015
- const estimateBlockTokens = (b: ContentBlock): number =>
1016
- estimateContentBlockTokens(b, {
1017
- providerName: this.estimationProviderName,
1018
- });
1019
-
1020
- let totalTokens = 0;
1021
- for (const block of blocks) {
1022
- totalTokens += estimateBlockTokens(block);
1023
- }
1024
- const originalTotalTokens = totalTokens;
1025
- if (totalTokens <= maxTranscriptTokens) return blocks;
1026
-
1027
- // First pass: drop images from the beginning until we fit or run out of
1028
- // images to drop. Images are high-cost and their text context (message
1029
- // headers, surrounding tool_use/tool_result serializations) is preserved.
1030
- const result = [...blocks];
1031
- for (
1032
- let i = 0;
1033
- i < result.length && totalTokens > maxTranscriptTokens;
1034
- i++
1035
- ) {
1036
- if (result[i].type === "image") {
1037
- totalTokens -= estimateBlockTokens(result[i]);
1038
- const stub: ContentBlock = {
1039
- type: "text",
1040
- text: `[image omitted from summary context]`,
1041
- };
1042
- totalTokens += estimateBlockTokens(stub);
1043
- result[i] = stub;
1044
- }
1045
- }
1046
- if (totalTokens <= maxTranscriptTokens) return result;
1047
-
1048
- // Second pass: drop text blocks from the beginning (oldest) until we fit.
1049
- // If a single text block exceeds the remaining budget, truncate it rather
1050
- // than dropping it entirely so the summarizer always has content to work with.
1051
- let dropUntil = 0;
1052
- let droppedTokens = 0;
1053
- for (
1054
- let i = 0;
1055
- i < result.length && totalTokens > maxTranscriptTokens;
1056
- i++
1057
- ) {
1058
- const blockTokens = estimateBlockTokens(result[i]);
1059
- const excess = totalTokens - maxTranscriptTokens;
1060
- if (blockTokens > excess && result[i].type === "text") {
1061
- // Truncate this block to shed exactly the excess tokens.
1062
- // Subtract the cost of the "[...truncated] " prefix so the final
1063
- // block (prefix + kept text) stays within budget.
1064
- const truncationPrefix = "[...truncated] ";
1065
- const prefixTokens = estimateTextTokens(truncationPrefix);
1066
- const keepTokens = Math.max(1, blockTokens - excess - prefixTokens);
1067
- const text = (result[i] as { type: "text"; text: string }).text;
1068
- // Approximate: 1 token ≈ 4 characters for truncation purposes.
1069
- const keepChars = Math.max(1, Math.floor(keepTokens * 4));
1070
- const truncatedText = text.slice(-keepChars);
1071
- const truncatedBlock: ContentBlock = {
1072
- type: "text",
1073
- text: `${truncationPrefix}${truncatedText}`,
1074
- };
1075
- const newBlockTokens = estimateBlockTokens(truncatedBlock);
1076
- droppedTokens += blockTokens - newBlockTokens;
1077
- totalTokens -= blockTokens - newBlockTokens;
1078
- result[i] = truncatedBlock;
1079
- dropUntil = i;
1080
- break;
1081
- }
1082
- droppedTokens += blockTokens;
1083
- totalTokens -= blockTokens;
1084
- dropUntil = i + 1;
1085
- }
1086
-
1087
- log.info(
1088
- {
1089
- originalTokens: originalTotalTokens,
1090
- cappedTokens: maxTranscriptTokens,
1091
- droppedTokens,
1092
- },
1093
- "Capped summary transcript blocks to fit provider input limit",
1094
- );
1095
-
1096
- return [
1097
- { type: "text", text: "[earlier messages truncated]" } as ContentBlock,
1098
- ...result.slice(dropUntil),
1099
- ];
1100
- }
1101
-
1102
- private async updateSummary(
1103
- currentSummary: string,
1104
- transcriptBlocks: ContentBlock[],
1105
- retainedThreadRefs: string[],
1106
- signal?: AbortSignal,
1107
- overrideProfile?: string | null,
1108
- ): Promise<{
1109
- summary: string;
1110
- inputTokens: number;
1111
- outputTokens: number;
1112
- model: string;
1113
- cacheCreationInputTokens: number;
1114
- cacheReadInputTokens: number;
1115
- rawResponse?: unknown;
1116
- /**
1117
- * True when the provider.sendMessage call threw and the local fallback
1118
- * was used. Callers (the agent loop) use this to drive circuit-breaker
1119
- * state without having to reimplement the fallback themselves.
1120
- */
1121
- failed: boolean;
1122
- }> {
1123
- // When the existing summary is already consuming most of its budget,
1124
- // nudge the model to compress older durable content aggressively so
1125
- // incremental-update passes don't let the summary grow unboundedly.
1126
- const existingSummaryTokens = estimateTextTokens(currentSummary);
1127
- const compressionPressure =
1128
- existingSummaryTokens >=
1129
- this.summaryMaxTokens * SUMMARY_COMPRESSION_PRESSURE_RATIO;
1130
- const contentBlocks = buildSummaryContentBlocks(
1131
- currentSummary,
1132
- transcriptBlocks,
1133
- retainedThreadRefs,
1134
- { compressionPressure },
1135
- );
1136
- const summaryMessage: Message = { role: "user", content: contentBlocks };
1137
- let failed = false;
1138
- try {
1139
- const providerConfig: Record<string, unknown> = {
1140
- callSite: CONVERSATION_SUMMARY_CALL_SITE,
1141
- usageTracking: "manual",
1142
- max_tokens: this.summaryMaxTokens,
1143
- };
1144
- if (overrideProfile) {
1145
- providerConfig.overrideProfile = overrideProfile;
1146
- }
1147
- const response = await this.provider.sendMessage(
1148
- [summaryMessage],
1149
- undefined,
1150
- SUMMARY_SYSTEM_PROMPT,
1151
- {
1152
- config: providerConfig,
1153
- signal,
1154
- },
1155
- );
1156
-
1157
- const nextSummary = extractText(response.content).trim();
1158
- if (nextSummary.length > 0) {
1159
- return {
1160
- summary: this.clampSummary(nextSummary),
1161
- inputTokens: response.usage.inputTokens,
1162
- outputTokens: response.usage.outputTokens,
1163
- model: response.model,
1164
- cacheCreationInputTokens:
1165
- response.usage.cacheCreationInputTokens ?? 0,
1166
- cacheReadInputTokens: response.usage.cacheReadInputTokens ?? 0,
1167
- rawResponse: response.rawResponse,
1168
- failed: false,
1169
- };
1170
- }
1171
- } catch (err) {
1172
- failed = true;
1173
- log.warn({ err }, "Summary generation failed, using local fallback");
1174
- }
1175
-
1176
- // Fallback: extract text-only transcript for local summary generation.
1177
- const textTranscript = transcriptBlocks
1178
- .filter(
1179
- (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1180
- )
1181
- .map((b) => b.text)
1182
- .join("\n\n");
1183
-
1184
- return {
1185
- summary: fallbackSummary(currentSummary, textTranscript),
1186
- inputTokens: 0,
1187
- outputTokens: 0,
1188
- model: "",
1189
- cacheCreationInputTokens: 0,
1190
- cacheReadInputTokens: 0,
1191
- failed,
1192
- };
1193
- }
1194
-
1195
- private clampSummary(summary: string): string {
1196
- // Budget in tokens → approximate char limit (4 chars ≈ 1 token).
1197
- const maxChars = this.summaryMaxTokens * 4;
1198
- if (summary.length <= maxChars) return summary;
1199
- return clampSummaryAtSectionBoundary(summary, maxChars);
1200
- }
1201
- }
1202
-
1203
- /**
1204
- * Truncate a markdown summary that exceeds `maxChars`, preferring a
1205
- * section boundary (`\n## `) so we never cut a heading mid-text. Falls
1206
- * back to a hard character slice when no boundary exists in the safe
1207
- * region (first half of the budget).
1208
- */
1209
- export function clampSummaryAtSectionBoundary(
1210
- summary: string,
1211
- maxChars: number,
1212
- ): string {
1213
- if (summary.length <= maxChars) return summary;
1214
- const ELLIPSIS = "...";
1215
- // Hard limit we must stay under, leaving room for the ellipsis suffix.
1216
- const cutoff = maxChars - ELLIPSIS.length;
1217
- if (cutoff <= 0) return ELLIPSIS;
1218
- const head = safeStringSlice(summary, 0, cutoff);
1219
- // Find the last `## ` heading at a line start. Require it to be past the
1220
- // midpoint of the allowed region so we don't drop most of the summary
1221
- // just to hit a boundary — better to cut mid-section late than to keep
1222
- // almost nothing.
1223
- const halfway = Math.floor(cutoff / 2);
1224
- const boundary = head.lastIndexOf("\n## ");
1225
- if (boundary >= halfway) {
1226
- return `${head.slice(0, boundary).trimEnd()}\n${ELLIPSIS}`;
1227
- }
1228
- return `${head}${ELLIPSIS}`;
1229
- }
1230
-
1231
- function collectUserTurnStartIndexes(messages: Message[]): number[] {
1232
- const starts: number[] = [];
1233
- for (let i = 0; i < messages.length; i++) {
1234
- const message = messages[i];
1235
- if (message.role !== "user") continue;
1236
- if (getSummaryFromContextMessage(message) != null) continue;
1237
- if (isToolResultOnly(message)) continue;
1238
- starts.push(i);
1239
- }
1240
- return starts;
1241
- }
1242
-
1243
- /**
1244
- * Count messages that have DB counterparts. Context-summary messages are
1245
- * in-memory-only and excluded; ALL other messages (including tool-result-only
1246
- * user messages) have a corresponding row in the DB and must be counted so
1247
- * that `contextCompactedMessageCount` indexes the DB array correctly.
1248
- */
1249
- function countPersistedMessages(messages: Message[]): number {
1250
- return messages.filter((message) => {
1251
- return getSummaryFromContextMessage(message) == null;
1252
- }).length;
1253
- }
1254
-
1255
- function isSystemNoticeBlock(block: ContentBlock): boolean {
1256
- if (block.type !== "text") return false;
1257
- const text = (block as { text?: string }).text ?? "";
1258
- return (
1259
- text.startsWith("<system_notice>") && text.endsWith("</system_notice>")
1260
- );
1261
351
  }
1262
352
 
1263
- /** A user message that contains ONLY tool_result blocks (no text or other content).
1264
- * System notice text blocks (retry nudges, progress checks) do not count as user content. */
1265
- function isToolResultOnly(message: Message): boolean {
1266
- return (
1267
- message.content.length > 0 &&
1268
- message.content.every(
1269
- (block) =>
1270
- block.type === "tool_result" ||
1271
- block.type === "web_search_tool_result" ||
1272
- isSystemNoticeBlock(block),
1273
- )
1274
- );
1275
- }
353
+ // ---------------------------------------------------------------------------
354
+ // Helpers
355
+ // ---------------------------------------------------------------------------
1276
356
 
1277
- /**
1278
- * Walk the keep boundary backward to ensure tool_use/tool_result pairs are
1279
- * never split across the compaction boundary. If the first kept message is
1280
- * a user message containing tool_result blocks whose matching tool_use blocks
1281
- * live in the preceding (compacted-away) assistant message, include that
1282
- * assistant message in the kept set.
1283
- */
1284
- function adjustForToolPairs(
357
+ function noopResult(
1285
358
  messages: Message[],
1286
- keepFromIndex: number,
1287
- ): number {
1288
- let idx = keepFromIndex;
1289
- while (idx > 0) {
1290
- const msg = messages[idx];
1291
- if (!msg || msg.role !== "user") break;
1292
-
1293
- // Collect tool_use_ids referenced by tool_results in this user message
1294
- const referencedIds = new Set<string>();
1295
- for (const block of msg.content) {
1296
- if (
1297
- (block.type === "tool_result" ||
1298
- block.type === "web_search_tool_result") &&
1299
- "tool_use_id" in block
1300
- ) {
1301
- referencedIds.add((block as { tool_use_id: string }).tool_use_id);
1302
- }
1303
- }
1304
- if (referencedIds.size === 0) break;
1305
-
1306
- // Check if the preceding assistant message contains matching tool_uses
1307
- const prev = messages[idx - 1];
1308
- if (!prev || prev.role !== "assistant") break;
1309
-
1310
- const hasOrphanedPair = prev.content.some(
1311
- (block) =>
1312
- (block.type === "tool_use" || block.type === "server_tool_use") &&
1313
- "id" in block &&
1314
- referencedIds.has((block as { id: string }).id),
1315
- );
1316
- if (!hasOrphanedPair) break;
1317
-
1318
- // Include the assistant message
1319
- idx--;
1320
-
1321
- // The assistant message may itself be preceded by a tool_result user
1322
- // message that pairs with an even earlier assistant — continue the check
1323
- if (idx > 0 && messages[idx - 1]?.role === "user") {
1324
- idx--;
1325
- } else {
1326
- break;
1327
- }
1328
- }
1329
- return idx;
1330
- }
1331
-
1332
- /**
1333
- * Strip `tool_result` blocks whose matching `tool_use` is not present in
1334
- * the message array. Used by the force-rescue path in `_maybeCompact`
1335
- * which bypasses `adjustForToolPairs` to honor user-explicit `/compact`
1336
- * commands — the kept region's first user message can otherwise contain
1337
- * an orphan `tool_result`, which the LLM API rejects.
1338
- *
1339
- * A user message that contains only orphan `tool_result` blocks is
1340
- * dropped entirely; partial messages keep the surviving content blocks.
1341
- */
1342
- function stripOrphanToolResults(messages: Message[]): Message[] {
1343
- const knownToolUseIds = new Set<string>();
1344
- for (const msg of messages) {
1345
- if (msg.role !== "assistant") continue;
1346
- for (const block of msg.content) {
1347
- if (
1348
- (block.type === "tool_use" || block.type === "server_tool_use") &&
1349
- "id" in block
1350
- ) {
1351
- knownToolUseIds.add((block as { id: string }).id);
1352
- }
1353
- }
1354
- }
1355
-
1356
- return messages.flatMap((msg) => {
1357
- if (msg.role !== "user") return [msg];
1358
- let stripped = false;
1359
- const filtered = msg.content.filter((block) => {
1360
- if (
1361
- (block.type === "tool_result" ||
1362
- block.type === "web_search_tool_result") &&
1363
- "tool_use_id" in block
1364
- ) {
1365
- const id = (block as { tool_use_id: string }).tool_use_id;
1366
- if (!knownToolUseIds.has(id)) {
1367
- stripped = true;
1368
- return false;
1369
- }
1370
- }
1371
- return true;
1372
- });
1373
- if (!stripped) return [msg];
1374
- if (filtered.length === 0) return [];
1375
- return [{ ...msg, content: filtered }];
1376
- });
1377
- }
1378
-
1379
- export function getSummaryFromContextMessage(
1380
- message: Message | undefined,
1381
- ): string | null {
1382
- if (!message) return null;
1383
- const text = extractText(message.content).trim();
1384
- if (!text.startsWith(CONTEXT_SUMMARY_MARKER)) return null;
1385
- if (INTERNAL_CONTEXT_SUMMARY_MESSAGES.has(message)) {
1386
- return stripContextSummaryTags(text);
1387
- }
1388
- return null;
1389
- }
1390
-
1391
- function stripContextSummaryTags(text: string): string {
1392
- let inner = text.slice(CONTEXT_SUMMARY_MARKER.length);
1393
- const closeIdx = inner.lastIndexOf("</context_summary>");
1394
- if (closeIdx !== -1) {
1395
- inner = inner.slice(0, closeIdx);
1396
- }
1397
- return inner.trim();
1398
- }
1399
-
1400
- export function createContextSummaryMessage(summary: string): Message {
1401
- const message: Message = {
1402
- role: "user",
1403
- content: [
1404
- {
1405
- type: "text",
1406
- text: `${CONTEXT_SUMMARY_MARKER}\n${summary}\n</context_summary>`,
1407
- },
1408
- ],
359
+ estimated: number,
360
+ opts: { maxInputTokens: number; thresholdTokens: number; reason: string },
361
+ ): ContextWindowResult {
362
+ return {
363
+ messages,
364
+ compacted: false,
365
+ previousEstimatedInputTokens: estimated,
366
+ estimatedInputTokens: estimated,
367
+ maxInputTokens: opts.maxInputTokens,
368
+ thresholdTokens: opts.thresholdTokens,
369
+ compactedMessages: 0,
370
+ compactedPersistedMessages: 0,
371
+ summaryCalls: 0,
372
+ summaryInputTokens: 0,
373
+ summaryOutputTokens: 0,
374
+ summaryModel: "",
375
+ summaryText: getSummaryFromContextMessage(messages[0]) ?? "",
376
+ reason: opts.reason,
1409
377
  };
1410
- INTERNAL_CONTEXT_SUMMARY_MESSAGES.add(message);
1411
- return message;
1412
- }
1413
-
1414
- /**
1415
- * Walk `messages` backward and return the concatenated text content of the
1416
- * most recent assistant message that contains at least one non-empty text
1417
- * block. tool_use / tool_result / image / unknown blocks are skipped. The
1418
- * result is trimmed and (if longer than `maxChars`) clamped from the START
1419
- * so the END — where "next step" / "now I'll …" narration tends to land —
1420
- * is preserved.
1421
- *
1422
- * Returns `null` when no eligible assistant text is found (e.g. compactable
1423
- * region was all user/tool messages, or all assistant messages were
1424
- * tool_use-only). The caller treats `null` as "no anchor to splice".
1425
- *
1426
- * Used by `_maybeCompact` to force-keep the last assistant text from the
1427
- * compactable region into the post-compaction summary message, so the
1428
- * model's most recent self-narration survives summarization regardless of
1429
- * whether the LLM summarizer chose to surface it.
1430
- */
1431
- export function extractTailAssistantText(
1432
- messages: Message[],
1433
- maxChars: number = TAIL_ANCHOR_MAX_CHARS,
1434
- ): string | null {
1435
- for (let i = messages.length - 1; i >= 0; i--) {
1436
- const message = messages[i];
1437
- if (message?.role !== "assistant") continue;
1438
- const text = extractText(message.content).trim();
1439
- if (text.length === 0) continue;
1440
- if (text.length <= maxChars) return text;
1441
- // Keep the END — most recent narration wins.
1442
- const truncated = safeStringSlice(
1443
- text,
1444
- text.length - maxChars,
1445
- text.length,
1446
- );
1447
- return `[...truncated] ${truncated}`;
1448
- }
1449
- return null;
1450
- }
1451
-
1452
- /**
1453
- * Splice a verbatim tail-anchor block onto the end of the LLM-produced
1454
- * summary text. The tag-wrapped block is structurally distinct from any
1455
- * `## ` section the LLM might generate, so it survives section-boundary
1456
- * clamping in `clampSummaryAtSectionBoundary` (which only runs on the LLM
1457
- * summary itself, before this splice).
1458
- *
1459
- * Idempotent: if the summary already ends with a `<verbatim_tail>…` block
1460
- * (e.g. from a prior compaction whose summary was carried forward as
1461
- * `existingSummary`), it is replaced rather than stacked, so successive
1462
- * compactions don't accumulate stale tails.
1463
- */
1464
- export function appendTailAnchorToSummary(
1465
- summary: string,
1466
- tailText: string,
1467
- ): string {
1468
- const trimmed = summary.trimEnd();
1469
- const existingOpen = trimmed.lastIndexOf(TAIL_ANCHOR_OPEN_TAG);
1470
- const base =
1471
- existingOpen >= 0 ? trimmed.slice(0, existingOpen).trimEnd() : trimmed;
1472
- return `${base}\n\n${TAIL_ANCHOR_OPEN_TAG}\n${tailText.trim()}\n${TAIL_ANCHOR_CLOSE_TAG}`;
1473
- }
1474
-
1475
- /**
1476
- * Build content blocks for the summary prompt. Returns a mix of text blocks
1477
- * (for the scaffolding, existing summary, and serialized non-image content)
1478
- * and image blocks (preserved from the original messages so the summarizer
1479
- * can describe what was in them).
1480
- */
1481
- function buildSummaryContentBlocks(
1482
- currentSummary: string,
1483
- transcriptBlocks: ContentBlock[],
1484
- retainedThreadRefs: string[],
1485
- options: { compressionPressure: boolean } = { compressionPressure: false },
1486
- ): ContentBlock[] {
1487
- const lines = [
1488
- "Update the summary with new transcript data.",
1489
- "If new information conflicts with older notes, keep the most recent and explicit detail.",
1490
- "Keep all unresolved asks and next steps.",
1491
- "For any images included below, describe their visual content in the summary so the information is preserved after compaction.",
1492
- ];
1493
- if (options.compressionPressure) {
1494
- lines.push(
1495
- "The existing summary is approaching its token budget. Compress older durable content aggressively (drop detail that is no longer load-bearing, merge bullets, tighten prose) while preserving the most recent turns' nuance.",
1496
- );
1497
- }
1498
- lines.push(
1499
- "",
1500
- "### Existing Summary",
1501
- currentSummary.trim().length > 0 ? currentSummary.trim() : "None.",
1502
- "",
1503
- );
1504
- if (retainedThreadRefs.length > 0) {
1505
- lines.push(
1506
- "### Retained Thread References",
1507
- "These reply tag lines remain in the live context after compaction. Each `→ Mxxxxxx` cites a parent message by alias; if that parent appears in the Transcript below, preserve its text verbatim.",
1508
- ...retainedThreadRefs.map((ref) => `- ${ref}`),
1509
- "",
1510
- );
1511
- }
1512
- lines.push("### Transcript");
1513
- return [
1514
- {
1515
- type: "text",
1516
- text: lines.join("\n"),
1517
- } as ContentBlock,
1518
- ...transcriptBlocks,
1519
- ];
1520
- }
1521
-
1522
- /**
1523
- * Scan retained-tail messages for Slack-style reply tag lines that cite a
1524
- * thread parent via the `→ Mxxxxxx` alias convention. Returns the full tag
1525
- * line for each match (de-duplicated, order-preserved) so the summarizer
1526
- * has a concrete list of parents whose text must be preserved verbatim.
1527
- *
1528
- * Non-slack conversations and retained tails without any reply markers
1529
- * produce an empty list — in that case the summarizer is told explicitly
1530
- * that no verbatim preservation is required.
1531
- */
1532
- function collectRetainedThreadReferences(
1533
- retainedMessages: Message[],
1534
- ): string[] {
1535
- const seen = new Set<string>();
1536
- const out: string[] = [];
1537
- for (const msg of retainedMessages) {
1538
- for (const block of msg.content) {
1539
- if (block.type !== "text") continue;
1540
- const text = (block as { text: string }).text;
1541
- for (const line of text.split("\n")) {
1542
- if (!THREAD_REPLY_REFERENCE_PATTERN.test(line)) continue;
1543
- const trimmed = line.trim();
1544
- if (trimmed.length === 0) continue;
1545
- if (seen.has(trimmed)) continue;
1546
- seen.add(trimmed);
1547
- out.push(trimmed);
1548
- }
1549
- }
1550
- }
1551
- return out;
1552
- }
1553
-
1554
- /**
1555
- * Serialize messages into a sequence of content blocks. Text-based content
1556
- * (tool calls, tool results, thinking, etc.) is serialized into text blocks.
1557
- * Image blocks — both top-level and nested inside tool_result contentBlocks —
1558
- * are preserved as-is so the summarizer LLM can see them.
1559
- */
1560
- function serializeMessagesToContentBlocks(messages: Message[]): ContentBlock[] {
1561
- const blocks: ContentBlock[] = [];
1562
- for (let i = 0; i < messages.length; i++) {
1563
- const msg = messages[i];
1564
- const textLines: string[] = [`Message #${i + 1} (${msg.role})`];
1565
-
1566
- for (const block of msg.content) {
1567
- if (block.type === "image") {
1568
- // Flush accumulated text lines before the image.
1569
- if (textLines.length > 0) {
1570
- blocks.push({ type: "text", text: textLines.join("\n") });
1571
- textLines.length = 0;
1572
- }
1573
- blocks.push(block);
1574
- } else if (block.type === "tool_result") {
1575
- // guard:allow-tool-result-only — web_search_tool_result handled by serializeBlock via else branch
1576
- // Extract images from tool_result contentBlocks before serializing.
1577
- const collectedImages: ImageContent[] = [];
1578
- textLines.push(serializeToolResultBlock(block, collectedImages));
1579
- if (collectedImages.length > 0) {
1580
- // Flush text, emit collected images, then continue.
1581
- if (textLines.length > 0) {
1582
- blocks.push({ type: "text", text: textLines.join("\n") });
1583
- textLines.length = 0;
1584
- }
1585
- blocks.push(...collectedImages);
1586
- }
1587
- } else {
1588
- textLines.push(serializeBlock(block));
1589
- }
1590
- }
1591
-
1592
- // Flush remaining text lines for this message.
1593
- if (textLines.length > 0) {
1594
- blocks.push({ type: "text", text: textLines.join("\n") });
1595
- }
1596
- }
1597
- return blocks;
1598
- }
1599
-
1600
- /**
1601
- * Serialize images nested inside tool_result contentBlocks, returning them
1602
- * as separate content blocks to preserve for the summarizer.
1603
- */
1604
- function serializeToolResultBlock(
1605
- block: Extract<ContentBlock, { type: "tool_result" }>,
1606
- collectedImages: ImageContent[],
1607
- ): string {
1608
- if (block.contentBlocks) {
1609
- for (const cb of block.contentBlocks) {
1610
- if (cb.type === "image") {
1611
- collectedImages.push(cb);
1612
- }
1613
- }
1614
- }
1615
- return `tool_result ${block.tool_use_id}${
1616
- block.is_error ? " (error)" : ""
1617
- }: ${clampText(block.content)}`;
1618
- }
1619
-
1620
- function serializeBlock(block: ContentBlock): string {
1621
- switch (block.type) {
1622
- case "text":
1623
- return `text: ${clampText(block.text)}`;
1624
- case "tool_use":
1625
- return `tool_use ${block.name}: ${clampText(stableJson(block.input))}`;
1626
- case "tool_result":
1627
- return `tool_result ${block.tool_use_id}${
1628
- block.is_error ? " (error)" : ""
1629
- }: ${clampText(block.content)}`;
1630
- case "image":
1631
- // Top-level images are handled by serializeMessagesToContentBlocks.
1632
- // This path is only hit for images in unexpected positions.
1633
- return `image: ${block.source.media_type}, ${
1634
- Math.ceil(block.source.data.length / 4) * 3
1635
- } bytes(base64)`;
1636
- case "file": {
1637
- const sizeBytes = Math.ceil(block.source.data.length / 4) * 3;
1638
- const parts = [
1639
- `file: ${block.source.filename}`,
1640
- block.source.media_type,
1641
- `${sizeBytes} bytes(base64)`,
1642
- ];
1643
- if (block.extracted_text) {
1644
- parts.push(`text=${clampText(block.extracted_text)}`);
1645
- }
1646
- return parts.join(", ");
1647
- }
1648
- case "thinking":
1649
- return `thinking: ${clampText(block.thinking)}`;
1650
- case "redacted_thinking":
1651
- return "redacted_thinking";
1652
- case "server_tool_use":
1653
- return `server_tool_use ${block.name}: ${clampText(stableJson(block.input))}`;
1654
- case "web_search_tool_result":
1655
- return `web_search_tool_result ${block.tool_use_id}`;
1656
- default:
1657
- return "unknown_block";
1658
- }
1659
- }
1660
-
1661
- function clampText(text: string): string {
1662
- if (text.length <= MAX_BLOCK_PREVIEW_CHARS) return text;
1663
- return `${safeStringSlice(text, 0, MAX_BLOCK_PREVIEW_CHARS)}... [truncated ${
1664
- text.length - MAX_BLOCK_PREVIEW_CHARS
1665
- } chars]`;
1666
- }
1667
-
1668
- function fallbackSummary(currentSummary: string, chunk: string): string {
1669
- const lines = chunk
1670
- .split("\n")
1671
- .map((line) => line.trim())
1672
- .filter((line) => line.length > 0);
1673
- const recentLines = lines.slice(-120).join("\n");
1674
- const merged = [
1675
- currentSummary.trim(),
1676
- "## Recent Progress",
1677
- recentLines.length > 0 ? recentLines : "No new details.",
1678
- ]
1679
- .filter((part) => part.length > 0)
1680
- .join("\n\n");
1681
- if (merged.length <= MAX_FALLBACK_SUMMARY_CHARS) return merged;
1682
- return merged.slice(merged.length - MAX_FALLBACK_SUMMARY_CHARS);
1683
- }
1684
-
1685
- function extractText(content: ContentBlock[]): string {
1686
- return content
1687
- .filter(
1688
- (block): block is Extract<ContentBlock, { type: "text" }> =>
1689
- block.type === "text",
1690
- )
1691
- .map((block) => block.text)
1692
- .join("\n");
1693
- }
1694
-
1695
- function stableJson(value: unknown): string {
1696
- try {
1697
- return JSON.stringify(value);
1698
- } catch {
1699
- return "[unserializable]";
1700
- }
1701
378
  }