@vellumai/assistant 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (506) hide show
  1. package/ARCHITECTURE.md +2 -7
  2. package/Dockerfile +75 -1
  3. package/bun.lock +11 -1
  4. package/docker-entrypoint.sh +5 -0
  5. package/docker-init-apt-root.sh +94 -0
  6. package/docker-kata-apt-env.sh +39 -0
  7. package/docs/plugins.md +88 -47
  8. package/docs/skills.md +9 -7
  9. package/examples/plugins/echo/README.md +27 -27
  10. package/examples/plugins/echo/package.json +3 -0
  11. package/examples/plugins/echo/register.ts +31 -31
  12. package/node_modules/@vellumai/slack-text/src/index.test.ts +114 -14
  13. package/node_modules/@vellumai/slack-text/src/index.ts +82 -18
  14. package/openapi.yaml +325 -3
  15. package/package.json +3 -1
  16. package/scripts/generate-openapi.ts +83 -10
  17. package/scripts/sync-llm-catalog.ts +2 -2
  18. package/scripts/sync-web-search-catalog.ts +47 -25
  19. package/src/__tests__/agent-image-optimize.test.ts +11 -3
  20. package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +131 -0
  21. package/src/__tests__/anthropic-provider.test.ts +45 -0
  22. package/src/__tests__/app-builder-tool-scripts.test.ts +9 -3
  23. package/src/__tests__/app-executors.test.ts +220 -4
  24. package/src/__tests__/auto-analysis-end-to-end.test.ts +35 -0
  25. package/src/__tests__/bundled-asset.test.ts +6 -6
  26. package/src/__tests__/channel-availability-routes.test.ts +206 -0
  27. package/src/__tests__/channel-delivery-store.test.ts +289 -1
  28. package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -1
  29. package/src/__tests__/clawhub.test.ts +75 -16
  30. package/src/__tests__/compactor-tail-resolution.test.ts +41 -0
  31. package/src/__tests__/config-schema.test.ts +21 -0
  32. package/src/__tests__/config-set-route.test.ts +80 -0
  33. package/src/__tests__/config-sounds-sync.test.ts +97 -0
  34. package/src/__tests__/config-watcher-skill-reseed.test.ts +453 -0
  35. package/src/__tests__/context-search-conversations-source.test.ts +117 -2
  36. package/src/__tests__/context-search-memory-v2-source.test.ts +0 -1
  37. package/src/__tests__/context-search-workspace-source.test.ts +7 -0
  38. package/src/__tests__/context-token-estimator.test.ts +1 -0
  39. package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
  40. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -0
  41. package/src/__tests__/conversation-agent-loop-overflow.test.ts +92 -92
  42. package/src/__tests__/conversation-agent-loop.test.ts +2 -0
  43. package/src/__tests__/conversation-error.test.ts +42 -3
  44. package/src/__tests__/conversation-fork-crud.test.ts +82 -0
  45. package/src/__tests__/conversation-inference-profile-route.test.ts +40 -4
  46. package/src/__tests__/conversation-lifecycle.test.ts +173 -0
  47. package/src/__tests__/conversation-message-sync-tags.test.ts +97 -0
  48. package/src/__tests__/conversation-pairing.test.ts +54 -0
  49. package/src/__tests__/conversation-process-callsite.test.ts +4 -1
  50. package/src/__tests__/conversation-provider-retry-repair.test.ts +5 -1
  51. package/src/__tests__/conversation-queue.test.ts +4 -1
  52. package/src/__tests__/conversation-runtime-assembly.test.ts +76 -9
  53. package/src/__tests__/conversation-slash-queue.test.ts +59 -1
  54. package/src/__tests__/conversation-slash-unknown.test.ts +4 -1
  55. package/src/__tests__/conversation-surfaces-table-action.test.ts +360 -0
  56. package/src/__tests__/conversation-sync-tags.test.ts +235 -0
  57. package/src/__tests__/conversation-workspace-injection.test.ts +5 -1
  58. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +5 -1
  59. package/src/__tests__/credential-security-invariants.test.ts +3 -2
  60. package/src/__tests__/db-slack-external-content-normalization.test.ts +301 -0
  61. package/src/__tests__/delete-managed-skill-tool.test.ts +55 -13
  62. package/src/__tests__/disk-pressure-tools.test.ts +1 -0
  63. package/src/__tests__/dm-backfill.test.ts +121 -10
  64. package/src/__tests__/document-tool-security.test.ts +258 -0
  65. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  66. package/src/__tests__/edit-propagation.test.ts +33 -0
  67. package/src/__tests__/empty-response-pipeline.test.ts +0 -4
  68. package/src/__tests__/external-plugin-loader.test.ts +60 -36
  69. package/src/__tests__/filing-service.test.ts +140 -0
  70. package/src/__tests__/get-skill-detail-audit.test.ts +0 -4
  71. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +43 -62
  72. package/src/__tests__/helpers/tar-fixtures.ts +39 -0
  73. package/src/__tests__/helpers/wait-for.ts +21 -0
  74. package/src/__tests__/history-repair-pipeline.test.ts +0 -3
  75. package/src/__tests__/history-repair.test.ts +73 -0
  76. package/src/__tests__/host-app-control-proxy.test.ts +266 -10
  77. package/src/__tests__/image-credentials.test.ts +1 -1
  78. package/src/__tests__/inbound-slack-persistence.test.ts +2 -0
  79. package/src/__tests__/inference-no-mode-boot-e2e.test.ts +1 -1
  80. package/src/__tests__/inference-profile-reaper.test.ts +4 -2
  81. package/src/__tests__/inference-profile-session-handler.test.ts +18 -6
  82. package/src/__tests__/inference-profile-session-ipc.test.ts +17 -5
  83. package/src/__tests__/injector-chain.test.ts +10 -8
  84. package/src/__tests__/install-skill-routing.test.ts +155 -37
  85. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +92 -3
  86. package/src/__tests__/list-messages-page-latest.test.ts +55 -0
  87. package/src/__tests__/llm-call-pipeline.test.ts +0 -3
  88. package/src/__tests__/llm-catalog-parity.test.ts +55 -13
  89. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +34 -0
  90. package/src/__tests__/llm-request-log-source-factory.test.ts +29 -53
  91. package/src/__tests__/llm-usage-store.test.ts +114 -0
  92. package/src/__tests__/managed-profile-guard.test.ts +31 -29
  93. package/src/__tests__/managed-skill-lifecycle.test.ts +109 -18
  94. package/src/__tests__/managed-store.test.ts +84 -192
  95. package/src/__tests__/media-generate-image.test.ts +1 -1
  96. package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -2
  97. package/src/__tests__/messages-after-tiebreaker.test.ts +122 -0
  98. package/src/__tests__/oauth-commands-routes.test.ts +168 -16
  99. package/src/__tests__/oauth-provider-profiles.test.ts +9 -0
  100. package/src/__tests__/openai-provider.test.ts +24 -0
  101. package/src/__tests__/openai-responses-cutover-guard.test.ts +17 -9
  102. package/src/__tests__/overflow-reduce-pipeline.test.ts +0 -2
  103. package/src/__tests__/persistence-pipeline.test.ts +0 -2
  104. package/src/__tests__/{managed-proxy-context.test.ts → platform-proxy-context.test.ts} +1 -1
  105. package/src/__tests__/platform.test.ts +2 -0
  106. package/src/__tests__/plugin-api-shim.test.ts +125 -0
  107. package/src/__tests__/plugin-bootstrap.test.ts +10 -36
  108. package/src/__tests__/plugin-external-api.test.ts +68 -0
  109. package/src/__tests__/plugin-registry.test.ts +0 -77
  110. package/src/__tests__/plugin-route-contribution.test.ts +0 -1
  111. package/src/__tests__/plugin-skill-contribution.test.ts +0 -2
  112. package/src/__tests__/plugin-tool-contribution.test.ts +16 -15
  113. package/src/__tests__/plugin-types.test.ts +3 -13
  114. package/src/__tests__/process-message-background-slack.test.ts +8 -1
  115. package/src/__tests__/process-message-display-content.test.ts +421 -0
  116. package/src/__tests__/provider-catalog-visibility.test.ts +142 -0
  117. package/src/__tests__/provider-error-scenarios.test.ts +111 -0
  118. package/src/__tests__/{provider-managed-proxy-integration.test.ts → provider-platform-proxy-integration.test.ts} +8 -8
  119. package/src/__tests__/scaffold-managed-skill-tool.test.ts +65 -13
  120. package/src/__tests__/schedule-routes.test.ts +50 -3
  121. package/src/__tests__/schedule-store.test.ts +94 -0
  122. package/src/__tests__/scheduler-reuse-conversation.test.ts +54 -7
  123. package/src/__tests__/schema-transforms.test.ts +20 -0
  124. package/src/__tests__/search-skills-unified.test.ts +0 -5
  125. package/src/__tests__/server-history-render.test.ts +43 -0
  126. package/src/__tests__/skill-load-feature-flag.test.ts +0 -12
  127. package/src/__tests__/skill-load-tool.test.ts +27 -89
  128. package/src/__tests__/skill-memory.test.ts +23 -3
  129. package/src/__tests__/skills-file-content-endpoint.test.ts +9 -38
  130. package/src/__tests__/skills-files-catalog-fallback.test.ts +0 -3
  131. package/src/__tests__/skills-install-extract.test.ts +49 -38
  132. package/src/__tests__/skills-install-staging.test.ts +159 -0
  133. package/src/__tests__/skills-uninstall.test.ts +9 -41
  134. package/src/__tests__/skills.test.ts +51 -58
  135. package/src/__tests__/slack-channel-config.test.ts +9 -0
  136. package/src/__tests__/subagent-tool-filtering.test.ts +50 -0
  137. package/src/__tests__/system-prompt.test.ts +737 -63
  138. package/src/__tests__/terminal-tools.test.ts +28 -1
  139. package/src/__tests__/thread-backfill.test.ts +557 -27
  140. package/src/__tests__/title-generate-pipeline.test.ts +0 -13
  141. package/src/__tests__/token-estimate-pipeline.test.ts +0 -3
  142. package/src/__tests__/tool-error-pipeline.test.ts +0 -3
  143. package/src/__tests__/tool-execute-pipeline.test.ts +0 -5
  144. package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -1
  145. package/src/__tests__/tool-executor.test.ts +16 -4
  146. package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -12
  147. package/src/__tests__/turn-events-store.test.ts +256 -0
  148. package/src/__tests__/twilio-routes.test.ts +4 -0
  149. package/src/__tests__/user-plugin-loader.test.ts +0 -7
  150. package/src/__tests__/voice-session-bridge.test.ts +198 -0
  151. package/src/__tests__/web-search-catalog-parity.test.ts +32 -10
  152. package/src/__tests__/workspace-migration-057-repair-stale-gemini-model-ids.test.ts +115 -3
  153. package/src/__tests__/workspace-migration-072-seed-reply-suggestion-callsite.test.ts +50 -0
  154. package/src/__tests__/workspace-migration-073-repair-recall-callsite-empty-profile.test.ts +153 -0
  155. package/src/__tests__/workspace-migration-085-memory-v2-bm25-b-reembed-disabled-v2-pages.test.ts +220 -0
  156. package/src/__tests__/workspace-migration-086-revert-stale-gemini-mis-rewrites.test.ts +269 -0
  157. package/src/__tests__/workspace-migration-remove-legacy-skills-index.test.ts +309 -0
  158. package/src/__tests__/workspace-migrations-runner.test.ts +111 -3
  159. package/src/acp/resolve-agent.ts +1 -1
  160. package/src/agent/image-optimize.ts +13 -5
  161. package/src/calls/voice-session-bridge.ts +61 -42
  162. package/src/channels/types.ts +108 -0
  163. package/src/cli/__tests__/unknown-command.test.ts +24 -0
  164. package/src/cli/commands/__tests__/changelog.test.ts +304 -319
  165. package/src/cli/commands/__tests__/schedules.test.ts +491 -0
  166. package/src/cli/commands/changelog.ts +106 -42
  167. package/src/cli/commands/conversations.ts +102 -17
  168. package/src/cli/commands/default-action.ts +10 -53
  169. package/src/cli/commands/notifications.ts +329 -317
  170. package/src/cli/commands/plugins.ts +185 -0
  171. package/src/cli/commands/schedules.ts +391 -0
  172. package/src/cli/commands/telemetry.ts +40 -0
  173. package/src/cli/lib/__tests__/cli-colors.test.ts +48 -0
  174. package/src/cli/lib/__tests__/confirm-prompt.test.ts +159 -0
  175. package/src/cli/lib/__tests__/install-from-github.test.ts +355 -0
  176. package/src/cli/lib/__tests__/list-installed-plugins.test.ts +154 -0
  177. package/src/cli/lib/__tests__/uninstall-plugin.test.ts +124 -0
  178. package/src/cli/lib/__tests__/unknown-command.test.ts +106 -0
  179. package/src/cli/lib/cli-colors.ts +12 -0
  180. package/src/cli/lib/confirm-prompt.ts +79 -0
  181. package/src/cli/lib/install-from-github.ts +304 -0
  182. package/src/cli/lib/list-installed-plugins.ts +137 -0
  183. package/src/cli/lib/uninstall-plugin.ts +82 -0
  184. package/src/cli/lib/unknown-command.ts +111 -0
  185. package/src/cli/program.ts +38 -2
  186. package/src/config/bundled-skills/app-builder/SKILL.md +23 -21
  187. package/src/config/bundled-skills/app-builder/TOOLS.json +7 -0
  188. package/src/config/bundled-skills/computer-use/TOOLS.json +15 -52
  189. package/src/config/bundled-skills/document/SKILL.md +23 -3
  190. package/src/config/bundled-skills/document/TOOLS.json +53 -0
  191. package/src/config/bundled-skills/document/tools/document-delete.ts +12 -0
  192. package/src/config/bundled-skills/document/tools/document-list.ts +12 -0
  193. package/src/config/bundled-skills/document/tools/document-read.ts +12 -0
  194. package/src/config/bundled-skills/skill-management/SKILL.md +2 -2
  195. package/src/config/bundled-skills/skill-management/TOOLS.json +7 -7
  196. package/src/config/bundled-tool-registry.ts +6 -0
  197. package/src/config/feature-flag-registry.json +41 -1
  198. package/src/config/loader.ts +64 -38
  199. package/src/config/schema.ts +7 -10
  200. package/src/config/schemas/__tests__/llm-request-logs.test.ts +36 -0
  201. package/src/config/schemas/channels.ts +8 -0
  202. package/src/config/schemas/compaction.ts +28 -0
  203. package/src/config/schemas/heartbeat.ts +9 -0
  204. package/src/config/schemas/llm-request-logs.ts +31 -7
  205. package/src/config/schemas/llm.ts +3 -0
  206. package/src/config/schemas/memory-retrieval.ts +18 -0
  207. package/src/config/schemas/tools.ts +14 -0
  208. package/src/config/skills.ts +3 -96
  209. package/src/context/compactor.ts +1047 -0
  210. package/src/context/token-estimator.ts +2 -2
  211. package/src/context/window-manager.ts +197 -1520
  212. package/src/credential-execution/managed-catalog.ts +37 -0
  213. package/src/credential-health/credential-health-service.ts +280 -19
  214. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +34 -0
  215. package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +138 -0
  216. package/src/daemon/__tests__/conversation-tool-setup.test.ts +74 -0
  217. package/src/daemon/approval-generators.ts +8 -6
  218. package/src/daemon/config-watcher.ts +94 -31
  219. package/src/daemon/conversation-agent-loop.ts +169 -9
  220. package/src/daemon/conversation-error.ts +171 -37
  221. package/src/daemon/conversation-lifecycle.ts +53 -40
  222. package/src/daemon/conversation-messaging.ts +25 -6
  223. package/src/daemon/conversation-process.ts +49 -12
  224. package/src/daemon/conversation-runtime-assembly.ts +16 -1
  225. package/src/daemon/conversation-slash.ts +12 -5
  226. package/src/daemon/conversation-store.ts +11 -4
  227. package/src/daemon/conversation-tool-setup.ts +39 -7
  228. package/src/daemon/conversation.ts +33 -1
  229. package/src/daemon/external-plugins-bootstrap.ts +217 -181
  230. package/src/daemon/first-greeting.ts +22 -2
  231. package/src/daemon/handlers/config-model.ts +6 -5
  232. package/src/daemon/handlers/config-slack-channel.ts +15 -3
  233. package/src/daemon/handlers/shared.ts +14 -5
  234. package/src/daemon/handlers/skills.ts +111 -108
  235. package/src/daemon/history-repair.ts +28 -1
  236. package/src/daemon/host-app-control-proxy.ts +98 -23
  237. package/src/daemon/lifecycle.ts +45 -35
  238. package/src/daemon/meet-host-supervisor.ts +5 -4
  239. package/src/daemon/memory-v2-startup.ts +49 -0
  240. package/src/daemon/message-protocol.ts +1 -0
  241. package/src/daemon/message-types/conversations.ts +25 -0
  242. package/src/daemon/message-types/messages.ts +61 -0
  243. package/src/daemon/message-types/subagents.ts +1 -0
  244. package/src/daemon/message-types/sync.ts +1 -0
  245. package/src/daemon/pkb-reminder-builder.test.ts +1 -1
  246. package/src/daemon/pkb-reminder-builder.ts +1 -1
  247. package/src/daemon/plugin-source-watcher.ts +146 -0
  248. package/src/daemon/process-message.ts +21 -3
  249. package/src/daemon/server.ts +11 -2
  250. package/src/daemon/skill-memory-refresh.ts +29 -0
  251. package/src/documents/document-store.ts +221 -3
  252. package/src/embedded/plugin-api.ts +40 -0
  253. package/src/filing/filing-service.ts +39 -0
  254. package/src/heartbeat/__tests__/heartbeat-service.test.ts +91 -6
  255. package/src/heartbeat/heartbeat-run-store.ts +2 -1
  256. package/src/heartbeat/heartbeat-service.ts +41 -0
  257. package/src/home/__tests__/feed-types.test.ts +40 -0
  258. package/src/home/feed-types.ts +22 -0
  259. package/src/home/post-connect-feed.ts +1 -0
  260. package/src/index.ts +18 -1
  261. package/src/live-voice/__tests__/live-voice-stt.test.ts +57 -0
  262. package/src/mcp/client.ts +20 -4
  263. package/src/media/image-credentials.ts +3 -3
  264. package/src/memory/__tests__/bookmark-crud.test.ts +33 -27
  265. package/src/memory/__tests__/conversation-queries.test.ts +263 -0
  266. package/src/memory/__tests__/jobs-worker-v2-graph-trigger-embed.test.ts +113 -0
  267. package/src/memory/__tests__/memory-retrospective-startup-cleanup.test.ts +119 -14
  268. package/src/memory/__tests__/message-content.test.ts +35 -0
  269. package/src/memory/bookmark-crud.ts +42 -10
  270. package/src/memory/context-search/sources/conversations.ts +62 -2
  271. package/src/memory/context-search/sources/workspace.ts +4 -0
  272. package/src/memory/conversation-crud.ts +63 -19
  273. package/src/memory/conversation-queries.ts +110 -10
  274. package/src/memory/db-init.ts +6 -0
  275. package/src/memory/delivery-crud.ts +152 -5
  276. package/src/memory/embedding-backend.ts +4 -4
  277. package/src/memory/external-conversation-store.ts +66 -5
  278. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +66 -9
  279. package/src/memory/graph/conversation-graph-memory.ts +31 -15
  280. package/src/memory/graph/tools.ts +3 -3
  281. package/src/memory/indexer.ts +34 -29
  282. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +73 -0
  283. package/src/memory/jobs/embed-concept-page.ts +20 -11
  284. package/src/memory/jobs-worker.ts +6 -1
  285. package/src/memory/llm-request-log-source-clickhouse.ts +17 -10
  286. package/src/memory/llm-request-log-source.ts +19 -52
  287. package/src/memory/llm-usage-store.ts +125 -5
  288. package/src/memory/memory-retrospective-startup-cleanup.ts +72 -5
  289. package/src/memory/message-content.ts +1 -1
  290. package/src/memory/migrations/109-external-conversation-bindings.ts +15 -4
  291. package/src/memory/migrations/229-delete-private-conversations.test.ts +38 -1
  292. package/src/memory/migrations/229-delete-private-conversations.ts +7 -0
  293. package/src/memory/migrations/247-external-conversation-binding-thread-id.ts +78 -0
  294. package/src/memory/migrations/248-create-onboarding-events.ts +21 -0
  295. package/src/memory/migrations/249-normalize-slack-external-content.ts +240 -0
  296. package/src/memory/migrations/index.ts +6 -0
  297. package/src/memory/migrations/registry.ts +8 -0
  298. package/src/memory/onboarding-events-store.ts +106 -0
  299. package/src/memory/schema/bookmarks.ts +0 -2
  300. package/src/memory/schema/calls.ts +1 -0
  301. package/src/memory/schema/inference.ts +1 -3
  302. package/src/memory/schema/infrastructure.ts +12 -0
  303. package/src/memory/turn-events-store.ts +127 -2
  304. package/src/memory/v2/__tests__/activation.test.ts +0 -8
  305. package/src/memory/v2/__tests__/injection.test.ts +98 -8
  306. package/src/memory/v2/__tests__/migration.test.ts +87 -0
  307. package/src/memory/v2/__tests__/page-index.test.ts +83 -0
  308. package/src/memory/v2/__tests__/prompts-router.test.ts +58 -6
  309. package/src/memory/v2/__tests__/qdrant.test.ts +66 -3
  310. package/src/memory/v2/__tests__/router.test.ts +15 -0
  311. package/src/memory/v2/__tests__/skill-store.test.ts +387 -8
  312. package/src/memory/v2/injection.ts +32 -6
  313. package/src/memory/v2/migration.ts +49 -19
  314. package/src/memory/v2/page-index.ts +35 -5
  315. package/src/memory/v2/prompts/router.ts +11 -8
  316. package/src/memory/v2/prompts/sweep.ts +2 -2
  317. package/src/memory/v2/qdrant.ts +135 -7
  318. package/src/memory/v2/router.ts +9 -8
  319. package/src/memory/v2/skill-store.ts +120 -35
  320. package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +45 -5
  321. package/src/messaging/providers/slack/__tests__/download.test.ts +231 -0
  322. package/src/messaging/providers/slack/adapter.ts +43 -5
  323. package/src/messaging/providers/slack/client.ts +27 -0
  324. package/src/messaging/providers/slack/deep-link.ts +65 -0
  325. package/src/messaging/providers/slack/download.ts +104 -0
  326. package/src/messaging/providers/slack/message-metadata.test.ts +32 -0
  327. package/src/messaging/providers/slack/message-metadata.ts +27 -0
  328. package/src/messaging/providers/slack/render-transcript.test.ts +134 -0
  329. package/src/messaging/providers/slack/render-transcript.ts +69 -5
  330. package/src/messaging/providers/slack/types.ts +20 -1
  331. package/src/notifications/conversation-pairing.ts +2 -1
  332. package/src/notifications/decision-engine.ts +2 -1
  333. package/src/notifications/emit-signal.ts +20 -1
  334. package/src/notifications/home-feed-side-effect.ts +54 -0
  335. package/src/notifications/signal.ts +3 -1
  336. package/src/oauth/connection-resolver.ts +8 -4
  337. package/src/oauth/platform-connection.ts +6 -2
  338. package/src/oauth/seed-providers.ts +10 -1
  339. package/src/permissions/checker.ts +2 -0
  340. package/src/permissions/ipc-risk-types.ts +1 -0
  341. package/src/permissions/question-prompter.test.ts +416 -0
  342. package/src/permissions/question-prompter.ts +294 -0
  343. package/src/platform/client.test.ts +1 -1
  344. package/src/platform/client.ts +1 -1
  345. package/src/plugin-api/constants.ts +26 -0
  346. package/src/plugin-api/index.ts +34 -1
  347. package/src/plugin-api/types.ts +104 -22
  348. package/src/plugins/defaults/circuit-breaker.ts +0 -5
  349. package/src/plugins/defaults/compaction.ts +0 -4
  350. package/src/plugins/defaults/empty-response.ts +0 -2
  351. package/src/plugins/defaults/history-repair.ts +0 -2
  352. package/src/plugins/defaults/injectors.ts +36 -3
  353. package/src/plugins/defaults/llm-call.ts +0 -2
  354. package/src/plugins/defaults/memory-retrieval.ts +0 -1
  355. package/src/plugins/defaults/overflow-reduce.ts +0 -1
  356. package/src/plugins/defaults/persistence.ts +0 -2
  357. package/src/plugins/defaults/title-generate.ts +0 -5
  358. package/src/plugins/defaults/token-estimate.ts +0 -2
  359. package/src/plugins/defaults/tool-error.ts +0 -7
  360. package/src/plugins/defaults/tool-execute.ts +0 -2
  361. package/src/plugins/defaults/tool-result-truncate.ts +0 -4
  362. package/src/plugins/ensure-plugin-api-shim.ts +96 -0
  363. package/src/plugins/external-api.ts +104 -0
  364. package/src/plugins/external-plugin-loader.ts +105 -32
  365. package/src/plugins/feature-gate.ts +22 -0
  366. package/src/plugins/pipeline.ts +37 -0
  367. package/src/plugins/registry.ts +48 -80
  368. package/src/plugins/types.ts +31 -26
  369. package/src/plugins/user-loader.ts +21 -2
  370. package/src/proactive-artifact/aux-message-injector.ts +11 -0
  371. package/src/proactive-artifact/job.test.ts +37 -5
  372. package/src/prompts/__tests__/system-prompt.test.ts +12 -0
  373. package/src/prompts/__tests__/task-progress-hint-section.test.ts +99 -0
  374. package/src/prompts/normalize-onboarding.ts +27 -0
  375. package/src/prompts/sections.ts +302 -0
  376. package/src/prompts/system-prompt.ts +63 -166
  377. package/src/prompts/templates/BOOTSTRAP.md +17 -1
  378. package/src/prompts/templates/system-sections.ts +173 -0
  379. package/src/providers/__tests__/inference.test.ts +22 -7
  380. package/src/providers/anthropic/client.ts +28 -28
  381. package/src/providers/connection-resolution.ts +7 -0
  382. package/src/providers/inference/adapter-factory.ts +41 -4
  383. package/src/providers/inference/connections.ts +74 -29
  384. package/src/providers/inference/resolve-auth.ts +12 -4
  385. package/src/providers/model-catalog.ts +294 -12
  386. package/src/providers/openai/chat-completions-provider.ts +10 -2
  387. package/src/providers/openrouter/client.ts +7 -0
  388. package/src/providers/{managed-proxy → platform-proxy}/constants.ts +4 -1
  389. package/src/providers/{managed-proxy → platform-proxy}/context.ts +3 -3
  390. package/src/providers/provider-availability.ts +17 -2
  391. package/src/providers/provider-catalog-visibility.ts +36 -0
  392. package/src/providers/registry.ts +22 -14
  393. package/src/providers/retry.ts +47 -1
  394. package/src/runtime/__tests__/agent-wake.test.ts +152 -0
  395. package/src/runtime/agent-wake.ts +42 -14
  396. package/src/runtime/auth/route-policy.ts +8 -1
  397. package/src/runtime/btw-sidechain.ts +2 -0
  398. package/src/runtime/http-types.ts +19 -0
  399. package/src/runtime/migrations/origin-mode.ts +1 -1
  400. package/src/runtime/pending-interactions.ts +1 -0
  401. package/src/runtime/routes/__tests__/bookmark-routes.test.ts +17 -0
  402. package/src/runtime/routes/__tests__/conversation-management-routes.test.ts +5 -1
  403. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +107 -20
  404. package/src/runtime/routes/__tests__/question-routes.test.ts +395 -0
  405. package/src/runtime/routes/__tests__/tts-routes.test.ts +64 -1
  406. package/src/runtime/routes/acp-routes-list.test.ts +143 -0
  407. package/src/runtime/routes/acp-routes.ts +5 -3
  408. package/src/runtime/routes/auth-routes.ts +1 -1
  409. package/src/runtime/routes/bookmark-routes.ts +5 -3
  410. package/src/runtime/routes/btw-routes.ts +5 -1
  411. package/src/runtime/routes/channel-availability-routes.ts +121 -0
  412. package/src/runtime/routes/conversation-cli-routes.ts +44 -3
  413. package/src/runtime/routes/conversation-list-routes.ts +3 -20
  414. package/src/runtime/routes/conversation-management-routes.ts +17 -42
  415. package/src/runtime/routes/conversation-query-routes.ts +40 -35
  416. package/src/runtime/routes/conversation-routes.ts +90 -11
  417. package/src/runtime/routes/documents-routes.ts +25 -86
  418. package/src/runtime/routes/group-routes.ts +5 -0
  419. package/src/runtime/routes/inbound-conversation.ts +28 -8
  420. package/src/runtime/routes/inbound-message-handler.ts +236 -41
  421. package/src/runtime/routes/inbound-stages/background-dispatch.test.ts +111 -0
  422. package/src/runtime/routes/inbound-stages/background-dispatch.ts +32 -1
  423. package/src/runtime/routes/inbound-stages/edit-intercept.ts +17 -4
  424. package/src/runtime/routes/index.ts +6 -0
  425. package/src/runtime/routes/inference-profile-session-handler.ts +17 -44
  426. package/src/runtime/routes/inference-profile-session-reaper.ts +7 -21
  427. package/src/runtime/routes/inference-provider-connection-routes.ts +65 -21
  428. package/src/runtime/routes/integrations/slack/share.ts +4 -52
  429. package/src/runtime/routes/integrations/slack/token.ts +43 -0
  430. package/src/runtime/routes/integrations/twilio.ts +6 -13
  431. package/src/runtime/routes/notification-routes.ts +1 -1
  432. package/src/runtime/routes/oauth-commands-routes.ts +105 -15
  433. package/src/runtime/routes/oauth-lifecycle-routes.ts +43 -0
  434. package/src/runtime/routes/question-routes.ts +259 -0
  435. package/src/runtime/routes/rename-conversation-routes.ts +2 -33
  436. package/src/runtime/routes/schedule-routes.ts +4 -7
  437. package/src/runtime/routes/subagents-routes.ts +57 -18
  438. package/src/runtime/routes/telemetry-routes.ts +27 -0
  439. package/src/runtime/routes/tts-routes.ts +27 -2
  440. package/src/runtime/routes/workspace-routes.test.ts +43 -0
  441. package/src/runtime/routes/workspace-routes.ts +28 -0
  442. package/src/runtime/services/conversation-serializer.ts +39 -7
  443. package/src/runtime/sync/resource-sync-events.ts +93 -1
  444. package/src/schedule/schedule-store.ts +27 -2
  445. package/src/schedule/scheduler.ts +9 -1
  446. package/src/security/__tests__/untrusted-content.test.ts +86 -0
  447. package/src/security/untrusted-content.ts +93 -8
  448. package/src/skills/catalog-files.ts +1 -1
  449. package/src/skills/catalog-install.ts +233 -116
  450. package/src/skills/clawhub.ts +70 -13
  451. package/src/skills/managed-store.ts +4 -119
  452. package/src/skills/skillssh-registry.ts +27 -48
  453. package/src/subagent/manager.ts +15 -7
  454. package/src/telemetry/types.ts +113 -1
  455. package/src/telemetry/usage-telemetry-reporter.test.ts +312 -5
  456. package/src/telemetry/usage-telemetry-reporter.ts +113 -7
  457. package/src/tools/apps/executors.ts +58 -7
  458. package/src/tools/ask-question/ask-question-tool.test.ts +509 -0
  459. package/src/tools/ask-question/ask-question-tool.ts +304 -0
  460. package/src/tools/browser/browser-execution.ts +15 -11
  461. package/src/tools/computer-use/definitions.ts +3 -3
  462. package/src/tools/credentials/vault.ts +1 -1
  463. package/src/tools/document/document-tool.ts +124 -1
  464. package/src/tools/filesystem/edit.ts +1 -1
  465. package/src/tools/filesystem/list.ts +1 -1
  466. package/src/tools/filesystem/read.ts +1 -1
  467. package/src/tools/filesystem/write.ts +5 -2
  468. package/src/tools/host-filesystem/transfer.ts +1 -1
  469. package/src/tools/host-terminal/host-shell.ts +1 -1
  470. package/src/tools/permission-checker.ts +1 -1
  471. package/src/tools/registry.ts +17 -7
  472. package/src/tools/schedule/create.ts +2 -2
  473. package/src/tools/schema-transforms.ts +7 -2
  474. package/src/tools/side-effects.ts +1 -0
  475. package/src/tools/skills/delete-managed.ts +4 -4
  476. package/src/tools/skills/execute.ts +1 -1
  477. package/src/tools/skills/scaffold-managed.ts +3 -2
  478. package/src/tools/subagent/notify-parent.ts +1 -1
  479. package/src/tools/system/request-permission.ts +2 -2
  480. package/src/tools/terminal/safe-env.ts +60 -1
  481. package/src/tools/tool-manifest.ts +2 -0
  482. package/src/tools/types.ts +72 -21
  483. package/src/tools/ui-surface/definitions.ts +6 -5
  484. package/src/tts/__tests__/provider-adapters.test.ts +76 -2
  485. package/src/tts/providers/elevenlabs-provider.ts +75 -1
  486. package/src/types/onboarding-context.ts +2 -0
  487. package/src/util/errors.ts +17 -0
  488. package/src/util/platform.ts +10 -0
  489. package/src/watcher/__tests__/engine.test.ts +22 -0
  490. package/src/watcher/engine.ts +6 -2
  491. package/src/workspace/migrations/057-repair-stale-gemini-model-ids.ts +80 -15
  492. package/src/workspace/migrations/072-seed-reply-suggestion-callsite.ts +35 -22
  493. package/src/workspace/migrations/073-repair-recall-callsite-empty-profile.ts +3 -1
  494. package/src/workspace/migrations/083-system-prompt-prefix-to-file.ts +191 -0
  495. package/src/workspace/migrations/084-remove-legacy-skills-index.ts +276 -0
  496. package/src/workspace/migrations/085-memory-v2-bm25-b-reembed-disabled-v2-pages.ts +137 -0
  497. package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +198 -0
  498. package/src/workspace/migrations/registry.ts +8 -0
  499. package/src/workspace/migrations/runner.ts +39 -9
  500. package/src/workspace/migrations/types.ts +4 -0
  501. package/examples/plugins/echo/bun.lock +0 -25
  502. package/src/__tests__/context-window-manager.test.ts +0 -2481
  503. package/src/context/__tests__/compact-prompt.test.ts +0 -63
  504. package/src/context/prompts/compact.md +0 -26
  505. package/src/prompts/__tests__/build-cli-reference-section.test.ts +0 -37
  506. /package/src/__tests__/{secret-routes-managed-proxy.test.ts → secret-routes-platform-proxy.test.ts} +0 -0
@@ -1,2481 +0,0 @@
1
- import { describe, expect, test } from "bun:test";
2
-
3
- import type { ContextWindowConfig } from "../config/types.js";
4
- import { estimateTextTokens } from "../context/token-estimator.js";
5
- import {
6
- appendTailAnchorToSummary,
7
- clampSummaryAtSectionBoundary,
8
- CONTEXT_SUMMARY_MARKER,
9
- ContextWindowManager,
10
- createContextSummaryMessage,
11
- extractTailAssistantText,
12
- getSummaryFromContextMessage,
13
- stripCompactionOnlyInjections,
14
- } from "../context/window-manager.js";
15
- import type {
16
- ContentBlock,
17
- Message,
18
- Provider,
19
- ProviderResponse,
20
- SendMessageOptions,
21
- } from "../providers/types.js";
22
-
23
- function makeConfig(
24
- overrides: Partial<ContextWindowConfig> = {},
25
- ): ContextWindowConfig {
26
- return {
27
- enabled: true,
28
- maxInputTokens: 450,
29
- targetBudgetRatio: 0.67,
30
- compactThreshold: 0.6,
31
- summaryBudgetRatio: 0.05,
32
- overflowRecovery: {
33
- enabled: true,
34
- safetyMarginRatio: 0.05,
35
- maxAttempts: 3,
36
- interactiveLatestTurnCompression: "summarize",
37
- nonInteractiveLatestTurnCompression: "truncate",
38
- },
39
- ...overrides,
40
- };
41
- }
42
-
43
- function createProvider(
44
- fn: (messages: Message[]) => ProviderResponse | Promise<ProviderResponse>,
45
- name: string = "mock",
46
- ): Provider {
47
- return {
48
- name,
49
- async sendMessage(messages: Message[]): Promise<ProviderResponse> {
50
- return fn(messages);
51
- },
52
- };
53
- }
54
-
55
- function message(role: "user" | "assistant", text: string): Message {
56
- return { role, content: [{ type: "text", text }] };
57
- }
58
-
59
- describe("ContextWindowManager", () => {
60
- test("skips compaction when estimated tokens are below threshold", async () => {
61
- const provider = createProvider(() => {
62
- throw new Error("should not be called");
63
- });
64
- const manager = new ContextWindowManager({
65
- provider,
66
- systemPrompt: "system prompt",
67
- config: makeConfig(),
68
- });
69
- const history = [message("user", "hello"), message("assistant", "hi")];
70
-
71
- const result = await manager.maybeCompact(history);
72
- expect(result.compacted).toBe(false);
73
- expect(result.messages).toEqual(history);
74
- expect(result.reason).toBe("below compaction threshold");
75
- });
76
-
77
- test("explains forced compaction skip when only one user turn exists", async () => {
78
- const provider = createProvider(() => {
79
- throw new Error("summarizer should not be called");
80
- });
81
- const manager = new ContextWindowManager({
82
- provider,
83
- systemPrompt: "system prompt",
84
- config: makeConfig({
85
- maxInputTokens: 10_000,
86
- targetBudgetRatio: 0.5,
87
- }),
88
- });
89
- // Only one user turn — there is nothing earlier to summarize, so
90
- // forced compaction must still skip but report a clear reason
91
- // instead of "conversation already fits within the compaction
92
- // target". `force=true` is honored everywhere else.
93
- const history = [message("user", "hello"), message("assistant", "hi")];
94
-
95
- const result = await manager.maybeCompact(history, undefined, {
96
- force: true,
97
- });
98
-
99
- expect(result.compacted).toBe(false);
100
- expect(result.messages).toEqual(history);
101
- expect(result.reason).toBe(
102
- "only one user turn — nothing earlier to compact",
103
- );
104
- });
105
-
106
- test("forced compaction summarizes when adjustForToolPairs would walk boundary to summary", async () => {
107
- let summaryCalls = 0;
108
- const provider = createProvider(() => {
109
- summaryCalls += 1;
110
- return {
111
- content: [{ type: "text", text: "## Summary\n- rescue path ran" }],
112
- model: "mock-model",
113
- usage: { inputTokens: 100, outputTokens: 25 },
114
- stopReason: "end_turn",
115
- };
116
- });
117
- const manager = new ContextWindowManager({
118
- provider,
119
- systemPrompt: "system prompt",
120
- config: makeConfig({
121
- maxInputTokens: 10_000,
122
- targetBudgetRatio: 0.5,
123
- }),
124
- });
125
- // Conversation starts with consecutive `assistant(tool_use)` →
126
- // `user(tool_result + text)` pairs. `collectUserTurnStartIndexes`
127
- // includes the mixed user messages (not tool_result-only), so the
128
- // earliest `userTurnStarts` entry is the message containing
129
- // `tool_result(tool-1)`. Once the projection-optimism clamp
130
- // decrements the keep boundary to that user turn,
131
- // `adjustForToolPairs` walks the boundary back through the
132
- // tool_use/tool_result chain to index 0 — under the old code that
133
- // routed `/compact` through the "already fits" skip path. With the
134
- // rescue, summarization runs and orphan `tool_result` blocks are
135
- // stripped from the kept region.
136
- const history: Message[] = [
137
- {
138
- role: "assistant",
139
- content: [{ type: "tool_use", id: "tool-1", name: "x", input: {} }],
140
- },
141
- {
142
- role: "user",
143
- content: [
144
- { type: "tool_result", tool_use_id: "tool-1", content: "result1" },
145
- { type: "text", text: "u1" },
146
- ],
147
- },
148
- {
149
- role: "assistant",
150
- content: [{ type: "tool_use", id: "tool-2", name: "x", input: {} }],
151
- },
152
- {
153
- role: "user",
154
- content: [
155
- { type: "tool_result", tool_use_id: "tool-2", content: "result2" },
156
- { type: "text", text: "u2" },
157
- ],
158
- },
159
- message("assistant", "a3"),
160
- message("user", "u3"),
161
- message("assistant", "a4"),
162
- ];
163
-
164
- const result = await manager.maybeCompact(history, undefined, {
165
- force: true,
166
- precomputedEstimate: 50_000,
167
- });
168
-
169
- expect(result.compacted).toBe(true);
170
- expect(summaryCalls).toBe(1);
171
- expect(result.reason).not.toBe(
172
- "conversation already fits within the compaction target",
173
- );
174
- expect(result.reason).not.toBe(
175
- "truncated tool results without summarization",
176
- );
177
- expect(result.compactedMessages).toBeGreaterThan(0);
178
-
179
- // The kept region must not contain orphan tool_result blocks whose
180
- // tool_use lives in the compacted region — the LLM API would reject
181
- // such messages on the next agent turn.
182
- const keptToolUseIds = new Set<string>();
183
- for (const msg of result.messages) {
184
- if (msg.role !== "assistant") continue;
185
- for (const block of msg.content) {
186
- if (
187
- (block.type === "tool_use" || block.type === "server_tool_use") &&
188
- "id" in block
189
- ) {
190
- keptToolUseIds.add((block as { id: string }).id);
191
- }
192
- }
193
- }
194
- for (const msg of result.messages) {
195
- if (msg.role !== "user") continue;
196
- for (const block of msg.content) {
197
- if (
198
- (block.type === "tool_result" ||
199
- block.type === "web_search_tool_result") &&
200
- "tool_use_id" in block
201
- ) {
202
- expect(keptToolUseIds.has(block.tool_use_id as string)).toBe(true);
203
- }
204
- }
205
- }
206
- });
207
-
208
- test("forced compaction summarizes when compactable count is below MIN guard", async () => {
209
- let summaryCalls = 0;
210
- const provider = createProvider(() => {
211
- summaryCalls += 1;
212
- return {
213
- content: [{ type: "text", text: "## Summary\n- min-bypass ran" }],
214
- model: "mock-model",
215
- usage: { inputTokens: 100, outputTokens: 25 },
216
- stopReason: "end_turn",
217
- };
218
- });
219
- const manager = new ContextWindowManager({
220
- provider,
221
- systemPrompt: "system prompt",
222
- config: makeConfig({
223
- maxInputTokens: 10_000,
224
- targetBudgetRatio: 0.5,
225
- }),
226
- });
227
- // Two user turns separated by a single assistant message — the
228
- // smallest realistic conversation where a forced compaction has
229
- // anything to summarize. After the projection clamp + rescue, the
230
- // compactable region is at most one user turn (the first one),
231
- // which can fall below `MIN_COMPACTABLE_PERSISTED_MESSAGES`. The
232
- // bypass must let summarization run instead of returning
233
- // "insufficient compactable persisted messages".
234
- const history: Message[] = [message("user", "u1"), message("user", "u2")];
235
-
236
- const result = await manager.maybeCompact(history, undefined, {
237
- force: true,
238
- precomputedEstimate: 50_000,
239
- });
240
-
241
- expect(result.compacted).toBe(true);
242
- expect(summaryCalls).toBe(1);
243
- expect(result.reason).not.toBe(
244
- "insufficient compactable persisted messages",
245
- );
246
- expect(result.compactedMessages).toBeGreaterThan(0);
247
- });
248
-
249
- test("forced compaction summarizes when projection fits but real usage exceeds target", async () => {
250
- let summaryCalls = 0;
251
- const provider = createProvider(() => {
252
- summaryCalls += 1;
253
- return {
254
- content: [
255
- { type: "text", text: "## Summary\n- forced compaction ran" },
256
- ],
257
- model: "mock-model",
258
- usage: { inputTokens: 100, outputTokens: 25 },
259
- stopReason: "end_turn",
260
- };
261
- });
262
- const manager = new ContextWindowManager({
263
- provider,
264
- systemPrompt: "system prompt",
265
- config: makeConfig({
266
- maxInputTokens: 10_000,
267
- targetBudgetRatio: 0.5,
268
- }),
269
- });
270
- // Tiny live messages so the projection trivially fits target — without
271
- // the fix this would route through the "already fits" skip path.
272
- const history: Message[] = [
273
- message("user", "u1"),
274
- message("assistant", "a1"),
275
- message("user", "u2"),
276
- message("assistant", "a2"),
277
- ];
278
-
279
- const result = await manager.maybeCompact(history, undefined, {
280
- force: true,
281
- // Simulate a live conversation that's well over target. In production
282
- // this happens when synthetic tool_result truncation in the projection
283
- // is far more aggressive than what the real messages allow.
284
- precomputedEstimate: 50_000,
285
- });
286
-
287
- expect(result.compacted).toBe(true);
288
- expect(summaryCalls).toBe(1);
289
- expect(result.reason).not.toBe(
290
- "conversation already fits within the compaction target",
291
- );
292
- expect(result.compactedMessages).toBeGreaterThan(0);
293
- });
294
-
295
- test("compacts old turns and keeps recent user turns", async () => {
296
- let summaryCalls = 0;
297
- const provider = createProvider(() => {
298
- summaryCalls += 1;
299
- return {
300
- content: [
301
- { type: "text", text: `## Goals\n- summary call ${summaryCalls}` },
302
- ],
303
- model: "mock-model",
304
- usage: { inputTokens: 100, outputTokens: 25 },
305
- stopReason: "end_turn",
306
- };
307
- });
308
- const manager = new ContextWindowManager({
309
- provider,
310
- systemPrompt: "system prompt",
311
- config: makeConfig({ maxInputTokens: 600 }),
312
- });
313
- const long = "x".repeat(240);
314
- const history: Message[] = [
315
- message("user", `u1 ${long}`),
316
- message("assistant", `a1 ${long}`),
317
- message("user", `u2 ${long}`),
318
- message("assistant", `a2 ${long}`),
319
- message("user", `u3 ${long}`),
320
- message("assistant", `a3 ${long}`),
321
- ];
322
-
323
- const result = await manager.maybeCompact(history);
324
-
325
- expect(result.compacted).toBe(true);
326
- expect(result.compactedMessages).toBeGreaterThan(0);
327
- expect(result.summaryCalls).toBe(summaryCalls);
328
- expect(result.summaryInputTokens).toBeGreaterThan(0);
329
- expect(result.summaryOutputTokens).toBeGreaterThan(0);
330
- expect(result.messages[0].role).toBe("user");
331
- expect(
332
- getSummaryFromContextMessage(result.messages[0])?.length,
333
- ).toBeGreaterThan(0);
334
-
335
- const userTexts = result.messages
336
- .filter((m) => m.role === "user")
337
- .map((m) => (m.content[0].type === "text" ? m.content[0].text : ""));
338
- expect(userTexts.some((text) => text.startsWith("u1 "))).toBe(false);
339
- expect(userTexts.some((text) => text.startsWith("u2 "))).toBe(true);
340
- expect(userTexts.some((text) => text.startsWith("u3 "))).toBe(true);
341
- });
342
-
343
- test("returns cache-aware summary usage from single-pass compaction", async () => {
344
- const provider = createProvider(() => {
345
- return {
346
- content: [
347
- { type: "text", text: `## Goals\n- summary of full transcript` },
348
- ],
349
- model: "claude-opus-4-6",
350
- usage: {
351
- inputTokens: 5_000,
352
- outputTokens: 80,
353
- cacheCreationInputTokens: 50,
354
- cacheReadInputTokens: 200,
355
- },
356
- rawResponse: {
357
- usage: {
358
- cache_creation: {
359
- ephemeral_5m_input_tokens: 50,
360
- ephemeral_1h_input_tokens: 0,
361
- },
362
- cache_read_input_tokens: 200,
363
- },
364
- },
365
- stopReason: "end_turn",
366
- };
367
- });
368
- const manager = new ContextWindowManager({
369
- provider,
370
- systemPrompt: "system prompt",
371
- config: makeConfig({
372
- maxInputTokens: 7_000,
373
- targetBudgetRatio: 0.41,
374
- }),
375
- });
376
- const long = "q".repeat(6_000);
377
- const history: Message[] = [
378
- message("user", `u1 ${long}`),
379
- message("assistant", `a1 ${long}`),
380
- message("user", `u2 ${long}`),
381
- message("assistant", `a2 ${long}`),
382
- message("user", `u3 ${long}`),
383
- ];
384
-
385
- const result = await manager.maybeCompact(history);
386
-
387
- expect(result.compacted).toBe(true);
388
- expect(result.summaryCalls).toBe(1);
389
- expect(result.summaryCacheCreationInputTokens).toBe(50);
390
- expect(result.summaryCacheReadInputTokens).toBe(200);
391
- expect(result.summaryRawResponses).toHaveLength(1);
392
- expect(result.summaryRawResponses?.[0]).toMatchObject({
393
- usage: {
394
- cache_creation: { ephemeral_5m_input_tokens: 50 },
395
- cache_read_input_tokens: 200,
396
- },
397
- });
398
- });
399
-
400
- test("updates an existing summary message instead of nesting summaries", async () => {
401
- const provider = createProvider(() => ({
402
- content: [{ type: "text", text: "## Goals\n- updated summary" }],
403
- model: "mock-model",
404
- usage: { inputTokens: 50, outputTokens: 10 },
405
- stopReason: "end_turn",
406
- }));
407
- const manager = new ContextWindowManager({
408
- provider,
409
- systemPrompt: "system prompt",
410
- config: makeConfig({
411
- maxInputTokens: 300,
412
- targetBudgetRatio: 0.58,
413
- }),
414
- });
415
- const long = "y".repeat(220);
416
- const history: Message[] = [
417
- createContextSummaryMessage("## Goals\n- old summary"),
418
- message("user", `older ${long}`),
419
- message("assistant", `reply ${long}`),
420
- message("user", `latest ${long}`),
421
- ];
422
-
423
- const result = await manager.maybeCompact(history);
424
- expect(result.compacted).toBe(true);
425
- expect(result.messages.length).toBeLessThan(history.length + 1);
426
- expect(getSummaryFromContextMessage(result.messages[0])).toContain(
427
- "updated summary",
428
- );
429
- expect(
430
- result.messages.filter(
431
- (m) =>
432
- m.role === "user" &&
433
- m.content.some(
434
- (block) =>
435
- block.type === "text" &&
436
- block.text.startsWith(CONTEXT_SUMMARY_MARKER),
437
- ),
438
- ),
439
- ).toHaveLength(1);
440
- });
441
-
442
- test("falls back to local summary when provider summarization fails", async () => {
443
- const provider = createProvider(async () => {
444
- throw new Error("provider unavailable");
445
- });
446
- const manager = new ContextWindowManager({
447
- provider,
448
- systemPrompt: "system prompt",
449
- config: makeConfig({
450
- maxInputTokens: 260,
451
- targetBudgetRatio: 0.59,
452
- }),
453
- });
454
- const long = "z".repeat(220);
455
- const history = [
456
- message("user", `task ${long}`),
457
- message("assistant", `result ${long}`),
458
- message("user", `followup ${long}`),
459
- ];
460
-
461
- const result = await manager.maybeCompact(history);
462
- expect(result.compacted).toBe(true);
463
- expect(result.summaryCalls).toBeGreaterThan(0);
464
- expect(result.summaryInputTokens).toBe(0);
465
- expect(result.summaryOutputTokens).toBe(0);
466
- expect(result.summaryModel).toBe("");
467
- expect(result.summaryText).toContain("## Recent Progress");
468
- });
469
-
470
- test("marks summaryFailed when the provider throws and fallback runs", async () => {
471
- // The agent-loop circuit breaker distinguishes "LLM call failed but
472
- // fallback rescued us" from "compaction succeeded end-to-end". The
473
- // fallback path must set summaryFailed:true so callers can count
474
- // consecutive failures without losing the compacted messages.
475
- const provider = createProvider(async () => {
476
- throw new Error("provider unavailable");
477
- });
478
- const manager = new ContextWindowManager({
479
- provider,
480
- systemPrompt: "system prompt",
481
- config: makeConfig({
482
- maxInputTokens: 260,
483
- targetBudgetRatio: 0.59,
484
- }),
485
- });
486
- const long = "z".repeat(220);
487
- const history = [
488
- message("user", `task ${long}`),
489
- message("assistant", `result ${long}`),
490
- message("user", `followup ${long}`),
491
- ];
492
-
493
- const result = await manager.maybeCompact(history);
494
- expect(result.compacted).toBe(true);
495
- expect(result.summaryFailed).toBe(true);
496
- });
497
-
498
- test("does not mark summaryFailed on a successful provider call", async () => {
499
- const provider = createProvider(() => ({
500
- content: [
501
- { type: "text", text: "## Goals\n- summary produced by provider" },
502
- ],
503
- model: "mock-model",
504
- usage: { inputTokens: 60, outputTokens: 12 },
505
- stopReason: "end_turn",
506
- }));
507
- const manager = new ContextWindowManager({
508
- provider,
509
- systemPrompt: "system prompt",
510
- config: makeConfig({
511
- maxInputTokens: 260,
512
- targetBudgetRatio: 0.59,
513
- }),
514
- });
515
- const long = "z".repeat(220);
516
- const history = [
517
- message("user", `task ${long}`),
518
- message("assistant", `result ${long}`),
519
- message("user", `followup ${long}`),
520
- ];
521
-
522
- const result = await manager.maybeCompact(history);
523
- expect(result.compacted).toBe(true);
524
- expect(result.summaryFailed).toBe(false);
525
- });
526
-
527
- test("serializes file blocks for summary chunks", async () => {
528
- const prompts: string[] = [];
529
- const provider = createProvider((messages) => {
530
- for (const block of messages[0]?.content ?? []) {
531
- if (block.type === "text") {
532
- prompts.push(block.text);
533
- }
534
- }
535
- return {
536
- content: [{ type: "text", text: "## Goals\n- file summarized" }],
537
- model: "mock-model",
538
- usage: { inputTokens: 60, outputTokens: 12 },
539
- stopReason: "end_turn",
540
- };
541
- });
542
- const manager = new ContextWindowManager({
543
- provider,
544
- systemPrompt: "system prompt",
545
- config: makeConfig({
546
- maxInputTokens: 2000,
547
- targetBudgetRatio: 0.4,
548
- compactThreshold: 0.35,
549
- }),
550
- });
551
- const long = "f".repeat(1500);
552
- const history: Message[] = [
553
- {
554
- role: "user",
555
- content: [
556
- {
557
- type: "file",
558
- source: {
559
- type: "base64",
560
- media_type: "application/pdf",
561
- filename: "spec.pdf",
562
- data: "a".repeat(4096),
563
- },
564
- extracted_text: "Critical requirement from attached spec.",
565
- },
566
- ],
567
- },
568
- message("assistant", `ack ${long}`),
569
- message("user", `followup ${long}`),
570
- ];
571
-
572
- const result = await manager.maybeCompact(history);
573
- expect(result.compacted).toBe(true);
574
-
575
- const combinedPrompts = prompts.join("\n");
576
- expect(combinedPrompts).toContain("file: spec.pdf");
577
- expect(combinedPrompts).toContain("application/pdf");
578
- expect(combinedPrompts).toContain(
579
- "Critical requirement from attached spec.",
580
- );
581
- expect(combinedPrompts).not.toContain("unknown_block");
582
- });
583
-
584
- test("passes image blocks to summarizer instead of text metadata", async () => {
585
- const receivedBlocks: { type: string; mediaType?: string }[] = [];
586
- const provider = createProvider((messages) => {
587
- for (const block of messages[0]?.content ?? []) {
588
- if (block.type === "image") {
589
- receivedBlocks.push({
590
- type: "image",
591
- mediaType: (block as { source: { media_type: string } }).source
592
- .media_type,
593
- });
594
- } else if (block.type === "text") {
595
- receivedBlocks.push({ type: "text" });
596
- }
597
- }
598
- return {
599
- content: [
600
- {
601
- type: "text",
602
- text: "## Goals\n- described image: a photo of a cat",
603
- },
604
- ],
605
- model: "mock-model",
606
- usage: { inputTokens: 100, outputTokens: 20 },
607
- stopReason: "end_turn",
608
- };
609
- });
610
- // Use a large enough maxInputTokens so the image fits in the summarizer
611
- // budget after accounting for overhead (system prompt, scaffolding, output).
612
- const manager = new ContextWindowManager({
613
- provider,
614
- systemPrompt: "sys",
615
- config: makeConfig({
616
- maxInputTokens: 5000,
617
- compactThreshold: 0.3,
618
- targetBudgetRatio: 0.2,
619
- }),
620
- });
621
- const long = "x".repeat(4000);
622
- const history: Message[] = [
623
- {
624
- role: "user",
625
- content: [
626
- { type: "text", text: "look at this" },
627
- {
628
- type: "image",
629
- source: {
630
- type: "base64",
631
- media_type: "image/png",
632
- data: "iVBORw0KGgo=",
633
- },
634
- },
635
- ],
636
- },
637
- message("assistant", `a1 ${long}`),
638
- message("user", `u2 ${long}`),
639
- ];
640
-
641
- const result = await manager.maybeCompact(history);
642
- expect(result.compacted).toBe(true);
643
-
644
- // The summarizer should have received actual image blocks, not text stubs.
645
- const imageBlocks = receivedBlocks.filter((b) => b.type === "image");
646
- expect(imageBlocks.length).toBe(1);
647
- expect(imageBlocks[0].mediaType).toBe("image/png");
648
- });
649
-
650
- test("passes tool_result images to summarizer", async () => {
651
- const receivedImageCount = { count: 0 };
652
- const provider = createProvider((messages) => {
653
- for (const block of messages[0]?.content ?? []) {
654
- if (block.type === "image") {
655
- receivedImageCount.count++;
656
- }
657
- }
658
- return {
659
- content: [
660
- { type: "text", text: "## Goals\n- summarized tool output images" },
661
- ],
662
- model: "mock-model",
663
- usage: { inputTokens: 100, outputTokens: 20 },
664
- stopReason: "end_turn",
665
- };
666
- });
667
- const manager = new ContextWindowManager({
668
- provider,
669
- systemPrompt: "sys",
670
- config: makeConfig({
671
- maxInputTokens: 5000,
672
- compactThreshold: 0.3,
673
- targetBudgetRatio: 0.2,
674
- }),
675
- });
676
- const long = "x".repeat(2000);
677
- const history: Message[] = [
678
- message("assistant", "let me read that file"),
679
- {
680
- role: "user",
681
- content: [
682
- {
683
- type: "tool_result",
684
- tool_use_id: "tool_1",
685
- content: "file contents",
686
- contentBlocks: [
687
- {
688
- type: "image",
689
- source: {
690
- type: "base64",
691
- media_type: "image/jpeg",
692
- data: "iVBORw0KGgo=",
693
- },
694
- },
695
- ],
696
- is_error: false,
697
- } as import("../providers/types.js").ToolResultContent,
698
- ],
699
- },
700
- message("user", `followup ${long}`),
701
- message("assistant", `response ${long}`),
702
- message("user", `final ${long}`),
703
- ];
704
-
705
- const result = await manager.maybeCompact(history);
706
- expect(result.compacted).toBe(true);
707
- expect(receivedImageCount.count).toBe(1);
708
- });
709
-
710
- test("counts compacted persisted messages including tool-result user turns", async () => {
711
- const provider = createProvider(() => ({
712
- content: [{ type: "text", text: "## Goals\n- compacted summary" }],
713
- model: "mock-model",
714
- usage: { inputTokens: 75, outputTokens: 20 },
715
- stopReason: "end_turn",
716
- }));
717
- const manager = new ContextWindowManager({
718
- provider,
719
- systemPrompt: "system prompt",
720
- config: makeConfig({
721
- maxInputTokens: 320,
722
- targetBudgetRatio: 0.58,
723
- }),
724
- });
725
- const long = "k".repeat(220);
726
- const history: Message[] = [
727
- message("user", `u1 ${long}`),
728
- {
729
- role: "assistant",
730
- content: [
731
- {
732
- type: "tool_use",
733
- id: "t1",
734
- name: "read_file",
735
- input: { path: "/tmp/a" },
736
- },
737
- ],
738
- },
739
- {
740
- role: "user",
741
- content: [
742
- { type: "tool_result", tool_use_id: "t1", content: "contents" },
743
- ],
744
- },
745
- message("assistant", `a1 ${long}`),
746
- message("user", `u2 ${long}`),
747
- ];
748
-
749
- const result = await manager.maybeCompact(history);
750
- expect(result.compacted).toBe(true);
751
- expect(result.compactedMessages).toBe(4);
752
- // Tool-result-only user messages have DB counterparts and must be
753
- // counted so contextCompactedMessageCount indexes the DB correctly.
754
- expect(result.compactedPersistedMessages).toBe(4);
755
- });
756
-
757
- test("adjusts keep boundary to preserve tool_use/tool_result pairs", async () => {
758
- const provider = createProvider(() => ({
759
- content: [{ type: "text", text: "## Goals\n- compacted summary" }],
760
- model: "mock-model",
761
- usage: { inputTokens: 75, outputTokens: 20 },
762
- stopReason: "end_turn",
763
- }));
764
- // Configure budget so compaction keeps only the last user turn,
765
- // which would normally split the tool pair because the last user
766
- // turn start is a mixed message (tool_result + text) whose matching
767
- // tool_use lives in the preceding assistant message.
768
- const manager = new ContextWindowManager({
769
- provider,
770
- systemPrompt: "system prompt",
771
- config: makeConfig({
772
- maxInputTokens: 320,
773
- targetBudgetRatio: 0.58,
774
- }),
775
- });
776
- const long = "k".repeat(220);
777
- const history: Message[] = [
778
- message("user", `u1 ${long}`), // index 0: old user turn (long)
779
- message("assistant", `a1 ${long}`), // index 1: assistant reply (long)
780
- message("user", `u2 ${long}`), // index 2: second user turn (long)
781
- {
782
- // index 3: assistant with tool_use
783
- role: "assistant",
784
- content: [
785
- {
786
- type: "tool_use",
787
- id: "t1",
788
- name: "read_file",
789
- input: { path: "/tmp/a" },
790
- },
791
- ],
792
- },
793
- {
794
- // index 4: user with tool_result AND text (mixed = user turn start)
795
- // Without adjustForToolPairs, the raw boundary would land here,
796
- // orphaning the tool_result from its tool_use at index 3.
797
- role: "user",
798
- content: [
799
- { type: "tool_result", tool_use_id: "t1", content: "file contents" },
800
- { type: "text", text: "thanks, now continue" },
801
- ],
802
- },
803
- ];
804
-
805
- const result = await manager.maybeCompact(history);
806
- expect(result.compacted).toBe(true);
807
- // The kept messages must include the tool_use assistant message (index 3)
808
- // and tool_result user message (index 4) as a pair, not split them.
809
- // Verify no orphaned tool_result blocks exist in the kept messages.
810
- const keptMessages = result.messages;
811
- for (let i = 0; i < keptMessages.length; i++) {
812
- const msg = keptMessages[i];
813
- if (msg.role !== "user") continue;
814
- for (const block of msg.content) {
815
- if (block.type === "tool_result") {
816
- // Every tool_result must have a matching tool_use in a preceding assistant message
817
- const toolUseId = (block as { tool_use_id: string }).tool_use_id;
818
- const hasMatchingToolUse = keptMessages
819
- .slice(0, i)
820
- .some(
821
- (prev) =>
822
- prev.role === "assistant" &&
823
- prev.content.some(
824
- (b) =>
825
- b.type === "tool_use" &&
826
- (b as { id: string }).id === toolUseId,
827
- ),
828
- );
829
- expect(hasMatchingToolUse).toBe(true);
830
- }
831
- }
832
- }
833
- });
834
-
835
- test("counts mixed tool_result+text user messages as persisted", async () => {
836
- const provider = createProvider(() => ({
837
- content: [{ type: "text", text: "## Goals\n- mixed summary" }],
838
- model: "mock-model",
839
- usage: { inputTokens: 75, outputTokens: 20 },
840
- stopReason: "end_turn",
841
- }));
842
- const manager = new ContextWindowManager({
843
- provider,
844
- systemPrompt: "system prompt",
845
- config: makeConfig({
846
- maxInputTokens: 320,
847
- targetBudgetRatio: 0.58,
848
- }),
849
- });
850
- const long = "k".repeat(220);
851
- // Simulates a merged user message (repairHistory merges consecutive same-role
852
- // messages), resulting in a user turn with both tool_result and text blocks.
853
- const history: Message[] = [
854
- message("user", `u1 ${long}`),
855
- {
856
- role: "assistant",
857
- content: [
858
- {
859
- type: "tool_use",
860
- id: "t1",
861
- name: "read_file",
862
- input: { path: "/tmp/a" },
863
- },
864
- ],
865
- },
866
- {
867
- role: "user",
868
- content: [
869
- { type: "tool_result", tool_use_id: "t1", content: "contents" },
870
- { type: "text", text: `follow-up question ${long}` },
871
- ],
872
- },
873
- message("assistant", `a1 ${long}`),
874
- message("user", `u2 ${long}`),
875
- ];
876
-
877
- const result = await manager.maybeCompact(history);
878
- expect(result.compacted).toBe(true);
879
- // The mixed user message should be counted as persisted (4 = u1 + mixed + a_tooluse + a1)
880
- expect(result.compactedPersistedMessages).toBe(4);
881
- });
882
-
883
- test("returns cache-aware usage metadata for compaction summaries", async () => {
884
- const rawResponse = {
885
- usage: {
886
- cache_creation: { ephemeral_5m_input_tokens: 120 },
887
- cache_read_input_tokens: 340,
888
- },
889
- };
890
- const provider = createProvider(() => ({
891
- content: [{ type: "text", text: "## Goals\n- cache-aware summary" }],
892
- model: "claude-opus-4-6",
893
- usage: {
894
- inputTokens: 500,
895
- outputTokens: 22,
896
- cacheCreationInputTokens: 120,
897
- cacheReadInputTokens: 340,
898
- },
899
- rawResponse,
900
- stopReason: "end_turn",
901
- }));
902
- const manager = new ContextWindowManager({
903
- provider,
904
- systemPrompt: "system prompt",
905
- config: makeConfig({
906
- maxInputTokens: 2600,
907
- targetBudgetRatio: 0.63,
908
- }),
909
- });
910
- const long = "c".repeat(5000);
911
- const history: Message[] = [
912
- message("user", `u1 ${long}`),
913
- message("assistant", `a1 ${long}`),
914
- message("user", `u2 ${long}`),
915
- ];
916
-
917
- const result = await manager.maybeCompact(history);
918
-
919
- expect(result.compacted).toBe(true);
920
- expect(result.summaryCalls).toBe(1);
921
- expect(result.summaryInputTokens).toBe(500);
922
- expect(result.summaryCacheCreationInputTokens).toBe(120);
923
- expect(result.summaryCacheReadInputTokens).toBe(340);
924
- expect(result.summaryRawResponses).toEqual([rawResponse]);
925
- });
926
-
927
- test("does not parse user-authored summary marker text as internal summary", () => {
928
- const userMessage: Message = {
929
- role: "user",
930
- content: [
931
- {
932
- type: "text",
933
- text: `${CONTEXT_SUMMARY_MARKER}\nI typed this prefix myself`,
934
- },
935
- ],
936
- };
937
- expect(getSummaryFromContextMessage(userMessage)).toBeNull();
938
- });
939
-
940
- test("skips compaction during cooldown", async () => {
941
- const provider = createProvider(() => {
942
- throw new Error(
943
- "summarizer should not be called while cooldown skip is active",
944
- );
945
- });
946
- const manager = new ContextWindowManager({
947
- provider,
948
- systemPrompt: "system prompt",
949
- config: makeConfig({
950
- maxInputTokens: 260,
951
- targetBudgetRatio: 0.74,
952
- }),
953
- });
954
- const long = "c".repeat(220);
955
- const history: Message[] = [
956
- message("user", `u1 ${long}`),
957
- message("assistant", `a1 ${long}`),
958
- message("user", `u2 ${long}`),
959
- ];
960
-
961
- const result = await manager.maybeCompact(history, undefined, {
962
- lastCompactedAt: Date.now() - 30_000,
963
- });
964
- expect(result.compacted).toBe(false);
965
- expect(result.reason).toBe("compaction cooldown active");
966
- });
967
-
968
- test("ignores cooldown and compacts under severe token pressure", async () => {
969
- const provider = createProvider(() => ({
970
- content: [{ type: "text", text: "## Goals\n- compacted under pressure" }],
971
- model: "mock-model",
972
- usage: { inputTokens: 60, outputTokens: 12 },
973
- stopReason: "end_turn",
974
- }));
975
- const manager = new ContextWindowManager({
976
- provider,
977
- systemPrompt: "system prompt",
978
- config: makeConfig({
979
- maxInputTokens: 320,
980
- targetBudgetRatio: 0.61,
981
- }),
982
- });
983
- const long = "p".repeat(340);
984
- const history: Message[] = [
985
- message("user", `u1 ${long}`),
986
- message("assistant", `a1 ${long}`),
987
- message("user", `u2 ${long}`),
988
- message("assistant", `a2 ${long}`),
989
- message("user", `u3 ${long}`),
990
- ];
991
-
992
- const result = await manager.maybeCompact(history, undefined, {
993
- lastCompactedAt: Date.now() - 30_000,
994
- });
995
- expect(result.compacted).toBe(true);
996
- expect(result.reason).toBeUndefined();
997
- });
998
-
999
- test("force=true bypasses cooldown for context-too-large recovery", async () => {
1000
- const provider = createProvider(() => ({
1001
- content: [{ type: "text", text: "## Goals\n- forced compaction" }],
1002
- model: "mock-model",
1003
- usage: { inputTokens: 60, outputTokens: 12 },
1004
- stopReason: "end_turn",
1005
- }));
1006
- const manager = new ContextWindowManager({
1007
- provider,
1008
- systemPrompt: "system prompt",
1009
- config: makeConfig({
1010
- maxInputTokens: 260,
1011
- targetBudgetRatio: 0.74,
1012
- }),
1013
- });
1014
- const long = "c".repeat(220);
1015
- const history: Message[] = [
1016
- message("user", `u1 ${long}`),
1017
- message("assistant", `a1 ${long}`),
1018
- message("user", `u2 ${long}`),
1019
- ];
1020
-
1021
- // Same setup as the cooldown test, but with force=true — should compact.
1022
- const result = await manager.maybeCompact(history, undefined, {
1023
- lastCompactedAt: Date.now() - 30_000,
1024
- force: true,
1025
- });
1026
- expect(result.compacted).toBe(true);
1027
- expect(result.reason).toBeUndefined();
1028
- });
1029
-
1030
- test("image-heavy payload is no longer underestimated as below-threshold", async () => {
1031
- const provider = createProvider(() => ({
1032
- content: [
1033
- { type: "text", text: "## Goals\n- compacted image-heavy history" },
1034
- ],
1035
- model: "mock-model",
1036
- usage: { inputTokens: 75, outputTokens: 20 },
1037
- stopReason: "end_turn",
1038
- }));
1039
- const manager = new ContextWindowManager({
1040
- provider,
1041
- systemPrompt: "system prompt",
1042
- config: makeConfig({
1043
- maxInputTokens: 7000,
1044
- targetBudgetRatio: 0.76,
1045
- compactThreshold: 0.8,
1046
- }),
1047
- });
1048
-
1049
- const images = Array.from({ length: 5 }, (_, i) => ({
1050
- type: "image" as const,
1051
- source: {
1052
- type: "base64" as const,
1053
- media_type: "image/png",
1054
- data: `${String(i)}${"A".repeat(40_000)}`,
1055
- },
1056
- }));
1057
-
1058
- const history: Message[] = [
1059
- {
1060
- role: "user",
1061
- content: [
1062
- { type: "text", text: "Please analyze these screenshots." },
1063
- ...images,
1064
- ],
1065
- },
1066
- message("assistant", "Sure, uploading now."),
1067
- ];
1068
-
1069
- const result = await manager.maybeCompact(history);
1070
- expect(result.reason).not.toBe("below compaction threshold");
1071
-
1072
- // Sanity check for this repro: counting raw base64 as text would exceed threshold.
1073
- const rawBase64Chars = images.reduce(
1074
- (sum, img) => sum + img.source.data.length,
1075
- 0,
1076
- );
1077
- const rawBase64TokenEquivalent = estimateTextTokens(
1078
- "A".repeat(rawBase64Chars),
1079
- );
1080
- expect(rawBase64TokenEquivalent).toBeGreaterThan(result.thresholdTokens);
1081
- });
1082
-
1083
- test("minKeepRecentUserTurns: 0 compacts all messages into summary only", async () => {
1084
- const provider = createProvider(() => ({
1085
- content: [{ type: "text", text: "## Goals\n- emergency summary" }],
1086
- model: "mock-model",
1087
- usage: { inputTokens: 60, outputTokens: 12 },
1088
- stopReason: "end_turn",
1089
- }));
1090
- const manager = new ContextWindowManager({
1091
- provider,
1092
- systemPrompt: "system prompt",
1093
- config: makeConfig({
1094
- maxInputTokens: 260,
1095
- targetBudgetRatio: 0.28,
1096
- }),
1097
- });
1098
- const long = "e".repeat(220);
1099
- const history: Message[] = [
1100
- message("user", `u1 ${long}`),
1101
- message("assistant", `a1 ${long}`),
1102
- message("user", `u2 ${long}`),
1103
- ];
1104
-
1105
- const result = await manager.maybeCompact(history, undefined, {
1106
- force: true,
1107
- minKeepRecentUserTurns: 0,
1108
- });
1109
- expect(result.compacted).toBe(true);
1110
- // With minKeepRecentUserTurns=0 and a tight target budget,
1111
- // pickKeepBoundary drops keepTurns all the way to 0.
1112
- // All three messages are compacted into a single summary message.
1113
- expect(result.compactedMessages).toBe(3);
1114
- expect(result.messages).toHaveLength(1);
1115
- expect(getSummaryFromContextMessage(result.messages[0])).toContain(
1116
- "emergency summary",
1117
- );
1118
- });
1119
-
1120
- test("force compaction with loose target override still summarizes persisted messages", async () => {
1121
- // `pickKeepBoundary` clamps `targetInputTokensOverride` to
1122
- // `config.targetInputTokens`, so a loose override cannot
1123
- // short-circuit summarization into the truncate-only early-exit.
1124
-
1125
- let summaryCalls = 0;
1126
- const provider = createProvider(() => {
1127
- summaryCalls += 1;
1128
- return {
1129
- content: [{ type: "text", text: "## Goals\n- real summary" }],
1130
- model: "mock-model",
1131
- usage: { inputTokens: 80, outputTokens: 20 },
1132
- stopReason: "end_turn",
1133
- };
1134
- });
1135
-
1136
- // Scaled from prod (max 200k → 1000) preserving key ratios: the
1137
- // loose override (~0.85×max) is ~17× the post-compaction target
1138
- // (~0.05×max), so history between the two exercises the clamp.
1139
- const manager = new ContextWindowManager({
1140
- provider,
1141
- systemPrompt: "system prompt",
1142
- config: makeConfig({
1143
- maxInputTokens: 1000,
1144
- targetBudgetRatio: 0.1,
1145
- summaryBudgetRatio: 0.05,
1146
- compactThreshold: 0.3,
1147
- }),
1148
- });
1149
-
1150
- // History in the "no-op zone": above threshold (300), below override (850).
1151
- const long = "x".repeat(180);
1152
- const history: Message[] = [
1153
- message("user", `u1 ${long}`),
1154
- message("assistant", `a1 ${long}`),
1155
- message("user", `u2 ${long}`),
1156
- message("assistant", `a2 ${long}`),
1157
- message("user", `u3 ${long}`),
1158
- message("assistant", `a3 ${long}`),
1159
- message("user", `u4 ${long}`),
1160
- message("assistant", `a4 ${long}`),
1161
- message("user", `u5 ${long}`),
1162
- ];
1163
-
1164
- const preflightBudgetAnalog = Math.floor(1000 * 0.85);
1165
- const result = await manager.maybeCompact(history, undefined, {
1166
- force: true,
1167
- targetInputTokensOverride: preflightBudgetAnalog,
1168
- });
1169
-
1170
- // Guard: we're actually above the compact threshold.
1171
- expect(result.previousEstimatedInputTokens).toBeGreaterThan(
1172
- result.thresholdTokens,
1173
- );
1174
-
1175
- // A real summarization happened (not the truncate-only no-op).
1176
- expect(result.compactedPersistedMessages).toBeGreaterThan(0);
1177
- expect(summaryCalls).toBeGreaterThan(0);
1178
- });
1179
-
1180
- test("force=true compacts below minFloor when a kept turn exceeds target", async () => {
1181
- // A giant paste in the last user turn means minFloor=1 alone exceeds target.
1182
- // Under force, pickKeepBoundary should walk keepTurns below minFloor (down to
1183
- // 0) so the huge block falls into the compacted region and gets summarized
1184
- // instead of being kept at full size.
1185
- const provider = createProvider(() => ({
1186
- content: [{ type: "text", text: "## Goals\n- compressed large paste" }],
1187
- model: "mock-model",
1188
- usage: { inputTokens: 120, outputTokens: 20 },
1189
- stopReason: "end_turn",
1190
- }));
1191
- const manager = new ContextWindowManager({
1192
- provider,
1193
- systemPrompt: "system prompt",
1194
- config: makeConfig({ maxInputTokens: 600, targetBudgetRatio: 0.2 }),
1195
- });
1196
- const hugePaste = "p".repeat(4000); // ~1000 tokens, well above targetInputTokens
1197
- const history: Message[] = [
1198
- message("user", "u1 small"),
1199
- message("assistant", "a1 small"),
1200
- message("user", `u2 ${hugePaste}`),
1201
- ];
1202
-
1203
- const result = await manager.maybeCompact(history, undefined, {
1204
- force: true,
1205
- });
1206
-
1207
- expect(result.compacted).toBe(true);
1208
- // With force=true the kept region is empty; all turns including the oversized
1209
- // paste were summarized, so the compacted result is just the summary.
1210
- expect(result.messages).toHaveLength(1);
1211
- expect(result.compactedMessages).toBe(history.length);
1212
- expect(getSummaryFromContextMessage(result.messages[0])).toContain(
1213
- "compressed large paste",
1214
- );
1215
- expect(result.estimatedInputTokens).toBeLessThan(
1216
- result.previousEstimatedInputTokens,
1217
- );
1218
- });
1219
-
1220
- test("force=false honors minFloor even when the kept turn exceeds target", async () => {
1221
- // Same oversized paste, but without force the algorithm must preserve the
1222
- // minFloor=1 recent turn (auto mid-loop compaction needs the in-flight turn
1223
- // intact). Anything compactable before the floor still gets summarized.
1224
- const provider = createProvider(() => ({
1225
- content: [{ type: "text", text: "## Goals\n- summary" }],
1226
- model: "mock-model",
1227
- usage: { inputTokens: 60, outputTokens: 10 },
1228
- stopReason: "end_turn",
1229
- }));
1230
- const manager = new ContextWindowManager({
1231
- provider,
1232
- systemPrompt: "system prompt",
1233
- config: makeConfig({ maxInputTokens: 600, targetBudgetRatio: 0.2 }),
1234
- });
1235
- const hugePaste = "p".repeat(4000);
1236
- const history: Message[] = [
1237
- message("user", "u1 small"),
1238
- message("assistant", "a1 small"),
1239
- message("user", "u2 small"),
1240
- message("assistant", "a2 small"),
1241
- message("user", `u3 ${hugePaste}`),
1242
- ];
1243
-
1244
- const result = await manager.maybeCompact(history);
1245
-
1246
- expect(result.compacted).toBe(true);
1247
- // The oversized last user turn is retained verbatim; the kept array starts
1248
- // with the summary followed by the messages from that turn onward.
1249
- const lastUser = result.messages
1250
- .filter((m) => m.role === "user")
1251
- .map((m) => (m.content[0].type === "text" ? m.content[0].text : ""))
1252
- .find((t) => t.startsWith("u3 "));
1253
- expect(lastUser).toBeDefined();
1254
- expect(lastUser!.length).toBeGreaterThan(hugePaste.length);
1255
- });
1256
-
1257
- test("shouldCompact returns needed=false with estimatedTokens when below threshold", () => {
1258
- const provider = createProvider(() => {
1259
- throw new Error("should not be called");
1260
- });
1261
- const manager = new ContextWindowManager({
1262
- provider,
1263
- systemPrompt: "system prompt",
1264
- config: makeConfig(),
1265
- });
1266
- const history = [message("user", "hello"), message("assistant", "hi")];
1267
- const result = manager.shouldCompact(history);
1268
- expect(result.needed).toBe(false);
1269
- expect(result.estimatedTokens).toBeGreaterThan(0);
1270
- });
1271
-
1272
- test("shouldCompact returns needed=true with estimatedTokens when above threshold", () => {
1273
- const provider = createProvider(() => {
1274
- throw new Error("should not be called");
1275
- });
1276
- const manager = new ContextWindowManager({
1277
- provider,
1278
- systemPrompt: "system prompt",
1279
- config: makeConfig(),
1280
- });
1281
- const long = "x".repeat(240);
1282
- const history: Message[] = [
1283
- message("user", `u1 ${long}`),
1284
- message("assistant", `a1 ${long}`),
1285
- message("user", `u2 ${long}`),
1286
- message("assistant", `a2 ${long}`),
1287
- message("user", `u3 ${long}`),
1288
- message("assistant", `a3 ${long}`),
1289
- ];
1290
- const result = manager.shouldCompact(history);
1291
- expect(result.needed).toBe(true);
1292
- expect(result.estimatedTokens).toBeGreaterThan(0);
1293
- });
1294
-
1295
- test("shouldCompact returns needed=false with zero estimatedTokens when disabled", () => {
1296
- const provider = createProvider(() => {
1297
- throw new Error("should not be called");
1298
- });
1299
- const long = "x".repeat(240);
1300
- const manager = new ContextWindowManager({
1301
- provider,
1302
- systemPrompt: "system prompt",
1303
- config: makeConfig({ enabled: false }),
1304
- });
1305
- const history: Message[] = [
1306
- message("user", `u1 ${long}`),
1307
- message("assistant", `a1 ${long}`),
1308
- message("user", `u2 ${long}`),
1309
- message("assistant", `a2 ${long}`),
1310
- ];
1311
- const result = manager.shouldCompact(history);
1312
- expect(result.needed).toBe(false);
1313
- expect(result.estimatedTokens).toBe(0);
1314
- });
1315
-
1316
- test("truncates tool results in kept turns to preserve more conversation", async () => {
1317
- const provider = createProvider(() => ({
1318
- content: [{ type: "text", text: "## Goals\n- truncation summary" }],
1319
- model: "mock-model",
1320
- usage: { inputTokens: 60, outputTokens: 12 },
1321
- stopReason: "end_turn",
1322
- }));
1323
- // Budget is tight enough that full 8K tool results would force dropping turns,
1324
- // but truncated results (≤6K chars) should allow more turns to be kept.
1325
- const config = makeConfig({
1326
- maxInputTokens: 4000,
1327
- targetBudgetRatio: 0.7,
1328
- });
1329
- const manager = new ContextWindowManager({
1330
- provider,
1331
- systemPrompt: "system prompt",
1332
- config,
1333
- });
1334
-
1335
- const largeToolResult = "x".repeat(8000);
1336
- const history: Message[] = [
1337
- message("user", "u1"),
1338
- {
1339
- role: "assistant",
1340
- content: [
1341
- {
1342
- type: "tool_use",
1343
- id: "t1",
1344
- name: "read_file",
1345
- input: { path: "/tmp/a" },
1346
- },
1347
- ],
1348
- },
1349
- {
1350
- role: "user",
1351
- content: [
1352
- {
1353
- type: "tool_result",
1354
- tool_use_id: "t1",
1355
- content: largeToolResult,
1356
- },
1357
- ],
1358
- },
1359
- message("assistant", "a1"),
1360
- message("user", "u2"),
1361
- {
1362
- role: "assistant",
1363
- content: [
1364
- {
1365
- type: "tool_use",
1366
- id: "t2",
1367
- name: "read_file",
1368
- input: { path: "/tmp/b" },
1369
- },
1370
- ],
1371
- },
1372
- {
1373
- role: "user",
1374
- content: [
1375
- {
1376
- type: "tool_result",
1377
- tool_use_id: "t2",
1378
- content: largeToolResult,
1379
- },
1380
- ],
1381
- },
1382
- message("assistant", "a2"),
1383
- message("user", "u3"),
1384
- {
1385
- role: "assistant",
1386
- content: [
1387
- {
1388
- type: "tool_use",
1389
- id: "t3",
1390
- name: "read_file",
1391
- input: { path: "/tmp/c" },
1392
- },
1393
- ],
1394
- },
1395
- {
1396
- role: "user",
1397
- content: [
1398
- {
1399
- type: "tool_result",
1400
- tool_use_id: "t3",
1401
- content: largeToolResult,
1402
- },
1403
- ],
1404
- },
1405
- message("assistant", "a3"),
1406
- message("user", "u4"),
1407
- message("assistant", "a4"),
1408
- ];
1409
-
1410
- const result = await manager.maybeCompact(history, undefined, {
1411
- force: true,
1412
- });
1413
- expect(result.compacted).toBe(true);
1414
-
1415
- // Verify tool results in output are truncated (should be < 8K chars each).
1416
- for (const msg of result.messages) {
1417
- for (const block of msg.content) {
1418
- if (block.type === "tool_result") {
1419
- expect(block.content.length).toBeLessThan(8000);
1420
- }
1421
- }
1422
- }
1423
- });
1424
-
1425
- test("targetInputTokensOverride reduces retained turns beyond normal compaction", async () => {
1426
- const provider = createProvider(() => ({
1427
- content: [{ type: "text", text: "## Goals\n- tight fit summary" }],
1428
- model: "mock-model",
1429
- usage: { inputTokens: 60, outputTokens: 12 },
1430
- stopReason: "end_turn",
1431
- }));
1432
-
1433
- // Use generous default target so normal compaction would keep all 3 user turns.
1434
- const config = makeConfig({
1435
- maxInputTokens: 1200,
1436
- targetBudgetRatio: 0.88,
1437
- });
1438
- const long = "t".repeat(220);
1439
- const history: Message[] = [
1440
- message("user", `u1 ${long}`),
1441
- message("assistant", `a1 ${long}`),
1442
- message("user", `u2 ${long}`),
1443
- message("assistant", `a2 ${long}`),
1444
- message("user", `u3 ${long}`),
1445
- message("assistant", `a3 ${long}`),
1446
- ];
1447
-
1448
- // Without override: normal compaction keeps more turns.
1449
- const normalManager = new ContextWindowManager({
1450
- provider,
1451
- systemPrompt: "system prompt",
1452
- config,
1453
- });
1454
- const normalResult = await normalManager.maybeCompact(history, undefined, {
1455
- force: true,
1456
- });
1457
-
1458
- // With a very tight override target: should keep fewer turns.
1459
- const tightManager = new ContextWindowManager({
1460
- provider,
1461
- systemPrompt: "system prompt",
1462
- config,
1463
- });
1464
- const tightResult = await tightManager.maybeCompact(history, undefined, {
1465
- force: true,
1466
- targetInputTokensOverride: 80,
1467
- });
1468
-
1469
- expect(tightResult.compacted).toBe(true);
1470
- // The tight override should compact more messages than normal.
1471
- expect(tightResult.compactedMessages).toBeGreaterThan(
1472
- normalResult.compactedMessages,
1473
- );
1474
- });
1475
-
1476
- test("subtracts summaryOffset only when summary at index 0 was injected from parent", async () => {
1477
- const provider = createProvider(() => ({
1478
- content: [{ type: "text", text: "## Goals\n- new child summary" }],
1479
- model: "mock-model",
1480
- usage: { inputTokens: 75, outputTokens: 20 },
1481
- stopReason: "end_turn",
1482
- }));
1483
- const manager = new ContextWindowManager({
1484
- provider,
1485
- systemPrompt: "system prompt",
1486
- config: makeConfig({
1487
- maxInputTokens: 320,
1488
- targetBudgetRatio: 0.58,
1489
- }),
1490
- });
1491
- const long = "k".repeat(220);
1492
- // Parent-injected summary at index 0, plus 2 injected non-persisted
1493
- // messages, plus 3 child-persisted messages. nonPersistedPrefixCount
1494
- // includes the summary (set by injectInheritedContext).
1495
- const history: Message[] = [
1496
- createContextSummaryMessage("parent summary"),
1497
- message("user", `injected-u ${long}`),
1498
- message("assistant", `injected-a ${long}`),
1499
- message("user", `persisted-u1 ${long}`),
1500
- message("assistant", `persisted-a1 ${long}`),
1501
- message("user", `persisted-u2 ${long}`),
1502
- ];
1503
- manager.nonPersistedPrefixCount = 3;
1504
- manager.summaryIsInjected = true;
1505
-
1506
- const result = await manager.maybeCompact(history, undefined, {
1507
- force: true,
1508
- });
1509
- expect(result.compacted).toBe(true);
1510
- // 4 messages compacted (2 injected + 2 child-persisted), but only the
1511
- // 2 child-persisted ones count as DB-persisted.
1512
- expect(result.compactedMessages).toBe(4);
1513
- expect(result.compactedPersistedMessages).toBe(2);
1514
- // Flag clears and prefix drains (both injected messages + summary slot).
1515
- expect(manager.summaryIsInjected).toBe(false);
1516
- expect(manager.nonPersistedPrefixCount).toBe(0);
1517
- });
1518
-
1519
- test("summary system prompt instructs verbatim thread-anchor preservation", async () => {
1520
- const capturedSystemPrompts: (string | undefined)[] = [];
1521
- const provider: Provider = {
1522
- name: "mock",
1523
- async sendMessage(
1524
- _messages: Message[],
1525
- _tools,
1526
- systemPrompt,
1527
- ): Promise<ProviderResponse> {
1528
- capturedSystemPrompts.push(systemPrompt);
1529
- return {
1530
- content: [
1531
- {
1532
- type: "text",
1533
- text: "## Goals\n- preserved thread parent verbatim",
1534
- },
1535
- ],
1536
- model: "mock-model",
1537
- usage: { inputTokens: 60, outputTokens: 12 },
1538
- stopReason: "end_turn",
1539
- };
1540
- },
1541
- };
1542
- const manager = new ContextWindowManager({
1543
- provider,
1544
- systemPrompt: "system prompt",
1545
- config: makeConfig({ maxInputTokens: 600 }),
1546
- });
1547
- const long = "x".repeat(240);
1548
- // Simulate a Slack-style transcript where an old user "thread parent"
1549
- // message is about to be compacted while a later reply survives in the
1550
- // retained tail. The clause being asserted instructs the summarizer to
1551
- // preserve that parent verbatim — we cannot verify the model's behavior
1552
- // here (the provider is a stub), so we instead assert the clause itself
1553
- // reaches the summarizer.
1554
- const history: Message[] = [
1555
- message("user", `parent: kickoff plan ${long}`),
1556
- message("assistant", `a1 ${long}`),
1557
- message("user", `u2 ${long}`),
1558
- message("assistant", `a2 ${long}`),
1559
- message("user", `reply-in-thread ${long}`),
1560
- message("assistant", `a3 ${long}`),
1561
- ];
1562
-
1563
- const result = await manager.maybeCompact(history);
1564
- expect(result.compacted).toBe(true);
1565
- expect(capturedSystemPrompts.length).toBeGreaterThan(0);
1566
- const seenPrompt = capturedSystemPrompts[0];
1567
- expect(seenPrompt).toBeDefined();
1568
- expect(seenPrompt).toContain("Thread anchors");
1569
- expect(seenPrompt).toContain("verbatim");
1570
- });
1571
-
1572
- test("summary prompt lists retained-tail thread-reply references", async () => {
1573
- const capturedMessages: Message[][] = [];
1574
- const provider: Provider = {
1575
- name: "mock",
1576
- async sendMessage(messages: Message[]): Promise<ProviderResponse> {
1577
- capturedMessages.push(messages);
1578
- return {
1579
- content: [{ type: "text", text: "## Goals\n- ok" }],
1580
- model: "mock-model",
1581
- usage: { inputTokens: 60, outputTokens: 12 },
1582
- stopReason: "end_turn",
1583
- };
1584
- },
1585
- };
1586
- const manager = new ContextWindowManager({
1587
- provider,
1588
- systemPrompt: "system prompt",
1589
- config: makeConfig({ maxInputTokens: 600 }),
1590
- });
1591
- const long = "x".repeat(240);
1592
- // Compactable region ends before the retained tail, which contains a
1593
- // Slack-style reply line that cites its parent via `→ M1a2b3c`. The
1594
- // summary prompt must surface that reference so the Thread-anchors
1595
- // instruction has something to act on.
1596
- const history: Message[] = [
1597
- message("user", `[11/14/23 14:25 @alice]: parent kickoff ${long}`),
1598
- message("assistant", `a1 ${long}`),
1599
- message("user", `u2 ${long}`),
1600
- message("assistant", `a2 ${long}`),
1601
- message("user", `[11/14/23 14:28 @bob → M1a2b3c]: reply ${long}`),
1602
- message("assistant", `a3 ${long}`),
1603
- ];
1604
-
1605
- const result = await manager.maybeCompact(history);
1606
- expect(result.compacted).toBe(true);
1607
- expect(capturedMessages.length).toBeGreaterThan(0);
1608
- const userPromptText = capturedMessages[0]
1609
- .flatMap((m) => m.content)
1610
- .filter(
1611
- (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1612
- )
1613
- .map((b) => b.text)
1614
- .join("\n");
1615
- expect(userPromptText).toContain("### Retained Thread References");
1616
- expect(userPromptText).toContain("→ M1a2b3c");
1617
- });
1618
-
1619
- test("summary prompt lists retained-tail thread-reply references for edited replies", async () => {
1620
- const capturedMessages: Message[][] = [];
1621
- const provider: Provider = {
1622
- name: "mock",
1623
- async sendMessage(messages: Message[]): Promise<ProviderResponse> {
1624
- capturedMessages.push(messages);
1625
- return {
1626
- content: [{ type: "text", text: "## Goals\n- ok" }],
1627
- model: "mock-model",
1628
- usage: { inputTokens: 60, outputTokens: 12 },
1629
- stopReason: "end_turn",
1630
- };
1631
- },
1632
- };
1633
- const manager = new ContextWindowManager({
1634
- provider,
1635
- systemPrompt: "system prompt",
1636
- config: makeConfig({ maxInputTokens: 600 }),
1637
- });
1638
- const long = "x".repeat(240);
1639
- // An edited reply renders with `, edited …` between the parent alias and
1640
- // the closing bracket: `→ Mxxxxxx, edited MM/DD/YY HH:MM]`. The regex
1641
- // must still flag these lines so retention works for edited replies.
1642
- const history: Message[] = [
1643
- message("user", `[11/14/23 14:25 @alice]: parent kickoff ${long}`),
1644
- message("assistant", `a1 ${long}`),
1645
- message("user", `u2 ${long}`),
1646
- message("assistant", `a2 ${long}`),
1647
- message(
1648
- "user",
1649
- `[11/14/23 14:28 @bob → M1a2b3c, edited 11/14/23 14:32]: reply ${long}`,
1650
- ),
1651
- message("assistant", `a3 ${long}`),
1652
- ];
1653
-
1654
- const result = await manager.maybeCompact(history);
1655
- expect(result.compacted).toBe(true);
1656
- expect(capturedMessages.length).toBeGreaterThan(0);
1657
- const userPromptText = capturedMessages[0]
1658
- .flatMap((m) => m.content)
1659
- .filter(
1660
- (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1661
- )
1662
- .map((b) => b.text)
1663
- .join("\n");
1664
- expect(userPromptText).toContain("### Retained Thread References");
1665
- expect(userPromptText).toContain("→ M1a2b3c, edited 11/14/23 14:32");
1666
- });
1667
-
1668
- test("summary prompt omits retained references when retained tail has no thread markers", async () => {
1669
- const capturedMessages: Message[][] = [];
1670
- const provider: Provider = {
1671
- name: "mock",
1672
- async sendMessage(messages: Message[]): Promise<ProviderResponse> {
1673
- capturedMessages.push(messages);
1674
- return {
1675
- content: [{ type: "text", text: "## Goals\n- ok" }],
1676
- model: "mock-model",
1677
- usage: { inputTokens: 60, outputTokens: 12 },
1678
- stopReason: "end_turn",
1679
- };
1680
- },
1681
- };
1682
- const manager = new ContextWindowManager({
1683
- provider,
1684
- systemPrompt: "system prompt",
1685
- config: makeConfig({ maxInputTokens: 600 }),
1686
- });
1687
- const long = "x".repeat(240);
1688
- const history: Message[] = [
1689
- message("user", `u1 ${long}`),
1690
- message("assistant", `a1 ${long}`),
1691
- message("user", `u2 ${long}`),
1692
- message("assistant", `a2 ${long}`),
1693
- message("user", `u3 ${long}`),
1694
- message("assistant", `a3 ${long}`),
1695
- ];
1696
-
1697
- const result = await manager.maybeCompact(history);
1698
- expect(result.compacted).toBe(true);
1699
- const userPromptText = capturedMessages[0]
1700
- .flatMap((m) => m.content)
1701
- .filter(
1702
- (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
1703
- )
1704
- .map((b) => b.text)
1705
- .join("\n");
1706
- expect(userPromptText).not.toContain("### Retained Thread References");
1707
- expect(userPromptText).not.toMatch(/→ M[0-9a-f]{6}]/);
1708
- });
1709
-
1710
- test("does not subtract summaryOffset when summary at index 0 is child-owned from prior compaction", async () => {
1711
- const provider = createProvider(() => ({
1712
- content: [{ type: "text", text: "## Goals\n- next child summary" }],
1713
- model: "mock-model",
1714
- usage: { inputTokens: 75, outputTokens: 20 },
1715
- stopReason: "end_turn",
1716
- }));
1717
- const manager = new ContextWindowManager({
1718
- provider,
1719
- systemPrompt: "system prompt",
1720
- config: makeConfig({
1721
- maxInputTokens: 320,
1722
- targetBudgetRatio: 0.58,
1723
- }),
1724
- });
1725
- const long = "k".repeat(220);
1726
- // Post-first-compaction state: child-owned summary at index 0, 2
1727
- // still-injected messages that survived the first compaction's keep
1728
- // region, 3 child-persisted messages. nonPersistedPrefixCount reflects
1729
- // only the 2 remaining injected messages — the summary slot was already
1730
- // consumed when the flag-gated decrement ran on the prior compaction.
1731
- const history: Message[] = [
1732
- createContextSummaryMessage("prior child summary"),
1733
- message("user", `injected-u ${long}`),
1734
- message("assistant", `injected-a ${long}`),
1735
- message("user", `persisted-u1 ${long}`),
1736
- message("assistant", `persisted-a1 ${long}`),
1737
- message("user", `persisted-u2 ${long}`),
1738
- ];
1739
- manager.nonPersistedPrefixCount = 2;
1740
- manager.summaryIsInjected = false;
1741
-
1742
- const result = await manager.maybeCompact(history, undefined, {
1743
- force: true,
1744
- });
1745
- expect(result.compacted).toBe(true);
1746
- expect(result.compactedMessages).toBe(4);
1747
- // Regression guard: without the flag gate, the subtraction from the
1748
- // #24353 fix would double-apply here (nonPersistedPrefixCount - 1),
1749
- // undercounting injectedInCompactable and inflating
1750
- // compactedPersistedMessages by 1 (to 3).
1751
- expect(result.compactedPersistedMessages).toBe(2);
1752
- expect(manager.nonPersistedPrefixCount).toBe(0);
1753
- });
1754
-
1755
- test("Slack origin bumps default minKeepRecentUserTurns to 8", async () => {
1756
- const provider = createProvider(() => ({
1757
- content: [{ type: "text", text: "## Goals\n- slack thread context" }],
1758
- model: "mock-model",
1759
- usage: { inputTokens: 60, outputTokens: 12 },
1760
- stopReason: "end_turn",
1761
- }));
1762
-
1763
- // Use targetInputTokensOverride so the binary search is forced even
1764
- // for a small history. Both managers see the same tight budget; the
1765
- // only knob that varies is conversationOriginChannel.
1766
- const config = makeConfig({ maxInputTokens: 12_000 });
1767
- const long = "s".repeat(220);
1768
- // 9 user turns: enough headroom for Slack's bumped floor of 8 to be
1769
- // distinguishable from the default floor of 1.
1770
- const history: Message[] = [];
1771
- for (let i = 1; i <= 9; i++) {
1772
- history.push(message("user", `u${i} ${long}`));
1773
- history.push(message("assistant", `a${i} ${long}`));
1774
- }
1775
-
1776
- const slackManager = new ContextWindowManager({
1777
- provider,
1778
- systemPrompt: "system prompt",
1779
- config,
1780
- });
1781
- const slackResult = await slackManager.maybeCompact(history, undefined, {
1782
- force: true,
1783
- targetInputTokensOverride: 200,
1784
- conversationOriginChannel: "slack",
1785
- });
1786
-
1787
- const defaultManager = new ContextWindowManager({
1788
- provider,
1789
- systemPrompt: "system prompt",
1790
- config,
1791
- });
1792
- const defaultResult = await defaultManager.maybeCompact(
1793
- history,
1794
- undefined,
1795
- { force: true, targetInputTokensOverride: 200 },
1796
- );
1797
-
1798
- expect(slackResult.compacted).toBe(true);
1799
- expect(defaultResult.compacted).toBe(true);
1800
- // Default floor (1 user turn) compacts more of the history than the
1801
- // Slack floor (8 user turns), which preserves more recent context.
1802
- expect(defaultResult.compactedMessages).toBeGreaterThan(
1803
- slackResult.compactedMessages,
1804
- );
1805
- // Slack keeps 8 of 9 user turns: 16 kept messages, 2 compacted.
1806
- expect(slackResult.compactedMessages).toBe(2);
1807
- });
1808
-
1809
- test("non-Slack origin keeps default minKeepRecentUserTurns of 1", async () => {
1810
- const provider = createProvider(() => ({
1811
- content: [{ type: "text", text: "## Goals\n- standard summary" }],
1812
- model: "mock-model",
1813
- usage: { inputTokens: 60, outputTokens: 12 },
1814
- stopReason: "end_turn",
1815
- }));
1816
-
1817
- const config = makeConfig({ maxInputTokens: 12_000 });
1818
- const long = "n".repeat(220);
1819
- const history: Message[] = [];
1820
- for (let i = 1; i <= 9; i++) {
1821
- history.push(message("user", `u${i} ${long}`));
1822
- history.push(message("assistant", `a${i} ${long}`));
1823
- }
1824
-
1825
- // Telegram origin must behave identically to no-channel-hint default.
1826
- const telegramManager = new ContextWindowManager({
1827
- provider,
1828
- systemPrompt: "system prompt",
1829
- config,
1830
- });
1831
- const telegramResult = await telegramManager.maybeCompact(
1832
- history,
1833
- undefined,
1834
- {
1835
- force: true,
1836
- targetInputTokensOverride: 200,
1837
- conversationOriginChannel: "telegram",
1838
- },
1839
- );
1840
-
1841
- const defaultManager = new ContextWindowManager({
1842
- provider,
1843
- systemPrompt: "system prompt",
1844
- config,
1845
- });
1846
- const defaultResult = await defaultManager.maybeCompact(
1847
- history,
1848
- undefined,
1849
- { force: true, targetInputTokensOverride: 200 },
1850
- );
1851
-
1852
- expect(telegramResult.compacted).toBe(true);
1853
- expect(defaultResult.compacted).toBe(true);
1854
- expect(telegramResult.compactedMessages).toBe(
1855
- defaultResult.compactedMessages,
1856
- );
1857
- });
1858
-
1859
- test("explicit minKeepRecentUserTurns wins over Slack default", async () => {
1860
- const provider = createProvider(() => ({
1861
- content: [{ type: "text", text: "## Goals\n- emergency override" }],
1862
- model: "mock-model",
1863
- usage: { inputTokens: 60, outputTokens: 12 },
1864
- stopReason: "end_turn",
1865
- }));
1866
-
1867
- const manager = new ContextWindowManager({
1868
- provider,
1869
- systemPrompt: "system prompt",
1870
- config: makeConfig({
1871
- maxInputTokens: 260,
1872
- targetBudgetRatio: 0.28,
1873
- }),
1874
- });
1875
- const long = "e".repeat(220);
1876
- const history: Message[] = [
1877
- message("user", `u1 ${long}`),
1878
- message("assistant", `a1 ${long}`),
1879
- message("user", `u2 ${long}`),
1880
- ];
1881
-
1882
- // Emergency override (`minKeepRecentUserTurns: 0`) must take precedence
1883
- // over the Slack-bumped default of 8 — this guards the agent loop's
1884
- // context-too-large recovery path which always passes 0.
1885
- const result = await manager.maybeCompact(history, undefined, {
1886
- force: true,
1887
- minKeepRecentUserTurns: 0,
1888
- conversationOriginChannel: "slack",
1889
- });
1890
- expect(result.compacted).toBe(true);
1891
- expect(result.compactedMessages).toBe(3);
1892
- expect(result.messages).toHaveLength(1);
1893
- });
1894
-
1895
- test("summary provider call includes callSite: conversationSummarization", async () => {
1896
- // Regression guard for JARVIS-587: without the callSite, the summary
1897
- // call fell through to `llm.default` (opus + effort=max + thinking
1898
- // enabled) and exceeded the 30s plugin pipeline budget on ~150k-token
1899
- // transcripts. The fix is to route the summary call through the
1900
- // dedicated `conversationSummarization` call-site config.
1901
- const capturedOptions: (SendMessageOptions | undefined)[] = [];
1902
- const provider: Provider = {
1903
- name: "mock",
1904
- async sendMessage(
1905
- _messages: Message[],
1906
- _tools: unknown,
1907
- _systemPrompt: unknown,
1908
- options?: SendMessageOptions,
1909
- ): Promise<ProviderResponse> {
1910
- capturedOptions.push(options);
1911
- return {
1912
- content: [{ type: "text", text: "## Goals\n- summary" }],
1913
- model: "mock-model",
1914
- usage: { inputTokens: 50, outputTokens: 10 },
1915
- stopReason: "end_turn",
1916
- };
1917
- },
1918
- };
1919
- const manager = new ContextWindowManager({
1920
- provider,
1921
- systemPrompt: "system prompt",
1922
- config: makeConfig({ maxInputTokens: 600 }),
1923
- });
1924
- const long = "x".repeat(240);
1925
- const history: Message[] = [
1926
- message("user", `u1 ${long}`),
1927
- message("assistant", `a1 ${long}`),
1928
- message("user", `u2 ${long}`),
1929
- message("assistant", `a2 ${long}`),
1930
- message("user", `u3 ${long}`),
1931
- message("assistant", `a3 ${long}`),
1932
- ];
1933
-
1934
- const result = await manager.maybeCompact(history);
1935
- expect(result.compacted).toBe(true);
1936
- expect(capturedOptions.length).toBeGreaterThan(0);
1937
- for (const options of capturedOptions) {
1938
- expect(options?.config?.callSite).toBe("conversationSummarization");
1939
- }
1940
- });
1941
- });
1942
-
1943
- describe("stripCompactionOnlyInjections", () => {
1944
- test("removes memory, turn_context, and workspace text blocks from user messages", () => {
1945
- const messages: Message[] = [
1946
- {
1947
- role: "user",
1948
- content: [
1949
- {
1950
- type: "text",
1951
- text: "<memory __injected>\nrecall notes\n</memory>",
1952
- },
1953
- {
1954
- type: "text",
1955
- text: "<turn_context>\nActor: Alice\n</turn_context>",
1956
- },
1957
- { type: "text", text: "real user content" },
1958
- ],
1959
- },
1960
- {
1961
- role: "assistant",
1962
- content: [{ type: "text", text: "assistant reply" }],
1963
- },
1964
- ];
1965
- const stripped = stripCompactionOnlyInjections(messages);
1966
- expect(stripped).toHaveLength(2);
1967
- const firstText = (stripped[0].content[0] as { text: string }).text;
1968
- expect(firstText).toBe("real user content");
1969
- expect(stripped[0].content).toHaveLength(1);
1970
- });
1971
-
1972
- test("drops user messages that become empty after stripping", () => {
1973
- const messages: Message[] = [
1974
- {
1975
- role: "user",
1976
- content: [
1977
- { type: "text", text: "<memory __injected>\nonly memory\n</memory>" },
1978
- ],
1979
- },
1980
- { role: "user", content: [{ type: "text", text: "real content" }] },
1981
- ];
1982
- const stripped = stripCompactionOnlyInjections(messages);
1983
- expect(stripped).toHaveLength(1);
1984
- expect((stripped[0].content[0] as { text: string }).text).toBe(
1985
- "real content",
1986
- );
1987
- });
1988
-
1989
- test("leaves assistant messages and non-text blocks untouched", () => {
1990
- const messages: Message[] = [
1991
- {
1992
- role: "assistant",
1993
- content: [
1994
- {
1995
- type: "text",
1996
- text: "<turn_context>\nnot really injected\n</turn_context>",
1997
- },
1998
- ],
1999
- },
2000
- {
2001
- role: "user",
2002
- content: [
2003
- {
2004
- type: "tool_result",
2005
- tool_use_id: "t1",
2006
- content: "<memory>fake</memory>",
2007
- },
2008
- { type: "text", text: "user reply" },
2009
- ],
2010
- },
2011
- ];
2012
- const stripped = stripCompactionOnlyInjections(messages);
2013
- expect(stripped).toHaveLength(2);
2014
- expect((stripped[0].content[0] as { text: string }).text).toContain(
2015
- "turn_context",
2016
- );
2017
- expect(stripped[1].content).toHaveLength(2);
2018
- });
2019
-
2020
- test("preserves user prose that merely mentions ambiguous tag names", () => {
2021
- // Common-word bare tags embedded in legitimate user prose (discussions of
2022
- // XML, system terminology, etc.) must survive stripping because they are
2023
- // not shaped like a runtime injection — no leading newline after the
2024
- // open tag, or other prose surrounds the tag.
2025
- const messages: Message[] = [
2026
- {
2027
- role: "user",
2028
- content: [
2029
- {
2030
- type: "text",
2031
- text: "<memory> is a tag I'd like to add to my parser",
2032
- },
2033
- ],
2034
- },
2035
- {
2036
- role: "user",
2037
- content: [
2038
- {
2039
- type: "text",
2040
- text: "checking <workspace> usage across the repo, any thoughts?",
2041
- },
2042
- ],
2043
- },
2044
- {
2045
- role: "user",
2046
- content: [
2047
- {
2048
- type: "text",
2049
- text: "what is <knowledge_base> in this context?",
2050
- },
2051
- ],
2052
- },
2053
- {
2054
- role: "user",
2055
- content: [
2056
- { type: "text", text: "<pkb> sounds like a short name — wrong?" },
2057
- ],
2058
- },
2059
- {
2060
- role: "user",
2061
- content: [
2062
- {
2063
- type: "text",
2064
- text: "when the model hits a <system_reminder>, what happens next?",
2065
- },
2066
- ],
2067
- },
2068
- ];
2069
- const stripped = stripCompactionOnlyInjections(messages);
2070
- expect(stripped).toHaveLength(messages.length);
2071
- for (let i = 0; i < messages.length; i++) {
2072
- expect(stripped[i].content).toHaveLength(1);
2073
- expect((stripped[i].content[0] as { text: string }).text).toBe(
2074
- (messages[i].content[0] as { text: string }).text,
2075
- );
2076
- }
2077
- });
2078
-
2079
- test("still strips runtime-shaped wrapped blocks for ambiguous tag names", () => {
2080
- // Bare-tag blocks with a newline after the open tag and a matching close
2081
- // tag (e.g. `<memory>\n...\n</memory>`) match the wrapped-strip path.
2082
- // This covers both the current runtime emission shape and blocks
2083
- // persisted before the `__injected` attribute existed — the prefix list
2084
- // handles `__injected`-attributed tags, and the wrapped matcher handles
2085
- // the bare-tag wrap shape.
2086
- const messages: Message[] = [
2087
- {
2088
- role: "user",
2089
- content: [
2090
- { type: "text", text: "<memory>\nlegacy recall blob\n</memory>" },
2091
- { type: "text", text: "actual user content" },
2092
- ],
2093
- },
2094
- {
2095
- role: "user",
2096
- content: [
2097
- {
2098
- type: "text",
2099
- text: "<workspace>\nRoot: /home\nFiles: a, b\n</workspace>",
2100
- },
2101
- { type: "text", text: "more prose" },
2102
- ],
2103
- },
2104
- {
2105
- role: "user",
2106
- content: [
2107
- {
2108
- type: "text",
2109
- text: "<system_reminder>\nread your PKB\n</system_reminder>",
2110
- },
2111
- { type: "text", text: "ok" },
2112
- ],
2113
- },
2114
- ];
2115
- const stripped = stripCompactionOnlyInjections(messages);
2116
- expect(stripped).toHaveLength(3);
2117
- for (const msg of stripped) {
2118
- expect(msg.content).toHaveLength(1);
2119
- }
2120
- expect((stripped[0].content[0] as { text: string }).text).toBe(
2121
- "actual user content",
2122
- );
2123
- expect((stripped[1].content[0] as { text: string }).text).toBe(
2124
- "more prose",
2125
- );
2126
- expect((stripped[2].content[0] as { text: string }).text).toBe("ok");
2127
- });
2128
-
2129
- test("does not strip a user's inline snippet that is not shaped like an injection", () => {
2130
- // A user quoting a `<memory>...</memory>` snippet alongside prose in the
2131
- // SAME text block should survive — the block does not start with
2132
- // `<memory>\n` (there's surrounding prose) so the wrapped-tag match
2133
- // does not trigger.
2134
- const messages: Message[] = [
2135
- {
2136
- role: "user",
2137
- content: [
2138
- {
2139
- type: "text",
2140
- text: "Here's the XML I'm working with: <memory>x</memory> — what do you think?",
2141
- },
2142
- ],
2143
- },
2144
- ];
2145
- const stripped = stripCompactionOnlyInjections(messages);
2146
- expect(stripped).toHaveLength(1);
2147
- expect((stripped[0].content[0] as { text: string }).text).toContain(
2148
- "<memory>x</memory>",
2149
- );
2150
- });
2151
- });
2152
-
2153
- describe("summarizer input excludes runtime injections", () => {
2154
- test("maybeCompact does not pass memory/turn_context text to the summarizer", async () => {
2155
- const seenPrompts: string[] = [];
2156
- const provider = createProvider((messages) => {
2157
- for (const msg of messages) {
2158
- for (const block of msg.content) {
2159
- if (block.type === "text") seenPrompts.push(block.text);
2160
- }
2161
- }
2162
- return {
2163
- content: [
2164
- {
2165
- type: "text",
2166
- text: "## Facts Worth Remembering\n- summary produced",
2167
- },
2168
- ],
2169
- model: "mock",
2170
- usage: { inputTokens: 100, outputTokens: 25 },
2171
- stopReason: "end_turn",
2172
- };
2173
- });
2174
- const manager = new ContextWindowManager({
2175
- provider,
2176
- systemPrompt: "system prompt",
2177
- config: makeConfig({
2178
- maxInputTokens: 2000,
2179
- targetBudgetRatio: 0.4,
2180
- compactThreshold: 0.35,
2181
- }),
2182
- });
2183
- const long = "x".repeat(1500);
2184
- const memoryBlob =
2185
- "<memory __injected>\nBOB_ATTENDED_STANDUP_YESTERDAY\n</memory>";
2186
- const turnCtx =
2187
- "<turn_context>\nACTOR_METADATA_THAT_SHOULD_NOT_LEAK\n</turn_context>";
2188
- const history: Message[] = [
2189
- {
2190
- role: "user",
2191
- content: [
2192
- { type: "text", text: memoryBlob },
2193
- { type: "text", text: turnCtx },
2194
- { type: "text", text: `u1 ${long}` },
2195
- ],
2196
- },
2197
- message("assistant", `a1 ${long}`),
2198
- message("user", `u2 ${long}`),
2199
- message("assistant", `a2 ${long}`),
2200
- message("user", `u3 ${long}`),
2201
- ];
2202
-
2203
- const result = await manager.maybeCompact(history);
2204
- expect(result.compacted).toBe(true);
2205
- const joined = seenPrompts.join("\n");
2206
- expect(joined).not.toContain("BOB_ATTENDED_STANDUP_YESTERDAY");
2207
- expect(joined).not.toContain("ACTOR_METADATA_THAT_SHOULD_NOT_LEAK");
2208
- expect(joined).not.toContain("<memory __injected>");
2209
- expect(joined).not.toContain("<turn_context>");
2210
- // Real conversation content should survive — at least one of the
2211
- // middle turns (whose header/body is short enough to fit within the
2212
- // capped transcript budget) should appear in the summarizer input.
2213
- expect(joined).toMatch(/u2 |a1 /);
2214
- });
2215
- });
2216
-
2217
- describe("clampSummaryAtSectionBoundary", () => {
2218
- test("returns the input unchanged when under the limit", () => {
2219
- const summary = "## Decisions\nWe decided to ship.";
2220
- expect(clampSummaryAtSectionBoundary(summary, 1000)).toBe(summary);
2221
- });
2222
-
2223
- test("truncates at a `## ` boundary when one exists in the allowed region", () => {
2224
- const keeper = "## Facts\n" + "a".repeat(200);
2225
- const dropped = "## Open Threads\n" + "b".repeat(500);
2226
- const summary = `${keeper}\n${dropped}`;
2227
- const maxChars = keeper.length + 20;
2228
- const clamped = clampSummaryAtSectionBoundary(summary, maxChars);
2229
- expect(clamped.endsWith("...")).toBe(true);
2230
- expect(clamped).not.toContain("## Open Threads");
2231
- expect(clamped).toContain("## Facts");
2232
- // No mid-header cut: nothing that looks like a partial heading.
2233
- expect(/##\s*$/.test(clamped)).toBe(false);
2234
- });
2235
-
2236
- test("falls back to a hard cut when no section boundary is past the midpoint", () => {
2237
- const body = "no section headers in this output " + "z".repeat(1000);
2238
- const clamped = clampSummaryAtSectionBoundary(body, 100);
2239
- expect(clamped.endsWith("...")).toBe(true);
2240
- expect(clamped.length).toBeLessThanOrEqual(100);
2241
- });
2242
- });
2243
-
2244
- describe("extractTailAssistantText", () => {
2245
- test("returns the most recent assistant text block", () => {
2246
- const messages: Message[] = [
2247
- message("user", "u1"),
2248
- message("assistant", "a1 first"),
2249
- message("user", "u2"),
2250
- message("assistant", "a2 last"),
2251
- ];
2252
- expect(extractTailAssistantText(messages)).toBe("a2 last");
2253
- });
2254
-
2255
- test("returns null when no assistant text is present", () => {
2256
- const messages: Message[] = [message("user", "u1"), message("user", "u2")];
2257
- expect(extractTailAssistantText(messages)).toBeNull();
2258
- });
2259
-
2260
- test("skips assistant messages with only tool_use blocks and finds the prior text", () => {
2261
- const messages: Message[] = [
2262
- message("assistant", "a1 narration before tool use"),
2263
- message("user", "u1"),
2264
- {
2265
- role: "assistant",
2266
- content: [
2267
- {
2268
- type: "tool_use",
2269
- id: "tool-1",
2270
- name: "bash",
2271
- input: { command: "ls" },
2272
- } as ContentBlock,
2273
- ],
2274
- },
2275
- ];
2276
- expect(extractTailAssistantText(messages)).toBe(
2277
- "a1 narration before tool use",
2278
- );
2279
- });
2280
-
2281
- test("clamps long text from the start so the END is preserved", () => {
2282
- const longText = "early prefix " + "x".repeat(2000) + " FINAL NEXT STEP";
2283
- const messages: Message[] = [message("assistant", longText)];
2284
- const result = extractTailAssistantText(messages, 200);
2285
- expect(result).not.toBeNull();
2286
- expect(result!.startsWith("[...truncated]")).toBe(true);
2287
- expect(result!.endsWith("FINAL NEXT STEP")).toBe(true);
2288
- // Stripped block size ≈ maxChars; "[...truncated] " adds a fixed prefix.
2289
- expect(result!.length).toBeLessThanOrEqual(200 + "[...truncated] ".length);
2290
- });
2291
-
2292
- test("ignores empty/whitespace-only assistant text", () => {
2293
- const messages: Message[] = [
2294
- message("assistant", "real content"),
2295
- message("assistant", " \n "),
2296
- ];
2297
- expect(extractTailAssistantText(messages)).toBe("real content");
2298
- });
2299
-
2300
- test("returns null for an empty messages array", () => {
2301
- expect(extractTailAssistantText([])).toBeNull();
2302
- });
2303
- });
2304
-
2305
- describe("appendTailAnchorToSummary", () => {
2306
- test("appends a tag-wrapped block after the summary", () => {
2307
- const out = appendTailAnchorToSummary(
2308
- "## Goals\n- item",
2309
- "Next step: file the SSE followup.",
2310
- );
2311
- expect(out).toContain("## Goals\n- item");
2312
- expect(out).toContain(
2313
- "<verbatim_tail>\nNext step: file the SSE followup.\n</verbatim_tail>",
2314
- );
2315
- expect(out.endsWith("</verbatim_tail>")).toBe(true);
2316
- });
2317
-
2318
- test("is idempotent: re-applying with new text replaces the prior tail", () => {
2319
- const first = appendTailAnchorToSummary("body", "tail-1");
2320
- const second = appendTailAnchorToSummary(first, "tail-2");
2321
- expect(second).toContain("body");
2322
- expect(second).toContain("tail-2");
2323
- expect(second).not.toContain("tail-1");
2324
- // Exactly one open-tag occurrence — no stacking.
2325
- expect(second.match(/<verbatim_tail>/g)?.length).toBe(1);
2326
- });
2327
- });
2328
-
2329
- describe("compaction tail-anchor", () => {
2330
- test("splices the last assistant text block verbatim into the summary message", async () => {
2331
- const provider = createProvider(() => ({
2332
- content: [{ type: "text", text: "## Goals\n- LLM summary" }],
2333
- model: "mock-model",
2334
- usage: { inputTokens: 100, outputTokens: 25 },
2335
- stopReason: "end_turn",
2336
- }));
2337
- const manager = new ContextWindowManager({
2338
- provider,
2339
- systemPrompt: "system prompt",
2340
- config: makeConfig({ maxInputTokens: 600 }),
2341
- });
2342
- const long = "x".repeat(240);
2343
- const distinctiveTail =
2344
- "Pushed 8fe70d63a0 — next step: file the SSE followup as promised.";
2345
- // Place `distinctiveTail` as the assistant response for u1 so it lands
2346
- // at the end of the compactable region. With the same 600-token budget
2347
- // and 6-message shape as the existing 600-token compaction test above,
2348
- // the binary search settles on keepTurns=2 (kept = [u2, a2, u3, a3];
2349
- // compactable = [u1, distinctiveTail]) — exercising the real-world
2350
- // drift scenario where the model's last narration in a long work span
2351
- // gets summarized away.
2352
- const history: Message[] = [
2353
- message("user", `u1 ${long}`),
2354
- message("assistant", distinctiveTail),
2355
- message("user", `u2 ${long}`),
2356
- message("assistant", `a2 ${long}`),
2357
- message("user", `u3 ${long}`),
2358
- message("assistant", `a3 ${long}`),
2359
- ];
2360
-
2361
- const result = await manager.maybeCompact(history);
2362
-
2363
- expect(result.compacted).toBe(true);
2364
- const summaryInner = getSummaryFromContextMessage(result.messages[0]);
2365
- expect(summaryInner).not.toBeNull();
2366
- // LLM summary still present.
2367
- expect(summaryInner).toContain("LLM summary");
2368
- // Verbatim tail spliced in: distinctive text from the LAST assistant
2369
- // message in the compactable region (here, `distinctiveTail`).
2370
- expect(summaryInner).toContain("<verbatim_tail>");
2371
- expect(summaryInner).toContain(distinctiveTail);
2372
- expect(summaryInner).toContain("</verbatim_tail>");
2373
- // summaryText reflects what's persisted in messages[0] for consistency
2374
- // with downstream consumers (DB, context_compacted event).
2375
- expect(result.summaryText).toContain(distinctiveTail);
2376
- });
2377
-
2378
- test("omits the tail-anchor block when no assistant text exists in compactable region", async () => {
2379
- // Construct a scenario where the compactable region has assistant
2380
- // messages with ONLY tool_use blocks (no text) plus user turns. The
2381
- // anchor should be omitted gracefully.
2382
- const provider = createProvider(() => ({
2383
- content: [{ type: "text", text: "## Goals\n- summary" }],
2384
- model: "mock-model",
2385
- usage: { inputTokens: 100, outputTokens: 25 },
2386
- stopReason: "end_turn",
2387
- }));
2388
- const manager = new ContextWindowManager({
2389
- provider,
2390
- systemPrompt: "system prompt",
2391
- config: makeConfig({ maxInputTokens: 600 }),
2392
- });
2393
- const long = "x".repeat(240);
2394
- const history: Message[] = [
2395
- message("user", `u1 ${long}`),
2396
- {
2397
- role: "assistant",
2398
- content: [
2399
- {
2400
- type: "tool_use",
2401
- id: "tool-1",
2402
- name: "bash",
2403
- input: { command: "ls" },
2404
- } as ContentBlock,
2405
- ],
2406
- },
2407
- {
2408
- role: "user",
2409
- content: [
2410
- {
2411
- type: "tool_result",
2412
- tool_use_id: "tool-1",
2413
- content: "ls output",
2414
- } as ContentBlock,
2415
- ],
2416
- },
2417
- message("user", `u2 ${long}`),
2418
- message("assistant", `a2 ${long}`),
2419
- message("user", `u3 ${long}`),
2420
- message("assistant", `a3 ${long}`),
2421
- ];
2422
-
2423
- const result = await manager.maybeCompact(history);
2424
-
2425
- expect(result.compacted).toBe(true);
2426
- const summaryInner = getSummaryFromContextMessage(result.messages[0]);
2427
- expect(summaryInner).not.toBeNull();
2428
- // No tail anchor when the only compactable assistant message has no text.
2429
- // (a2 / a3 are kept verbatim post-compaction since they're recent enough,
2430
- // so the compactable-region's only assistant message is the tool_use one.)
2431
- if (summaryInner!.includes("<verbatim_tail>")) {
2432
- // If a2 ended up in the compactable region after binary search, the
2433
- // anchor would surface a2's text — which is fine; the assertion that
2434
- // matters is that the spliced content (when present) is verbatim
2435
- // content from the compactable region, not noise. Validate the
2436
- // ordering: anchor must follow LLM summary text.
2437
- expect(summaryInner!.indexOf("summary")).toBeLessThan(
2438
- summaryInner!.indexOf("<verbatim_tail>"),
2439
- );
2440
- }
2441
- });
2442
-
2443
- test("clamps tail-anchor when the last assistant text is longer than the cap", async () => {
2444
- const provider = createProvider(() => ({
2445
- content: [{ type: "text", text: "## Goals\n- summary" }],
2446
- model: "mock-model",
2447
- usage: { inputTokens: 100, outputTokens: 25 },
2448
- stopReason: "end_turn",
2449
- }));
2450
- const manager = new ContextWindowManager({
2451
- provider,
2452
- systemPrompt: "system prompt",
2453
- config: makeConfig({ maxInputTokens: 600 }),
2454
- });
2455
- const long = "x".repeat(240);
2456
- const tailEnd = "FINAL DISTINCTIVE END MARKER";
2457
- // Long enough to trip TAIL_ANCHOR_MAX_CHARS (=1500) clamping.
2458
- const longTail = "early body " + "y".repeat(2000) + " " + tailEnd;
2459
- const history: Message[] = [
2460
- message("user", `u1 ${long}`),
2461
- message("assistant", longTail),
2462
- message("user", `u2 ${long}`),
2463
- message("assistant", `a2 ${long}`),
2464
- message("user", `u3 ${long}`),
2465
- message("assistant", `a3 ${long}`),
2466
- ];
2467
-
2468
- const result = await manager.maybeCompact(history);
2469
-
2470
- expect(result.compacted).toBe(true);
2471
- const summaryInner = getSummaryFromContextMessage(result.messages[0]);
2472
- expect(summaryInner).not.toBeNull();
2473
- if (summaryInner!.includes("<verbatim_tail>")) {
2474
- // When clamped, the END is preserved (most recent narration).
2475
- expect(summaryInner).toContain(tailEnd);
2476
- // And the early prefix is dropped.
2477
- expect(summaryInner).toContain("[...truncated]");
2478
- expect(summaryInner).not.toContain("early body");
2479
- }
2480
- });
2481
- });