@vellumai/assistant 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/docs/browser-use-architecture-phase2.md +1 -1
  3. package/knip.json +2 -1
  4. package/openapi.yaml +809 -11
  5. package/package.json +1 -1
  6. package/src/__tests__/anthropic-provider.test.ts +34 -37
  7. package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
  8. package/src/__tests__/assistant-feature-flags-integration.test.ts +3 -3
  9. package/src/__tests__/audit-log-rotation.test.ts +70 -16
  10. package/src/__tests__/background-workers-disk-pressure.test.ts +3 -3
  11. package/src/__tests__/btw-routes.test.ts +2 -3
  12. package/src/__tests__/call-controller.test.ts +0 -1
  13. package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
  14. package/src/__tests__/channel-guardian.test.ts +3 -3
  15. package/src/__tests__/checker.test.ts +6 -15
  16. package/src/__tests__/compaction-events.test.ts +1 -0
  17. package/src/__tests__/compactor-call-site-logging.test.ts +214 -0
  18. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +5 -11
  19. package/src/__tests__/computer-use-tools.test.ts +2 -4
  20. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  21. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +1 -1
  22. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  23. package/src/__tests__/conversation-agent-loop-overflow.test.ts +197 -2
  24. package/src/__tests__/conversation-agent-loop.test.ts +163 -122
  25. package/src/__tests__/conversation-app-control-instantiation.test.ts +2 -5
  26. package/src/__tests__/conversation-clear-safety.test.ts +25 -25
  27. package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
  28. package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
  29. package/src/__tests__/conversation-error.test.ts +31 -0
  30. package/src/__tests__/conversation-fork-crud.test.ts +178 -15
  31. package/src/__tests__/conversation-lifecycle.test.ts +52 -11
  32. package/src/__tests__/{conversation-load-cleaned-at.test.ts → conversation-load-history-stripped.test.ts} +13 -13
  33. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -0
  34. package/src/__tests__/conversation-routes-disk-view.test.ts +109 -0
  35. package/src/__tests__/conversation-routes-slash-commands.test.ts +35 -0
  36. package/src/__tests__/conversation-skill-tools.test.ts +2 -5
  37. package/src/__tests__/conversation-store.test.ts +1 -1
  38. package/src/__tests__/conversation-sync-tags.test.ts +99 -32
  39. package/src/__tests__/conversation-workspace-cache-state.test.ts +1 -0
  40. package/src/__tests__/conversation-workspace-injection.test.ts +1 -1
  41. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
  42. package/src/__tests__/credential-execution-feature-gates.test.ts +9 -7
  43. package/src/__tests__/credential-execution-tools.test.ts +6 -6
  44. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  45. package/src/__tests__/credential-vault-unit.test.ts +2 -2
  46. package/src/__tests__/dynamic-page-surface.test.ts +2 -2
  47. package/src/__tests__/email-html-renderer.test.ts +12 -0
  48. package/src/__tests__/gateway-flag-listener.test.ts +237 -0
  49. package/src/__tests__/gemini-provider.test.ts +78 -0
  50. package/src/__tests__/guardian-dispatch.test.ts +0 -1
  51. package/src/__tests__/guardian-outbound-http.test.ts +7 -5
  52. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  53. package/src/__tests__/heartbeat-disk-pressure.test.ts +4 -0
  54. package/src/__tests__/heartbeat-service.test.ts +4 -0
  55. package/src/__tests__/host-shell-tool.test.ts +1 -1
  56. package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
  57. package/src/__tests__/list-messages-tool-merge.test.ts +70 -11
  58. package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
  59. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
  60. package/src/__tests__/llm-resolver.test.ts +77 -9
  61. package/src/__tests__/llm-usage-store.test.ts +66 -0
  62. package/src/__tests__/logger.test.ts +89 -0
  63. package/src/__tests__/mcp-abort-signal.test.ts +2 -2
  64. package/src/__tests__/media-generate-image.test.ts +31 -0
  65. package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
  66. package/src/__tests__/model-intents.test.ts +2 -4
  67. package/src/__tests__/notification-guardian-path.test.ts +0 -1
  68. package/src/__tests__/onboarding-template-contract.test.ts +1 -1
  69. package/src/__tests__/openai-provider.test.ts +46 -0
  70. package/src/__tests__/openai-responses-provider.test.ts +114 -12
  71. package/src/__tests__/pending-interactions-resolved-event.test.ts +0 -1
  72. package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
  73. package/src/__tests__/platform.test.ts +2 -2
  74. package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
  75. package/src/__tests__/plugin-bootstrap.test.ts +2 -2
  76. package/src/__tests__/plugin-tool-contribution.test.ts +13 -6
  77. package/src/__tests__/plugin-types.test.ts +3 -2
  78. package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
  79. package/src/__tests__/pricing.test.ts +12 -0
  80. package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
  81. package/src/__tests__/registry.test.ts +2 -8
  82. package/src/__tests__/require-fresh-approval.test.ts +2 -2
  83. package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
  84. package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
  85. package/src/__tests__/skill-feature-flags.test.ts +2 -2
  86. package/src/__tests__/skill-projection-feature-flag.test.ts +4 -7
  87. package/src/__tests__/skill-projection.benchmark.test.ts +2 -6
  88. package/src/__tests__/skill-tool-factory.test.ts +1 -1
  89. package/src/__tests__/subagent-notify-parent.test.ts +1 -1
  90. package/src/__tests__/suggestion-routes.test.ts +1 -0
  91. package/src/__tests__/sync-message-contract.test.ts +59 -0
  92. package/src/__tests__/system-prompt.test.ts +145 -131
  93. package/src/__tests__/terminal-tools.test.ts +1 -1
  94. package/src/__tests__/tool-approval-handler.test.ts +1 -5
  95. package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
  96. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
  97. package/src/__tests__/tool-executor-lifecycle-events.test.ts +15 -5
  98. package/src/__tests__/tool-executor.test.ts +9 -62
  99. package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
  100. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  101. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
  102. package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
  103. package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
  104. package/src/__tests__/usage-routes.test.ts +3 -0
  105. package/src/__tests__/verification-control-plane-policy.test.ts +2 -2
  106. package/src/__tests__/workspace-git-service.test.ts +6 -5
  107. package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
  108. package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
  109. package/src/acp/prepare-agent-env.ts +78 -0
  110. package/src/acp/session-manager.ts +1 -1
  111. package/src/agent/loop.ts +8 -0
  112. package/src/api/README.md +5 -0
  113. package/src/api/index.ts +4 -0
  114. package/src/api/package.json +10 -0
  115. package/src/background-wake/background-wake-routes.test.ts +233 -0
  116. package/src/background-wake/runtime-registry.ts +24 -0
  117. package/src/cli/commands/__tests__/browser.test.ts +23 -5
  118. package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
  119. package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
  120. package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
  121. package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
  122. package/src/cli/commands/__tests__/memory-v2.test.ts +1 -0
  123. package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
  124. package/src/cli/commands/browser.ts +247 -0
  125. package/src/cli/commands/domain.ts +91 -41
  126. package/src/cli/commands/inference.ts +93 -40
  127. package/src/cli/commands/memory-v2-compare-render.ts +115 -0
  128. package/src/cli/commands/memory-v2.ts +176 -1
  129. package/src/cli/commands/memory-v3-render.ts +344 -0
  130. package/src/cli/commands/memory-v3.ts +316 -0
  131. package/src/cli/program.ts +2 -0
  132. package/src/config/assistant-feature-flags.ts +21 -9
  133. package/src/config/bundled-skills/document-editor/SKILL.md +11 -2
  134. package/src/config/bundled-skills/document-editor/TOOLS.json +18 -0
  135. package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
  136. package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
  137. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
  138. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
  139. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
  140. package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
  141. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
  142. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
  143. package/src/config/bundled-tool-registry.ts +2 -0
  144. package/src/config/call-site-defaults.ts +7 -6
  145. package/src/config/feature-flag-registry.json +16 -0
  146. package/src/config/schemas/__tests__/memory-v2.test.ts +213 -1
  147. package/src/config/schemas/call-site-catalog.ts +21 -7
  148. package/src/config/schemas/llm.ts +12 -1
  149. package/src/config/schemas/memory-v2.ts +246 -0
  150. package/src/config/schemas/memory.ts +2 -1
  151. package/src/context/compactor.ts +52 -0
  152. package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
  153. package/src/conversations/message-consolidation.ts +404 -0
  154. package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +1 -1
  155. package/src/daemon/__tests__/meet-manifest-loader.test.ts +1 -1
  156. package/src/daemon/conversation-agent-loop-handlers.ts +2 -13
  157. package/src/daemon/conversation-agent-loop.ts +126 -76
  158. package/src/daemon/conversation-error.ts +31 -1
  159. package/src/daemon/conversation-lifecycle.ts +27 -22
  160. package/src/daemon/conversation-runtime-assembly.ts +10 -9
  161. package/src/daemon/conversation-tool-setup.ts +63 -3
  162. package/src/daemon/conversation-usage.ts +2 -0
  163. package/src/daemon/conversation.ts +14 -29
  164. package/src/daemon/disk-pressure-guard.ts +14 -2
  165. package/src/daemon/handlers/config-model.test.ts +1 -0
  166. package/src/daemon/handlers/conversations.ts +11 -3
  167. package/src/daemon/host-browser-proxy.ts +5 -5
  168. package/src/daemon/host-cu-proxy.ts +4 -4
  169. package/src/daemon/host-file-proxy.ts +4 -4
  170. package/src/daemon/host-proxy-base.ts +4 -4
  171. package/src/daemon/host-transfer-proxy.ts +10 -10
  172. package/src/daemon/lifecycle.ts +23 -20
  173. package/src/daemon/meet-manifest-loader.ts +1 -7
  174. package/src/daemon/message-types/conversations.ts +6 -9
  175. package/src/daemon/message-types/home.ts +1 -13
  176. package/src/daemon/message-types/messages.ts +6 -14
  177. package/src/daemon/message-types/sync.ts +14 -0
  178. package/src/daemon/shutdown-handlers.ts +24 -5
  179. package/src/daemon/switch-inference-profile-tool.ts +52 -0
  180. package/src/daemon/tool-setup-types.ts +13 -0
  181. package/src/events/relationship-state-updated.ts +25 -0
  182. package/src/heartbeat/__tests__/heartbeat-service.test.ts +1 -1
  183. package/src/home/home-greeting.ts +0 -9
  184. package/src/home/suggested-prompts.ts +0 -9
  185. package/src/ipc/gateway-flag-listener.ts +123 -0
  186. package/src/ipc/skill-routes/registries.ts +8 -12
  187. package/src/memory/__tests__/db-async-query.test.ts +165 -0
  188. package/src/memory/__tests__/db-maintenance.test.ts +115 -0
  189. package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +241 -0
  190. package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
  191. package/src/memory/__tests__/memory-retrospective-job.test.ts +7 -0
  192. package/src/memory/auto-analysis-enqueue.ts +5 -1
  193. package/src/memory/conversation-crud.ts +71 -70
  194. package/src/memory/conversation-starters-cadence.ts +3 -1
  195. package/src/memory/conversation-title-service.ts +19 -3
  196. package/src/memory/db-async-query.ts +214 -0
  197. package/src/memory/db-init.ts +10 -0
  198. package/src/memory/db-maintenance.ts +30 -21
  199. package/src/memory/graph/bootstrap.ts +8 -1
  200. package/src/memory/graph/capability-seed.ts +7 -3
  201. package/src/memory/graph/conversation-graph-memory.ts +100 -17
  202. package/src/memory/graph/extraction.ts +1 -5
  203. package/src/memory/graph/graph-search.ts +7 -1
  204. package/src/memory/indexer.ts +28 -18
  205. package/src/memory/job-handlers/cleanup.ts +76 -18
  206. package/src/memory/job-handlers/conversation-starters.ts +1 -4
  207. package/src/memory/jobs/embed-pkb-file.ts +6 -1
  208. package/src/memory/jobs-store.ts +14 -0
  209. package/src/memory/jobs-worker.ts +55 -22
  210. package/src/memory/llm-request-log-source-clickhouse.ts +42 -2
  211. package/src/memory/llm-request-log-source-local.ts +7 -0
  212. package/src/memory/llm-request-log-source.ts +9 -2
  213. package/src/memory/llm-request-log-store.ts +43 -1
  214. package/src/memory/llm-usage-store.ts +24 -0
  215. package/src/memory/memory-retrospective-enqueue.ts +8 -1
  216. package/src/memory/memory-retrospective-job.ts +5 -0
  217. package/src/memory/memory-v2-activation-log-store.ts +15 -6
  218. package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
  219. package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
  220. package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
  221. package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
  222. package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
  223. package/src/memory/migrations/index.ts +17 -0
  224. package/src/memory/migrations/registry.ts +33 -0
  225. package/src/memory/schema/conversations.ts +1 -1
  226. package/src/memory/schema/infrastructure.ts +21 -0
  227. package/src/memory/tool-usage-store.ts +36 -8
  228. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
  229. package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
  230. package/src/memory/v2/__tests__/harness-metrics.test.ts +74 -0
  231. package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
  232. package/src/memory/v2/__tests__/harness-replay-input.test.ts +225 -0
  233. package/src/memory/v2/__tests__/harness-runner.test.ts +109 -0
  234. package/src/memory/v2/__tests__/injection.test.ts +127 -98
  235. package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
  236. package/src/memory/v2/__tests__/router.test.ts +171 -3
  237. package/src/memory/v2/harness/compare.ts +57 -0
  238. package/src/memory/v2/harness/metrics.ts +124 -0
  239. package/src/memory/v2/harness/oracle.ts +145 -0
  240. package/src/memory/v2/harness/replay-input.ts +224 -0
  241. package/src/memory/v2/harness/retriever.ts +74 -0
  242. package/src/memory/v2/harness/router-retriever.ts +43 -0
  243. package/src/memory/v2/harness/runner.ts +106 -0
  244. package/src/memory/v2/harness/trace.ts +58 -0
  245. package/src/memory/v2/injection.ts +21 -15
  246. package/src/memory/v2/prompts/router.ts +26 -1
  247. package/src/memory/v2/qdrant.ts +14 -2
  248. package/src/memory/v2/router.ts +171 -18
  249. package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
  250. package/src/memory/v3/__tests__/consolidation-job.test.ts +468 -0
  251. package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
  252. package/src/memory/v3/__tests__/edges.test.ts +563 -0
  253. package/src/memory/v3/__tests__/filter.test.ts +512 -0
  254. package/src/memory/v3/__tests__/gate.test.ts +574 -0
  255. package/src/memory/v3/__tests__/index-composition.test.ts +233 -0
  256. package/src/memory/v3/__tests__/loop.test.ts +530 -0
  257. package/src/memory/v3/__tests__/retriever.test.ts +226 -0
  258. package/src/memory/v3/__tests__/scouts.test.ts +440 -0
  259. package/src/memory/v3/__tests__/shadow-middleware.test.ts +312 -0
  260. package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
  261. package/src/memory/v3/__tests__/traversal.test.ts +469 -0
  262. package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
  263. package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
  264. package/src/memory/v3/__tests__/tree-walk.test.ts +707 -0
  265. package/src/memory/v3/__tests__/validate.test.ts +245 -0
  266. package/src/memory/v3/auto-edges.ts +223 -0
  267. package/src/memory/v3/coactivation-store.ts +124 -0
  268. package/src/memory/v3/consolidation-job.ts +323 -0
  269. package/src/memory/v3/edge-learning-job.ts +160 -0
  270. package/src/memory/v3/edges.ts +249 -0
  271. package/src/memory/v3/filter.ts +281 -0
  272. package/src/memory/v3/gate.ts +334 -0
  273. package/src/memory/v3/index-composition.ts +113 -0
  274. package/src/memory/v3/llm-capture.ts +46 -0
  275. package/src/memory/v3/loop.ts +382 -0
  276. package/src/memory/v3/maintenance.ts +144 -0
  277. package/src/memory/v3/prompt-context.ts +33 -0
  278. package/src/memory/v3/prompts/consolidation.ts +458 -0
  279. package/src/memory/v3/prompts/system-prompts.ts +196 -0
  280. package/src/memory/v3/retriever.ts +33 -0
  281. package/src/memory/v3/scouts.ts +420 -0
  282. package/src/memory/v3/shadow-middleware.ts +305 -0
  283. package/src/memory/v3/traversal.ts +206 -0
  284. package/src/memory/v3/tree-index.ts +237 -0
  285. package/src/memory/v3/tree-store.ts +394 -0
  286. package/src/memory/v3/tree-walk.ts +351 -0
  287. package/src/memory/v3/types.ts +65 -0
  288. package/src/memory/v3/validate.ts +300 -0
  289. package/src/notifications/adapters/macos.ts +18 -1
  290. package/src/notifications/adapters/platform.ts +1 -1
  291. package/src/notifications/decision-engine.ts +1 -4
  292. package/src/notifications/emit-signal.ts +29 -49
  293. package/src/permissions/prompter.ts +3 -3
  294. package/src/permissions/question-prompter.ts +5 -2
  295. package/src/permissions/secret-prompter.ts +2 -2
  296. package/src/plugin-api/index.ts +4 -0
  297. package/src/plugin-api/types.ts +7 -33
  298. package/src/plugins/defaults/index.ts +6 -0
  299. package/src/plugins/defaults/injectors.ts +18 -11
  300. package/src/plugins/external-plugin-loader.ts +5 -68
  301. package/src/plugins/types.ts +11 -16
  302. package/src/proactive-artifact/aux-message-injector.ts +17 -4
  303. package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
  304. package/src/prompts/persona-resolver.ts +36 -21
  305. package/src/prompts/sections.ts +39 -7
  306. package/src/prompts/system-prompt.ts +50 -185
  307. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  308. package/src/prompts/templates/system-sections.ts +230 -8
  309. package/src/providers/__tests__/connection-model-compat.test.ts +234 -0
  310. package/src/providers/__tests__/retry-callsite.test.ts +85 -5
  311. package/src/providers/anthropic/client.ts +32 -66
  312. package/src/providers/call-site-routing.ts +14 -2
  313. package/src/providers/connection-model-compat.ts +38 -0
  314. package/src/providers/connection-resolution.ts +16 -2
  315. package/src/providers/gemini/client.ts +49 -6
  316. package/src/providers/inference/adapter-factory.ts +3 -0
  317. package/src/providers/minimax/client.ts +106 -0
  318. package/src/providers/model-catalog.ts +43 -0
  319. package/src/providers/model-intents.ts +1 -1
  320. package/src/providers/openai/chat-completions-provider.ts +6 -3
  321. package/src/providers/openai/codex-models.ts +18 -0
  322. package/src/providers/openai/responses-provider.ts +78 -21
  323. package/src/providers/provider-send-message.ts +7 -1
  324. package/src/providers/retry.ts +34 -3
  325. package/src/providers/thinking-config.ts +26 -1
  326. package/src/providers/usage-tracking.ts +2 -0
  327. package/src/runtime/AGENTS.md +2 -2
  328. package/src/runtime/agent-wake.ts +1 -0
  329. package/src/runtime/assistant-event-hub.ts +76 -6
  330. package/src/runtime/auth/route-policy.ts +36 -0
  331. package/src/runtime/btw-sidechain.ts +0 -6
  332. package/src/runtime/http-types.ts +0 -2
  333. package/src/runtime/migrations/vbundle-builder.ts +10 -3
  334. package/src/runtime/pending-interactions.ts +0 -1
  335. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +106 -0
  336. package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +25 -6
  337. package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
  338. package/src/runtime/routes/acp-routes.test.ts +255 -6
  339. package/src/runtime/routes/acp-routes.ts +8 -1
  340. package/src/runtime/routes/avatar-routes.ts +10 -10
  341. package/src/runtime/routes/background-wake-routes.ts +188 -0
  342. package/src/runtime/routes/browser-tabs-routes.ts +200 -0
  343. package/src/runtime/routes/btw-routes.ts +0 -6
  344. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  345. package/src/runtime/routes/conversation-list-routes.ts +12 -4
  346. package/src/runtime/routes/conversation-management-routes.ts +77 -20
  347. package/src/runtime/routes/conversation-query-routes.ts +142 -36
  348. package/src/runtime/routes/conversation-routes.ts +252 -410
  349. package/src/runtime/routes/conversation-starter-routes.ts +6 -3
  350. package/src/runtime/routes/disk-pressure-routes.ts +1 -1
  351. package/src/runtime/routes/domain-routes.ts +60 -10
  352. package/src/runtime/routes/email-routes.ts +5 -2
  353. package/src/runtime/routes/events-routes.ts +54 -10
  354. package/src/runtime/routes/group-routes.ts +24 -8
  355. package/src/runtime/routes/host-browser-routes.ts +10 -2
  356. package/src/runtime/routes/host-cu-routes.ts +2 -2
  357. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
  358. package/src/runtime/routes/index.ts +8 -0
  359. package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
  360. package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
  361. package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
  362. package/src/runtime/routes/memory-item-routes.ts +8 -3
  363. package/src/runtime/routes/memory-v2-routes.ts +215 -5
  364. package/src/runtime/routes/memory-v3-routes.ts +316 -0
  365. package/src/runtime/routes/migration-routes.ts +21 -24
  366. package/src/runtime/routes/plugins-routes.ts +337 -0
  367. package/src/runtime/routes/rename-conversation-routes.ts +6 -2
  368. package/src/runtime/routes/secret-routes.ts +25 -5
  369. package/src/runtime/routes/settings-routes.ts +12 -11
  370. package/src/runtime/routes/slack-channel-routes.ts +5 -4
  371. package/src/runtime/routes/workspace-routes.ts +25 -10
  372. package/src/runtime/sync/resource-sync-events.ts +106 -38
  373. package/src/runtime/sync/sync-publisher.test.ts +49 -0
  374. package/src/runtime/sync/sync-publisher.ts +2 -1
  375. package/src/runtime/verification-outbound-actions.ts +73 -1
  376. package/src/telemetry/types.ts +12 -0
  377. package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
  378. package/src/telemetry/usage-telemetry-reporter.ts +1 -0
  379. package/src/tools/acp/spawn.test.ts +119 -0
  380. package/src/tools/acp/spawn.ts +15 -2
  381. package/src/tools/apps/definitions.ts +2 -8
  382. package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
  383. package/src/tools/ask-question/ask-question-tool.ts +38 -45
  384. package/src/tools/browser/__tests__/pinned-tabs.test.ts +70 -0
  385. package/src/tools/browser/browser-execution.ts +16 -3
  386. package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
  387. package/src/tools/browser/cdp-client/__tests__/types.test.ts +3 -0
  388. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +12 -0
  389. package/src/tools/browser/cdp-client/extension-cdp-client.ts +27 -1
  390. package/src/tools/browser/cdp-client/factory.ts +100 -17
  391. package/src/tools/browser/cdp-client/local-cdp-client.ts +12 -0
  392. package/src/tools/browser/cdp-client/types.ts +65 -0
  393. package/src/tools/browser/pinned-tabs.ts +96 -40
  394. package/src/tools/computer-use/definitions.ts +22 -78
  395. package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
  396. package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
  397. package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
  398. package/src/tools/credentials/vault.ts +3 -9
  399. package/src/tools/document/document-tool.ts +59 -0
  400. package/src/tools/execution-target.ts +21 -23
  401. package/src/tools/executor.ts +6 -1
  402. package/src/tools/filesystem/edit.ts +3 -9
  403. package/src/tools/filesystem/list.ts +3 -9
  404. package/src/tools/filesystem/read.ts +3 -9
  405. package/src/tools/filesystem/write.ts +3 -9
  406. package/src/tools/host-filesystem/edit.ts +3 -9
  407. package/src/tools/host-filesystem/read.ts +3 -9
  408. package/src/tools/host-filesystem/transfer.ts +3 -9
  409. package/src/tools/host-filesystem/write.ts +3 -9
  410. package/src/tools/host-terminal/host-shell.ts +3 -9
  411. package/src/tools/mcp/mcp-tool-factory.ts +1 -8
  412. package/src/tools/memory/register.test.ts +1 -1
  413. package/src/tools/memory/register.ts +4 -9
  414. package/src/tools/network/web-fetch.ts +3 -9
  415. package/src/tools/network/web-search.ts +25 -32
  416. package/src/tools/registry.ts +7 -23
  417. package/src/tools/schema-transforms.ts +1 -1
  418. package/src/tools/skills/execute.ts +3 -9
  419. package/src/tools/skills/load.ts +3 -9
  420. package/src/tools/skills/skill-tool-factory.ts +1 -8
  421. package/src/tools/subagent/notify-parent.ts +3 -9
  422. package/src/tools/system/request-permission.ts +3 -9
  423. package/src/tools/terminal/shell.ts +3 -9
  424. package/src/tools/tool-defaults.ts +94 -0
  425. package/src/tools/types.ts +27 -98
  426. package/src/tools/ui-surface/definitions.ts +6 -22
  427. package/src/usage/pricing.ts +23 -0
  428. package/src/usage/types.ts +12 -0
  429. package/src/util/logger.ts +16 -7
  430. package/src/util/platform.ts +7 -2
  431. package/src/util/sqlite3-runtime.ts +65 -0
  432. package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
  433. package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
  434. package/src/workspace/migrations/registry.ts +2 -0
  435. package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
  436. package/src/__tests__/message-complete-display-id.test.ts +0 -175
  437. package/src/daemon/query-complexity-router.ts +0 -75
  438. package/src/prompts/cache-boundary.ts +0 -8
@@ -0,0 +1,234 @@
1
+ /**
2
+ * Tests for the Codex-subscription model-compatibility gate on auto-resolved
3
+ * provider connections.
4
+ *
5
+ * When a profile uses "Any active OpenAI connection" (no `provider_connection`
6
+ * pinned), the daemon auto-picks an active OpenAI connection. An
7
+ * `oauth_subscription` (ChatGPT Codex) connection hard-routes to the Codex
8
+ * endpoint, which rejects non-Codex models with HTTP 400. The gate skips such
9
+ * a connection during auto-resolution unless the model is Codex-compatible.
10
+ *
11
+ * Two layers are covered:
12
+ * 1. `isConnectionCompatibleWithModel` — the pure predicate.
13
+ * 2. `getConfiguredProvider` — the auto-resolution path that uses the
14
+ * predicate as an additional `.find()` filter, plus the pinned-connection
15
+ * path which bypasses the gate entirely.
16
+ */
17
+
18
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
19
+
20
+ import { isConnectionCompatibleWithModel } from "../connection-model-compat.js";
21
+ import type { Auth } from "../inference/auth.js";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Pure predicate tests — no mocking required.
25
+ // ---------------------------------------------------------------------------
26
+
27
+ const apiKeyAuth: Auth = { type: "api_key", credential: "credential/x" };
28
+ const platformAuth: Auth = { type: "platform" };
29
+ const oauthAuth: Auth = {
30
+ type: "oauth_subscription",
31
+ credential: "credential/x",
32
+ };
33
+
34
+ describe("isConnectionCompatibleWithModel", () => {
35
+ test("api_key connection is compatible with any model", () => {
36
+ const conn = { auth: apiKeyAuth };
37
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5")).toBe(true);
38
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.4")).toBe(true);
39
+ });
40
+
41
+ test("platform connection is compatible with any model", () => {
42
+ const conn = { auth: platformAuth };
43
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.4-nano")).toBe(true);
44
+ });
45
+
46
+ test("oauth_subscription connection is incompatible with a non-Codex model", () => {
47
+ const conn = { auth: oauthAuth };
48
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5")).toBe(false);
49
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.5")).toBe(false);
50
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.4-nano")).toBe(false);
51
+ });
52
+
53
+ test("oauth_subscription connection is compatible with a Codex model", () => {
54
+ const conn = { auth: oauthAuth };
55
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.4")).toBe(true);
56
+ expect(isConnectionCompatibleWithModel(conn, "gpt-5.3-codex")).toBe(true);
57
+ });
58
+
59
+ test("undefined model applies no gating (compatible)", () => {
60
+ const conn = { auth: oauthAuth };
61
+ expect(isConnectionCompatibleWithModel(conn, undefined)).toBe(true);
62
+ });
63
+ });
64
+
65
+ // ---------------------------------------------------------------------------
66
+ // Integration tests through `getConfiguredProvider` — module mocks below must
67
+ // be declared before the import-under-test.
68
+ // ---------------------------------------------------------------------------
69
+
70
+ mock.module("../../util/logger.js", () => ({
71
+ getLogger: () =>
72
+ new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
73
+ }));
74
+
75
+ let mockLlmConfig: Record<string, unknown> = {};
76
+
77
+ mock.module("../../config/loader.js", () => ({
78
+ getConfig: () => ({
79
+ llm: mockLlmConfig,
80
+ services: { inference: { mode: "your-own" } },
81
+ }),
82
+ }));
83
+
84
+ const mockDbSentinel = { __mock: "db" };
85
+ mock.module("../../memory/db-connection.js", () => ({
86
+ getDb: () => mockDbSentinel,
87
+ }));
88
+
89
+ type Connection = {
90
+ name: string;
91
+ provider: string;
92
+ status: string;
93
+ auth: { type: string; credential?: string };
94
+ };
95
+
96
+ // Ordered list the mocked `listConnections` returns. `.find()` walks it in
97
+ // order, so insertion order is meaningful for these tests.
98
+ let fakeConnectionList: Connection[] = [];
99
+ const fakeConnectionsByName = new Map<string, Connection>();
100
+
101
+ mock.module("../inference/connections.js", () => ({
102
+ getConnection: (_db: unknown, name: string) =>
103
+ fakeConnectionsByName.get(name) ?? null,
104
+ listConnections: (_db: unknown, filter?: { provider?: string }) =>
105
+ filter?.provider
106
+ ? fakeConnectionList.filter((c) => c.provider === filter.provider)
107
+ : fakeConnectionList,
108
+ }));
109
+
110
+ // Records the connection name handed to the resolver so tests can assert
111
+ // which connection auto-resolution selected.
112
+ const resolveProviderCalls: Connection[] = [];
113
+
114
+ mock.module("../registry.js", () => ({
115
+ getProvider: (name: string) => {
116
+ throw new Error(`legacy getProvider should not be called: ${name}`);
117
+ },
118
+ initializeProviders: async () => {},
119
+ listProviders: () => [{ name: "stub" }],
120
+ resolveProviderFromConnection: async (connection: Connection) => {
121
+ resolveProviderCalls.push(connection);
122
+ return { name: connection.provider, tag: connection.name };
123
+ },
124
+ }));
125
+
126
+ import { getConfiguredProvider } from "../provider-send-message.js";
127
+
128
+ function registerConnections(connections: Connection[]): void {
129
+ fakeConnectionList = connections;
130
+ for (const c of connections) fakeConnectionsByName.set(c.name, c);
131
+ }
132
+
133
+ function reset(): void {
134
+ resolveProviderCalls.length = 0;
135
+ fakeConnectionList = [];
136
+ fakeConnectionsByName.clear();
137
+ mockLlmConfig = {};
138
+ }
139
+
140
+ const OPENAI_KEY: Connection = {
141
+ name: "openai-key",
142
+ provider: "openai",
143
+ status: "active",
144
+ auth: { type: "api_key", credential: "credential/openai" },
145
+ };
146
+ const OPENAI_CODEX: Connection = {
147
+ name: "openai-codex",
148
+ provider: "openai",
149
+ status: "active",
150
+ auth: {
151
+ type: "oauth_subscription",
152
+ credential: "credential/openai-codex/access_token",
153
+ },
154
+ };
155
+
156
+ describe("auto-resolution skips oauth_subscription connections for non-Codex models", () => {
157
+ beforeEach(reset);
158
+
159
+ test("non-Codex model picks the api_key connection over a (first-listed) oauth_subscription one", async () => {
160
+ // oauth_subscription listed FIRST — without the gate, insertion order
161
+ // would have selected it and misrouted gpt-5 to the Codex endpoint.
162
+ registerConnections([OPENAI_CODEX, OPENAI_KEY]);
163
+ setOpenAiProfile("gpt-5");
164
+
165
+ const result = await getConfiguredProvider("mainAgent", {
166
+ overrideProfile: "openai-any",
167
+ });
168
+
169
+ expect(result).not.toBeNull();
170
+ expect(resolveProviderCalls.length).toBe(1);
171
+ expect(resolveProviderCalls[0].name).toBe("openai-key");
172
+ });
173
+
174
+ test("Codex model can select the oauth_subscription connection", async () => {
175
+ registerConnections([OPENAI_CODEX, OPENAI_KEY]);
176
+ setOpenAiProfile("gpt-5.4");
177
+
178
+ const result = await getConfiguredProvider("mainAgent", {
179
+ overrideProfile: "openai-any",
180
+ });
181
+
182
+ expect(result).not.toBeNull();
183
+ expect(resolveProviderCalls.length).toBe(1);
184
+ expect(resolveProviderCalls[0].name).toBe("openai-codex");
185
+ });
186
+
187
+ test("non-Codex model with only an oauth_subscription connection resolves to null (no misroute)", async () => {
188
+ // Pure-predicate gate: the lone oauth_subscription connection is filtered
189
+ // out, so auto-resolution finds nothing and the call site falls back
190
+ // gracefully rather than dispatching gpt-5 to the Codex endpoint.
191
+ registerConnections([OPENAI_CODEX]);
192
+ setOpenAiProfile("gpt-5");
193
+
194
+ const result = await getConfiguredProvider("mainAgent", {
195
+ overrideProfile: "openai-any",
196
+ });
197
+
198
+ expect(result).toBeNull();
199
+ expect(resolveProviderCalls.length).toBe(0);
200
+ });
201
+
202
+ test("explicitly pinned oauth_subscription connection is used regardless of model", async () => {
203
+ registerConnections([OPENAI_CODEX, OPENAI_KEY]);
204
+ mockLlmConfig = {
205
+ default: { provider: "anthropic", model: "claude-opus-4-7" },
206
+ profiles: {
207
+ "openai-pinned": {
208
+ provider: "openai",
209
+ model: "gpt-5",
210
+ provider_connection: "openai-codex",
211
+ },
212
+ },
213
+ };
214
+
215
+ const result = await getConfiguredProvider("mainAgent", {
216
+ overrideProfile: "openai-pinned",
217
+ });
218
+
219
+ // The pinned connection bypasses the auto-resolution gate entirely.
220
+ expect(result).not.toBeNull();
221
+ expect(resolveProviderCalls.length).toBe(1);
222
+ expect(resolveProviderCalls[0].name).toBe("openai-codex");
223
+ });
224
+ });
225
+
226
+ function setOpenAiProfile(model: string): void {
227
+ mockLlmConfig = {
228
+ default: { provider: "anthropic", model: "claude-opus-4-7" },
229
+ profiles: {
230
+ // "Any active OpenAI connection" — provider set, no provider_connection.
231
+ "openai-any": { provider: "openai", model },
232
+ },
233
+ };
234
+ }
@@ -425,7 +425,7 @@ describe("RetryProvider — callSite resolution", () => {
425
425
  expect(config.temperature).toBe(0.5);
426
426
  });
427
427
 
428
- test("strips effort/speed/thinking for providers that don't support them", async () => {
428
+ test("strips effort/speed for providers that don't support them (e.g. fireworks)", async () => {
429
429
  setLlmConfig({
430
430
  default: {
431
431
  provider: "anthropic",
@@ -434,14 +434,14 @@ describe("RetryProvider — callSite resolution", () => {
434
434
  speed: "fast",
435
435
  },
436
436
  callSites: {
437
- memoryRetrieval: { thinking: { enabled: true } },
437
+ memoryRetrieval: { thinking: { enabled: false } },
438
438
  },
439
439
  });
440
440
 
441
441
  let seen: SendMessageOptions | undefined;
442
- // gemini does not support effort/speed/thinking — they must be stripped.
442
+ // fireworks does not support speed or thinking — they must be stripped.
443
443
  const wrapped = new RetryProvider(
444
- makeProvider("gemini", (options) => {
444
+ makeProvider("fireworks", (options) => {
445
445
  seen = options;
446
446
  }),
447
447
  );
@@ -451,13 +451,93 @@ describe("RetryProvider — callSite resolution", () => {
451
451
  });
452
452
 
453
453
  const config = seen?.config as Record<string, unknown>;
454
- expect(config.effort).toBeUndefined();
455
454
  expect(config.speed).toBeUndefined();
456
455
  expect(config.thinking).toBeUndefined();
457
456
  // Model still comes through.
458
457
  expect(config.model).toBe("claude-opus-4-7");
459
458
  });
460
459
 
460
+ test("preserves thinking + level for Gemini provider", async () => {
461
+ setLlmConfig({
462
+ default: {
463
+ provider: "gemini",
464
+ model: "gemini-3.5-flash",
465
+ thinking: { enabled: true, streamThinking: true, level: "high" },
466
+ },
467
+ callSites: { mainAgent: {} },
468
+ });
469
+
470
+ let seen: SendMessageOptions | undefined;
471
+ const wrapped = new RetryProvider(
472
+ makeProvider("gemini", (options) => {
473
+ seen = options;
474
+ }),
475
+ );
476
+
477
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
478
+ config: { callSite: "mainAgent" },
479
+ });
480
+
481
+ const config = seen?.config as Record<string, unknown>;
482
+ expect(config.thinking).toEqual({
483
+ type: "adaptive",
484
+ level: "high",
485
+ streamThinking: true,
486
+ });
487
+ });
488
+
489
+ test("Gemini disabled thinking carries the wire `disabled` discriminator", async () => {
490
+ setLlmConfig({
491
+ default: {
492
+ provider: "gemini",
493
+ model: "gemini-3.5-flash",
494
+ thinking: { enabled: false, streamThinking: false },
495
+ },
496
+ callSites: { mainAgent: {} },
497
+ });
498
+
499
+ let seen: SendMessageOptions | undefined;
500
+ const wrapped = new RetryProvider(
501
+ makeProvider("gemini", (options) => {
502
+ seen = options;
503
+ }),
504
+ );
505
+
506
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
507
+ config: { callSite: "mainAgent" },
508
+ });
509
+
510
+ const config = seen?.config as Record<string, unknown>;
511
+ expect(config.thinking).toEqual({ type: "disabled" });
512
+ });
513
+
514
+ test("scrubs Gemini-only thinking extras (level, streamThinking) for Anthropic", async () => {
515
+ setLlmConfig({
516
+ default: {
517
+ provider: "anthropic",
518
+ model: "claude-opus-4-7",
519
+ thinking: { enabled: true, streamThinking: true, level: "high" },
520
+ },
521
+ callSites: { mainAgent: {} },
522
+ });
523
+
524
+ let seen: SendMessageOptions | undefined;
525
+ const wrapped = new RetryProvider(
526
+ makeProvider("anthropic", (options) => {
527
+ seen = options;
528
+ }),
529
+ );
530
+
531
+ await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
532
+ config: { callSite: "mainAgent" },
533
+ });
534
+
535
+ const config = seen?.config as Record<string, unknown>;
536
+ // Anthropic's SDK rejects unknown keys inside the `thinking` object with
537
+ // "Extra inputs are not permitted" — must be exactly `{ type }`.
538
+ expect(config.thinking).toEqual({ type: "adaptive" });
539
+ });
540
+
461
541
  test("explicit per-call config.model wins over resolved callSite model", async () => {
462
542
  setLlmConfig({
463
543
  default: { provider: "anthropic", model: "resolved-model" },
@@ -1,6 +1,5 @@
1
1
  import Anthropic from "@anthropic-ai/sdk";
2
2
 
3
- import { SYSTEM_PROMPT_CACHE_BOUNDARY } from "../../prompts/system-prompt.js";
4
3
  import { isAbortReason } from "../../util/abort-reasons.js";
5
4
  import { ProviderError } from "../../util/errors.js";
6
5
  import { getLogger } from "../../util/logger.js";
@@ -819,6 +818,15 @@ export class AnthropicProvider implements Provider {
819
818
  ((config as Record<string, unknown> | undefined)?.cacheTtl as
820
819
  | "5m"
821
820
  | "1h") ?? "1h";
821
+ // Opt-out for callers (e.g. the memory router) that send a single
822
+ // user message per call with content that changes every time. The
823
+ // turn-start cache breakpoint below is only useful when the same
824
+ // prefix is re-sent on a subsequent call (typical for the main agent
825
+ // loop's tool-use iterations); one-shot callers pay cache_creation
826
+ // cost without a future hit.
827
+ const disableTurnStartCache =
828
+ (config as Record<string, unknown> | undefined)?.disableTurnStartCache ===
829
+ true;
822
830
  let sentMessages: Anthropic.MessageParam[] | undefined;
823
831
  const startedAt = Date.now();
824
832
  // Hoisted so the catch block can distinguish our inner stream timeout
@@ -980,14 +988,11 @@ export class AnthropicProvider implements Provider {
980
988
  // followed by user tool_result). Replaying stale thinking blocks from
981
989
  // earlier turns causes 400 errors when the signature is no longer
982
990
  // valid (e.g. after a provider/model/profile switch).
983
- const activeToolUseStart =
984
- findActiveToolUseContinuationStart(formatted);
991
+ const activeToolUseStart = findActiveToolUseContinuationStart(formatted);
985
992
  for (let i = 0; i < activeToolUseStart; i++) {
986
993
  const msg = formatted[i];
987
994
  if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue;
988
- const stripped = (
989
- msg.content as Anthropic.ContentBlockParam[]
990
- ).filter(
995
+ const stripped = (msg.content as Anthropic.ContentBlockParam[]).filter(
991
996
  (b) =>
992
997
  typeof b === "string" ||
993
998
  (b.type !== "thinking" && b.type !== "redacted_thinking"),
@@ -1009,6 +1014,7 @@ export class AnthropicProvider implements Provider {
1009
1014
  speed,
1010
1015
  output_config,
1011
1016
  cacheTtl: _cacheTtl,
1017
+ disableTurnStartCache: _disableTurnStartCache,
1012
1018
  max_tokens: callerMaxTokens,
1013
1019
  usageAttributionHeaders,
1014
1020
  ...restConfig
@@ -1065,38 +1071,17 @@ export class AnthropicProvider implements Provider {
1065
1071
  };
1066
1072
 
1067
1073
  if (systemPrompt) {
1068
- const boundaryIdx = systemPrompt.indexOf(SYSTEM_PROMPT_CACHE_BOUNDARY);
1069
- if (boundaryIdx >= 0) {
1070
- // Split into two cache blocks: static instructions (stable across
1071
- // turns) and dynamic workspace content (changes when files are
1072
- // edited). The static prefix stays cached even when workspace
1073
- // files change, saving ~8-10K tokens of cache creation per turn.
1074
- // Both blocks use 1-hour cache TTL to avoid repeated cache misses
1075
- // for conversations with turn gaps exceeding the default 5-minute
1076
- // window.
1077
- const staticBlock = systemPrompt.slice(0, boundaryIdx);
1078
- const dynamicBlock = systemPrompt.slice(
1079
- boundaryIdx + SYSTEM_PROMPT_CACHE_BOUNDARY.length,
1080
- );
1081
- const systemBlocks = [staticBlock, dynamicBlock]
1082
- .filter((text) => text.length > 0)
1083
- .map((text) => ({
1084
- type: "text" as const,
1085
- text,
1086
- cache_control: cacheControl,
1087
- }));
1088
- if (systemBlocks.length > 0) {
1089
- params.system = systemBlocks;
1090
- }
1091
- } else {
1092
- params.system = [
1093
- {
1094
- type: "text" as const,
1095
- text: systemPrompt,
1096
- cache_control: cacheControl,
1097
- },
1098
- ];
1099
- }
1074
+ // The whole system prompt is rendered as a single cached
1075
+ // block. A 1-hour cache TTL is used (when supported by the
1076
+ // model) so the breakpoint survives turn gaps that exceed the
1077
+ // default 5-minute window.
1078
+ params.system = [
1079
+ {
1080
+ type: "text" as const,
1081
+ text: systemPrompt,
1082
+ cache_control: cacheControl,
1083
+ },
1084
+ ];
1100
1085
  }
1101
1086
 
1102
1087
  if (tools && tools.length > 0) {
@@ -1160,7 +1145,9 @@ export class AnthropicProvider implements Provider {
1160
1145
  }
1161
1146
  };
1162
1147
  const turnStartIdx = findUserTextMsgIdx(msgs.length - 1);
1163
- if (turnStartIdx >= 0) applyCacheControlToLastBlock(turnStartIdx);
1148
+ if (turnStartIdx >= 0 && !disableTurnStartCache) {
1149
+ applyCacheControlToLastBlock(turnStartIdx);
1150
+ }
1164
1151
 
1165
1152
  // Previous-turn anchor: when this request is the first of a new turn
1166
1153
  // (turn-start is the very last message — no tool-use loop yet), also
@@ -1172,9 +1159,8 @@ export class AnthropicProvider implements Provider {
1172
1159
  // cache_creation tokens per new turn). Skipped during tool-use loops
1173
1160
  // where the current turn-start already covers the same prefix and a
1174
1161
  // second anchor would blow the 4-breakpoint budget.
1175
- let prevTurnAnchorIdx = -1;
1176
1162
  if (turnStartIdx === msgs.length - 1 && turnStartIdx > 0) {
1177
- prevTurnAnchorIdx = findUserTextMsgIdx(turnStartIdx - 1);
1163
+ const prevTurnAnchorIdx = findUserTextMsgIdx(turnStartIdx - 1);
1178
1164
  if (prevTurnAnchorIdx >= 0)
1179
1165
  applyCacheControlToLastBlock(prevTurnAnchorIdx);
1180
1166
  }
@@ -1185,7 +1171,6 @@ export class AnthropicProvider implements Provider {
1185
1171
  // cheaply without conflicting with the 1h breakpoints above.
1186
1172
  // Skip thinking/redacted_thinking blocks — Anthropic doesn't allow
1187
1173
  // cache_control on those types.
1188
- let tailBreakpointApplied = false;
1189
1174
  if (turnStartIdx >= 0 && turnStartIdx < sentMessages.length - 1) {
1190
1175
  const lastMsg = sentMessages[sentMessages.length - 1];
1191
1176
  if (Array.isArray(lastMsg.content) && lastMsg.content.length > 0) {
@@ -1207,34 +1192,15 @@ export class AnthropicProvider implements Provider {
1207
1192
  if (tailBlock && typeof tailBlock !== "string") {
1208
1193
  (tailBlock as unknown as Record<string, unknown>).cache_control =
1209
1194
  tailCacheControl;
1210
- tailBreakpointApplied = true;
1211
1195
  }
1212
1196
  }
1213
1197
  }
1214
1198
 
1215
- // Enforce Anthropic API maximum of 4 cache_control blocks.
1216
- // With the system prompt boundary split into 2 cached blocks AND
1217
- // tools + turn-start + (tail OR prev-turn-anchor), we'd have 5.
1218
- // Drop the static system block's breakpoint it's small (<1K
1219
- // tokens) so the re-read cost is negligible, while the dynamic
1220
- // block (workspace context) rarely changes mid-session and
1221
- // benefits more from caching. Tail and prev-turn-anchor are
1222
- // mutually exclusive (prev-turn-anchor only fires when turn-start
1223
- // is the last message, which is the exact condition that suppresses
1224
- // the tail), so we never exceed 5.
1225
- const hasToolCacheBreakpoint =
1226
- params.tools?.some(
1227
- (t) => "cache_control" in t && t.cache_control != null,
1228
- ) ?? false;
1229
- if (
1230
- (tailBreakpointApplied || prevTurnAnchorIdx >= 0) &&
1231
- Array.isArray(params.system) &&
1232
- params.system.length === 2 &&
1233
- hasToolCacheBreakpoint
1234
- ) {
1235
- delete (params.system[0] as unknown as Record<string, unknown>)
1236
- .cache_control;
1237
- }
1199
+ // Cache-breakpoint accounting: system(1) + tools(1) + turn-start(1) +
1200
+ // (tail OR prev-turn-anchor)(1) = 4 exactly Anthropic's per-request
1201
+ // cap. Tail and prev-turn-anchor are mutually exclusive (the latter
1202
+ // only fires when turn-start is the last message, which suppresses
1203
+ // the tail), so the total can't drift past 4.
1238
1204
 
1239
1205
  // Strip orphaned UTF-16 surrogates so the Anthropic JSON parser never
1240
1206
  // sees invalid strings produced by upstream surrogate-splitting `.slice()` calls.
@@ -24,6 +24,7 @@ import { AsyncLocalStorage } from "node:async_hooks";
24
24
  import { resolveCallSiteConfig } from "../config/llm-resolver.js";
25
25
  import { getConfig } from "../config/loader.js";
26
26
  import { getDb } from "../memory/db-connection.js";
27
+ import { isConnectionCompatibleWithModel } from "./connection-model-compat.js";
27
28
  import {
28
29
  ConnectionResolutionError,
29
30
  tryResolveProviderForConnectionName,
@@ -73,10 +74,15 @@ export class CallSiteRoutingProvider implements Provider {
73
74
  * `expectedProvider` is the provider name the resolved profile
74
75
  * declared. The hook verifies the connection's provider matches
75
76
  * and throws on mismatch.
77
+ *
78
+ * `model` is the resolved call-site model, threaded through so the
79
+ * connection lookup can gate `oauth_subscription` (Codex) connections
80
+ * by model compatibility.
76
81
  */
77
82
  private readonly resolveByConnection: (
78
83
  connectionName: string,
79
84
  expectedProvider: string,
85
+ model: string | undefined,
80
86
  ) => Promise<Provider | null>,
81
87
  ) {
82
88
  this.tokenEstimationProvider = defaultProvider.tokenEstimationProvider;
@@ -155,7 +161,11 @@ export class CallSiteRoutingProvider implements Provider {
155
161
  const candidates = listConnections(getDb(), {
156
162
  provider: resolved.provider,
157
163
  });
158
- const active = candidates.find((c) => c.status === "active");
164
+ const active = candidates.find(
165
+ (c) =>
166
+ c.status === "active" &&
167
+ isConnectionCompatibleWithModel(c, resolved.model),
168
+ );
159
169
  if (active) {
160
170
  connectionName = active.name;
161
171
  }
@@ -168,6 +178,7 @@ export class CallSiteRoutingProvider implements Provider {
168
178
  const connectionProvider = await this.resolveByConnection(
169
179
  connectionName,
170
180
  resolved.provider,
181
+ resolved.model,
171
182
  );
172
183
  if (connectionProvider) return connectionProvider;
173
184
  return this.defaultProvider;
@@ -200,11 +211,12 @@ export function wrapWithCallSiteRouting(
200
211
  ): Provider {
201
212
  return new CallSiteRoutingProvider(
202
213
  base,
203
- (connectionName, expectedProvider) =>
214
+ (connectionName, expectedProvider, model) =>
204
215
  tryResolveProviderForConnectionName(
205
216
  connectionName,
206
217
  config,
207
218
  expectedProvider,
219
+ model,
208
220
  ),
209
221
  );
210
222
  }
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Model-compatibility gate for auto-resolved provider connections.
3
+ *
4
+ * When a profile uses "Any active <provider> connection" (no
5
+ * `provider_connection` pinned), the daemon auto-picks an active connection
6
+ * for the provider. `oauth_subscription` connections (ChatGPT Codex) hard-
7
+ * route every request to the Codex endpoint, which rejects non-Codex models
8
+ * with HTTP 400. This helper lets the auto-resolution sites skip such a
9
+ * connection when the requested model is not Codex-compatible.
10
+ */
11
+
12
+ import type { ProviderConnection } from "./inference/auth.js";
13
+ import { isCodexSubscriptionModel } from "./openai/codex-models.js";
14
+
15
+ /**
16
+ * Whether `connection` can serve a request for `model` during
17
+ * auto-resolution.
18
+ *
19
+ * `oauth_subscription` connections route through the ChatGPT Codex endpoint,
20
+ * so they are only compatible with Codex models. Every other auth type
21
+ * imposes no model restriction and is always compatible.
22
+ *
23
+ * `model` may be undefined when the call site has no resolved model; in that
24
+ * case no model gating is applied (returns true) so resolution behaviour is
25
+ * unchanged.
26
+ *
27
+ * This gate applies to auto-resolution only — an explicitly pinned
28
+ * `provider_connection` bypasses connection selection entirely and is used
29
+ * regardless of model.
30
+ */
31
+ export function isConnectionCompatibleWithModel(
32
+ connection: Pick<ProviderConnection, "auth">,
33
+ model: string | undefined,
34
+ ): boolean {
35
+ if (connection.auth.type !== "oauth_subscription") return true;
36
+ if (!model) return true;
37
+ return isCodexSubscriptionModel(model);
38
+ }
@@ -30,6 +30,7 @@
30
30
  import { resolveCallSiteConfig } from "../config/llm-resolver.js";
31
31
  import { getDb } from "../memory/db-connection.js";
32
32
  import { getLogger } from "../util/logger.js";
33
+ import { isConnectionCompatibleWithModel } from "./connection-model-compat.js";
33
34
  import { getConnection, listConnections } from "./inference/connections.js";
34
35
  import type { ProvidersConfig } from "./registry.js";
35
36
  import { resolveProviderFromConnection } from "./registry.js";
@@ -79,11 +80,16 @@ export class ConnectionResolutionError extends Error {
79
80
  * `expectedProvider` is the provider name the resolving profile declared.
80
81
  * Pass `undefined` to skip the mismatch check (callers that don't yet
81
82
  * know the expected provider).
83
+ *
84
+ * `model` is the resolved call-site model. It gates the `provider_mismatch`
85
+ * auto-recovery below so a non-Codex model is never rerouted onto an
86
+ * `oauth_subscription` (ChatGPT Codex) connection.
82
87
  */
83
88
  export async function tryResolveProviderForConnectionName(
84
89
  connectionName: string,
85
90
  config: ProvidersConfig,
86
91
  expectedProvider?: string,
92
+ model?: string,
87
93
  ): Promise<Provider | null> {
88
94
  let connection;
89
95
  try {
@@ -113,7 +119,10 @@ export async function tryResolveProviderForConnectionName(
113
119
  try {
114
120
  const db = getDb();
115
121
  const candidates = listConnections(db, { provider: expectedProvider });
116
- const active = candidates.find((c) => c.status === "active");
122
+ const active = candidates.find(
123
+ (c) =>
124
+ c.status === "active" && isConnectionCompatibleWithModel(c, model),
125
+ );
117
126
  if (active) {
118
127
  log.info(
119
128
  {
@@ -192,7 +201,11 @@ export async function resolveDefaultProvider(
192
201
  const candidates = listConnections(getDb(), {
193
202
  provider: resolved.provider,
194
203
  });
195
- const active = candidates.find((c) => c.status === "active");
204
+ const active = candidates.find(
205
+ (c) =>
206
+ c.status === "active" &&
207
+ isConnectionCompatibleWithModel(c, resolved.model),
208
+ );
196
209
  if (active) {
197
210
  log.info(
198
211
  { provider: resolved.provider, resolvedConnection: active.name },
@@ -216,5 +229,6 @@ export async function resolveDefaultProvider(
216
229
  connectionName,
217
230
  config,
218
231
  resolved.provider,
232
+ resolved.model,
219
233
  );
220
234
  }