@vellumai/assistant 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/docs/browser-use-architecture-phase2.md +1 -1
  3. package/knip.json +2 -1
  4. package/openapi.yaml +809 -11
  5. package/package.json +1 -1
  6. package/src/__tests__/anthropic-provider.test.ts +34 -37
  7. package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
  8. package/src/__tests__/assistant-feature-flags-integration.test.ts +3 -3
  9. package/src/__tests__/audit-log-rotation.test.ts +70 -16
  10. package/src/__tests__/background-workers-disk-pressure.test.ts +3 -3
  11. package/src/__tests__/btw-routes.test.ts +2 -3
  12. package/src/__tests__/call-controller.test.ts +0 -1
  13. package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
  14. package/src/__tests__/channel-guardian.test.ts +3 -3
  15. package/src/__tests__/checker.test.ts +6 -15
  16. package/src/__tests__/compaction-events.test.ts +1 -0
  17. package/src/__tests__/compactor-call-site-logging.test.ts +214 -0
  18. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +5 -11
  19. package/src/__tests__/computer-use-tools.test.ts +2 -4
  20. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  21. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +1 -1
  22. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  23. package/src/__tests__/conversation-agent-loop-overflow.test.ts +197 -2
  24. package/src/__tests__/conversation-agent-loop.test.ts +163 -122
  25. package/src/__tests__/conversation-app-control-instantiation.test.ts +2 -5
  26. package/src/__tests__/conversation-clear-safety.test.ts +25 -25
  27. package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
  28. package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
  29. package/src/__tests__/conversation-error.test.ts +31 -0
  30. package/src/__tests__/conversation-fork-crud.test.ts +178 -15
  31. package/src/__tests__/conversation-lifecycle.test.ts +52 -11
  32. package/src/__tests__/{conversation-load-cleaned-at.test.ts → conversation-load-history-stripped.test.ts} +13 -13
  33. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -0
  34. package/src/__tests__/conversation-routes-disk-view.test.ts +109 -0
  35. package/src/__tests__/conversation-routes-slash-commands.test.ts +35 -0
  36. package/src/__tests__/conversation-skill-tools.test.ts +2 -5
  37. package/src/__tests__/conversation-store.test.ts +1 -1
  38. package/src/__tests__/conversation-sync-tags.test.ts +99 -32
  39. package/src/__tests__/conversation-workspace-cache-state.test.ts +1 -0
  40. package/src/__tests__/conversation-workspace-injection.test.ts +1 -1
  41. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
  42. package/src/__tests__/credential-execution-feature-gates.test.ts +9 -7
  43. package/src/__tests__/credential-execution-tools.test.ts +6 -6
  44. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  45. package/src/__tests__/credential-vault-unit.test.ts +2 -2
  46. package/src/__tests__/dynamic-page-surface.test.ts +2 -2
  47. package/src/__tests__/email-html-renderer.test.ts +12 -0
  48. package/src/__tests__/gateway-flag-listener.test.ts +237 -0
  49. package/src/__tests__/gemini-provider.test.ts +78 -0
  50. package/src/__tests__/guardian-dispatch.test.ts +0 -1
  51. package/src/__tests__/guardian-outbound-http.test.ts +7 -5
  52. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  53. package/src/__tests__/heartbeat-disk-pressure.test.ts +4 -0
  54. package/src/__tests__/heartbeat-service.test.ts +4 -0
  55. package/src/__tests__/host-shell-tool.test.ts +1 -1
  56. package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
  57. package/src/__tests__/list-messages-tool-merge.test.ts +70 -11
  58. package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
  59. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
  60. package/src/__tests__/llm-resolver.test.ts +77 -9
  61. package/src/__tests__/llm-usage-store.test.ts +66 -0
  62. package/src/__tests__/logger.test.ts +89 -0
  63. package/src/__tests__/mcp-abort-signal.test.ts +2 -2
  64. package/src/__tests__/media-generate-image.test.ts +31 -0
  65. package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
  66. package/src/__tests__/model-intents.test.ts +2 -4
  67. package/src/__tests__/notification-guardian-path.test.ts +0 -1
  68. package/src/__tests__/onboarding-template-contract.test.ts +1 -1
  69. package/src/__tests__/openai-provider.test.ts +46 -0
  70. package/src/__tests__/openai-responses-provider.test.ts +114 -12
  71. package/src/__tests__/pending-interactions-resolved-event.test.ts +0 -1
  72. package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
  73. package/src/__tests__/platform.test.ts +2 -2
  74. package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
  75. package/src/__tests__/plugin-bootstrap.test.ts +2 -2
  76. package/src/__tests__/plugin-tool-contribution.test.ts +13 -6
  77. package/src/__tests__/plugin-types.test.ts +3 -2
  78. package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
  79. package/src/__tests__/pricing.test.ts +12 -0
  80. package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
  81. package/src/__tests__/registry.test.ts +2 -8
  82. package/src/__tests__/require-fresh-approval.test.ts +2 -2
  83. package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
  84. package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
  85. package/src/__tests__/skill-feature-flags.test.ts +2 -2
  86. package/src/__tests__/skill-projection-feature-flag.test.ts +4 -7
  87. package/src/__tests__/skill-projection.benchmark.test.ts +2 -6
  88. package/src/__tests__/skill-tool-factory.test.ts +1 -1
  89. package/src/__tests__/subagent-notify-parent.test.ts +1 -1
  90. package/src/__tests__/suggestion-routes.test.ts +1 -0
  91. package/src/__tests__/sync-message-contract.test.ts +59 -0
  92. package/src/__tests__/system-prompt.test.ts +145 -131
  93. package/src/__tests__/terminal-tools.test.ts +1 -1
  94. package/src/__tests__/tool-approval-handler.test.ts +1 -5
  95. package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
  96. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
  97. package/src/__tests__/tool-executor-lifecycle-events.test.ts +15 -5
  98. package/src/__tests__/tool-executor.test.ts +9 -62
  99. package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
  100. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  101. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
  102. package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
  103. package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
  104. package/src/__tests__/usage-routes.test.ts +3 -0
  105. package/src/__tests__/verification-control-plane-policy.test.ts +2 -2
  106. package/src/__tests__/workspace-git-service.test.ts +6 -5
  107. package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
  108. package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
  109. package/src/acp/prepare-agent-env.ts +78 -0
  110. package/src/acp/session-manager.ts +1 -1
  111. package/src/agent/loop.ts +8 -0
  112. package/src/api/README.md +5 -0
  113. package/src/api/index.ts +4 -0
  114. package/src/api/package.json +10 -0
  115. package/src/background-wake/background-wake-routes.test.ts +233 -0
  116. package/src/background-wake/runtime-registry.ts +24 -0
  117. package/src/cli/commands/__tests__/browser.test.ts +23 -5
  118. package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
  119. package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
  120. package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
  121. package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
  122. package/src/cli/commands/__tests__/memory-v2.test.ts +1 -0
  123. package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
  124. package/src/cli/commands/browser.ts +247 -0
  125. package/src/cli/commands/domain.ts +91 -41
  126. package/src/cli/commands/inference.ts +93 -40
  127. package/src/cli/commands/memory-v2-compare-render.ts +115 -0
  128. package/src/cli/commands/memory-v2.ts +176 -1
  129. package/src/cli/commands/memory-v3-render.ts +344 -0
  130. package/src/cli/commands/memory-v3.ts +316 -0
  131. package/src/cli/program.ts +2 -0
  132. package/src/config/assistant-feature-flags.ts +21 -9
  133. package/src/config/bundled-skills/document-editor/SKILL.md +11 -2
  134. package/src/config/bundled-skills/document-editor/TOOLS.json +18 -0
  135. package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
  136. package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
  137. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
  138. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
  139. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
  140. package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
  141. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
  142. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
  143. package/src/config/bundled-tool-registry.ts +2 -0
  144. package/src/config/call-site-defaults.ts +7 -6
  145. package/src/config/feature-flag-registry.json +16 -0
  146. package/src/config/schemas/__tests__/memory-v2.test.ts +213 -1
  147. package/src/config/schemas/call-site-catalog.ts +21 -7
  148. package/src/config/schemas/llm.ts +12 -1
  149. package/src/config/schemas/memory-v2.ts +246 -0
  150. package/src/config/schemas/memory.ts +2 -1
  151. package/src/context/compactor.ts +52 -0
  152. package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
  153. package/src/conversations/message-consolidation.ts +404 -0
  154. package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +1 -1
  155. package/src/daemon/__tests__/meet-manifest-loader.test.ts +1 -1
  156. package/src/daemon/conversation-agent-loop-handlers.ts +2 -13
  157. package/src/daemon/conversation-agent-loop.ts +126 -76
  158. package/src/daemon/conversation-error.ts +31 -1
  159. package/src/daemon/conversation-lifecycle.ts +27 -22
  160. package/src/daemon/conversation-runtime-assembly.ts +10 -9
  161. package/src/daemon/conversation-tool-setup.ts +63 -3
  162. package/src/daemon/conversation-usage.ts +2 -0
  163. package/src/daemon/conversation.ts +14 -29
  164. package/src/daemon/disk-pressure-guard.ts +14 -2
  165. package/src/daemon/handlers/config-model.test.ts +1 -0
  166. package/src/daemon/handlers/conversations.ts +11 -3
  167. package/src/daemon/host-browser-proxy.ts +5 -5
  168. package/src/daemon/host-cu-proxy.ts +4 -4
  169. package/src/daemon/host-file-proxy.ts +4 -4
  170. package/src/daemon/host-proxy-base.ts +4 -4
  171. package/src/daemon/host-transfer-proxy.ts +10 -10
  172. package/src/daemon/lifecycle.ts +23 -20
  173. package/src/daemon/meet-manifest-loader.ts +1 -7
  174. package/src/daemon/message-types/conversations.ts +6 -9
  175. package/src/daemon/message-types/home.ts +1 -13
  176. package/src/daemon/message-types/messages.ts +6 -14
  177. package/src/daemon/message-types/sync.ts +14 -0
  178. package/src/daemon/shutdown-handlers.ts +24 -5
  179. package/src/daemon/switch-inference-profile-tool.ts +52 -0
  180. package/src/daemon/tool-setup-types.ts +13 -0
  181. package/src/events/relationship-state-updated.ts +25 -0
  182. package/src/heartbeat/__tests__/heartbeat-service.test.ts +1 -1
  183. package/src/home/home-greeting.ts +0 -9
  184. package/src/home/suggested-prompts.ts +0 -9
  185. package/src/ipc/gateway-flag-listener.ts +123 -0
  186. package/src/ipc/skill-routes/registries.ts +8 -12
  187. package/src/memory/__tests__/db-async-query.test.ts +165 -0
  188. package/src/memory/__tests__/db-maintenance.test.ts +115 -0
  189. package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +241 -0
  190. package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
  191. package/src/memory/__tests__/memory-retrospective-job.test.ts +7 -0
  192. package/src/memory/auto-analysis-enqueue.ts +5 -1
  193. package/src/memory/conversation-crud.ts +71 -70
  194. package/src/memory/conversation-starters-cadence.ts +3 -1
  195. package/src/memory/conversation-title-service.ts +19 -3
  196. package/src/memory/db-async-query.ts +214 -0
  197. package/src/memory/db-init.ts +10 -0
  198. package/src/memory/db-maintenance.ts +30 -21
  199. package/src/memory/graph/bootstrap.ts +8 -1
  200. package/src/memory/graph/capability-seed.ts +7 -3
  201. package/src/memory/graph/conversation-graph-memory.ts +100 -17
  202. package/src/memory/graph/extraction.ts +1 -5
  203. package/src/memory/graph/graph-search.ts +7 -1
  204. package/src/memory/indexer.ts +28 -18
  205. package/src/memory/job-handlers/cleanup.ts +76 -18
  206. package/src/memory/job-handlers/conversation-starters.ts +1 -4
  207. package/src/memory/jobs/embed-pkb-file.ts +6 -1
  208. package/src/memory/jobs-store.ts +14 -0
  209. package/src/memory/jobs-worker.ts +55 -22
  210. package/src/memory/llm-request-log-source-clickhouse.ts +42 -2
  211. package/src/memory/llm-request-log-source-local.ts +7 -0
  212. package/src/memory/llm-request-log-source.ts +9 -2
  213. package/src/memory/llm-request-log-store.ts +43 -1
  214. package/src/memory/llm-usage-store.ts +24 -0
  215. package/src/memory/memory-retrospective-enqueue.ts +8 -1
  216. package/src/memory/memory-retrospective-job.ts +5 -0
  217. package/src/memory/memory-v2-activation-log-store.ts +15 -6
  218. package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
  219. package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
  220. package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
  221. package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
  222. package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
  223. package/src/memory/migrations/index.ts +17 -0
  224. package/src/memory/migrations/registry.ts +33 -0
  225. package/src/memory/schema/conversations.ts +1 -1
  226. package/src/memory/schema/infrastructure.ts +21 -0
  227. package/src/memory/tool-usage-store.ts +36 -8
  228. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
  229. package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
  230. package/src/memory/v2/__tests__/harness-metrics.test.ts +74 -0
  231. package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
  232. package/src/memory/v2/__tests__/harness-replay-input.test.ts +225 -0
  233. package/src/memory/v2/__tests__/harness-runner.test.ts +109 -0
  234. package/src/memory/v2/__tests__/injection.test.ts +127 -98
  235. package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
  236. package/src/memory/v2/__tests__/router.test.ts +171 -3
  237. package/src/memory/v2/harness/compare.ts +57 -0
  238. package/src/memory/v2/harness/metrics.ts +124 -0
  239. package/src/memory/v2/harness/oracle.ts +145 -0
  240. package/src/memory/v2/harness/replay-input.ts +224 -0
  241. package/src/memory/v2/harness/retriever.ts +74 -0
  242. package/src/memory/v2/harness/router-retriever.ts +43 -0
  243. package/src/memory/v2/harness/runner.ts +106 -0
  244. package/src/memory/v2/harness/trace.ts +58 -0
  245. package/src/memory/v2/injection.ts +21 -15
  246. package/src/memory/v2/prompts/router.ts +26 -1
  247. package/src/memory/v2/qdrant.ts +14 -2
  248. package/src/memory/v2/router.ts +171 -18
  249. package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
  250. package/src/memory/v3/__tests__/consolidation-job.test.ts +468 -0
  251. package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
  252. package/src/memory/v3/__tests__/edges.test.ts +563 -0
  253. package/src/memory/v3/__tests__/filter.test.ts +512 -0
  254. package/src/memory/v3/__tests__/gate.test.ts +574 -0
  255. package/src/memory/v3/__tests__/index-composition.test.ts +233 -0
  256. package/src/memory/v3/__tests__/loop.test.ts +530 -0
  257. package/src/memory/v3/__tests__/retriever.test.ts +226 -0
  258. package/src/memory/v3/__tests__/scouts.test.ts +440 -0
  259. package/src/memory/v3/__tests__/shadow-middleware.test.ts +312 -0
  260. package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
  261. package/src/memory/v3/__tests__/traversal.test.ts +469 -0
  262. package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
  263. package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
  264. package/src/memory/v3/__tests__/tree-walk.test.ts +707 -0
  265. package/src/memory/v3/__tests__/validate.test.ts +245 -0
  266. package/src/memory/v3/auto-edges.ts +223 -0
  267. package/src/memory/v3/coactivation-store.ts +124 -0
  268. package/src/memory/v3/consolidation-job.ts +323 -0
  269. package/src/memory/v3/edge-learning-job.ts +160 -0
  270. package/src/memory/v3/edges.ts +249 -0
  271. package/src/memory/v3/filter.ts +281 -0
  272. package/src/memory/v3/gate.ts +334 -0
  273. package/src/memory/v3/index-composition.ts +113 -0
  274. package/src/memory/v3/llm-capture.ts +46 -0
  275. package/src/memory/v3/loop.ts +382 -0
  276. package/src/memory/v3/maintenance.ts +144 -0
  277. package/src/memory/v3/prompt-context.ts +33 -0
  278. package/src/memory/v3/prompts/consolidation.ts +458 -0
  279. package/src/memory/v3/prompts/system-prompts.ts +196 -0
  280. package/src/memory/v3/retriever.ts +33 -0
  281. package/src/memory/v3/scouts.ts +420 -0
  282. package/src/memory/v3/shadow-middleware.ts +305 -0
  283. package/src/memory/v3/traversal.ts +206 -0
  284. package/src/memory/v3/tree-index.ts +237 -0
  285. package/src/memory/v3/tree-store.ts +394 -0
  286. package/src/memory/v3/tree-walk.ts +351 -0
  287. package/src/memory/v3/types.ts +65 -0
  288. package/src/memory/v3/validate.ts +300 -0
  289. package/src/notifications/adapters/macos.ts +18 -1
  290. package/src/notifications/adapters/platform.ts +1 -1
  291. package/src/notifications/decision-engine.ts +1 -4
  292. package/src/notifications/emit-signal.ts +29 -49
  293. package/src/permissions/prompter.ts +3 -3
  294. package/src/permissions/question-prompter.ts +5 -2
  295. package/src/permissions/secret-prompter.ts +2 -2
  296. package/src/plugin-api/index.ts +4 -0
  297. package/src/plugin-api/types.ts +7 -33
  298. package/src/plugins/defaults/index.ts +6 -0
  299. package/src/plugins/defaults/injectors.ts +18 -11
  300. package/src/plugins/external-plugin-loader.ts +5 -68
  301. package/src/plugins/types.ts +11 -16
  302. package/src/proactive-artifact/aux-message-injector.ts +17 -4
  303. package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
  304. package/src/prompts/persona-resolver.ts +36 -21
  305. package/src/prompts/sections.ts +39 -7
  306. package/src/prompts/system-prompt.ts +50 -185
  307. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  308. package/src/prompts/templates/system-sections.ts +230 -8
  309. package/src/providers/__tests__/connection-model-compat.test.ts +234 -0
  310. package/src/providers/__tests__/retry-callsite.test.ts +85 -5
  311. package/src/providers/anthropic/client.ts +32 -66
  312. package/src/providers/call-site-routing.ts +14 -2
  313. package/src/providers/connection-model-compat.ts +38 -0
  314. package/src/providers/connection-resolution.ts +16 -2
  315. package/src/providers/gemini/client.ts +49 -6
  316. package/src/providers/inference/adapter-factory.ts +3 -0
  317. package/src/providers/minimax/client.ts +106 -0
  318. package/src/providers/model-catalog.ts +43 -0
  319. package/src/providers/model-intents.ts +1 -1
  320. package/src/providers/openai/chat-completions-provider.ts +6 -3
  321. package/src/providers/openai/codex-models.ts +18 -0
  322. package/src/providers/openai/responses-provider.ts +78 -21
  323. package/src/providers/provider-send-message.ts +7 -1
  324. package/src/providers/retry.ts +34 -3
  325. package/src/providers/thinking-config.ts +26 -1
  326. package/src/providers/usage-tracking.ts +2 -0
  327. package/src/runtime/AGENTS.md +2 -2
  328. package/src/runtime/agent-wake.ts +1 -0
  329. package/src/runtime/assistant-event-hub.ts +76 -6
  330. package/src/runtime/auth/route-policy.ts +36 -0
  331. package/src/runtime/btw-sidechain.ts +0 -6
  332. package/src/runtime/http-types.ts +0 -2
  333. package/src/runtime/migrations/vbundle-builder.ts +10 -3
  334. package/src/runtime/pending-interactions.ts +0 -1
  335. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +106 -0
  336. package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +25 -6
  337. package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
  338. package/src/runtime/routes/acp-routes.test.ts +255 -6
  339. package/src/runtime/routes/acp-routes.ts +8 -1
  340. package/src/runtime/routes/avatar-routes.ts +10 -10
  341. package/src/runtime/routes/background-wake-routes.ts +188 -0
  342. package/src/runtime/routes/browser-tabs-routes.ts +200 -0
  343. package/src/runtime/routes/btw-routes.ts +0 -6
  344. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  345. package/src/runtime/routes/conversation-list-routes.ts +12 -4
  346. package/src/runtime/routes/conversation-management-routes.ts +77 -20
  347. package/src/runtime/routes/conversation-query-routes.ts +142 -36
  348. package/src/runtime/routes/conversation-routes.ts +252 -410
  349. package/src/runtime/routes/conversation-starter-routes.ts +6 -3
  350. package/src/runtime/routes/disk-pressure-routes.ts +1 -1
  351. package/src/runtime/routes/domain-routes.ts +60 -10
  352. package/src/runtime/routes/email-routes.ts +5 -2
  353. package/src/runtime/routes/events-routes.ts +54 -10
  354. package/src/runtime/routes/group-routes.ts +24 -8
  355. package/src/runtime/routes/host-browser-routes.ts +10 -2
  356. package/src/runtime/routes/host-cu-routes.ts +2 -2
  357. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
  358. package/src/runtime/routes/index.ts +8 -0
  359. package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
  360. package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
  361. package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
  362. package/src/runtime/routes/memory-item-routes.ts +8 -3
  363. package/src/runtime/routes/memory-v2-routes.ts +215 -5
  364. package/src/runtime/routes/memory-v3-routes.ts +316 -0
  365. package/src/runtime/routes/migration-routes.ts +21 -24
  366. package/src/runtime/routes/plugins-routes.ts +337 -0
  367. package/src/runtime/routes/rename-conversation-routes.ts +6 -2
  368. package/src/runtime/routes/secret-routes.ts +25 -5
  369. package/src/runtime/routes/settings-routes.ts +12 -11
  370. package/src/runtime/routes/slack-channel-routes.ts +5 -4
  371. package/src/runtime/routes/workspace-routes.ts +25 -10
  372. package/src/runtime/sync/resource-sync-events.ts +106 -38
  373. package/src/runtime/sync/sync-publisher.test.ts +49 -0
  374. package/src/runtime/sync/sync-publisher.ts +2 -1
  375. package/src/runtime/verification-outbound-actions.ts +73 -1
  376. package/src/telemetry/types.ts +12 -0
  377. package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
  378. package/src/telemetry/usage-telemetry-reporter.ts +1 -0
  379. package/src/tools/acp/spawn.test.ts +119 -0
  380. package/src/tools/acp/spawn.ts +15 -2
  381. package/src/tools/apps/definitions.ts +2 -8
  382. package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
  383. package/src/tools/ask-question/ask-question-tool.ts +38 -45
  384. package/src/tools/browser/__tests__/pinned-tabs.test.ts +70 -0
  385. package/src/tools/browser/browser-execution.ts +16 -3
  386. package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
  387. package/src/tools/browser/cdp-client/__tests__/types.test.ts +3 -0
  388. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +12 -0
  389. package/src/tools/browser/cdp-client/extension-cdp-client.ts +27 -1
  390. package/src/tools/browser/cdp-client/factory.ts +100 -17
  391. package/src/tools/browser/cdp-client/local-cdp-client.ts +12 -0
  392. package/src/tools/browser/cdp-client/types.ts +65 -0
  393. package/src/tools/browser/pinned-tabs.ts +96 -40
  394. package/src/tools/computer-use/definitions.ts +22 -78
  395. package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
  396. package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
  397. package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
  398. package/src/tools/credentials/vault.ts +3 -9
  399. package/src/tools/document/document-tool.ts +59 -0
  400. package/src/tools/execution-target.ts +21 -23
  401. package/src/tools/executor.ts +6 -1
  402. package/src/tools/filesystem/edit.ts +3 -9
  403. package/src/tools/filesystem/list.ts +3 -9
  404. package/src/tools/filesystem/read.ts +3 -9
  405. package/src/tools/filesystem/write.ts +3 -9
  406. package/src/tools/host-filesystem/edit.ts +3 -9
  407. package/src/tools/host-filesystem/read.ts +3 -9
  408. package/src/tools/host-filesystem/transfer.ts +3 -9
  409. package/src/tools/host-filesystem/write.ts +3 -9
  410. package/src/tools/host-terminal/host-shell.ts +3 -9
  411. package/src/tools/mcp/mcp-tool-factory.ts +1 -8
  412. package/src/tools/memory/register.test.ts +1 -1
  413. package/src/tools/memory/register.ts +4 -9
  414. package/src/tools/network/web-fetch.ts +3 -9
  415. package/src/tools/network/web-search.ts +25 -32
  416. package/src/tools/registry.ts +7 -23
  417. package/src/tools/schema-transforms.ts +1 -1
  418. package/src/tools/skills/execute.ts +3 -9
  419. package/src/tools/skills/load.ts +3 -9
  420. package/src/tools/skills/skill-tool-factory.ts +1 -8
  421. package/src/tools/subagent/notify-parent.ts +3 -9
  422. package/src/tools/system/request-permission.ts +3 -9
  423. package/src/tools/terminal/shell.ts +3 -9
  424. package/src/tools/tool-defaults.ts +94 -0
  425. package/src/tools/types.ts +27 -98
  426. package/src/tools/ui-surface/definitions.ts +6 -22
  427. package/src/usage/pricing.ts +23 -0
  428. package/src/usage/types.ts +12 -0
  429. package/src/util/logger.ts +16 -7
  430. package/src/util/platform.ts +7 -2
  431. package/src/util/sqlite3-runtime.ts +65 -0
  432. package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
  433. package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
  434. package/src/workspace/migrations/registry.ts +2 -0
  435. package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
  436. package/src/__tests__/message-complete-display-id.test.ts +0 -175
  437. package/src/daemon/query-complexity-router.ts +0 -75
  438. package/src/prompts/cache-boundary.ts +0 -8
@@ -26,7 +26,6 @@ import type {
26
26
  TurnChannelContext,
27
27
  TurnInterfaceContext,
28
28
  } from "../channels/types.js";
29
- import { isAssistantFeatureFlagEnabled } from "../config/assistant-feature-flags.js";
30
29
  import {
31
30
  contextWindowConfigFromEffective,
32
31
  type EffectiveContextWindow,
@@ -60,7 +59,6 @@ import { commitAppTurnChanges } from "../memory/app-git-service.js";
60
59
  import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
61
60
  import { enqueueAutoAnalysisOnCompaction } from "../memory/auto-analysis-enqueue.js";
62
61
  import {
63
- clearStrippedInjectionMetadataForConversation,
64
62
  getConversation,
65
63
  getConversationOriginChannel,
66
64
  getConversationOriginInterface,
@@ -68,6 +66,7 @@ import {
68
66
  getLastUserTimestampBefore,
69
67
  getMessageById,
70
68
  provenanceFromTrustContext,
69
+ setConversationHistoryStrippedAt,
71
70
  setLastNotifiedInferenceProfile,
72
71
  updateConversationContextWindow,
73
72
  updateConversationSlackContextWatermark,
@@ -163,13 +162,13 @@ import {
163
162
  createEventHandlerState,
164
163
  dispatchAgentEvent,
165
164
  type EventHandlerDeps,
166
- getClientDisplayMessageId,
167
165
  } from "./conversation-agent-loop-handlers.js";
168
166
  import {
169
167
  approveHostAttachmentRead,
170
168
  resolveAssistantAttachments,
171
169
  } from "./conversation-attachments.js";
172
170
  import {
171
+ budgetYieldUnrecoveredClassification,
173
172
  buildConversationErrorMessage,
174
173
  classifyConversationError,
175
174
  isUserCancellation,
@@ -222,10 +221,6 @@ import {
222
221
  SYNC_TAGS,
223
222
  } from "./message-types/sync.js";
224
223
  import { parseActualTokensFromError } from "./parse-actual-tokens-from-error.js";
225
- import {
226
- classifyQueryComplexity,
227
- complexityTierToProfileKey,
228
- } from "./query-complexity-router.js";
229
224
  import type { TraceEmitter } from "./trace-emitter.js";
230
225
  import type { TrustContext } from "./trust-context.js";
231
226
  import { stripHistoricalWebSearchResults } from "./web-search-history.js";
@@ -539,6 +534,19 @@ export interface AgentLoopConversationContext {
539
534
  * is inherited by later tool executions and nested subagents.
540
535
  */
541
536
  currentTurnOverrideProfile?: string;
537
+ /**
538
+ * Set by the `switch_inference_profile` tool when the model self-selects a
539
+ * different profile mid-turn. Read by `readCurrentOverrideProfile` in the
540
+ * agent loop so the next LLM call uses the switched profile. Reset at
541
+ * turn start.
542
+ */
543
+ toolRoutedProfile?: string;
544
+ /**
545
+ * True when the user has explicitly selected an inference profile for this
546
+ * conversation (via the composer profile picker). When set, tool-based
547
+ * auto-routing is suppressed — the user's explicit choice takes precedence.
548
+ */
549
+ hasExplicitProfileOverride?: boolean;
542
550
  commandIntent?: { type: string; payload?: string; languageCode?: string };
543
551
  trustContext?: TrustContext;
544
552
  /** Task-run scope for the current turn. Cleared at turn end so queued/drained turns don't inherit it. */
@@ -681,6 +689,13 @@ export async function runAgentLoopImpl(
681
689
  let yieldedForHandoff = false;
682
690
  let yieldedForBudget = false;
683
691
  let pendingCheckpointYield: "budget" | "handoff" | null = null;
692
+ // Captured when the auto_compress_latest_turn rerun yields at the mid-loop
693
+ // budget checkpoint. SSE emission happens immediately at the detection site;
694
+ // assistant-row persistence is deferred until after the pendingToolResults
695
+ // flush so we don't orphan tool_use/tool_result pairs in the durable history.
696
+ let budgetYieldClassification: ReturnType<
697
+ typeof budgetYieldUnrecoveredClassification
698
+ > | null = null;
684
699
  let emitTerminalExit:
685
700
  | ((reason: AgentLoopExitReason) => Promise<void>)
686
701
  | null = null;
@@ -709,52 +724,23 @@ export async function runAgentLoopImpl(
709
724
  options?.overrideProfile ??
710
725
  getConversationOverrideProfileFromRow(turnStartConversation);
711
726
 
727
+ ctx.hasExplicitProfileOverride = !!userExplicitOverride;
728
+
712
729
  const config = getConfig();
713
730
 
714
- // Query complexity routing: when no explicit user override is set and the
715
- // feature flag is enabled, classify the query and route to the appropriate
716
- // profile for this turn. The override is ephemeral (not persisted).
717
- let turnOverrideProfile = userExplicitOverride;
718
- if (
719
- !userExplicitOverride &&
720
- turnCallSite === "mainAgent" &&
721
- isAssistantFeatureFlagEnabled("query-complexity-routing", config)
722
- ) {
723
- const tier = await classifyQueryComplexity(content);
724
- if (tier && tier !== "balanced") {
725
- const routedProfile = complexityTierToProfileKey(tier);
726
- if (config.llm.profiles?.[routedProfile]) {
727
- turnOverrideProfile = routedProfile;
728
- }
729
- }
730
- }
731
+ // Tool-based auto-routing: the switch_inference_profile tool lets the model
732
+ // self-select a different profile mid-turn. Reset the per-turn slot so a
733
+ // stale selection from a previous turn doesn't leak forward.
734
+ ctx.toolRoutedProfile = undefined;
731
735
 
732
- // Notify clients when the auto-router selected a non-default profile.
733
- if (turnOverrideProfile && turnOverrideProfile !== userExplicitOverride) {
734
- const profileEntry = config.llm.profiles?.[turnOverrideProfile];
735
- const label = profileEntry?.label ?? turnOverrideProfile;
736
- broadcastMessage({
737
- type: "turn_profile_auto_routed",
738
- conversationId: ctx.conversationId,
739
- profile: turnOverrideProfile,
740
- profileLabel: label,
741
- });
742
- }
736
+ const turnOverrideProfile = userExplicitOverride;
743
737
 
744
- // Only use the complexity-routed profile as a fallback — not the initial
745
- // explicit override. If a mid-turn session expiry clears the conversation
746
- // override, the old behavior (return undefined → revert to workspace
747
- // defaults) must be preserved for non-routed turns.
748
- const complexityRoutedProfile =
749
- turnOverrideProfile !== userExplicitOverride
750
- ? turnOverrideProfile
751
- : undefined;
752
738
  const readCurrentOverrideProfile = (): string | undefined =>
753
739
  options?.overrideProfile ??
754
740
  getConversationOverrideProfileFromRow(
755
741
  getConversation(ctx.conversationId),
756
742
  ) ??
757
- complexityRoutedProfile;
743
+ ctx.toolRoutedProfile;
758
744
 
759
745
  const effectiveContextWindow = resolveEffectiveContextWindow({
760
746
  llm: config.llm,
@@ -776,6 +762,7 @@ export async function runAgentLoopImpl(
776
762
  contextWindowManager.updateConfig?.(currentContextWindowConfig);
777
763
 
778
764
  let appliedOverrideProfile = turnOverrideProfile;
765
+ let emittedToolRoutedProfile: string | undefined;
779
766
  const refreshCurrentProfileState = (): string | undefined => {
780
767
  const currentOverrideProfile = readCurrentOverrideProfile();
781
768
  if (currentOverrideProfile !== appliedOverrideProfile) {
@@ -797,6 +784,24 @@ export async function runAgentLoopImpl(
797
784
  "Turn inference profile changed mid-loop",
798
785
  );
799
786
  }
787
+
788
+ // Emit turn_profile_auto_routed when the tool-based router selects a
789
+ // new profile. Deduplicated so the event fires at most once per profile.
790
+ if (
791
+ ctx.toolRoutedProfile &&
792
+ ctx.toolRoutedProfile !== emittedToolRoutedProfile
793
+ ) {
794
+ emittedToolRoutedProfile = ctx.toolRoutedProfile;
795
+ const profileEntry = config.llm.profiles?.[ctx.toolRoutedProfile];
796
+ const label = profileEntry?.label ?? ctx.toolRoutedProfile;
797
+ broadcastMessage({
798
+ type: "turn_profile_auto_routed",
799
+ conversationId: ctx.conversationId,
800
+ profile: ctx.toolRoutedProfile,
801
+ profileLabel: label,
802
+ });
803
+ }
804
+
800
805
  ctx.currentTurnOverrideProfile = currentOverrideProfile;
801
806
  return currentOverrideProfile;
802
807
  };
@@ -1645,7 +1650,7 @@ export async function runAgentLoopImpl(
1645
1650
  // V2 static memory block (essentials/threads/recent/buffer).
1646
1651
  // `currentMemoryV2Static` is the trust-gated content reused by every
1647
1652
  // re-injection path — it stays non-null on non-full-mode turns so
1648
- // that mid-turn reducer compaction (which strips the prior `<memory>`
1653
+ // that mid-turn reducer compaction (which strips the prior `<info>`
1649
1654
  // block) can restore the freshest content. `memoryV2Static` is the
1650
1655
  // first-turn / post-compaction cadence-gated value for initial
1651
1656
  // injection only. `readMemoryV2StaticContent` self-gates on the v2
@@ -2307,14 +2312,7 @@ export async function runAgentLoopImpl(
2307
2312
  // so we compact the "raw" persistent messages.
2308
2313
  const rawHistory = stripInjectionsForCompaction(updatedHistory);
2309
2314
  ctx.messages = rawHistory;
2310
- try {
2311
- clearStrippedInjectionMetadataForConversation(ctx.conversationId);
2312
- } catch (err) {
2313
- rlog.warn(
2314
- { err },
2315
- "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
2316
- );
2317
- }
2315
+ setConversationHistoryStrippedAt(ctx.conversationId, Date.now());
2318
2316
 
2319
2317
  ctx.emitActivityState(
2320
2318
  "thinking",
@@ -2598,14 +2596,7 @@ export async function runAgentLoopImpl(
2598
2596
 
2599
2597
  if (updatedHistory.length > preRunHistoryLength) {
2600
2598
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2601
- try {
2602
- clearStrippedInjectionMetadataForConversation(ctx.conversationId);
2603
- } catch (err) {
2604
- rlog.warn(
2605
- { err },
2606
- "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
2607
- );
2608
- }
2599
+ setConversationHistoryStrippedAt(ctx.conversationId, Date.now());
2609
2600
  convergenceStripped = true;
2610
2601
  preRepairMessages = updatedHistory;
2611
2602
  preRunHistoryLength = updatedHistory.length;
@@ -2850,14 +2841,7 @@ export async function runAgentLoopImpl(
2850
2841
  // pre-rerun messages.
2851
2842
  if (updatedHistory.length > preRunHistoryLength) {
2852
2843
  ctx.messages = stripInjectionsForCompaction(updatedHistory);
2853
- try {
2854
- clearStrippedInjectionMetadataForConversation(ctx.conversationId);
2855
- } catch (err) {
2856
- rlog.warn(
2857
- { err },
2858
- "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
2859
- );
2860
- }
2844
+ setConversationHistoryStrippedAt(ctx.conversationId, Date.now());
2861
2845
  convergenceStripped = true;
2862
2846
  preRepairMessages = updatedHistory;
2863
2847
  preRunHistoryLength = updatedHistory.length;
@@ -3006,6 +2990,32 @@ export async function runAgentLoopImpl(
3006
2990
  await emitTerminalExit?.("context_too_large");
3007
2991
  pendingCheckpointYield = null;
3008
2992
  onEvent(buildConversationErrorMessage(ctx.conversationId, classified));
2993
+ } else if (yieldedForBudget && !abortController.signal.aborted) {
2994
+ // The auto_compress_latest_turn rerun (action === "auto_compress_latest_turn"
2995
+ // above) reset `contextTooLargeDetected` to false before its final
2996
+ // `agentLoop.run`, so the context-too-large branch above won't fire
2997
+ // even when that rerun yields at the mid-loop budget checkpoint with
2998
+ // no further recovery layer to re-enter. Without surfacing this here,
2999
+ // the turn terminates silently — the inspector sees `agent_loop_exit_reason
3000
+ // = NULL` and the user sees no message at all (just a "ghost" turn).
3001
+ //
3002
+ // Unlike provider-error persistence at L3091 — which only fires when
3003
+ // the loop produced NO assistant output — budget_yield_unrecovered
3004
+ // typically yields AFTER one or more successful tool-use iterations,
3005
+ // so `hasAssistantResponse` is true and that path would skip us. We
3006
+ // capture the classification here so the live SSE event fires
3007
+ // immediately, and persist a dedicated notice row below — after the
3008
+ // pendingToolResults flush — so the transcript reads as: tool-use →
3009
+ // tool results → "I couldn't fit the next step…" notice. Persisting
3010
+ // earlier would orphan an assistant(tool_use) from its user(tool_result),
3011
+ // breaking provider adjacency on replay.
3012
+ budgetYieldClassification = budgetYieldUnrecoveredClassification();
3013
+ onEvent(
3014
+ buildConversationErrorMessage(
3015
+ ctx.conversationId,
3016
+ budgetYieldClassification,
3017
+ ),
3018
+ );
3009
3019
  }
3010
3020
  }
3011
3021
 
@@ -3081,6 +3091,52 @@ export async function runAgentLoopImpl(
3081
3091
  state.pendingToolResults.clear();
3082
3092
  }
3083
3093
 
3094
+ // Persist the budget_yield_unrecovered notice now that any pending
3095
+ // tool_results have flushed. The SSE event already fired upstream; this
3096
+ // makes the row durable in the right position: tool-use → tool-results →
3097
+ // notice. Doing it earlier (e.g. at the detection site) would land the
3098
+ // assistant row between a tool_use and its tool_result and break provider
3099
+ // adjacency on replay.
3100
+ if (budgetYieldClassification && !abortController.signal.aborted) {
3101
+ const yieldNoticeMessage = createAssistantMessage(
3102
+ budgetYieldClassification.userMessage,
3103
+ );
3104
+ const yieldNoticeMetadata = {
3105
+ ...provenanceFromTrustContext(ctx.trustContext),
3106
+ userMessageChannel: capturedTurnChannelContext.userMessageChannel,
3107
+ assistantMessageChannel:
3108
+ capturedTurnChannelContext.assistantMessageChannel,
3109
+ userMessageInterface: capturedTurnInterfaceContext.userMessageInterface,
3110
+ assistantMessageInterface:
3111
+ capturedTurnInterfaceContext.assistantMessageInterface,
3112
+ };
3113
+ try {
3114
+ await runPipeline<PersistArgs, PersistResult>(
3115
+ "persistence",
3116
+ getMiddlewaresFor("persistence"),
3117
+ defaultPersistenceTerminal,
3118
+ {
3119
+ op: "add",
3120
+ conversationId: ctx.conversationId,
3121
+ role: "assistant",
3122
+ content: JSON.stringify(yieldNoticeMessage.content),
3123
+ metadata: yieldNoticeMetadata,
3124
+ },
3125
+ buildPluginTurnContext(ctx, reqId),
3126
+ DEFAULT_TIMEOUTS.persistence,
3127
+ );
3128
+ } catch (err) {
3129
+ // Non-fatal — a DB hiccup must not escalate a budget-yield exit into
3130
+ // a turn-level throw. The live SSE event was already emitted, so the
3131
+ // user still sees the notice this turn even if the durable row missed.
3132
+ rlog.warn(
3133
+ { err },
3134
+ "Failed to persist budget_yield_unrecovered notice (non-fatal)",
3135
+ );
3136
+ }
3137
+ await emitTerminalExit?.("budget_yield_unrecovered");
3138
+ }
3139
+
3084
3140
  // Reconstruct history
3085
3141
  const newMessages = updatedHistory.slice(preRunHistoryLength).map((msg) => {
3086
3142
  if (msg.role !== "assistant") return msg;
@@ -3263,7 +3319,6 @@ export async function runAgentLoopImpl(
3263
3319
  ctx.lastAssistantAttachments = assistantAttachments;
3264
3320
  ctx.lastAttachmentWarnings = attachmentResult.directiveWarnings;
3265
3321
  syncLastAssistantMessageToDisk();
3266
- const clientDisplayMessageId = getClientDisplayMessageId(state);
3267
3322
 
3268
3323
  // Re-check: the user may have cancelled during attachment resolution
3269
3324
  if (abortController.signal.aborted) {
@@ -3309,9 +3364,6 @@ export async function runAgentLoopImpl(
3309
3364
  ...(state.lastAssistantMessageId
3310
3365
  ? { messageId: state.lastAssistantMessageId }
3311
3366
  : {}),
3312
- ...(clientDisplayMessageId
3313
- ? { displayMessageId: clientDisplayMessageId }
3314
- : {}),
3315
3367
  });
3316
3368
  publishLoopMessagesChanged();
3317
3369
  } else {
@@ -3336,9 +3388,6 @@ export async function runAgentLoopImpl(
3336
3388
  ...(state.lastAssistantMessageId
3337
3389
  ? { messageId: state.lastAssistantMessageId }
3338
3390
  : {}),
3339
- ...(clientDisplayMessageId
3340
- ? { displayMessageId: clientDisplayMessageId }
3341
- : {}),
3342
3391
  });
3343
3392
  publishLoopMessagesChanged();
3344
3393
 
@@ -3645,6 +3694,7 @@ export async function applyCompactionResult(
3645
3694
  result.summaryText,
3646
3695
  ctx.contextCompactedMessageCount,
3647
3696
  );
3697
+ setConversationHistoryStrippedAt(ctx.conversationId, compactedAt);
3648
3698
  if (options.slackContextCompactionWatermarkTs) {
3649
3699
  updateConversationSlackContextWatermark(
3650
3700
  ctx.conversationId,
@@ -437,9 +437,15 @@ function classifyCore(
437
437
  errorCategory: "image_dimensions_too_large",
438
438
  };
439
439
  }
440
+ // Extract the provider detail after "API error (NNN): " prefix
441
+ const detailMatch = message.match(/API error \(\d+\):\s*(.+)/i);
442
+ const detail = detailMatch?.[1];
443
+ const suffix = detail
444
+ ? `: ${detail.length > 200 ? detail.slice(0, 200) + "…" : detail}`
445
+ : "";
440
446
  return {
441
447
  code: "PROVIDER_API",
442
- userMessage: "The AI provider rejected the request.",
448
+ userMessage: `The AI provider rejected the request (HTTP ${error.statusCode})${suffix}`,
443
449
  retryable: true,
444
450
  errorCategory: "provider_api_error",
445
451
  };
@@ -760,6 +766,30 @@ function classifyByMessage(
760
766
  };
761
767
  }
762
768
 
769
+ /**
770
+ * Classify a `budget_yield_unrecovered` terminal exit.
771
+ *
772
+ * Emitted when the agent loop's `auto_compress_latest_turn` rerun
773
+ * (the last layer of the overflow-recovery ladder) still yields at
774
+ * the mid-loop preflight budget checkpoint. The turn cannot proceed,
775
+ * but it is not a provider rejection — every compaction the loop ran
776
+ * has already been applied to the conversation, so the user's next
777
+ * message starts from the compacted history and typically succeeds.
778
+ *
779
+ * The returned `userMessage` is persisted as a `role="assistant"` row
780
+ * by the same path that already persists `PROVIDER_BILLING` etc., so
781
+ * the notice is durable across reload (not just a transient banner).
782
+ */
783
+ export function budgetYieldUnrecoveredClassification(): ClassifiedConversationError {
784
+ return {
785
+ code: "BUDGET_YIELD_UNRECOVERED",
786
+ userMessage:
787
+ "I tried to compact this conversation but couldn't fit the next step into the model's context window. Send another message to continue — the compaction I did run has been saved, so your next turn starts from a smaller history.",
788
+ retryable: true,
789
+ errorCategory: "budget_yield_unrecovered",
790
+ };
791
+ }
792
+
763
793
  /**
764
794
  * Build a `conversation_error` server message from a classified error.
765
795
  */
@@ -16,7 +16,7 @@ import {
16
16
  getMessages,
17
17
  type MessageRow,
18
18
  } from "../memory/conversation-crud.js";
19
- import { enqueueMemoryJob } from "../memory/jobs-store.js";
19
+ import { enqueueMemoryJob, isMemoryEnabled } from "../memory/jobs-store.js";
20
20
  import { enqueueMemoryRetrospectiveIfEnabled } from "../memory/memory-retrospective-enqueue.js";
21
21
  import { shouldExposePersonalMemory } from "../memory/v2/static-context.js";
22
22
  import type { PermissionPrompter } from "../permissions/prompter.js";
@@ -189,17 +189,18 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
189
189
  ctx.contextCompactedAt = conv?.contextCompactedAt ?? null;
190
190
  }
191
191
 
192
- // `/clean` persists a timestamp; messages older than this should skip
193
- // metadata rehydration and have any injection prefixes still embedded in
194
- // their content stripped, so the cleaned state survives reload and forks.
195
- const cleanedAt = conv?.cleanedAt ?? null;
192
+ // Every injection-strip event (`/clean` or compaction) updates
193
+ // `historyStrippedAt`. Messages older than this should skip metadata
194
+ // rehydration and have any injection prefixes still embedded in their
195
+ // content stripped, so the post-strip view survives reload and forks.
196
+ const historyStrippedAt = conv?.historyStrippedAt ?? null;
196
197
  const slicedDbMessages = dbMessages.slice(ctx.contextCompactedMessageCount);
197
- let preCleanCount = 0;
198
- if (cleanedAt != null) {
198
+ let preStrippedCount = 0;
199
+ if (historyStrippedAt != null) {
199
200
  const boundary = slicedDbMessages.findIndex(
200
- (m) => m.createdAt >= cleanedAt,
201
+ (m) => m.createdAt >= historyStrippedAt,
201
202
  );
202
- preCleanCount = boundary === -1 ? slicedDbMessages.length : boundary;
203
+ preStrippedCount = boundary === -1 ? slicedDbMessages.length : boundary;
203
204
  }
204
205
 
205
206
  // Mirror the injection-time gate (`shouldExposePersonalMemory` in
@@ -213,7 +214,7 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
213
214
  isTrustedActor: resolveTrustClass(ctx.trustContext) === "guardian",
214
215
  });
215
216
  const parsedMessages: Message[] = slicedDbMessages.map((m, index, arr) => {
216
- const isPreClean = index < preCleanCount;
217
+ const isPreStripped = index < preStrippedCount;
217
218
  const role = m.role as "user" | "assistant";
218
219
  let content: ContentBlock[];
219
220
  try {
@@ -233,7 +234,7 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
233
234
 
234
235
  // Re-inject persisted injection blocks from metadata so it survives
235
236
  // conversation reloads (eviction, restart, fork).
236
- if (role === "user" && m.metadata && !isPreClean) {
237
+ if (role === "user" && m.metadata && !isPreStripped) {
237
238
  try {
238
239
  const meta = JSON.parse(m.metadata);
239
240
  const isTail = index === arr.length - 1;
@@ -244,9 +245,12 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
244
245
  // (pkb-context 30, pkb-reminder 35, memory-v2-static 38,
245
246
  // now-md 40 — the v2 static block lands inside the memory
246
247
  // prefix, so now-md splices *after* it):
247
- // [<workspace>, <turn_context>, <memory __injected>,
248
- // <memory>\n…</memory>, <NOW.md>, <system_reminder>,
248
+ // [<workspace>, <turn_context>, <memory>dynamic</memory>,
249
+ // <info>v2static</info>, <NOW.md>, <system_reminder>,
249
250
  // <knowledge_base>, ...original]
251
+ // The v2 static block is replayed verbatim from stored metadata,
252
+ // so rows may carry either `<info>…</info>` or `<memory>…</memory>`
253
+ // depending on when they were persisted.
250
254
  // Required so Anthropic's prefix cache keeps matching msg[0]
251
255
  // across daemon restart and conversation eviction. The tail
252
256
  // row only rehydrates `memoryInjectedBlock` — the next turn
@@ -273,11 +277,12 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
273
277
  }
274
278
 
275
279
  // The v2 static memory block (essentials/threads/recent/buffer
276
- // wrapped in `<memory>…</memory>`) carries personal user memory.
277
- // Trust-gated to mirror `shouldExposePersonalMemory` at injection
278
- // time — untrusted-actor views must not read persisted personal
279
- // memory back through metadata. Skipped on the tail row because
280
- // the next turn re-injects fresh content on full-mode turns.
280
+ // wrapped in either `<info>…</info>` or `<memory>…</memory>`)
281
+ // carries personal user memory. Trust-gated to mirror
282
+ // `shouldExposePersonalMemory` at injection time — untrusted-actor
283
+ // views must not read persisted personal memory back through
284
+ // metadata. Skipped on the tail row because the next turn
285
+ // re-injects fresh content on full-mode turns.
281
286
  if (
282
287
  !isTail &&
283
288
  personalMemoryAllowed &&
@@ -336,13 +341,13 @@ export async function loadFromDb(ctx: LoadFromDbContext): Promise<void> {
336
341
  // Strip pre-clean messages only; post-clean messages keep the fresh
337
342
  // injections they were generated with.
338
343
  const messagesBeforeRepair =
339
- preCleanCount === 0
344
+ preStrippedCount === 0
340
345
  ? parsedMessages
341
346
  : [
342
347
  ...stripInjectionsForCompaction(
343
- parsedMessages.slice(0, preCleanCount),
348
+ parsedMessages.slice(0, preStrippedCount),
344
349
  ),
345
- ...parsedMessages.slice(preCleanCount),
350
+ ...parsedMessages.slice(preStrippedCount),
346
351
  ];
347
352
 
348
353
  const { messages: repairedMessages, stats } =
@@ -448,7 +453,7 @@ export function disposeConversation(ctx: DisposeContext): void {
448
453
  } catch {
449
454
  // Best-effort — fall through to legacy v1 enqueue
450
455
  }
451
- if (!v2Enabled) {
456
+ if (!v2Enabled && isMemoryEnabled()) {
452
457
  try {
453
458
  enqueueMemoryJob("graph_extract", {
454
459
  conversationId: ctx.conversationId,
@@ -1734,15 +1734,16 @@ const RUNTIME_INJECTION_PREFIXES = [
1734
1734
  "<background_turn>",
1735
1735
  "<memory_context __injected>",
1736
1736
  "<memory_context>", // backward-compat: strip legacy blocks from pre-__injected history
1737
- // The static `memory-v2-static` block (opens `<memory>\n…`) IS stripped
1738
- // so each compaction re-injects the freshest essentials/threads/recent/
1739
- // buffer view, matching the `<knowledge_base>` cadence. The dynamic
1740
- // activation block (opens `<memory __injected>…`) is intentionally NOT
1741
- // stripped `startsWith("<memory>\n")` does not match it — so per-turn
1742
- // memory activations persist in history. The activation pipeline dedupes
1743
- // via `everInjected`, and compaction handles aggregate growth, so
1744
- // accumulation does not cause unbounded context growth.
1737
+ // The static `memory-v2-static` block (`<info>\n…</info>`) and the
1738
+ // dynamic activation block (`<memory>\n…</memory>`, plus legacy
1739
+ // `<memory __injected>…`) are both stripped so each compaction
1740
+ // re-injects the freshest essentials/threads/recent/buffer view and
1741
+ // re-runs the activation pipeline, matching the `<knowledge_base>`
1742
+ // cadence. The activation pipeline dedupes via `everInjected`, and
1743
+ // compaction handles aggregate growth, so accumulation does not cause
1744
+ // unbounded context growth. Both wrappers may appear in persisted rows.
1745
1745
  "<memory>\n",
1746
+ "<info>\n",
1746
1747
  "<voice_call_control>",
1747
1748
  "<workspace_top_level>", // backward-compat: strip legacy workspace blocks
1748
1749
  // NOTE: <workspace> is intentionally NOT stripped — workspace context
@@ -2037,7 +2038,7 @@ export interface RuntimeInjectionOptions {
2037
2038
  /**
2038
2039
  * Pre-rendered v2 static memory content (essentials/threads/recent/buffer
2039
2040
  * concatenated, header-wrapped). When non-null on full-mode turns the
2040
- * `memory-v2-static` injector wraps it in `<memory>` and splices it onto
2041
+ * `memory-v2-static` injector wraps it in `<info>` and splices it onto
2041
2042
  * the user message; subsequent turns leave the prior block cached on its
2042
2043
  * original user message.
2043
2044
  */
@@ -74,6 +74,11 @@ export function resolveTrustClass(
74
74
  return trustContext?.trustClass ?? "unknown";
75
75
  }
76
76
 
77
+ import { isAssistantFeatureFlagEnabled } from "../config/assistant-feature-flags.js";
78
+ import {
79
+ buildSwitchInferenceProfileToolDef,
80
+ SWITCH_INFERENCE_PROFILE_TOOL_NAME,
81
+ } from "./switch-inference-profile-tool.js";
77
82
  import type { ToolSetupContext } from "./tool-setup-types.js";
78
83
  export type { ToolSetupContext } from "./tool-setup-types.js";
79
84
 
@@ -86,8 +91,8 @@ export type { ToolSetupContext } from "./tool-setup-types.js";
86
91
  export function buildToolDefinitions(): ToolDefinition[] {
87
92
  return [
88
93
  ...getAllToolDefinitions(),
89
- ...allUiSurfaceTools.map((t) => t.getDefinition()),
90
- ...coreAppProxyTools.map((t) => t.getDefinition()),
94
+ ...allUiSurfaceTools,
95
+ ...coreAppProxyTools,
91
96
  ];
92
97
  }
93
98
 
@@ -209,6 +214,33 @@ export function createToolExecutor(
209
214
  },
210
215
  };
211
216
 
217
+ // Intercept switch_inference_profile: daemon-internal tool that lets the
218
+ // model self-select a different inference profile mid-turn. No permission
219
+ // checks — this is a control-flow signal, not a user-visible tool.
220
+ if (executionName === SWITCH_INFERENCE_PROFILE_TOOL_NAME) {
221
+ const profile = typeof input.profile === "string" ? input.profile : "";
222
+ const config = getConfig();
223
+ const profileEntry = config.llm.profiles?.[profile];
224
+ if (!profileEntry) {
225
+ return {
226
+ content: `Profile "${profile}" not found. Available profiles: ${Object.keys(config.llm.profiles ?? {}).join(", ")}`,
227
+ isError: true,
228
+ };
229
+ }
230
+ if (profileEntry.status === "disabled") {
231
+ return {
232
+ content: `Profile "${profile}" is disabled.`,
233
+ isError: true,
234
+ };
235
+ }
236
+ ctx.toolRoutedProfile = profile;
237
+ const label = profileEntry.label ?? profile;
238
+ return {
239
+ content: `Switched to ${label} profile. Continue with your response.`,
240
+ isError: false,
241
+ };
242
+ }
243
+
212
244
  // Intercept skill_execute: extract the real tool name and input, then
213
245
  // route through the full executor pipeline so the underlying tool's
214
246
  // risk level, permission checks, hooks, and lifecycle events all fire
@@ -326,6 +358,14 @@ export interface SkillProjectionContext {
326
358
  * host tools into the LLM tool definitions.
327
359
  */
328
360
  readonly transportInterface?: InterfaceId;
361
+ /** Per-turn override profile, read by the switch_inference_profile tool injection. */
362
+ currentTurnOverrideProfile?: string;
363
+ /**
364
+ * True when the user has explicitly selected an inference profile for this
365
+ * conversation (via the composer profile picker). When set, tool-based
366
+ * auto-routing is suppressed — the user's explicit choice takes precedence.
367
+ */
368
+ hasExplicitProfileOverride?: boolean;
329
369
  }
330
370
 
331
371
  // ── Conditional tool sets ────────────────────────────────────────────
@@ -613,6 +653,26 @@ export function createResolveToolsCallback(
613
653
  }
614
654
 
615
655
  ctx.allowedToolNames = turnAllowed;
616
- return injectActivityField(allBaseDefs, ACTIVITY_SKIP_SET);
656
+ const baseDefs = injectActivityField(allBaseDefs, ACTIVITY_SKIP_SET);
657
+
658
+ const config = getConfig();
659
+ if (
660
+ isAssistantFeatureFlagEnabled("query-complexity-routing", config) &&
661
+ config.llm &&
662
+ !ctx.hasExplicitProfileOverride
663
+ ) {
664
+ const currentProfile =
665
+ ctx.currentTurnOverrideProfile ?? config.llm.activeProfile;
666
+ const toolDef = buildSwitchInferenceProfileToolDef(
667
+ config.llm.profiles ?? {},
668
+ currentProfile,
669
+ );
670
+ if (toolDef) {
671
+ turnAllowed.add(SWITCH_INFERENCE_PROFILE_TOOL_NAME);
672
+ return [...baseDefs, toolDef];
673
+ }
674
+ }
675
+
676
+ return baseDefs;
617
677
  };
618
678
  }
@@ -3,6 +3,7 @@ import { updateConversationUsage } from "../memory/conversation-crud.js";
3
3
  import { recordUsageEvent } from "../memory/llm-usage-store.js";
4
4
  import type { UsageActor } from "../usage/actors.js";
5
5
  import { resolveUsageAttribution } from "../usage/attribution.js";
6
+ import { extractRawUsage } from "../usage/pricing.js";
6
7
  import type {
7
8
  AnthropicCacheCreationTokenDetails,
8
9
  PricingResult,
@@ -235,6 +236,7 @@ export function recordUsage(
235
236
  outputTokens,
236
237
  cacheCreationInputTokens: normalizedCacheCreationInputTokens,
237
238
  cacheReadInputTokens: normalizedCacheReadInputTokens,
239
+ rawUsage: extractRawUsage(rawResponse),
238
240
  conversationId: ctx.conversationId,
239
241
  runId: null,
240
242
  requestId,