@vellumai/assistant 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/docs/browser-use-architecture-phase2.md +1 -1
  3. package/knip.json +2 -1
  4. package/openapi.yaml +809 -11
  5. package/package.json +1 -1
  6. package/src/__tests__/anthropic-provider.test.ts +34 -37
  7. package/src/__tests__/assistant-event-hub-self-exclusion.test.ts +293 -0
  8. package/src/__tests__/assistant-feature-flags-integration.test.ts +3 -3
  9. package/src/__tests__/audit-log-rotation.test.ts +70 -16
  10. package/src/__tests__/background-workers-disk-pressure.test.ts +3 -3
  11. package/src/__tests__/btw-routes.test.ts +2 -3
  12. package/src/__tests__/call-controller.test.ts +0 -1
  13. package/src/__tests__/cancel-resolves-conversation-key.test.ts +1 -1
  14. package/src/__tests__/channel-guardian.test.ts +3 -3
  15. package/src/__tests__/checker.test.ts +6 -15
  16. package/src/__tests__/compaction-events.test.ts +1 -0
  17. package/src/__tests__/compactor-call-site-logging.test.ts +214 -0
  18. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +5 -11
  19. package/src/__tests__/computer-use-tools.test.ts +2 -4
  20. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  21. package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +1 -1
  22. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  23. package/src/__tests__/conversation-agent-loop-overflow.test.ts +197 -2
  24. package/src/__tests__/conversation-agent-loop.test.ts +163 -122
  25. package/src/__tests__/conversation-app-control-instantiation.test.ts +2 -5
  26. package/src/__tests__/conversation-clear-safety.test.ts +25 -25
  27. package/src/__tests__/conversation-delete-schedule-cleanup.test.ts +1 -1
  28. package/src/__tests__/conversation-disk-view-integration.test.ts +2 -2
  29. package/src/__tests__/conversation-error.test.ts +31 -0
  30. package/src/__tests__/conversation-fork-crud.test.ts +178 -15
  31. package/src/__tests__/conversation-lifecycle.test.ts +52 -11
  32. package/src/__tests__/{conversation-load-cleaned-at.test.ts → conversation-load-history-stripped.test.ts} +13 -13
  33. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -0
  34. package/src/__tests__/conversation-routes-disk-view.test.ts +109 -0
  35. package/src/__tests__/conversation-routes-slash-commands.test.ts +35 -0
  36. package/src/__tests__/conversation-skill-tools.test.ts +2 -5
  37. package/src/__tests__/conversation-store.test.ts +1 -1
  38. package/src/__tests__/conversation-sync-tags.test.ts +99 -32
  39. package/src/__tests__/conversation-workspace-cache-state.test.ts +1 -0
  40. package/src/__tests__/conversation-workspace-injection.test.ts +1 -1
  41. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -1
  42. package/src/__tests__/credential-execution-feature-gates.test.ts +9 -7
  43. package/src/__tests__/credential-execution-tools.test.ts +6 -6
  44. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  45. package/src/__tests__/credential-vault-unit.test.ts +2 -2
  46. package/src/__tests__/dynamic-page-surface.test.ts +2 -2
  47. package/src/__tests__/email-html-renderer.test.ts +12 -0
  48. package/src/__tests__/gateway-flag-listener.test.ts +237 -0
  49. package/src/__tests__/gemini-provider.test.ts +78 -0
  50. package/src/__tests__/guardian-dispatch.test.ts +0 -1
  51. package/src/__tests__/guardian-outbound-http.test.ts +7 -5
  52. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +1 -1
  53. package/src/__tests__/heartbeat-disk-pressure.test.ts +4 -0
  54. package/src/__tests__/heartbeat-service.test.ts +4 -0
  55. package/src/__tests__/host-shell-tool.test.ts +1 -1
  56. package/src/__tests__/init-feature-flag-overrides.test.ts +5 -6
  57. package/src/__tests__/list-messages-tool-merge.test.ts +70 -11
  58. package/src/__tests__/llm-request-log-call-site.test.ts +136 -0
  59. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +26 -0
  60. package/src/__tests__/llm-resolver.test.ts +77 -9
  61. package/src/__tests__/llm-usage-store.test.ts +66 -0
  62. package/src/__tests__/logger.test.ts +89 -0
  63. package/src/__tests__/mcp-abort-signal.test.ts +2 -2
  64. package/src/__tests__/media-generate-image.test.ts +31 -0
  65. package/src/__tests__/memory-v2-static-injector.test.ts +7 -7
  66. package/src/__tests__/model-intents.test.ts +2 -4
  67. package/src/__tests__/notification-guardian-path.test.ts +0 -1
  68. package/src/__tests__/onboarding-template-contract.test.ts +1 -1
  69. package/src/__tests__/openai-provider.test.ts +46 -0
  70. package/src/__tests__/openai-responses-provider.test.ts +114 -12
  71. package/src/__tests__/pending-interactions-resolved-event.test.ts +0 -1
  72. package/src/__tests__/platform-bash-auto-approve.test.ts +2 -2
  73. package/src/__tests__/platform.test.ts +2 -2
  74. package/src/__tests__/plugin-api-tool-definition.test.ts +92 -0
  75. package/src/__tests__/plugin-bootstrap.test.ts +2 -2
  76. package/src/__tests__/plugin-tool-contribution.test.ts +13 -6
  77. package/src/__tests__/plugin-types.test.ts +3 -2
  78. package/src/__tests__/prechat-onboarding-contract.test.ts +131 -98
  79. package/src/__tests__/pricing.test.ts +12 -0
  80. package/src/__tests__/prune-jobs-changes-parser.test.ts +61 -0
  81. package/src/__tests__/registry.test.ts +2 -8
  82. package/src/__tests__/require-fresh-approval.test.ts +2 -2
  83. package/src/__tests__/runtime-events-sse-bilingual.test.ts +154 -0
  84. package/src/__tests__/shell-tool-proxy-mode.test.ts +1 -1
  85. package/src/__tests__/skill-feature-flags.test.ts +2 -2
  86. package/src/__tests__/skill-projection-feature-flag.test.ts +4 -7
  87. package/src/__tests__/skill-projection.benchmark.test.ts +2 -6
  88. package/src/__tests__/skill-tool-factory.test.ts +1 -1
  89. package/src/__tests__/subagent-notify-parent.test.ts +1 -1
  90. package/src/__tests__/suggestion-routes.test.ts +1 -0
  91. package/src/__tests__/sync-message-contract.test.ts +59 -0
  92. package/src/__tests__/system-prompt.test.ts +145 -131
  93. package/src/__tests__/terminal-tools.test.ts +1 -1
  94. package/src/__tests__/tool-approval-handler.test.ts +1 -5
  95. package/src/__tests__/tool-execute-pipeline.test.ts +2 -2
  96. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +2 -5
  97. package/src/__tests__/tool-executor-lifecycle-events.test.ts +15 -5
  98. package/src/__tests__/tool-executor.test.ts +9 -62
  99. package/src/__tests__/tool-grant-request-escalation.test.ts +1 -6
  100. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  101. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1 -6
  102. package/src/__tests__/trusted-contact-multichannel.test.ts +0 -1
  103. package/src/__tests__/ui-file-upload-surface.test.ts +2 -2
  104. package/src/__tests__/usage-routes.test.ts +3 -0
  105. package/src/__tests__/verification-control-plane-policy.test.ts +2 -2
  106. package/src/__tests__/workspace-git-service.test.ts +6 -5
  107. package/src/__tests__/workspace-migration-089-move-memory-tree-out-of-v3.test.ts +86 -0
  108. package/src/acp/__tests__/prepare-agent-env.test.ts +146 -0
  109. package/src/acp/prepare-agent-env.ts +78 -0
  110. package/src/acp/session-manager.ts +1 -1
  111. package/src/agent/loop.ts +8 -0
  112. package/src/api/README.md +5 -0
  113. package/src/api/index.ts +4 -0
  114. package/src/api/package.json +10 -0
  115. package/src/background-wake/background-wake-routes.test.ts +233 -0
  116. package/src/background-wake/runtime-registry.ts +24 -0
  117. package/src/cli/commands/__tests__/browser.test.ts +23 -5
  118. package/src/cli/commands/__tests__/domain-register.test.ts +110 -0
  119. package/src/cli/commands/__tests__/domain-status.test.ts +33 -33
  120. package/src/cli/commands/__tests__/inference-send.test.ts +108 -5
  121. package/src/cli/commands/__tests__/memory-v2-compare-render.test.ts +98 -0
  122. package/src/cli/commands/__tests__/memory-v2.test.ts +1 -0
  123. package/src/cli/commands/__tests__/memory-v3-render.test.ts +340 -0
  124. package/src/cli/commands/browser.ts +247 -0
  125. package/src/cli/commands/domain.ts +91 -41
  126. package/src/cli/commands/inference.ts +93 -40
  127. package/src/cli/commands/memory-v2-compare-render.ts +115 -0
  128. package/src/cli/commands/memory-v2.ts +176 -1
  129. package/src/cli/commands/memory-v3-render.ts +344 -0
  130. package/src/cli/commands/memory-v3.ts +316 -0
  131. package/src/cli/program.ts +2 -0
  132. package/src/config/assistant-feature-flags.ts +21 -9
  133. package/src/config/bundled-skills/document-editor/SKILL.md +11 -2
  134. package/src/config/bundled-skills/document-editor/TOOLS.json +18 -0
  135. package/src/config/bundled-skills/document-editor/tools/document-open.ts +12 -0
  136. package/src/config/bundled-skills/image-studio/SKILL.md +4 -0
  137. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
  138. package/src/config/bundled-skills/media-processing/tools/ingest-media.ts +13 -8
  139. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +10 -3
  140. package/src/config/bundled-skills/phone-calls/references/TRANSCRIPTS.md +16 -14
  141. package/src/config/bundled-skills/playbooks/tools/playbook-create.ts +7 -2
  142. package/src/config/bundled-skills/playbooks/tools/playbook-update.ts +7 -2
  143. package/src/config/bundled-tool-registry.ts +2 -0
  144. package/src/config/call-site-defaults.ts +7 -6
  145. package/src/config/feature-flag-registry.json +16 -0
  146. package/src/config/schemas/__tests__/memory-v2.test.ts +213 -1
  147. package/src/config/schemas/call-site-catalog.ts +21 -7
  148. package/src/config/schemas/llm.ts +12 -1
  149. package/src/config/schemas/memory-v2.ts +246 -0
  150. package/src/config/schemas/memory.ts +2 -1
  151. package/src/context/compactor.ts +52 -0
  152. package/src/conversations/__tests__/message-consolidation.test.ts +350 -0
  153. package/src/conversations/message-consolidation.ts +404 -0
  154. package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +1 -1
  155. package/src/daemon/__tests__/meet-manifest-loader.test.ts +1 -1
  156. package/src/daemon/conversation-agent-loop-handlers.ts +2 -13
  157. package/src/daemon/conversation-agent-loop.ts +126 -76
  158. package/src/daemon/conversation-error.ts +31 -1
  159. package/src/daemon/conversation-lifecycle.ts +27 -22
  160. package/src/daemon/conversation-runtime-assembly.ts +10 -9
  161. package/src/daemon/conversation-tool-setup.ts +63 -3
  162. package/src/daemon/conversation-usage.ts +2 -0
  163. package/src/daemon/conversation.ts +14 -29
  164. package/src/daemon/disk-pressure-guard.ts +14 -2
  165. package/src/daemon/handlers/config-model.test.ts +1 -0
  166. package/src/daemon/handlers/conversations.ts +11 -3
  167. package/src/daemon/host-browser-proxy.ts +5 -5
  168. package/src/daemon/host-cu-proxy.ts +4 -4
  169. package/src/daemon/host-file-proxy.ts +4 -4
  170. package/src/daemon/host-proxy-base.ts +4 -4
  171. package/src/daemon/host-transfer-proxy.ts +10 -10
  172. package/src/daemon/lifecycle.ts +23 -20
  173. package/src/daemon/meet-manifest-loader.ts +1 -7
  174. package/src/daemon/message-types/conversations.ts +6 -9
  175. package/src/daemon/message-types/home.ts +1 -13
  176. package/src/daemon/message-types/messages.ts +6 -14
  177. package/src/daemon/message-types/sync.ts +14 -0
  178. package/src/daemon/shutdown-handlers.ts +24 -5
  179. package/src/daemon/switch-inference-profile-tool.ts +52 -0
  180. package/src/daemon/tool-setup-types.ts +13 -0
  181. package/src/events/relationship-state-updated.ts +25 -0
  182. package/src/heartbeat/__tests__/heartbeat-service.test.ts +1 -1
  183. package/src/home/home-greeting.ts +0 -9
  184. package/src/home/suggested-prompts.ts +0 -9
  185. package/src/ipc/gateway-flag-listener.ts +123 -0
  186. package/src/ipc/skill-routes/registries.ts +8 -12
  187. package/src/memory/__tests__/db-async-query.test.ts +165 -0
  188. package/src/memory/__tests__/db-maintenance.test.ts +115 -0
  189. package/src/memory/__tests__/jobs-store-enqueue-gate.test.ts +241 -0
  190. package/src/memory/__tests__/jobs-store-job-classes.test.ts +28 -1
  191. package/src/memory/__tests__/memory-retrospective-job.test.ts +7 -0
  192. package/src/memory/auto-analysis-enqueue.ts +5 -1
  193. package/src/memory/conversation-crud.ts +71 -70
  194. package/src/memory/conversation-starters-cadence.ts +3 -1
  195. package/src/memory/conversation-title-service.ts +19 -3
  196. package/src/memory/db-async-query.ts +214 -0
  197. package/src/memory/db-init.ts +10 -0
  198. package/src/memory/db-maintenance.ts +30 -21
  199. package/src/memory/graph/bootstrap.ts +8 -1
  200. package/src/memory/graph/capability-seed.ts +7 -3
  201. package/src/memory/graph/conversation-graph-memory.ts +100 -17
  202. package/src/memory/graph/extraction.ts +1 -5
  203. package/src/memory/graph/graph-search.ts +7 -1
  204. package/src/memory/indexer.ts +28 -18
  205. package/src/memory/job-handlers/cleanup.ts +76 -18
  206. package/src/memory/job-handlers/conversation-starters.ts +1 -4
  207. package/src/memory/jobs/embed-pkb-file.ts +6 -1
  208. package/src/memory/jobs-store.ts +14 -0
  209. package/src/memory/jobs-worker.ts +55 -22
  210. package/src/memory/llm-request-log-source-clickhouse.ts +42 -2
  211. package/src/memory/llm-request-log-source-local.ts +7 -0
  212. package/src/memory/llm-request-log-source.ts +9 -2
  213. package/src/memory/llm-request-log-store.ts +43 -1
  214. package/src/memory/llm-usage-store.ts +24 -0
  215. package/src/memory/memory-retrospective-enqueue.ts +8 -1
  216. package/src/memory/memory-retrospective-job.ts +5 -0
  217. package/src/memory/memory-v2-activation-log-store.ts +15 -6
  218. package/src/memory/migrations/260-rename-cleaned-at.ts +44 -0
  219. package/src/memory/migrations/261-llm-usage-add-raw-usage.ts +36 -0
  220. package/src/memory/migrations/262-memory-v3-coactivation.ts +57 -0
  221. package/src/memory/migrations/263-memory-v3-auto-edges.ts +50 -0
  222. package/src/memory/migrations/264-llm-request-log-call-site.ts +29 -0
  223. package/src/memory/migrations/index.ts +17 -0
  224. package/src/memory/migrations/registry.ts +33 -0
  225. package/src/memory/schema/conversations.ts +1 -1
  226. package/src/memory/schema/infrastructure.ts +21 -0
  227. package/src/memory/tool-usage-store.ts +36 -8
  228. package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -0
  229. package/src/memory/v2/__tests__/harness-compare.test.ts +186 -0
  230. package/src/memory/v2/__tests__/harness-metrics.test.ts +74 -0
  231. package/src/memory/v2/__tests__/harness-oracle.test.ts +257 -0
  232. package/src/memory/v2/__tests__/harness-replay-input.test.ts +225 -0
  233. package/src/memory/v2/__tests__/harness-runner.test.ts +109 -0
  234. package/src/memory/v2/__tests__/injection.test.ts +127 -98
  235. package/src/memory/v2/__tests__/qdrant.test.ts +36 -0
  236. package/src/memory/v2/__tests__/router.test.ts +171 -3
  237. package/src/memory/v2/harness/compare.ts +57 -0
  238. package/src/memory/v2/harness/metrics.ts +124 -0
  239. package/src/memory/v2/harness/oracle.ts +145 -0
  240. package/src/memory/v2/harness/replay-input.ts +224 -0
  241. package/src/memory/v2/harness/retriever.ts +74 -0
  242. package/src/memory/v2/harness/router-retriever.ts +43 -0
  243. package/src/memory/v2/harness/runner.ts +106 -0
  244. package/src/memory/v2/harness/trace.ts +58 -0
  245. package/src/memory/v2/injection.ts +21 -15
  246. package/src/memory/v2/prompts/router.ts +26 -1
  247. package/src/memory/v2/qdrant.ts +14 -2
  248. package/src/memory/v2/router.ts +171 -18
  249. package/src/memory/v3/__tests__/coactivation-store.test.ts +422 -0
  250. package/src/memory/v3/__tests__/consolidation-job.test.ts +468 -0
  251. package/src/memory/v3/__tests__/edge-learning-job.test.ts +324 -0
  252. package/src/memory/v3/__tests__/edges.test.ts +563 -0
  253. package/src/memory/v3/__tests__/filter.test.ts +512 -0
  254. package/src/memory/v3/__tests__/gate.test.ts +574 -0
  255. package/src/memory/v3/__tests__/index-composition.test.ts +233 -0
  256. package/src/memory/v3/__tests__/loop.test.ts +530 -0
  257. package/src/memory/v3/__tests__/retriever.test.ts +226 -0
  258. package/src/memory/v3/__tests__/scouts.test.ts +440 -0
  259. package/src/memory/v3/__tests__/shadow-middleware.test.ts +312 -0
  260. package/src/memory/v3/__tests__/system-prompts.test.ts +154 -0
  261. package/src/memory/v3/__tests__/traversal.test.ts +469 -0
  262. package/src/memory/v3/__tests__/tree-index.test.ts +280 -0
  263. package/src/memory/v3/__tests__/tree-store.test.ts +529 -0
  264. package/src/memory/v3/__tests__/tree-walk.test.ts +707 -0
  265. package/src/memory/v3/__tests__/validate.test.ts +245 -0
  266. package/src/memory/v3/auto-edges.ts +223 -0
  267. package/src/memory/v3/coactivation-store.ts +124 -0
  268. package/src/memory/v3/consolidation-job.ts +323 -0
  269. package/src/memory/v3/edge-learning-job.ts +160 -0
  270. package/src/memory/v3/edges.ts +249 -0
  271. package/src/memory/v3/filter.ts +281 -0
  272. package/src/memory/v3/gate.ts +334 -0
  273. package/src/memory/v3/index-composition.ts +113 -0
  274. package/src/memory/v3/llm-capture.ts +46 -0
  275. package/src/memory/v3/loop.ts +382 -0
  276. package/src/memory/v3/maintenance.ts +144 -0
  277. package/src/memory/v3/prompt-context.ts +33 -0
  278. package/src/memory/v3/prompts/consolidation.ts +458 -0
  279. package/src/memory/v3/prompts/system-prompts.ts +196 -0
  280. package/src/memory/v3/retriever.ts +33 -0
  281. package/src/memory/v3/scouts.ts +420 -0
  282. package/src/memory/v3/shadow-middleware.ts +305 -0
  283. package/src/memory/v3/traversal.ts +206 -0
  284. package/src/memory/v3/tree-index.ts +237 -0
  285. package/src/memory/v3/tree-store.ts +394 -0
  286. package/src/memory/v3/tree-walk.ts +351 -0
  287. package/src/memory/v3/types.ts +65 -0
  288. package/src/memory/v3/validate.ts +300 -0
  289. package/src/notifications/adapters/macos.ts +18 -1
  290. package/src/notifications/adapters/platform.ts +1 -1
  291. package/src/notifications/decision-engine.ts +1 -4
  292. package/src/notifications/emit-signal.ts +29 -49
  293. package/src/permissions/prompter.ts +3 -3
  294. package/src/permissions/question-prompter.ts +5 -2
  295. package/src/permissions/secret-prompter.ts +2 -2
  296. package/src/plugin-api/index.ts +4 -0
  297. package/src/plugin-api/types.ts +7 -33
  298. package/src/plugins/defaults/index.ts +6 -0
  299. package/src/plugins/defaults/injectors.ts +18 -11
  300. package/src/plugins/external-plugin-loader.ts +5 -68
  301. package/src/plugins/types.ts +11 -16
  302. package/src/proactive-artifact/aux-message-injector.ts +17 -4
  303. package/src/prompts/__tests__/task-progress-hint-section.test.ts +3 -9
  304. package/src/prompts/persona-resolver.ts +36 -21
  305. package/src/prompts/sections.ts +39 -7
  306. package/src/prompts/system-prompt.ts +50 -185
  307. package/src/prompts/templates/BOOTSTRAP.md +2 -2
  308. package/src/prompts/templates/system-sections.ts +230 -8
  309. package/src/providers/__tests__/connection-model-compat.test.ts +234 -0
  310. package/src/providers/__tests__/retry-callsite.test.ts +85 -5
  311. package/src/providers/anthropic/client.ts +32 -66
  312. package/src/providers/call-site-routing.ts +14 -2
  313. package/src/providers/connection-model-compat.ts +38 -0
  314. package/src/providers/connection-resolution.ts +16 -2
  315. package/src/providers/gemini/client.ts +49 -6
  316. package/src/providers/inference/adapter-factory.ts +3 -0
  317. package/src/providers/minimax/client.ts +106 -0
  318. package/src/providers/model-catalog.ts +43 -0
  319. package/src/providers/model-intents.ts +1 -1
  320. package/src/providers/openai/chat-completions-provider.ts +6 -3
  321. package/src/providers/openai/codex-models.ts +18 -0
  322. package/src/providers/openai/responses-provider.ts +78 -21
  323. package/src/providers/provider-send-message.ts +7 -1
  324. package/src/providers/retry.ts +34 -3
  325. package/src/providers/thinking-config.ts +26 -1
  326. package/src/providers/usage-tracking.ts +2 -0
  327. package/src/runtime/AGENTS.md +2 -2
  328. package/src/runtime/agent-wake.ts +1 -0
  329. package/src/runtime/assistant-event-hub.ts +76 -6
  330. package/src/runtime/auth/route-policy.ts +36 -0
  331. package/src/runtime/btw-sidechain.ts +0 -6
  332. package/src/runtime/http-types.ts +0 -2
  333. package/src/runtime/migrations/vbundle-builder.ts +10 -3
  334. package/src/runtime/pending-interactions.ts +0 -1
  335. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +106 -0
  336. package/src/runtime/routes/__tests__/memory-v2-simulate-route.test.ts +25 -6
  337. package/src/runtime/routes/__tests__/plugins-routes.test.ts +512 -0
  338. package/src/runtime/routes/acp-routes.test.ts +255 -6
  339. package/src/runtime/routes/acp-routes.ts +8 -1
  340. package/src/runtime/routes/avatar-routes.ts +10 -10
  341. package/src/runtime/routes/background-wake-routes.ts +188 -0
  342. package/src/runtime/routes/browser-tabs-routes.ts +200 -0
  343. package/src/runtime/routes/btw-routes.ts +0 -6
  344. package/src/runtime/routes/conversation-cli-routes.ts +1 -1
  345. package/src/runtime/routes/conversation-list-routes.ts +12 -4
  346. package/src/runtime/routes/conversation-management-routes.ts +77 -20
  347. package/src/runtime/routes/conversation-query-routes.ts +142 -36
  348. package/src/runtime/routes/conversation-routes.ts +252 -410
  349. package/src/runtime/routes/conversation-starter-routes.ts +6 -3
  350. package/src/runtime/routes/disk-pressure-routes.ts +1 -1
  351. package/src/runtime/routes/domain-routes.ts +60 -10
  352. package/src/runtime/routes/email-routes.ts +5 -2
  353. package/src/runtime/routes/events-routes.ts +54 -10
  354. package/src/runtime/routes/group-routes.ts +24 -8
  355. package/src/runtime/routes/host-browser-routes.ts +10 -2
  356. package/src/runtime/routes/host-cu-routes.ts +2 -2
  357. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +96 -3
  358. package/src/runtime/routes/index.ts +8 -0
  359. package/src/runtime/routes/inference-profile-session-handler.ts +22 -12
  360. package/src/runtime/routes/inference-profile-session-routes.ts +7 -1
  361. package/src/runtime/routes/llm-call-sites-routes.ts +32 -5
  362. package/src/runtime/routes/memory-item-routes.ts +8 -3
  363. package/src/runtime/routes/memory-v2-routes.ts +215 -5
  364. package/src/runtime/routes/memory-v3-routes.ts +316 -0
  365. package/src/runtime/routes/migration-routes.ts +21 -24
  366. package/src/runtime/routes/plugins-routes.ts +337 -0
  367. package/src/runtime/routes/rename-conversation-routes.ts +6 -2
  368. package/src/runtime/routes/secret-routes.ts +25 -5
  369. package/src/runtime/routes/settings-routes.ts +12 -11
  370. package/src/runtime/routes/slack-channel-routes.ts +5 -4
  371. package/src/runtime/routes/workspace-routes.ts +25 -10
  372. package/src/runtime/sync/resource-sync-events.ts +106 -38
  373. package/src/runtime/sync/sync-publisher.test.ts +49 -0
  374. package/src/runtime/sync/sync-publisher.ts +2 -1
  375. package/src/runtime/verification-outbound-actions.ts +73 -1
  376. package/src/telemetry/types.ts +12 -0
  377. package/src/telemetry/usage-telemetry-reporter.test.ts +48 -0
  378. package/src/telemetry/usage-telemetry-reporter.ts +1 -0
  379. package/src/tools/acp/spawn.test.ts +119 -0
  380. package/src/tools/acp/spawn.ts +15 -2
  381. package/src/tools/apps/definitions.ts +2 -8
  382. package/src/tools/ask-question/ask-question-tool.test.ts +3 -3
  383. package/src/tools/ask-question/ask-question-tool.ts +38 -45
  384. package/src/tools/browser/__tests__/pinned-tabs.test.ts +70 -0
  385. package/src/tools/browser/browser-execution.ts +16 -3
  386. package/src/tools/browser/cdp-client/__tests__/browser-tabs-factory.test.ts +402 -0
  387. package/src/tools/browser/cdp-client/__tests__/types.test.ts +3 -0
  388. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +12 -0
  389. package/src/tools/browser/cdp-client/extension-cdp-client.ts +27 -1
  390. package/src/tools/browser/cdp-client/factory.ts +100 -17
  391. package/src/tools/browser/cdp-client/local-cdp-client.ts +12 -0
  392. package/src/tools/browser/cdp-client/types.ts +65 -0
  393. package/src/tools/browser/pinned-tabs.ts +96 -40
  394. package/src/tools/computer-use/definitions.ts +22 -78
  395. package/src/tools/credential-execution/make-authenticated-request.ts +3 -9
  396. package/src/tools/credential-execution/manage-secure-command-tool.ts +3 -9
  397. package/src/tools/credential-execution/run-authenticated-command.ts +3 -9
  398. package/src/tools/credentials/vault.ts +3 -9
  399. package/src/tools/document/document-tool.ts +59 -0
  400. package/src/tools/execution-target.ts +21 -23
  401. package/src/tools/executor.ts +6 -1
  402. package/src/tools/filesystem/edit.ts +3 -9
  403. package/src/tools/filesystem/list.ts +3 -9
  404. package/src/tools/filesystem/read.ts +3 -9
  405. package/src/tools/filesystem/write.ts +3 -9
  406. package/src/tools/host-filesystem/edit.ts +3 -9
  407. package/src/tools/host-filesystem/read.ts +3 -9
  408. package/src/tools/host-filesystem/transfer.ts +3 -9
  409. package/src/tools/host-filesystem/write.ts +3 -9
  410. package/src/tools/host-terminal/host-shell.ts +3 -9
  411. package/src/tools/mcp/mcp-tool-factory.ts +1 -8
  412. package/src/tools/memory/register.test.ts +1 -1
  413. package/src/tools/memory/register.ts +4 -9
  414. package/src/tools/network/web-fetch.ts +3 -9
  415. package/src/tools/network/web-search.ts +25 -32
  416. package/src/tools/registry.ts +7 -23
  417. package/src/tools/schema-transforms.ts +1 -1
  418. package/src/tools/skills/execute.ts +3 -9
  419. package/src/tools/skills/load.ts +3 -9
  420. package/src/tools/skills/skill-tool-factory.ts +1 -8
  421. package/src/tools/subagent/notify-parent.ts +3 -9
  422. package/src/tools/system/request-permission.ts +3 -9
  423. package/src/tools/terminal/shell.ts +3 -9
  424. package/src/tools/tool-defaults.ts +94 -0
  425. package/src/tools/types.ts +27 -98
  426. package/src/tools/ui-surface/definitions.ts +6 -22
  427. package/src/usage/pricing.ts +23 -0
  428. package/src/usage/types.ts +12 -0
  429. package/src/util/logger.ts +16 -7
  430. package/src/util/platform.ts +7 -2
  431. package/src/util/sqlite3-runtime.ts +65 -0
  432. package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +1 -0
  433. package/src/workspace/migrations/089-move-memory-tree-out-of-v3.ts +86 -0
  434. package/src/workspace/migrations/registry.ts +2 -0
  435. package/src/__tests__/compaction-strip-metadata-clear.test.ts +0 -206
  436. package/src/__tests__/message-complete-display-id.test.ts +0 -175
  437. package/src/daemon/query-complexity-router.ts +0 -75
  438. package/src/prompts/cache-boundary.ts +0 -8
@@ -133,13 +133,10 @@ const mockSkillTool: Tool = {
133
133
  description: "A test skill tool",
134
134
  category: "skill",
135
135
  defaultRiskLevel: RiskLevel.Low,
136
+ executionTarget: "sandbox",
136
137
  origin: "skill",
137
138
  ownerSkillId: "test-skill",
138
- getDefinition: () => ({
139
- name: "skill_test_tool",
140
- description: "A test skill tool",
141
- input_schema: { type: "object" as const, properties: {} },
142
- }),
139
+ input_schema: { type: "object" as const, properties: {} },
143
140
  execute: async () => ({ content: "ok", isError: false }),
144
141
  };
145
142
  registerTool(mockSkillTool);
@@ -150,14 +147,11 @@ const mockBundledSkillTool: Tool = {
150
147
  description: "A test bundled skill tool",
151
148
  category: "skill",
152
149
  defaultRiskLevel: RiskLevel.Low,
150
+ executionTarget: "sandbox",
153
151
  origin: "skill",
154
152
  ownerSkillId: "gmail",
155
153
  ownerSkillBundled: true,
156
- getDefinition: () => ({
157
- name: "skill_bundled_test_tool",
158
- description: "A test bundled skill tool",
159
- input_schema: { type: "object" as const, properties: {} },
160
- }),
154
+ input_schema: { type: "object" as const, properties: {} },
161
155
  execute: async () => ({ content: "ok", isError: false }),
162
156
  };
163
157
  registerTool(mockBundledSkillTool);
@@ -391,13 +385,10 @@ describe("Permission Checker", () => {
391
385
  description: "A medium-risk skill tool",
392
386
  category: "skill",
393
387
  defaultRiskLevel: RiskLevel.Medium,
388
+ executionTarget: "sandbox",
394
389
  origin: "skill",
395
390
  ownerSkillId: "test-skill",
396
- getDefinition: () => ({
397
- name: "skill_medium_tool",
398
- description: "A medium-risk skill tool",
399
- input_schema: { type: "object" as const, properties: {} },
400
- }),
391
+ input_schema: { type: "object" as const, properties: {} },
401
392
  execute: async () => ({ content: "ok", isError: false }),
402
393
  };
403
394
  registerTool(mediumSkillTool);
@@ -129,6 +129,7 @@ mock.module("../security/secret-allowlist.js", () => ({
129
129
  mock.module("../memory/conversation-crud.js", () => ({
130
130
  setConversationOriginChannelIfUnset: () => {},
131
131
  updateConversationContextWindow: () => {},
132
+ setConversationHistoryStrippedAt: () => {},
132
133
  deleteMessageById: () => {},
133
134
  provenanceFromTrustContext: () => ({
134
135
  source: "user",
@@ -0,0 +1,214 @@
1
+ /**
2
+ * Tests that successful compaction LLM calls land an `llm_request_logs`
3
+ * row with `call_site = "compactionAgent"`. The compactor opts out of
4
+ * automatic usage tracking (`usageTracking: "manual"`), so its calls
5
+ * otherwise never reach `recordRequestLog` via the agent-loop
6
+ * dispatcher. This test pins the explicit instrumentation in
7
+ * `compactor.ts` so visibility doesn't silently regress.
8
+ */
9
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
10
+
11
+ function makeLoggerStub(): Record<string, unknown> {
12
+ const stub: Record<string, unknown> = {};
13
+ for (const m of [
14
+ "info",
15
+ "warn",
16
+ "error",
17
+ "debug",
18
+ "trace",
19
+ "fatal",
20
+ "silent",
21
+ "child",
22
+ ]) {
23
+ stub[m] = m === "child" ? () => makeLoggerStub() : () => {};
24
+ }
25
+ return stub;
26
+ }
27
+
28
+ mock.module("../util/logger.js", () => ({
29
+ getLogger: () => makeLoggerStub(),
30
+ }));
31
+
32
+ mock.module("../memory/conversation-crud.js", () => ({
33
+ getMessages: () => [],
34
+ }));
35
+
36
+ mock.module("../memory/attachments-store.js", () => ({
37
+ getAttachmentMetadataForMessage: () => [],
38
+ getAttachmentContent: () => null,
39
+ }));
40
+
41
+ const recordRequestLogCalls: Array<{
42
+ conversationId: string;
43
+ requestPayload: string;
44
+ responsePayload: string;
45
+ messageId: string | undefined;
46
+ provider: string | undefined;
47
+ callSite: string | undefined;
48
+ }> = [];
49
+
50
+ mock.module("../memory/llm-request-log-store.js", () => ({
51
+ recordRequestLog: (
52
+ conversationId: string,
53
+ requestPayload: string,
54
+ responsePayload: string,
55
+ messageId?: string,
56
+ provider?: string,
57
+ callSite?: string,
58
+ ): string => {
59
+ recordRequestLogCalls.push({
60
+ conversationId,
61
+ requestPayload,
62
+ responsePayload,
63
+ messageId,
64
+ provider,
65
+ callSite,
66
+ });
67
+ return `mock-log-${recordRequestLogCalls.length}`;
68
+ },
69
+ }));
70
+
71
+ import { runAssistantDrivenCompaction } from "../context/compactor.js";
72
+ import type { Message, Provider } from "../providers/types.js";
73
+
74
+ const TAIL_TIMESTAMP =
75
+ "2026-05-21 (Thursday) 10:00:00 -05:00 (America/Chicago)";
76
+
77
+ const compactionResponse = `
78
+ <compaction_result>
79
+ <summary>
80
+ Earlier turns summarized here.
81
+ </summary>
82
+
83
+ <key_state>
84
+ - Nothing critical pending.
85
+ </key_state>
86
+
87
+ <tail_start timestamp="${TAIL_TIMESTAMP}" preview="tail anchor message" />
88
+ </compaction_result>
89
+ `;
90
+
91
+ const RAW_REQUEST = { model: "mock-model", messages: [] };
92
+ const RAW_RESPONSE = { id: "resp-1", content: compactionResponse };
93
+
94
+ function makeProvider(): Provider {
95
+ return {
96
+ name: "mock-provider",
97
+ sendMessage: async () => ({
98
+ content: [{ type: "text", text: compactionResponse }],
99
+ model: "mock-model",
100
+ actualProvider: "actual-mock-provider",
101
+ usage: { inputTokens: 100, outputTokens: 50 },
102
+ stopReason: "end_turn",
103
+ rawRequest: RAW_REQUEST,
104
+ rawResponse: RAW_RESPONSE,
105
+ }),
106
+ };
107
+ }
108
+
109
+ function makeProviderWithoutRaw(): Provider {
110
+ return {
111
+ name: "mock-provider",
112
+ sendMessage: async () => ({
113
+ content: [{ type: "text", text: compactionResponse }],
114
+ model: "mock-model",
115
+ usage: { inputTokens: 100, outputTokens: 50 },
116
+ stopReason: "end_turn",
117
+ // rawRequest/rawResponse intentionally absent — best-effort skip path.
118
+ }),
119
+ };
120
+ }
121
+
122
+ const userText = (text: string): Message => ({
123
+ role: "user",
124
+ content: [{ type: "text", text }],
125
+ });
126
+
127
+ const userTextWithTurnContext = (text: string, timestamp: string): Message => ({
128
+ role: "user",
129
+ content: [
130
+ {
131
+ type: "text",
132
+ text: `<turn_context>\ncurrent_time: ${timestamp}\n</turn_context>\n${text}`,
133
+ },
134
+ ],
135
+ });
136
+
137
+ const assistantText = (text: string): Message => ({
138
+ role: "assistant",
139
+ content: [{ type: "text", text }],
140
+ });
141
+
142
+ function buildMessages(): Message[] {
143
+ return [
144
+ userText("old user turn 1"),
145
+ assistantText("old assistant reply 1"),
146
+ userText("old user turn 2"),
147
+ assistantText("old assistant reply 2"),
148
+ userTextWithTurnContext("tail anchor message", TAIL_TIMESTAMP),
149
+ assistantText("tail assistant reply"),
150
+ ];
151
+ }
152
+
153
+ const args = (provider: Provider) => ({
154
+ conversationId: "conv-compaction-log-1",
155
+ messages: buildMessages(),
156
+ provider,
157
+ systemPrompt: "you are a test assistant",
158
+ compaction: { enabled: true, autoThreshold: 0.7 },
159
+ maxInputTokens: 1000,
160
+ // Above threshold so the auto-check fires.
161
+ previousEstimatedInputTokens: 900,
162
+ });
163
+
164
+ describe("compactor records llm_request_logs with call_site=compactionAgent", () => {
165
+ beforeEach(() => {
166
+ recordRequestLogCalls.length = 0;
167
+ });
168
+
169
+ test("successful compaction call stamps call_site = compactionAgent", async () => {
170
+ await runAssistantDrivenCompaction(args(makeProvider()));
171
+
172
+ expect(recordRequestLogCalls.length).toBe(1);
173
+ expect(recordRequestLogCalls[0]!.callSite).toBe("compactionAgent");
174
+ expect(recordRequestLogCalls[0]!.conversationId).toBe(
175
+ "conv-compaction-log-1",
176
+ );
177
+ // Provider name comes from actualProvider when present.
178
+ expect(recordRequestLogCalls[0]!.provider).toBe("actual-mock-provider");
179
+ // Payloads should be JSON-stringified.
180
+ expect(recordRequestLogCalls[0]!.requestPayload).toBe(
181
+ JSON.stringify(RAW_REQUEST),
182
+ );
183
+ expect(recordRequestLogCalls[0]!.responsePayload).toBe(
184
+ JSON.stringify(RAW_RESPONSE),
185
+ );
186
+ });
187
+
188
+ test("skips persistence when provider returns no rawRequest/rawResponse", async () => {
189
+ await runAssistantDrivenCompaction(args(makeProviderWithoutRaw()));
190
+
191
+ // Helper short-circuits when raw payloads are absent — non-fatal.
192
+ expect(recordRequestLogCalls.length).toBe(0);
193
+ });
194
+
195
+ test("uses provider.name when actualProvider is absent", async () => {
196
+ const provider: Provider = {
197
+ name: "fallback-provider-name",
198
+ sendMessage: async () => ({
199
+ content: [{ type: "text", text: compactionResponse }],
200
+ model: "mock-model",
201
+ usage: { inputTokens: 100, outputTokens: 50 },
202
+ stopReason: "end_turn",
203
+ rawRequest: RAW_REQUEST,
204
+ rawResponse: RAW_RESPONSE,
205
+ }),
206
+ };
207
+
208
+ await runAssistantDrivenCompaction(args(provider));
209
+
210
+ expect(recordRequestLogCalls.length).toBe(1);
211
+ expect(recordRequestLogCalls[0]!.provider).toBe("fallback-provider-name");
212
+ expect(recordRequestLogCalls[0]!.callSite).toBe("compactionAgent");
213
+ });
214
+ });
@@ -78,12 +78,11 @@ describe("computer-use skill manifest regression", () => {
78
78
  await initializeTools();
79
79
 
80
80
  for (const cuTool of allComputerUseTools) {
81
- const def = cuTool.getDefinition();
82
81
  const manifestTool = manifest.tools.find(
83
- (t: { name: string }) => t.name === def.name,
82
+ (t: { name: string }) => t.name === cuTool.name,
84
83
  );
85
84
  expect(manifestTool).toBeDefined();
86
- expect(manifestTool.description).toBe(def.description);
85
+ expect(manifestTool.description).toBe(cuTool.description);
87
86
  }
88
87
  });
89
88
 
@@ -91,12 +90,11 @@ describe("computer-use skill manifest regression", () => {
91
90
  await initializeTools();
92
91
 
93
92
  for (const cuTool of allComputerUseTools) {
94
- const def = cuTool.getDefinition();
95
93
  const manifestTool = manifest.tools.find(
96
- (t: { name: string }) => t.name === def.name,
94
+ (t: { name: string }) => t.name === cuTool.name,
97
95
  );
98
96
  expect(manifestTool).toBeDefined();
99
- expect(manifestTool.input_schema).toEqual(def.input_schema);
97
+ expect(manifestTool.input_schema).toEqual(cuTool.input_schema);
100
98
  }
101
99
  });
102
100
 
@@ -121,15 +119,11 @@ describe("computer-use skill manifest regression", () => {
121
119
  (entry: { name: string; description: string }) => ({
122
120
  name: entry.name,
123
121
  description: entry.description,
122
+ input_schema: { type: "object" as const, properties: {} },
124
123
  category: "computer-use",
125
124
  defaultRiskLevel: RiskLevel.Low,
126
125
  origin: "skill" as const,
127
126
  ownerSkillId: "computer-use",
128
- getDefinition: () => ({
129
- name: entry.name,
130
- description: entry.description,
131
- input_schema: { type: "object" as const, properties: {} },
132
- }),
133
127
  execute: async () => ({ content: "stub", isError: false }),
134
128
  }),
135
129
  );
@@ -23,10 +23,8 @@ interface JsonSchema {
23
23
  }
24
24
 
25
25
  /** Cast a tool definition's input_schema to a usable JSON Schema shape. */
26
- function schema(tool: {
27
- getDefinition(): { input_schema: object };
28
- }): JsonSchema {
29
- return tool.getDefinition().input_schema as JsonSchema;
26
+ function schema(tool: { input_schema: object }): JsonSchema {
27
+ return tool.input_schema as JsonSchema;
30
28
  }
31
29
 
32
30
  const ctx: ToolContext = {
@@ -50,7 +50,6 @@ mock.module("../notifications/emit-signal.js", () => ({
50
50
  ],
51
51
  };
52
52
  },
53
- registerBroadcastFn: () => {},
54
53
  }));
55
54
 
56
55
  // Mock channel guardian service — provide a guardian binding for 'self' + 'telegram'
@@ -86,7 +86,6 @@ mock.module("../daemon/disk-pressure-guard.js", () => ({
86
86
  }));
87
87
 
88
88
  mock.module("../memory/conversation-crud.js", () => ({
89
- clearStrippedInjectionMetadataForConversation: () => {},
90
89
  getConversation: () => ({
91
90
  id: "conv-123",
92
91
  conversationType: "background",
@@ -98,6 +97,7 @@ mock.module("../memory/conversation-crud.js", () => ({
98
97
  getConversationOriginInterface: () => null,
99
98
  getConversationOverrideProfileFromRow: () => null,
100
99
  provenanceFromTrustContext: () => ({}),
100
+ setConversationHistoryStrippedAt: () => {},
101
101
  updateConversationContextWindow: () => {},
102
102
  updateConversationSlackContextWatermark: () => {},
103
103
  }));
@@ -145,9 +145,9 @@ let mockConversationRow: {
145
145
 
146
146
  mock.module("../memory/conversation-crud.js", () => ({
147
147
  setConversationOriginChannelIfUnset: () => {},
148
+ setConversationHistoryStrippedAt: () => {},
148
149
  updateConversationUsage: () => {},
149
150
  updateMessageMetadata: () => {},
150
- clearStrippedInjectionMetadataForConversation: () => {},
151
151
  getMessages: () => [],
152
152
  getConversation: () => mockConversationRow,
153
153
  getConversationOverrideProfileFromRow: (
@@ -170,6 +170,7 @@ mock.module("../daemon/context-overflow-policy.js", () => ({
170
170
 
171
171
  mock.module("../memory/conversation-crud.js", () => ({
172
172
  setConversationOriginChannelIfUnset: () => {},
173
+ setConversationHistoryStrippedAt: () => {},
173
174
  updateConversationUsage: () => {},
174
175
  getMessages: () => [],
175
176
  getConversation: () => ({
@@ -186,7 +187,7 @@ mock.module("../memory/conversation-crud.js", () => ({
186
187
  trustContext: undefined,
187
188
  }),
188
189
  getConversationOriginInterface: () => null,
189
- addMessage: () => ({ id: "mock-msg-id" }),
190
+ addMessage: (...args: unknown[]) => addMessageMock(...args),
190
191
  deleteMessageById: () => {},
191
192
  updateConversationContextWindow: () => {},
192
193
  updateConversationTitle: () => {},
@@ -194,7 +195,6 @@ mock.module("../memory/conversation-crud.js", () => ({
194
195
  getMessageById: () => null,
195
196
  updateMessageContent: () => {},
196
197
  updateMessageMetadata: () => {},
197
- clearStrippedInjectionMetadataForConversation: () => {},
198
198
  setLastNotifiedInferenceProfile: () => {},
199
199
  getLastUserTimestampBefore: () => 0,
200
200
  getConversationOverrideProfileFromRow: () => undefined,
@@ -305,6 +305,9 @@ mock.module("../daemon/history-repair.js", () => ({
305
305
 
306
306
  const recordUsageMock = mock((..._args: unknown[]) => {});
307
307
  const setAgentLoopExitReasonOnLatestLogMock = mock(() => {});
308
+ const addMessageMock = mock(
309
+ (..._args: unknown[]) => ({ id: "mock-msg-id" }) as { id: string },
310
+ );
308
311
  mock.module("../daemon/conversation-usage.js", () => ({
309
312
  recordUsage: recordUsageMock,
310
313
  }));
@@ -633,6 +636,7 @@ beforeEach(() => {
633
636
  mockApplyRuntimeInjections = (msgs) => msgs;
634
637
  recordUsageMock.mockClear();
635
638
  setAgentLoopExitReasonOnLatestLogMock.mockClear();
639
+ addMessageMock.mockClear();
636
640
  // Reset the plugin registry and re-register every default so the
637
641
  // orchestrator's pipelines (`overflowReduce`, `persistence`, …) dispatch to
638
642
  // the default middleware, which in turn hits the mocked collaborators
@@ -2234,4 +2238,195 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
2234
2238
  );
2235
2239
  expect(conversationError).toBeUndefined();
2236
2240
  });
2241
+
2242
+ // ── Test 9 ────────────────────────────────────────────────────────
2243
+ // When the `auto_compress_latest_turn` rerun (the last layer of the
2244
+ // overflow-recovery ladder) still yields at the mid-loop checkpoint,
2245
+ // the turn cannot proceed. Before PR 1 of the Compaction Visibility
2246
+ // workstream this terminated silently — no `agent_loop_exit_reason`,
2247
+ // no client notice, no durable transcript row. Now the loop must:
2248
+ // 1. emit a `conversation_error` event with code
2249
+ // `BUDGET_YIELD_UNRECOVERED`,
2250
+ // 2. persist a `role="assistant"` notice via the persistence
2251
+ // pipeline (so reloads keep the message),
2252
+ // 3. stamp `budget_yield_unrecovered` onto the latest llm_request_logs
2253
+ // row.
2254
+ test("budget_yield_unrecovered: classified error emitted, persisted, and stamped", async () => {
2255
+ const events: ServerMessage[] = [];
2256
+
2257
+ // Every estimate after the very first preflight is above the mid-loop
2258
+ // threshold (190_000 × 0.85 = 161_500). This makes every checkpoint
2259
+ // yield, including the one inside the auto_compress rerun.
2260
+ let estimateCallCount = 0;
2261
+ mockEstimateTokens = () => {
2262
+ estimateCallCount++;
2263
+ if (estimateCallCount === 1) return 100_000;
2264
+ return 170_000;
2265
+ };
2266
+
2267
+ // Convergence reducer becomes exhausted on the second tier so the
2268
+ // loop escalates from convergence to the action-resolution block.
2269
+ let reducerCallCount = 0;
2270
+ mockReducerStepFn = (msgs: Message[]) => {
2271
+ reducerCallCount++;
2272
+ const exhausted = reducerCallCount >= 2;
2273
+ return {
2274
+ messages: msgs,
2275
+ tier: exhausted ? "tool_result_truncation" : "forced_compaction",
2276
+ state: {
2277
+ appliedTiers: exhausted
2278
+ ? ["forced_compaction", "tool_result_truncation"]
2279
+ : ["forced_compaction"],
2280
+ injectionMode: "full" as const,
2281
+ exhausted,
2282
+ },
2283
+ estimatedTokens: exhausted ? 60_000 : 80_000,
2284
+ };
2285
+ };
2286
+
2287
+ // The overflow policy directs us into auto_compress_latest_turn so the
2288
+ // emergency compaction + final agentLoop.run path executes.
2289
+ mockOverflowAction = "auto_compress_latest_turn";
2290
+
2291
+ let agentLoopCallCount = 0;
2292
+ const agentLoopRun: AgentLoopRun = async (
2293
+ messages,
2294
+ onEvent,
2295
+ _signal,
2296
+ _requestId,
2297
+ onCheckpoint,
2298
+ ) => {
2299
+ agentLoopCallCount++;
2300
+
2301
+ const withProgress: Message[] = [
2302
+ ...messages,
2303
+ {
2304
+ role: "assistant" as const,
2305
+ content: [
2306
+ { type: "text", text: `tool call ${agentLoopCallCount}` },
2307
+ {
2308
+ type: "tool_use",
2309
+ id: `tu-${agentLoopCallCount}`,
2310
+ name: "bash",
2311
+ input: { command: "ls" },
2312
+ },
2313
+ ] as ContentBlock[],
2314
+ },
2315
+ {
2316
+ role: "user" as const,
2317
+ content: [
2318
+ {
2319
+ type: "tool_result",
2320
+ tool_use_id: `tu-${agentLoopCallCount}`,
2321
+ content: "output",
2322
+ is_error: false,
2323
+ },
2324
+ ] as ContentBlock[],
2325
+ },
2326
+ ];
2327
+
2328
+ onEvent({
2329
+ type: "message_complete",
2330
+ message: {
2331
+ role: "assistant",
2332
+ content: [
2333
+ { type: "text", text: `tool call ${agentLoopCallCount}` },
2334
+ {
2335
+ type: "tool_use",
2336
+ id: `tu-${agentLoopCallCount}`,
2337
+ name: "bash",
2338
+ input: { command: "ls" },
2339
+ },
2340
+ ],
2341
+ },
2342
+ });
2343
+ onEvent({
2344
+ type: "usage",
2345
+ inputTokens: 100,
2346
+ outputTokens: 50,
2347
+ model: "test-model",
2348
+ providerDurationMs: 100,
2349
+ });
2350
+
2351
+ // Every checkpoint yields — including the final auto_compress rerun.
2352
+ if (onCheckpoint) {
2353
+ const decision = await onCheckpoint({
2354
+ turnIndex: 0,
2355
+ toolCount: 1,
2356
+ hasToolUse: true,
2357
+ history: withProgress,
2358
+ });
2359
+ if (decision === "yield") {
2360
+ return withProgress;
2361
+ }
2362
+ }
2363
+
2364
+ return withProgress;
2365
+ };
2366
+
2367
+ const ctx = makeCtx({
2368
+ agentLoopRun,
2369
+ contextWindowManager: {
2370
+ shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
2371
+ // The compaction pipeline (default terminal) routes through this
2372
+ // for the emergency `auto_compress_latest_turn` path.
2373
+ maybeCompact: async () => ({
2374
+ compacted: true,
2375
+ messages: [
2376
+ {
2377
+ role: "user" as const,
2378
+ content: [{ type: "text", text: "compacted" }],
2379
+ },
2380
+ ] as Message[],
2381
+ compactedPersistedMessages: 5,
2382
+ summaryText: "Emergency summary",
2383
+ previousEstimatedInputTokens: 170_000,
2384
+ estimatedInputTokens: 90_000,
2385
+ maxInputTokens: 200_000,
2386
+ thresholdTokens: 160_000,
2387
+ compactedMessages: 10,
2388
+ summaryCalls: 1,
2389
+ summaryInputTokens: 500,
2390
+ summaryOutputTokens: 200,
2391
+ summaryModel: "mock-model",
2392
+ }),
2393
+ } as unknown as AgentLoopConversationContext["contextWindowManager"],
2394
+ });
2395
+
2396
+ await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
2397
+
2398
+ // The classified error is emitted to the client.
2399
+ const errorEvents = events.filter((e) => e.type === "conversation_error");
2400
+ expect(errorEvents).toHaveLength(1);
2401
+ const errorEvent = errorEvents[0];
2402
+ if (errorEvent && "code" in errorEvent) {
2403
+ expect(errorEvent.code).toBe("BUDGET_YIELD_UNRECOVERED");
2404
+ expect(errorEvent.retryable).toBe(true);
2405
+ expect(errorEvent.errorCategory).toBe("budget_yield_unrecovered");
2406
+ } else {
2407
+ throw new Error("conversation_error event missing `code` field");
2408
+ }
2409
+
2410
+ // The exit reason is stamped onto the latest llm_request_logs row.
2411
+ expect(setAgentLoopExitReasonOnLatestLogMock).toHaveBeenCalledWith(
2412
+ "test-conv",
2413
+ "budget_yield_unrecovered",
2414
+ );
2415
+
2416
+ // A `role="assistant"` notice is persisted via the persistence pipeline.
2417
+ // The default persistence terminal calls
2418
+ // `addMessage(conversationId, role, content, metadata, addOptions)` —
2419
+ // we look for the call whose role positional arg is "assistant" and
2420
+ // whose content positional arg mentions compaction.
2421
+ const assistantPersistCall = addMessageMock.mock.calls.find((call) => {
2422
+ const role = call[1];
2423
+ const content = call[2];
2424
+ return (
2425
+ role === "assistant" &&
2426
+ typeof content === "string" &&
2427
+ content.includes("compact")
2428
+ );
2429
+ });
2430
+ expect(assistantPersistCall).toBeDefined();
2431
+ });
2237
2432
  });