@vellumai/assistant 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (667) hide show
  1. package/ARCHITECTURE.md +273 -10
  2. package/Dockerfile +2 -3
  3. package/bun.lock +5 -13
  4. package/docs/backup-troubleshooting.md +52 -0
  5. package/docs/browser-use-architecture-phase2.md +174 -0
  6. package/docs/stt-provider-onboarding.md +120 -0
  7. package/knip.json +12 -2
  8. package/node_modules/@vellumai/ces-contracts/bun.lock +8 -6
  9. package/node_modules/@vellumai/ces-contracts/package.json +3 -3
  10. package/openapi.yaml +982 -72
  11. package/package.json +4 -6
  12. package/scripts/generate-openapi.ts +0 -1
  13. package/scripts/test.sh +73 -18
  14. package/src/__tests__/agent-image-optimize.test.ts +28 -0
  15. package/src/__tests__/agent-loop.test.ts +123 -0
  16. package/src/__tests__/anthropic-provider.test.ts +263 -10
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +550 -0
  18. package/src/__tests__/auto-analysis-prompt.test.ts +50 -0
  19. package/src/__tests__/browser-fill-credential.test.ts +11 -0
  20. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  21. package/src/__tests__/browser-skill-endstate.test.ts +31 -7
  22. package/src/__tests__/btw-routes.test.ts +7 -0
  23. package/src/__tests__/call-controller.test.ts +581 -20
  24. package/src/__tests__/catalog-files.test.ts +138 -0
  25. package/src/__tests__/channel-invite-transport.test.ts +2 -2
  26. package/src/__tests__/channel-readiness-routes.test.ts +16 -20
  27. package/src/__tests__/channel-readiness-service.test.ts +12 -7
  28. package/src/__tests__/checker.test.ts +157 -10
  29. package/src/__tests__/clawhub-files.test.ts +347 -0
  30. package/src/__tests__/commit-message-enrichment-service.test.ts +36 -19
  31. package/src/__tests__/config-analysis.test.ts +100 -0
  32. package/src/__tests__/config-schema.test.ts +1013 -66
  33. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +339 -0
  34. package/src/__tests__/config-watcher.test.ts +43 -8
  35. package/src/__tests__/contact-store-user-file.test.ts +512 -0
  36. package/src/__tests__/contacts-write.test.ts +197 -0
  37. package/src/__tests__/context-window-manager.test.ts +88 -0
  38. package/src/__tests__/conversation-abort-tool-results.test.ts +2 -0
  39. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -0
  40. package/src/__tests__/conversation-agent-loop.test.ts +98 -2
  41. package/src/__tests__/conversation-confirmation-signals.test.ts +135 -0
  42. package/src/__tests__/conversation-error.test.ts +70 -0
  43. package/src/__tests__/conversation-history-web-search.test.ts +11 -4
  44. package/src/__tests__/conversation-init.benchmark.test.ts +6 -1
  45. package/src/__tests__/conversation-launcher-skill-regression.test.ts +51 -0
  46. package/src/__tests__/conversation-list-source.test.ts +145 -0
  47. package/src/__tests__/conversation-pre-run-repair.test.ts +2 -0
  48. package/src/__tests__/conversation-provider-retry-repair.test.ts +2 -0
  49. package/src/__tests__/conversation-queue.test.ts +901 -60
  50. package/src/__tests__/conversation-routes-disk-view.test.ts +270 -0
  51. package/src/__tests__/conversation-runtime-assembly.test.ts +55 -0
  52. package/src/__tests__/conversation-skill-tools.test.ts +7 -4
  53. package/src/__tests__/conversation-slash-commands.test.ts +33 -0
  54. package/src/__tests__/conversation-slash-queue.test.ts +89 -18
  55. package/src/__tests__/conversation-slash-unknown.test.ts +2 -0
  56. package/src/__tests__/conversation-tool-setup-batch-authorized.test.ts +226 -0
  57. package/src/__tests__/conversation-workspace-injection.test.ts +2 -0
  58. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +2 -0
  59. package/src/__tests__/credential-health-service.test.ts +352 -0
  60. package/src/__tests__/credential-security-invariants.test.ts +5 -3
  61. package/src/__tests__/credential-vault-unit.test.ts +379 -3
  62. package/src/__tests__/credentials-cli.test.ts +40 -16
  63. package/src/__tests__/cross-provider-web-search.test.ts +146 -35
  64. package/src/__tests__/deterministic-verification-control-plane.test.ts +10 -1
  65. package/src/__tests__/device-id.test.ts +112 -0
  66. package/src/__tests__/docker-signing-key-bootstrap.test.ts +167 -4
  67. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +1 -3
  68. package/src/__tests__/email-html-renderer.test.ts +71 -0
  69. package/src/__tests__/email-invite-adapter.test.ts +36 -32
  70. package/src/__tests__/emit-event-signal.test.ts +71 -0
  71. package/src/__tests__/extension-id-sync-guard.test.ts +75 -8
  72. package/src/__tests__/fixtures/mock-chrome-extension.ts +11 -0
  73. package/src/__tests__/gateway-only-enforcement.test.ts +206 -1
  74. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  75. package/src/__tests__/gemini-provider.test.ts +64 -0
  76. package/src/__tests__/get-skill-detail-audit.test.ts +325 -0
  77. package/src/__tests__/gmail-archive-fallback.test.ts +193 -0
  78. package/src/__tests__/gmail-archive-gate.test.ts +246 -0
  79. package/src/__tests__/gmail-preferences.test.ts +117 -0
  80. package/src/__tests__/headless-browser-interactions.test.ts +43 -0
  81. package/src/__tests__/headless-browser-mode.test.ts +614 -0
  82. package/src/__tests__/headless-browser-navigate.test.ts +142 -5
  83. package/src/__tests__/headless-browser-read-tools.test.ts +11 -0
  84. package/src/__tests__/headless-browser-snapshot.test.ts +10 -0
  85. package/src/__tests__/heartbeat-service.test.ts +70 -17
  86. package/src/__tests__/home-state-routes.test.ts +162 -0
  87. package/src/__tests__/host-bash-proxy.test.ts +0 -5
  88. package/src/__tests__/host-browser-e2e-cloud.test.ts +138 -4
  89. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +4 -4
  90. package/src/__tests__/host-browser-ws-events-e2e.test.ts +103 -0
  91. package/src/__tests__/host-cu-proxy.test.ts +0 -5
  92. package/src/__tests__/identity-intro-cache.test.ts +40 -10
  93. package/src/__tests__/init-feature-flag-overrides.test.ts +38 -112
  94. package/src/__tests__/jobs-store-upsert-debounced.test.ts +141 -0
  95. package/src/__tests__/llm-context-normalization.test.ts +488 -0
  96. package/src/__tests__/llm-context-route-provider.test.ts +86 -5
  97. package/src/__tests__/llm-usage-store.test.ts +363 -0
  98. package/src/__tests__/media-stream-output.test.ts +555 -0
  99. package/src/__tests__/media-stream-parser.test.ts +374 -0
  100. package/src/__tests__/media-stream-server-integration.test.ts +1234 -0
  101. package/src/__tests__/media-stream-stt-session.test.ts +588 -0
  102. package/src/__tests__/media-turn-detector.test.ts +440 -0
  103. package/src/__tests__/message-queue.test.ts +125 -0
  104. package/src/__tests__/migration-export-http.test.ts +6 -6
  105. package/src/__tests__/migration-import-commit-http.test.ts +8 -6
  106. package/src/__tests__/migration-import-preflight-http.test.ts +6 -5
  107. package/src/__tests__/migration-validate-http.test.ts +3 -3
  108. package/src/__tests__/mock-gateway-ipc.ts +151 -0
  109. package/src/__tests__/model-intents.test.ts +2 -2
  110. package/src/__tests__/oauth-apps-routes.test.ts +1 -0
  111. package/src/__tests__/oauth-cli.test.ts +2 -0
  112. package/src/__tests__/oauth-connect-orchestrator.test.ts +2 -0
  113. package/src/__tests__/oauth-provider-serializer.test.ts +1 -0
  114. package/src/__tests__/oauth-providers-routes.test.ts +2 -0
  115. package/src/__tests__/oauth-store.test.ts +85 -0
  116. package/src/__tests__/oauth2-gateway-transport.test.ts +249 -6
  117. package/src/__tests__/onboarding-template-contract.test.ts +6 -13
  118. package/src/__tests__/openai-provider.test.ts +176 -0
  119. package/src/__tests__/openai-responses-cutover-guard.test.ts +184 -0
  120. package/src/__tests__/openai-responses-provider.test.ts +1105 -0
  121. package/src/__tests__/openrouter-token-estimation.test.ts +100 -0
  122. package/src/__tests__/outlook-unsubscribe.test.ts +31 -2
  123. package/src/__tests__/persona-resolver.test.ts +251 -0
  124. package/src/__tests__/platform-bash-auto-approve.test.ts +4 -0
  125. package/src/__tests__/platform.test.ts +92 -1
  126. package/src/__tests__/post-turn-tool-result-truncation.test.ts +47 -0
  127. package/src/__tests__/prechat-onboarding-contract.test.ts +267 -0
  128. package/src/__tests__/pricing.test.ts +174 -0
  129. package/src/__tests__/qdrant-manager.test.ts +29 -8
  130. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +194 -0
  131. package/src/__tests__/relationship-state-contract.test.ts +175 -0
  132. package/src/__tests__/relay-server.test.ts +423 -5
  133. package/src/__tests__/search-skills-unified.test.ts +118 -0
  134. package/src/__tests__/secret-scanner-executor.test.ts +4 -0
  135. package/src/__tests__/secure-keys.test.ts +107 -0
  136. package/src/__tests__/send-endpoint-busy.test.ts +5 -1
  137. package/src/__tests__/sequence-store.test.ts +1 -1
  138. package/src/__tests__/server-history-render.test.ts +49 -0
  139. package/src/__tests__/settings-routes.test.ts +201 -0
  140. package/src/__tests__/skill-load-feature-flag.test.ts +1 -0
  141. package/src/__tests__/skills-file-content-endpoint.test.ts +276 -145
  142. package/src/__tests__/skills-files-catalog-fallback.test.ts +381 -93
  143. package/src/__tests__/skills.test.ts +5 -2
  144. package/src/__tests__/skillssh-files.test.ts +446 -0
  145. package/src/__tests__/slack-block-formatting.test.ts +110 -0
  146. package/src/__tests__/slack-channel-config.test.ts +564 -1
  147. package/src/__tests__/stt-catalog-parity.test.ts +282 -0
  148. package/src/__tests__/stt-stream-session.test.ts +535 -0
  149. package/src/__tests__/system-prompt.test.ts +112 -26
  150. package/src/__tests__/telephony-stt-routing.test.ts +329 -0
  151. package/src/__tests__/terminal-tools.test.ts +18 -7
  152. package/src/__tests__/test-preload.ts +18 -0
  153. package/src/__tests__/test-support/browser-skill-harness.ts +4 -1
  154. package/src/__tests__/tool-executor-lifecycle-events.test.ts +9 -5
  155. package/src/__tests__/tool-executor-shell-integration.test.ts +4 -0
  156. package/src/__tests__/tool-executor.test.ts +33 -24
  157. package/src/__tests__/tool-result-truncation.test.ts +36 -0
  158. package/src/__tests__/trust-store.test.ts +7 -1
  159. package/src/__tests__/trusted-contact-approval-notifier.test.ts +1 -1
  160. package/src/__tests__/tts-catalog-parity.test.ts +345 -0
  161. package/src/__tests__/twilio-routes-twiml.test.ts +512 -114
  162. package/src/__tests__/twilio-routes.test.ts +376 -0
  163. package/src/__tests__/unicode.test.ts +293 -0
  164. package/src/__tests__/update-bulletin-format.test.ts +59 -0
  165. package/src/__tests__/update-bulletin.test.ts +206 -5
  166. package/src/__tests__/usage-routes.test.ts +25 -4
  167. package/src/__tests__/user-reference.test.ts +46 -61
  168. package/src/__tests__/verification-control-plane-policy.test.ts +4 -0
  169. package/src/__tests__/voice-config-update.test.ts +403 -0
  170. package/src/__tests__/voice-quality.test.ts +434 -19
  171. package/src/__tests__/workspace-heartbeat-service.test.ts +7 -0
  172. package/src/__tests__/workspace-migration-033-stt-service-explicit-config.test.ts +547 -0
  173. package/src/__tests__/workspace-migration-034-remove-calls-voice-transcription-provider.test.ts +596 -0
  174. package/src/__tests__/workspace-migration-drop-user-md.test.ts +368 -0
  175. package/src/__tests__/workspace-migration-meets.test.ts +244 -0
  176. package/src/__tests__/workspace-migration-seed-device-id.test.ts +14 -20
  177. package/src/__tests__/workspace-policy.test.ts +2 -0
  178. package/src/agent/image-optimize.ts +24 -12
  179. package/src/agent/loop.ts +43 -3
  180. package/src/backup/__tests__/backup-key.test.ts +152 -0
  181. package/src/backup/__tests__/backup-worker.test.ts +767 -0
  182. package/src/backup/__tests__/list-snapshots.test.ts +87 -0
  183. package/src/backup/__tests__/local-writer.test.ts +218 -0
  184. package/src/backup/__tests__/offsite-writer.test.ts +641 -0
  185. package/src/backup/__tests__/paths.test.ts +300 -0
  186. package/src/backup/__tests__/restore.test.ts +498 -0
  187. package/src/backup/__tests__/snapshot-lock.test.ts +352 -0
  188. package/src/backup/__tests__/stream-crypt.test.ts +228 -0
  189. package/src/backup/backup-key.ts +137 -0
  190. package/src/backup/backup-worker.ts +459 -0
  191. package/src/backup/list-snapshots.ts +147 -0
  192. package/src/backup/local-writer.ts +133 -0
  193. package/src/backup/offsite-writer.ts +222 -0
  194. package/src/backup/paths.ts +226 -0
  195. package/src/backup/restore.ts +322 -0
  196. package/src/backup/snapshot-lock.ts +431 -0
  197. package/src/backup/stream-crypt.ts +263 -0
  198. package/src/bundler/package-resolver.ts +4 -0
  199. package/src/calls/audio-store.ts +11 -5
  200. package/src/calls/call-controller.ts +226 -71
  201. package/src/calls/call-domain.ts +9 -0
  202. package/src/calls/call-speech-output.ts +190 -0
  203. package/src/calls/call-transport.ts +77 -0
  204. package/src/calls/media-stream-audio-transcode.ts +173 -0
  205. package/src/calls/media-stream-output.ts +660 -0
  206. package/src/calls/media-stream-parser.ts +300 -0
  207. package/src/calls/media-stream-protocol.ts +166 -0
  208. package/src/calls/media-stream-server.ts +592 -0
  209. package/src/calls/media-stream-stt-session.ts +460 -0
  210. package/src/calls/media-turn-detector.ts +230 -0
  211. package/src/calls/relay-server.ts +90 -75
  212. package/src/calls/resolve-call-tts-provider.ts +136 -0
  213. package/src/calls/telephony-stt-routing.ts +145 -0
  214. package/src/calls/tts-call-strategy.ts +161 -0
  215. package/src/calls/tts-text-sanitizer.ts +32 -16
  216. package/src/calls/twilio-routes.ts +281 -17
  217. package/src/calls/voice-quality.ts +78 -35
  218. package/src/calls/voice-session-bridge.ts +8 -1
  219. package/src/channels/types.ts +16 -0
  220. package/src/cli/__tests__/run-assistant-command.ts +11 -1
  221. package/src/cli/commands/__tests__/backup.test.ts +1165 -0
  222. package/src/cli/commands/__tests__/domain-register.test.ts +234 -0
  223. package/src/cli/commands/__tests__/domain-status.test.ts +132 -0
  224. package/src/cli/commands/__tests__/email-attachment.test.ts +422 -0
  225. package/src/cli/commands/__tests__/email-download.test.ts +16 -1
  226. package/src/cli/commands/__tests__/email-list.test.ts +22 -4
  227. package/src/cli/commands/__tests__/email-register.test.ts +4 -4
  228. package/src/cli/commands/__tests__/email-send.test.ts +37 -4
  229. package/src/cli/commands/__tests__/email-status.test.ts +5 -1
  230. package/src/cli/commands/__tests__/email-unregister.test.ts +34 -5
  231. package/src/cli/commands/backup.ts +993 -0
  232. package/src/cli/commands/conversations.ts +77 -0
  233. package/src/cli/commands/credentials.ts +0 -1
  234. package/src/cli/commands/domain.ts +210 -0
  235. package/src/cli/commands/email.ts +255 -3
  236. package/src/cli/commands/oauth/__tests__/connect.test.ts +12 -0
  237. package/src/cli/commands/oauth/__tests__/providers-delete.test.ts +1 -0
  238. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -0
  239. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -0
  240. package/src/cli/commands/oauth/mode.ts +12 -3
  241. package/src/cli/commands/oauth/providers.ts +15 -0
  242. package/src/cli/commands/oauth/shared.ts +2 -1
  243. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +4 -9
  244. package/src/cli/commands/platform/__tests__/connect.test.ts +6 -0
  245. package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
  246. package/src/cli/commands/platform/__tests__/status.test.ts +6 -0
  247. package/src/cli/program.ts +30 -4
  248. package/src/config/__tests__/backup-schema.test.ts +134 -0
  249. package/src/config/assistant-feature-flags.ts +61 -62
  250. package/src/config/bundled-skills/app-builder/references/CUSTOM_ROUTES.md +37 -1
  251. package/src/config/bundled-skills/browser/SKILL.md +30 -5
  252. package/src/config/bundled-skills/browser/TOOLS.json +123 -0
  253. package/src/config/bundled-skills/browser/tools/browser-attach.ts +12 -0
  254. package/src/config/bundled-skills/browser/tools/browser-detach.ts +12 -0
  255. package/src/config/bundled-skills/browser/tools/browser-status.ts +12 -0
  256. package/src/config/bundled-skills/browser/tools/browser-wait-for-download.ts +17 -0
  257. package/src/config/bundled-skills/contacts/SKILL.md +2 -2
  258. package/src/config/bundled-skills/gmail/SKILL.md +53 -7
  259. package/src/config/bundled-skills/gmail/TOOLS.json +33 -3
  260. package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +116 -9
  261. package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +138 -11
  262. package/src/config/bundled-skills/gmail/tools/gmail-preferences-tool.ts +59 -0
  263. package/src/config/bundled-skills/gmail/tools/gmail-preferences.ts +82 -0
  264. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +113 -17
  265. package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +2 -2
  266. package/src/config/bundled-skills/media-processing/SKILL.md +3 -9
  267. package/src/config/bundled-skills/media-processing/TOOLS.json +1 -6
  268. package/src/config/bundled-skills/media-processing/__tests__/audio-transcribe.test.ts +125 -0
  269. package/src/config/bundled-skills/media-processing/__tests__/extract-keyframes.test.ts +181 -0
  270. package/src/config/bundled-skills/media-processing/__tests__/preprocess-audio.test.ts +141 -0
  271. package/src/config/bundled-skills/media-processing/services/audio-transcribe.ts +32 -87
  272. package/src/config/bundled-skills/media-processing/services/preprocess.ts +8 -4
  273. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +0 -10
  274. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  275. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
  276. package/src/config/bundled-skills/outlook/SKILL.md +2 -2
  277. package/src/config/bundled-skills/outlook/tools/outlook-unsubscribe.ts +2 -2
  278. package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
  279. package/src/config/bundled-skills/phone-calls/references/CONFIG.md +27 -18
  280. package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +3 -3
  281. package/src/config/bundled-skills/settings/TOOLS.json +3 -3
  282. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +26 -22
  283. package/src/config/bundled-skills/slack/SKILL.md +1 -0
  284. package/src/config/bundled-skills/transcribe/SKILL.md +9 -14
  285. package/src/config/bundled-skills/transcribe/TOOLS.json +2 -7
  286. package/src/config/bundled-skills/transcribe/tools/transcribe-media.test.ts +256 -0
  287. package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +38 -188
  288. package/src/config/bundled-tool-registry.ts +8 -0
  289. package/src/config/env-registry.ts +24 -0
  290. package/src/config/env.ts +34 -10
  291. package/src/config/feature-flag-registry.json +46 -14
  292. package/src/config/loader.ts +26 -12
  293. package/src/config/schema.ts +35 -10
  294. package/src/config/schemas/__tests__/stt.test.ts +43 -0
  295. package/src/config/schemas/analysis.ts +51 -0
  296. package/src/config/schemas/backup.ts +72 -0
  297. package/src/config/schemas/calls.ts +1 -26
  298. package/src/config/schemas/elevenlabs.ts +0 -59
  299. package/src/config/schemas/filing.ts +47 -7
  300. package/src/config/schemas/heartbeat.ts +27 -5
  301. package/src/config/schemas/host-browser.ts +47 -1
  302. package/src/config/schemas/inference.ts +1 -1
  303. package/src/config/schemas/memory-lifecycle.ts +14 -2
  304. package/src/config/schemas/services.ts +44 -0
  305. package/src/config/schemas/stt.ts +59 -0
  306. package/src/config/schemas/tts.ts +230 -0
  307. package/src/config/schemas/updates.ts +14 -0
  308. package/src/config/skills.ts +4 -0
  309. package/src/config/types.ts +4 -0
  310. package/src/contacts/contact-store.ts +56 -11
  311. package/src/contacts/contacts-write.ts +38 -1
  312. package/src/context/post-turn-tool-result-truncation.ts +3 -2
  313. package/src/context/tool-result-truncation.ts +2 -1
  314. package/src/context/window-manager.ts +45 -12
  315. package/src/credential-execution/executable-discovery.ts +12 -2
  316. package/src/credential-execution/process-manager.ts +33 -2
  317. package/src/credential-health/credential-health-service.ts +366 -0
  318. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +324 -0
  319. package/src/daemon/__tests__/conversation-surfaces-launch.test.ts +497 -0
  320. package/src/daemon/__tests__/conversation-tool-setup.test.ts +17 -8
  321. package/src/daemon/__tests__/lifecycle-startup-ordering.test.ts +127 -0
  322. package/src/daemon/config-watcher.ts +99 -5
  323. package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
  324. package/src/daemon/conversation-agent-loop.ts +101 -24
  325. package/src/daemon/conversation-error.ts +11 -0
  326. package/src/daemon/conversation-history.ts +40 -6
  327. package/src/daemon/conversation-launch.ts +220 -0
  328. package/src/daemon/conversation-lifecycle.ts +59 -9
  329. package/src/daemon/conversation-messaging.ts +37 -3
  330. package/src/daemon/conversation-notifiers.ts +5 -0
  331. package/src/daemon/conversation-process.ts +581 -19
  332. package/src/daemon/conversation-queue-manager.ts +24 -0
  333. package/src/daemon/conversation-runtime-assembly.ts +11 -1
  334. package/src/daemon/conversation-slash.ts +36 -0
  335. package/src/daemon/conversation-surfaces.ts +94 -4
  336. package/src/daemon/conversation-tool-setup.ts +25 -0
  337. package/src/daemon/conversation-usage.ts +7 -4
  338. package/src/daemon/conversation.ts +86 -28
  339. package/src/daemon/handlers/config-slack-channel.ts +269 -94
  340. package/src/daemon/handlers/conversations.ts +4 -1
  341. package/src/daemon/handlers/shared.ts +22 -0
  342. package/src/daemon/handlers/skills.ts +321 -77
  343. package/src/daemon/host-browser-proxy.ts +2 -1
  344. package/src/daemon/lifecycle.ts +122 -25
  345. package/src/daemon/message-protocol.ts +6 -0
  346. package/src/daemon/message-types/conversations.ts +34 -1
  347. package/src/daemon/message-types/home.ts +40 -0
  348. package/src/daemon/message-types/meet.ts +143 -0
  349. package/src/daemon/message-types/messages.ts +14 -0
  350. package/src/daemon/message-types/schedules.ts +34 -2
  351. package/src/daemon/message-types/skills.ts +16 -0
  352. package/src/daemon/message-types/surfaces.ts +2 -0
  353. package/src/daemon/server.ts +347 -2
  354. package/src/daemon/shutdown-handlers.ts +32 -4
  355. package/src/daemon/shutdown-registry.ts +40 -0
  356. package/src/daemon/tool-side-effects.ts +9 -0
  357. package/src/email/html-renderer.ts +76 -0
  358. package/src/heartbeat/heartbeat-service.ts +93 -7
  359. package/src/home/__tests__/assistant-feed-authoring.test.ts +156 -0
  360. package/src/home/__tests__/emit-feed-event.test.ts +169 -0
  361. package/src/home/__tests__/feed-scheduler.test.ts +194 -0
  362. package/src/home/__tests__/feed-types.test.ts +275 -0
  363. package/src/home/__tests__/feed-writer.test.ts +688 -0
  364. package/src/home/__tests__/phase5-exit-criteria.test.ts +212 -0
  365. package/src/home/__tests__/platform-gmail-digest.test.ts +222 -0
  366. package/src/home/__tests__/progress-formula.test.ts +213 -0
  367. package/src/home/__tests__/relationship-state-writer.test.ts +740 -0
  368. package/src/home/__tests__/rollup-producer.test.ts +398 -0
  369. package/src/home/assistant-feed-authoring.ts +124 -0
  370. package/src/home/emit-feed-event.ts +158 -0
  371. package/src/home/feed-scheduler.ts +247 -0
  372. package/src/home/feed-types.ts +181 -0
  373. package/src/home/feed-writer.ts +469 -0
  374. package/src/home/platform-gmail-digest.ts +163 -0
  375. package/src/home/progress-formula.ts +86 -0
  376. package/src/home/relationship-state-writer.ts +824 -0
  377. package/src/home/relationship-state.ts +143 -0
  378. package/src/home/rollup-producer.ts +384 -0
  379. package/src/hooks/runner.ts +7 -0
  380. package/src/inbound/platform-callback-registration.ts +12 -3
  381. package/src/inbound/public-ingress-urls.ts +12 -0
  382. package/src/instrument.ts +1 -1
  383. package/src/ipc/__tests__/cli-ipc.test.ts +200 -0
  384. package/src/ipc/cli-client.ts +151 -0
  385. package/src/ipc/cli-server.ts +234 -0
  386. package/src/ipc/gateway-client.ts +180 -0
  387. package/src/ipc/routes/index.ts +5 -0
  388. package/src/ipc/routes/wake-conversation.ts +19 -0
  389. package/src/memory/__tests__/auto-analysis-enqueue.test.ts +356 -0
  390. package/src/memory/__tests__/auto-analysis-guard.test.ts +57 -0
  391. package/src/memory/__tests__/conversation-analyze-job.test.ts +232 -0
  392. package/src/memory/__tests__/find-analysis-conversation.test.ts +196 -0
  393. package/src/memory/app-store.ts +1 -1
  394. package/src/memory/attachments-store.ts +70 -0
  395. package/src/memory/auto-analysis-enqueue.ts +127 -0
  396. package/src/memory/auto-analysis-guard.ts +27 -0
  397. package/src/memory/cleanup-schedule-state.ts +37 -0
  398. package/src/memory/conversation-analyze-job.ts +73 -0
  399. package/src/memory/conversation-crud.ts +99 -0
  400. package/src/memory/conversation-disk-view.ts +7 -0
  401. package/src/memory/conversation-group-migration.ts +34 -2
  402. package/src/memory/conversation-queries.ts +6 -5
  403. package/src/memory/db-init.ts +6 -0
  404. package/src/memory/db-maintenance.ts +108 -0
  405. package/src/memory/db.ts +1 -0
  406. package/src/memory/graph/conversation-graph-memory.ts +15 -0
  407. package/src/memory/graph/extraction.test.ts +23 -0
  408. package/src/memory/graph/extraction.ts +8 -0
  409. package/src/memory/graph/retriever.ts +27 -18
  410. package/src/memory/graph/scoring.test.ts +186 -0
  411. package/src/memory/graph/scoring.ts +31 -1
  412. package/src/memory/graph/tools.ts +1 -1
  413. package/src/memory/group-crud.ts +6 -1
  414. package/src/memory/indexer.ts +95 -16
  415. package/src/memory/job-handlers/cleanup.ts +11 -8
  416. package/src/memory/job-handlers/conversation-starters.ts +16 -10
  417. package/src/memory/jobs-store.ts +64 -4
  418. package/src/memory/jobs-worker.ts +22 -9
  419. package/src/memory/llm-usage-store.ts +92 -56
  420. package/src/memory/migrations/219-oauth-providers-token-exchange-body-format.ts +15 -0
  421. package/src/memory/migrations/220-normalize-user-file-by-principal.ts +190 -0
  422. package/src/memory/migrations/221-conversations-archived-at.ts +16 -0
  423. package/src/memory/migrations/index.ts +6 -0
  424. package/src/memory/migrations/registry.ts +8 -0
  425. package/src/memory/qdrant-manager.ts +43 -16
  426. package/src/memory/schema/conversations.ts +2 -0
  427. package/src/memory/schema/oauth.ts +3 -0
  428. package/src/memory/usage-buckets.ts +396 -0
  429. package/src/messaging/providers/gmail/client.ts +57 -6
  430. package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +282 -0
  431. package/src/messaging/providers/slack/adapter.ts +143 -38
  432. package/src/messaging/providers/slack/client.ts +16 -0
  433. package/src/messaging/providers/slack/types.ts +4 -0
  434. package/src/notifications/decision-engine.ts +3 -3
  435. package/src/notifications/signal.ts +5 -0
  436. package/src/oauth/__tests__/identity-verifier.test.ts +1 -0
  437. package/src/oauth/byo-connection.test.ts +18 -1
  438. package/src/oauth/byo-connection.ts +3 -1
  439. package/src/oauth/connect-orchestrator.ts +2 -0
  440. package/src/oauth/connection-resolver.ts +6 -2
  441. package/src/oauth/connection.ts +2 -0
  442. package/src/oauth/oauth-store.ts +9 -0
  443. package/src/oauth/platform-connection.test.ts +98 -0
  444. package/src/oauth/platform-connection.ts +52 -31
  445. package/src/oauth/seed-providers.ts +7 -0
  446. package/src/permissions/checker.ts +16 -6
  447. package/src/permissions/defaults.ts +49 -1
  448. package/src/permissions/trust-store.ts +3 -3
  449. package/src/permissions/workspace-policy.ts +3 -0
  450. package/src/platform/client.test.ts +10 -0
  451. package/src/platform/sync-identity.ts +129 -0
  452. package/src/prompts/persona-resolver.ts +126 -2
  453. package/src/prompts/system-prompt.ts +59 -18
  454. package/src/prompts/templates/BOOTSTRAP.md +5 -5
  455. package/src/prompts/templates/SOUL.md +3 -1
  456. package/src/prompts/templates/UPDATES.md +12 -0
  457. package/src/prompts/templates/channels/slack.md +20 -0
  458. package/src/prompts/update-bulletin-format.ts +26 -9
  459. package/src/prompts/update-bulletin.ts +34 -23
  460. package/src/prompts/user-reference.ts +20 -17
  461. package/src/providers/__tests__/provider-secret-catalog.test.ts +42 -0
  462. package/src/providers/anthropic/client.ts +157 -61
  463. package/src/providers/fireworks/client.ts +2 -2
  464. package/src/providers/gemini/client.ts +9 -1
  465. package/src/providers/model-catalog.ts +6 -0
  466. package/src/providers/model-intents.ts +4 -4
  467. package/src/providers/ollama/client.ts +2 -2
  468. package/src/providers/openai/chat-completions-provider.ts +474 -0
  469. package/src/providers/openai/client.ts +25 -440
  470. package/src/providers/openai/responses-provider.ts +502 -0
  471. package/src/providers/openrouter/client.ts +101 -4
  472. package/src/providers/provider-secret-catalog.ts +139 -0
  473. package/src/providers/registry.ts +2 -2
  474. package/src/providers/retry.ts +14 -3
  475. package/src/providers/speech-to-text/__tests__/provider-catalog.test.ts +251 -0
  476. package/src/providers/speech-to-text/__tests__/resolve.test.ts +828 -0
  477. package/src/providers/speech-to-text/deepgram-realtime.test.ts +980 -0
  478. package/src/providers/speech-to-text/deepgram-realtime.ts +767 -0
  479. package/src/providers/speech-to-text/deepgram.test.ts +332 -0
  480. package/src/providers/speech-to-text/deepgram.ts +115 -0
  481. package/src/providers/speech-to-text/google-gemini-live-stream.test.ts +743 -0
  482. package/src/providers/speech-to-text/google-gemini-live-stream.ts +625 -0
  483. package/src/providers/speech-to-text/google-gemini.test.ts +226 -0
  484. package/src/providers/speech-to-text/google-gemini.ts +101 -0
  485. package/src/providers/speech-to-text/openai-whisper-stream.test.ts +564 -0
  486. package/src/providers/speech-to-text/openai-whisper-stream.ts +381 -0
  487. package/src/providers/speech-to-text/openai-whisper.test.ts +1 -37
  488. package/src/providers/speech-to-text/openai-whisper.ts +63 -33
  489. package/src/providers/speech-to-text/provider-catalog.ts +306 -0
  490. package/src/providers/speech-to-text/resolve.ts +386 -6
  491. package/src/providers/types.ts +9 -0
  492. package/src/runtime/AGENTS.md +43 -1
  493. package/src/runtime/__tests__/agent-wake.test.ts +831 -0
  494. package/src/runtime/__tests__/runtime-mode.test.ts +62 -0
  495. package/src/runtime/__tests__/slack-block-formatting.test.ts +481 -0
  496. package/src/runtime/agent-wake.ts +512 -0
  497. package/src/runtime/auth/__tests__/route-policy.test.ts +40 -0
  498. package/src/runtime/auth/route-policy.ts +30 -5
  499. package/src/runtime/auth/token-service.ts +56 -1
  500. package/src/runtime/btw-sidechain.ts +2 -0
  501. package/src/runtime/capability-tokens.ts +10 -10
  502. package/src/runtime/channel-invite-transport.ts +1 -1
  503. package/src/runtime/channel-invite-transports/email.ts +14 -6
  504. package/src/runtime/channel-readiness-service.ts +12 -22
  505. package/src/runtime/chrome-extension-registry.ts +38 -2
  506. package/src/runtime/http-server.ts +395 -10
  507. package/src/runtime/http-types.ts +6 -2
  508. package/src/runtime/migrations/__tests__/vbundle-import-credentials.test.ts +36 -0
  509. package/src/runtime/migrations/__tests__/vbundle-legacy-user-md.test.ts +360 -0
  510. package/src/runtime/migrations/migration-transport.ts +1 -0
  511. package/src/runtime/migrations/migration-wizard.ts +1 -0
  512. package/src/runtime/migrations/vbundle-import-analyzer.ts +77 -1
  513. package/src/runtime/migrations/vbundle-importer.ts +34 -0
  514. package/src/runtime/pending-interactions.ts +0 -11
  515. package/src/runtime/routes/__tests__/backup-routes.test.ts +967 -0
  516. package/src/runtime/routes/__tests__/home-feed-routes.test.ts +507 -0
  517. package/src/runtime/routes/__tests__/migration-import-credential-filter.test.ts +208 -0
  518. package/src/runtime/routes/__tests__/stt-routes.test.ts +406 -0
  519. package/src/runtime/routes/__tests__/tts-routes.test.ts +474 -0
  520. package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +148 -17
  521. package/src/runtime/routes/app-management-routes.ts +12 -18
  522. package/src/runtime/routes/attachment-routes.test.ts +9 -3
  523. package/src/runtime/routes/attachment-routes.ts +216 -17
  524. package/src/runtime/routes/backup-routes.ts +519 -0
  525. package/src/runtime/routes/browser-extension-pair-routes.ts +82 -23
  526. package/src/runtime/routes/btw-routes.ts +8 -6
  527. package/src/runtime/routes/contact-routes.test.ts +298 -0
  528. package/src/runtime/routes/contact-routes.ts +132 -5
  529. package/src/runtime/routes/conversation-analysis-routes.ts +22 -142
  530. package/src/runtime/routes/conversation-management-routes.ts +115 -0
  531. package/src/runtime/routes/conversation-routes.ts +367 -146
  532. package/src/runtime/routes/filing-routes.ts +93 -0
  533. package/src/runtime/routes/home-feed-routes.ts +334 -0
  534. package/src/runtime/routes/home-state-routes.ts +138 -0
  535. package/src/runtime/routes/host-browser-routes.ts +3 -14
  536. package/src/runtime/routes/identity-intro-cache.ts +7 -3
  537. package/src/runtime/routes/identity-routes.ts +3 -17
  538. package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +46 -39
  539. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +15 -15
  540. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +137 -0
  541. package/src/runtime/routes/integrations/slack/__tests__/share.test.ts +179 -0
  542. package/src/runtime/routes/integrations/slack/channel.ts +11 -3
  543. package/src/runtime/routes/integrations/slack/share.ts +45 -7
  544. package/src/runtime/routes/llm-context-normalization.ts +303 -0
  545. package/src/runtime/routes/memory-item-routes.test.ts +3 -2
  546. package/src/runtime/routes/migration-routes.ts +40 -5
  547. package/src/runtime/routes/settings-routes.ts +22 -5
  548. package/src/runtime/routes/skills-routes.ts +76 -7
  549. package/src/runtime/routes/stt-routes.ts +233 -0
  550. package/src/runtime/routes/surface-action-routes.ts +41 -2
  551. package/src/runtime/routes/tts-routes.ts +108 -24
  552. package/src/runtime/routes/usage-routes.ts +30 -2
  553. package/src/runtime/routes/user-route-dispatcher.ts +50 -5
  554. package/src/runtime/routes/user-routes.ts +13 -1
  555. package/src/runtime/routes/work-items-routes.ts +8 -1
  556. package/src/runtime/runtime-mode.ts +33 -0
  557. package/src/runtime/services/__tests__/analyze-conversation.test.ts +444 -0
  558. package/src/runtime/services/__tests__/analyze-deps-singleton.test.ts +67 -0
  559. package/src/runtime/services/__tests__/auto-analysis-prompt.test.ts +53 -0
  560. package/src/runtime/services/__tests__/manual-analysis-prompt.test.ts +41 -0
  561. package/src/runtime/services/analyze-conversation.ts +344 -0
  562. package/src/runtime/services/analyze-deps-singleton.ts +32 -0
  563. package/src/runtime/services/auto-analysis-prompt.ts +55 -0
  564. package/src/runtime/skill-route-registry.ts +49 -0
  565. package/src/runtime/slack-block-formatting.ts +437 -10
  566. package/src/schedule/scheduler.ts +50 -0
  567. package/src/security/oauth2.ts +26 -4
  568. package/src/security/secure-keys.ts +25 -2
  569. package/src/security/token-manager.ts +8 -0
  570. package/src/sequence/engine.ts +23 -0
  571. package/src/sequence/types.ts +1 -1
  572. package/src/skills/catalog-files.ts +64 -2
  573. package/src/skills/category-inference.ts +122 -0
  574. package/src/skills/clawhub-files.ts +213 -0
  575. package/src/skills/clawhub.ts +84 -23
  576. package/src/skills/skill-file-provider.ts +40 -0
  577. package/src/skills/skillssh-files.ts +395 -0
  578. package/src/skills/skillssh-registry.ts +4 -4
  579. package/src/stt/__tests__/daemon-batch-transcriber.test.ts +392 -0
  580. package/src/stt/__tests__/types.test.ts +89 -0
  581. package/src/stt/daemon-batch-transcriber.ts +195 -0
  582. package/src/stt/stt-stream-session.ts +499 -0
  583. package/src/stt/types.ts +330 -0
  584. package/src/stt/wav-encoder.test.ts +373 -0
  585. package/src/stt/wav-encoder.ts +175 -0
  586. package/src/subagent/manager.ts +38 -14
  587. package/src/tools/browser/__tests__/browser-mode.test.ts +119 -0
  588. package/src/tools/browser/__tests__/browser-status.test.ts +123 -0
  589. package/src/tools/browser/browser-execution.ts +1163 -23
  590. package/src/tools/browser/browser-manager.ts +45 -0
  591. package/src/tools/browser/browser-mode-constants.ts +12 -0
  592. package/src/tools/browser/browser-mode.ts +92 -0
  593. package/src/tools/browser/browser-status-constants.ts +33 -0
  594. package/src/tools/browser/cdp-client/__tests__/cdp-inspect-client.test.ts +393 -0
  595. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +29 -0
  596. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +1648 -32
  597. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/discovery.test.ts +264 -0
  598. package/src/tools/browser/cdp-client/cdp-inspect/discovery.ts +183 -17
  599. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +254 -21
  600. package/src/tools/browser/cdp-client/errors.ts +15 -0
  601. package/src/tools/browser/cdp-client/extension-cdp-client.ts +39 -16
  602. package/src/tools/browser/cdp-client/factory.ts +797 -87
  603. package/src/tools/browser/cdp-client/index.ts +16 -2
  604. package/src/tools/browser/cdp-client/types.ts +68 -0
  605. package/src/tools/credentials/vault.ts +35 -6
  606. package/src/tools/network/web-fetch.ts +5 -2
  607. package/src/tools/network/web-search.ts +5 -2
  608. package/src/tools/shared/shell-output.ts +3 -1
  609. package/src/tools/side-effects.ts +2 -0
  610. package/src/tools/skills/sandbox-runner.ts +3 -2
  611. package/src/tools/terminal/safe-env.ts +10 -2
  612. package/src/tools/terminal/shell.ts +15 -4
  613. package/src/tools/tool-manifest.ts +21 -0
  614. package/src/tools/types.ts +17 -0
  615. package/src/tools/ui-surface/definitions.ts +6 -1
  616. package/src/tts/__tests__/provider-adapters.test.ts +834 -0
  617. package/src/tts/__tests__/provider-catalog-consistency.test.ts +196 -0
  618. package/src/tts/__tests__/provider-catalog.test.ts +183 -0
  619. package/src/tts/__tests__/provider-registry.test.ts +90 -0
  620. package/src/tts/provider-catalog.ts +201 -0
  621. package/src/tts/provider-registry.ts +73 -0
  622. package/src/tts/providers/deepgram-provider.ts +219 -0
  623. package/src/tts/providers/elevenlabs-provider.ts +211 -0
  624. package/src/tts/providers/fish-audio-provider.ts +183 -0
  625. package/src/tts/providers/index.ts +42 -0
  626. package/src/tts/providers/register-builtins.ts +130 -0
  627. package/src/tts/synthesize-text.ts +110 -0
  628. package/src/tts/tts-config-resolver.ts +78 -0
  629. package/src/tts/types.ts +153 -0
  630. package/src/types/onboarding-context.ts +7 -0
  631. package/src/util/abort-reasons.ts +58 -0
  632. package/src/util/device-id.ts +32 -16
  633. package/src/util/errors.ts +9 -1
  634. package/src/util/platform.ts +54 -10
  635. package/src/util/pricing.ts +66 -3
  636. package/src/util/spawn.ts +1 -1
  637. package/src/util/truncate.ts +4 -2
  638. package/src/util/unicode.ts +201 -0
  639. package/src/version.ts +19 -24
  640. package/src/watcher/engine.ts +23 -0
  641. package/src/watcher/watcher-store.ts +31 -0
  642. package/src/workspace/migrations/003-seed-device-id.ts +9 -3
  643. package/src/workspace/migrations/017-seed-persona-dirs.ts +68 -4
  644. package/src/workspace/migrations/029-seed-pkb.ts +1 -1
  645. package/src/workspace/migrations/031-drop-user-md.ts +317 -0
  646. package/src/workspace/migrations/031-llm-log-retention-zero-to-null.ts +73 -0
  647. package/src/workspace/migrations/032-tts-provider-unification.ts +227 -0
  648. package/src/workspace/migrations/033-stt-service-explicit-config.ts +122 -0
  649. package/src/workspace/migrations/034-remove-calls-voice-transcription-provider.ts +215 -0
  650. package/src/workspace/migrations/035-seed-slack-channel-persona.ts +50 -0
  651. package/src/workspace/migrations/036-update-pkb-index-bar.ts +37 -0
  652. package/src/workspace/migrations/037-create-meets-dir.ts +61 -0
  653. package/src/workspace/migrations/registry.ts +16 -0
  654. package/src/workspace/top-level-renderer.ts +13 -1
  655. package/src/workspace/turn-commit.ts +31 -0
  656. package/src/__tests__/email-cli.test.ts +0 -297
  657. package/src/__tests__/email-service-config-fallback.test.ts +0 -102
  658. package/src/cli/commands/browser-relay.ts +0 -466
  659. package/src/email/guardrails.ts +0 -221
  660. package/src/email/provider.ts +0 -117
  661. package/src/email/providers/agentmail.ts +0 -361
  662. package/src/email/providers/index.ts +0 -65
  663. package/src/email/service.ts +0 -384
  664. package/src/email/types.ts +0 -126
  665. package/src/prompts/templates/USER.md +0 -13
  666. package/src/providers/speech-to-text/types.ts +0 -17
  667. package/src/runtime/routes/browser-cdp-routes.ts +0 -229
@@ -105,6 +105,20 @@ mock.module("../config/loader.js", () => ({
105
105
 
106
106
  const mockedConversationHostAccess = new Map<string, boolean>();
107
107
 
108
+ const capturedAddMessages: Array<{
109
+ id: string;
110
+ role: string;
111
+ content: string;
112
+ metadata?: Record<string, unknown>;
113
+ }> = [];
114
+
115
+ /**
116
+ * Content substrings that should cause `addMessage` to throw — used to
117
+ * simulate a mid-batch persist failure (e.g. a DB error on a specific
118
+ * tail message while its siblings succeed).
119
+ */
120
+ const addMessageShouldThrowForContent = new Set<string>();
121
+
108
122
  mock.module("../prompts/system-prompt.js", () => ({
109
123
  buildSystemPrompt: () => "system prompt",
110
124
  }));
@@ -133,6 +147,7 @@ mock.module("../security/secret-allowlist.js", () => ({
133
147
  mock.module("../memory/conversation-crud.js", () => ({
134
148
  getConversationType: () => "default",
135
149
  setConversationOriginChannelIfUnset: () => {},
150
+ setConversationOriginInterfaceIfUnset: () => {},
136
151
  updateConversationContextWindow: () => {},
137
152
  getConversationHostAccess: (conversationId: string) =>
138
153
  mockedConversationHostAccess.get(conversationId) ?? false,
@@ -159,11 +174,28 @@ mock.module("../memory/conversation-crud.js", () => ({
159
174
  totalEstimatedCost: 0,
160
175
  }),
161
176
  createConversation: () => ({ id: "conv-1" }),
162
- addMessage: (_convId: string, _role: string, _content: string) => {
163
- return { id: `msg-${Date.now()}` };
177
+ addMessage: (
178
+ _convId: string,
179
+ role: string,
180
+ content: string,
181
+ metadata?: Record<string, unknown>,
182
+ ) => {
183
+ // Simulate a persist failure for tests that need to exercise the
184
+ // tail-persist-failed path in drainBatch. Triggered by matching any
185
+ // registered substring against the serialized content payload.
186
+ for (const needle of addMessageShouldThrowForContent) {
187
+ if (content.includes(needle)) {
188
+ throw new Error(`Simulated addMessage failure for content: ${needle}`);
189
+ }
190
+ }
191
+ const id = `msg-${Date.now()}-${capturedAddMessages.length}`;
192
+ capturedAddMessages.push({ id, role, content, metadata });
193
+ return { id };
164
194
  },
165
195
  updateConversationUsage: () => {},
166
196
  updateConversationTitle: () => {},
197
+ getMessageById: () => null,
198
+ getLastUserTimestampBefore: () => 0,
167
199
  }));
168
200
 
169
201
  mock.module("../memory/conversation-queries.js", () => ({
@@ -456,6 +488,7 @@ beforeEach(() => {
456
488
  turnCommitCalls.length = 0;
457
489
  turnCommitHangForever = false;
458
490
  linkAttachmentShouldThrow = false;
491
+ addMessageShouldThrowForContent.clear();
459
492
  });
460
493
 
461
494
  afterAll(() => {
@@ -521,44 +554,73 @@ describe("Conversation message queue", () => {
521
554
  await new Promise((r) => setTimeout(r, 10));
522
555
  });
523
556
 
524
- test("[experimental] queued messages are processed in FIFO order", async () => {
557
+ test("[experimental] queued passthrough siblings drain as a single batched run", async () => {
525
558
  const conversation = makeConversation();
526
559
  await conversation.loadFromDb();
527
560
 
528
- const processedOrder: string[] = [];
529
-
530
- const makeHandler = (label: string) => (e: ServerMessage) => {
531
- if (e.type === "message_complete") processedOrder.push(label);
532
- };
561
+ const events1: ServerMessage[] = [];
562
+ const events2: ServerMessage[] = [];
563
+ const events3: ServerMessage[] = [];
533
564
 
534
565
  // Start first message
535
566
  const p1 = conversation.processMessage(
536
567
  "msg-1",
537
568
  [],
538
- makeHandler("msg-1"),
569
+ (e) => events1.push(e),
539
570
  "req-1",
540
571
  );
541
572
  await waitForPendingRun(1);
542
573
 
543
- // Enqueue two more
544
- conversation.enqueueMessage("msg-2", [], makeHandler("msg-2"), "req-2");
545
- conversation.enqueueMessage("msg-3", [], makeHandler("msg-3"), "req-3");
574
+ // Enqueue two more sibling passthrough messages
575
+ conversation.enqueueMessage("msg-2", [], (e) => events2.push(e), "req-2");
576
+ conversation.enqueueMessage("msg-3", [], (e) => events3.push(e), "req-3");
546
577
  expect(conversation.getQueueDepth()).toBe(2);
547
578
 
548
- // Complete firsttriggers second
579
+ // Complete run 0 drain pulls msg-2 and msg-3 into ONE batched run.
549
580
  resolveRun(0);
550
581
  await p1;
551
582
  await waitForPendingRun(2);
552
583
 
553
- // Complete second triggers third
554
- resolveRun(1);
555
- await waitForPendingRun(3);
584
+ // Exactly two runs total (not three): run 0 = msg-1, run 1 = batched [msg-2, msg-3]
585
+ expect(pendingRuns.length).toBe(2);
556
586
 
557
- // Complete third
558
- resolveRun(2);
587
+ // Each batched client saw its own message_dequeued tagged with its own requestId.
588
+ const dequeued2 = events2.filter((e) => e.type === "message_dequeued");
589
+ expect(dequeued2).toHaveLength(1);
590
+ expect(dequeued2[0]).toEqual({
591
+ type: "message_dequeued",
592
+ conversationId: "conv-1",
593
+ requestId: "req-2",
594
+ });
595
+ const dequeued3 = events3.filter((e) => e.type === "message_dequeued");
596
+ expect(dequeued3).toHaveLength(1);
597
+ expect(dequeued3[0]).toEqual({
598
+ type: "message_dequeued",
599
+ conversationId: "conv-1",
600
+ requestId: "req-3",
601
+ });
602
+
603
+ // The batched run's captured history carries both siblings. Either as
604
+ // separate user entries (raw history) or merged into one user entry
605
+ // (after history-repair's alternation enforcement — required by the
606
+ // Anthropic API). Either way, both msg-2 and msg-3 text must appear.
607
+ const batchedHistory = pendingRuns[1].messages;
608
+ const userMessages = batchedHistory.filter((m) => m.role === "user");
609
+ const textOf = (m: Message) =>
610
+ (Array.isArray(m.content) ? m.content : [])
611
+ .filter((b) => b.type === "text")
612
+ .map((b) => (b as { text: string }).text)
613
+ .join("\n");
614
+ const combinedUserText = userMessages.map(textOf).join("\n");
615
+ expect(combinedUserText).toContain("msg-2");
616
+ expect(combinedUserText).toContain("msg-3");
617
+
618
+ // Resolve the batched run; message_complete must fan out to both clients.
619
+ resolveRun(1);
559
620
  await new Promise((r) => setTimeout(r, 10));
560
621
 
561
- expect(processedOrder).toEqual(["msg-1", "msg-2", "msg-3"]);
622
+ expect(events2.some((e) => e.type === "message_complete")).toBe(true);
623
+ expect(events3.some((e) => e.type === "message_complete")).toBe(true);
562
624
  });
563
625
 
564
626
  test("message_queued and message_dequeued events are emitted", async () => {
@@ -699,27 +761,17 @@ describe("Conversation message queue", () => {
699
761
  conversation.enqueueMessage("msg-4", [], () => {}, "req-4");
700
762
  expect(conversation.getQueueDepth()).toBe(3);
701
763
 
702
- // Complete first → drains one from queue
764
+ // Complete first → drain pulls all three same-interface passthroughs
765
+ // into a single batched run (depth → 0, runs → 2 total).
703
766
  resolveRun(0);
704
767
  await p1;
705
768
  await waitForPendingRun(2);
706
769
 
707
- expect(conversation.getQueueDepth()).toBe(2);
708
-
709
- // Complete second → drains another
710
- resolveRun(1);
711
- await waitForPendingRun(3);
712
-
713
- expect(conversation.getQueueDepth()).toBe(1);
714
-
715
- // Complete third → drains last
716
- resolveRun(2);
717
- await waitForPendingRun(4);
718
-
719
770
  expect(conversation.getQueueDepth()).toBe(0);
771
+ expect(pendingRuns.length).toBe(2);
720
772
 
721
- // Complete fourth (final queued message)
722
- resolveRun(3);
773
+ // Complete the batched run; conversation finishes cleanly.
774
+ resolveRun(1);
723
775
  await new Promise((r) => setTimeout(r, 10));
724
776
  });
725
777
 
@@ -773,6 +825,763 @@ describe("Conversation message queue", () => {
773
825
  });
774
826
  });
775
827
 
828
+ // ---------------------------------------------------------------------------
829
+ // Batched drain — mixed-interface, slash-in-middle, attachments, byte budget
830
+ // ---------------------------------------------------------------------------
831
+
832
+ describe("Batched drain", () => {
833
+ beforeEach(() => {
834
+ pendingRuns = [];
835
+ });
836
+
837
+ test("mixed-interface queue splits into multiple batches at each interface boundary", async () => {
838
+ const conversation = makeConversation();
839
+ await conversation.loadFromDb();
840
+
841
+ const events2: ServerMessage[] = [];
842
+ const events3: ServerMessage[] = [];
843
+ const events4: ServerMessage[] = [];
844
+ const events5: ServerMessage[] = [];
845
+
846
+ // Start in-flight message (msg-1)
847
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
848
+ await waitForPendingRun(1);
849
+
850
+ // Enqueue 4 messages with interfaces [macos, macos, cli, macos].
851
+ // Expected drain: [macos batch of 2] → [cli single] → [macos single].
852
+ const meta = (iface: string) => ({
853
+ userMessageInterface: iface,
854
+ assistantMessageInterface: iface,
855
+ });
856
+ conversation.enqueueMessage(
857
+ "msg-2",
858
+ [],
859
+ (e) => events2.push(e),
860
+ "req-2",
861
+ undefined,
862
+ undefined,
863
+ meta("macos"),
864
+ );
865
+ conversation.enqueueMessage(
866
+ "msg-3",
867
+ [],
868
+ (e) => events3.push(e),
869
+ "req-3",
870
+ undefined,
871
+ undefined,
872
+ meta("macos"),
873
+ );
874
+ conversation.enqueueMessage(
875
+ "msg-4",
876
+ [],
877
+ (e) => events4.push(e),
878
+ "req-4",
879
+ undefined,
880
+ undefined,
881
+ meta("cli"),
882
+ );
883
+ conversation.enqueueMessage(
884
+ "msg-5",
885
+ [],
886
+ (e) => events5.push(e),
887
+ "req-5",
888
+ undefined,
889
+ undefined,
890
+ meta("macos"),
891
+ );
892
+ expect(conversation.getQueueDepth()).toBe(4);
893
+
894
+ // Resolve msg-1 → batched run pulls macos msg-2 + msg-3.
895
+ resolveRun(0);
896
+ await p1;
897
+ await waitForPendingRun(2);
898
+
899
+ // Batched run's history must contain both macos messages (either as
900
+ // separate user entries or merged into one after history-repair).
901
+ const macosBatchedHistory = pendingRuns[1].messages;
902
+ const macosUserMessages = macosBatchedHistory.filter(
903
+ (m) => m.role === "user",
904
+ );
905
+ const textOf = (m: Message) =>
906
+ (Array.isArray(m.content) ? m.content : [])
907
+ .filter((b) => b.type === "text")
908
+ .map((b) => (b as { text: string }).text)
909
+ .join("\n");
910
+ const combinedMacosText = macosUserMessages.map(textOf).join("\n");
911
+ expect(combinedMacosText).toContain("msg-2");
912
+ expect(combinedMacosText).toContain("msg-3");
913
+
914
+ // Both msg-2 and msg-3 received their own dequeue event.
915
+ expect(events2.filter((e) => e.type === "message_dequeued")).toHaveLength(
916
+ 1,
917
+ );
918
+ expect(events3.filter((e) => e.type === "message_dequeued")).toHaveLength(
919
+ 1,
920
+ );
921
+
922
+ // Resolve the batched run → drain pulls the cli single-message run.
923
+ resolveRun(1);
924
+ await waitForPendingRun(3);
925
+
926
+ // cli run contains msg-4 as a single-message run.
927
+ const cliHistory = pendingRuns[2].messages;
928
+ const cliUserText = cliHistory
929
+ .filter((m) => m.role === "user")
930
+ .map(textOf)
931
+ .join("\n");
932
+ expect(cliUserText).toContain("msg-4");
933
+ expect(events4.filter((e) => e.type === "message_dequeued")).toHaveLength(
934
+ 1,
935
+ );
936
+
937
+ // Resolve the cli run → drain pulls the final macos single-message run.
938
+ resolveRun(2);
939
+ await waitForPendingRun(4);
940
+ const finalHistory = pendingRuns[3].messages;
941
+ const finalUserText = finalHistory
942
+ .filter((m) => m.role === "user")
943
+ .map(textOf)
944
+ .join("\n");
945
+ expect(finalUserText).toContain("msg-5");
946
+ expect(events5.filter((e) => e.type === "message_dequeued")).toHaveLength(
947
+ 1,
948
+ );
949
+
950
+ // Four total runs: msg-1, batched [msg-2, msg-3], msg-4, msg-5.
951
+ expect(pendingRuns.length).toBe(4);
952
+
953
+ resolveRun(3);
954
+ await new Promise((r) => setTimeout(r, 10));
955
+ });
956
+
957
+ test("slash-in-middle splits the queue at the slash boundary", async () => {
958
+ const conversation = makeConversation();
959
+ await conversation.loadFromDb();
960
+
961
+ const eventsHello: ServerMessage[] = [];
962
+ const eventsSlash: ServerMessage[] = [];
963
+ const eventsWorld: ServerMessage[] = [];
964
+
965
+ // Start in-flight message
966
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
967
+ await waitForPendingRun(1);
968
+
969
+ // Enqueue ["hello", "/compact", "world"]. /compact resolves to a non-
970
+ // passthrough slash, so the batch builder stops at "hello" (length 1),
971
+ // then /compact takes the single-message /compact short-circuit path
972
+ // (no new runAgentLoop invocation), then "world" drains as its own run.
973
+ conversation.enqueueMessage(
974
+ "hello",
975
+ [],
976
+ (e) => eventsHello.push(e),
977
+ "req-hello",
978
+ );
979
+ conversation.enqueueMessage(
980
+ "/compact",
981
+ [],
982
+ (e) => eventsSlash.push(e),
983
+ "req-slash",
984
+ );
985
+ conversation.enqueueMessage(
986
+ "world",
987
+ [],
988
+ (e) => eventsWorld.push(e),
989
+ "req-world",
990
+ );
991
+ expect(conversation.getQueueDepth()).toBe(3);
992
+
993
+ // Resolve msg-1 → drain pulls "hello" as its own run (batch stops at
994
+ // /compact boundary).
995
+ resolveRun(0);
996
+ await p1;
997
+ await waitForPendingRun(2);
998
+
999
+ expect(pendingRuns.length).toBe(2);
1000
+ expect(eventsHello.some((e) => e.type === "message_dequeued")).toBe(true);
1001
+ // /compact and "world" are still queued.
1002
+ expect(conversation.getQueueDepth()).toBe(2);
1003
+
1004
+ // Resolve "hello" → drain pops /compact via the builder-rejected path,
1005
+ // runs its short-circuit (no new runAgentLoop), then drains "world".
1006
+ resolveRun(1);
1007
+ await waitForPendingRun(3);
1008
+
1009
+ // /compact should have emitted its own message_complete via the short-
1010
+ // circuit path (not via a runAgentLoop run).
1011
+ expect(eventsSlash.some((e) => e.type === "message_complete")).toBe(true);
1012
+ expect(eventsWorld.some((e) => e.type === "message_dequeued")).toBe(true);
1013
+ expect(pendingRuns.length).toBe(3);
1014
+
1015
+ resolveRun(2);
1016
+ await new Promise((r) => setTimeout(r, 10));
1017
+ });
1018
+
1019
+ test("unknown-slash in middle splits the queue at the unknown-slash boundary", async () => {
1020
+ // Covers the `kind: "unknown"` short-circuit branch in drainSingleMessage
1021
+ // specifically. The sibling /compact-in-middle test covers the `kind:
1022
+ // "compact"` short-circuit (via a different code path), so this test
1023
+ // exists to guarantee the batch builder also stops at unknown-kind
1024
+ // boundaries and that the unknown-slash drain path does NOT invoke a new
1025
+ // runAgentLoop run.
1026
+ //
1027
+ // We use `/status`, which the real `resolveSlash` returns as
1028
+ // `{ kind: "unknown", message: <status report> }` when a SlashContext is
1029
+ // present (always true for queued drains via buildSlashContext).
1030
+ const conversation = makeConversation();
1031
+ await conversation.loadFromDb();
1032
+
1033
+ const eventsPlainA: ServerMessage[] = [];
1034
+ const eventsSlash: ServerMessage[] = [];
1035
+ const eventsPlainB: ServerMessage[] = [];
1036
+
1037
+ // Start in-flight message
1038
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
1039
+ await waitForPendingRun(1);
1040
+
1041
+ // Enqueue ["plain-a", "/status", "plain-b"]. /status resolves to a non-
1042
+ // passthrough slash (kind: "unknown"), so the batch builder stops at
1043
+ // "plain-a" (length-1 batch → drainSingleMessage), then /status takes the
1044
+ // unknown-slash short-circuit path (no new runAgentLoop invocation — it
1045
+ // emits assistant_text_delta + message_complete inline), then "plain-b"
1046
+ // drains as its own run.
1047
+ conversation.enqueueMessage(
1048
+ "plain-a",
1049
+ [],
1050
+ (e) => eventsPlainA.push(e),
1051
+ "req-plain-a",
1052
+ );
1053
+ conversation.enqueueMessage(
1054
+ "/status",
1055
+ [],
1056
+ (e) => eventsSlash.push(e),
1057
+ "req-slash",
1058
+ );
1059
+ conversation.enqueueMessage(
1060
+ "plain-b",
1061
+ [],
1062
+ (e) => eventsPlainB.push(e),
1063
+ "req-plain-b",
1064
+ );
1065
+ expect(conversation.getQueueDepth()).toBe(3);
1066
+
1067
+ // Resolve msg-1 → drain pulls "plain-a" as its own run (batch stops at
1068
+ // the /status boundary).
1069
+ resolveRun(0);
1070
+ await p1;
1071
+ await waitForPendingRun(2);
1072
+
1073
+ expect(pendingRuns.length).toBe(2);
1074
+ expect(eventsPlainA.some((e) => e.type === "message_dequeued")).toBe(true);
1075
+ // /status and "plain-b" are still queued.
1076
+ expect(conversation.getQueueDepth()).toBe(2);
1077
+
1078
+ // Resolve "plain-a" → drain pops /status via the builder-rejected path,
1079
+ // runs its unknown-slash short-circuit (no new runAgentLoop, emits
1080
+ // assistant_text_delta + message_complete inline), then drains "plain-b"
1081
+ // as its own run.
1082
+ resolveRun(1);
1083
+ await waitForPendingRun(3);
1084
+
1085
+ // /status should have emitted its own assistant_text_delta + message_complete
1086
+ // via the unknown-slash short-circuit path (not via a runAgentLoop run).
1087
+ expect(eventsSlash.some((e) => e.type === "assistant_text_delta")).toBe(
1088
+ true,
1089
+ );
1090
+ expect(eventsSlash.some((e) => e.type === "message_complete")).toBe(true);
1091
+ expect(eventsPlainB.some((e) => e.type === "message_dequeued")).toBe(true);
1092
+ // Only three runs total: msg-1, "plain-a", "plain-b". /status short-circuits
1093
+ // without a runAgentLoop invocation.
1094
+ expect(pendingRuns.length).toBe(3);
1095
+
1096
+ resolveRun(2);
1097
+ await new Promise((r) => setTimeout(r, 10));
1098
+ });
1099
+
1100
+ test("attachments are preserved across a batched drain", async () => {
1101
+ capturedAddMessages.length = 0;
1102
+ const conversation = makeConversation();
1103
+ await conversation.loadFromDb();
1104
+
1105
+ // Start in-flight message
1106
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
1107
+ await waitForPendingRun(1);
1108
+
1109
+ // Two sibling messages, each with a distinct image attachment.
1110
+ const attachA = [
1111
+ {
1112
+ id: "att-a",
1113
+ filename: "a.png",
1114
+ mimeType: "image/png",
1115
+ data: Buffer.from("imageA").toString("base64"),
1116
+ filePath: "/tmp/a.png",
1117
+ },
1118
+ ];
1119
+ const attachB = [
1120
+ {
1121
+ id: "att-b",
1122
+ filename: "b.png",
1123
+ mimeType: "image/png",
1124
+ data: Buffer.from("imageB").toString("base64"),
1125
+ filePath: "/tmp/b.png",
1126
+ },
1127
+ ];
1128
+ conversation.enqueueMessage("with-A", attachA, () => {}, "req-A");
1129
+ conversation.enqueueMessage("with-B", attachB, () => {}, "req-B");
1130
+ expect(conversation.getQueueDepth()).toBe(2);
1131
+
1132
+ resolveRun(0);
1133
+ await p1;
1134
+ await waitForPendingRun(2);
1135
+
1136
+ // Two persisted user rows in the DB (one per batched message), each with
1137
+ // its own imageSourcePaths metadata keyed by the right filename.
1138
+ const userRows = capturedAddMessages.filter(
1139
+ (m) => m.role === "user" && m.content.includes('"image"'),
1140
+ );
1141
+ expect(userRows).toHaveLength(2);
1142
+ const pathsA = (userRows[0].metadata as Record<string, unknown>)
1143
+ ?.imageSourcePaths as Record<string, string> | undefined;
1144
+ expect(pathsA).toBeDefined();
1145
+ expect(pathsA!["0:a.png"]).toBe("/tmp/a.png");
1146
+ const pathsB = (userRows[1].metadata as Record<string, unknown>)
1147
+ ?.imageSourcePaths as Record<string, string> | undefined;
1148
+ expect(pathsB).toBeDefined();
1149
+ expect(pathsB!["0:b.png"]).toBe("/tmp/b.png");
1150
+
1151
+ // The batched run's in-memory history also reflects both image sources
1152
+ // (enrichMessageWithSourcePaths injects file:// references for images).
1153
+ const batchedHistory = pendingRuns[1].messages;
1154
+ const userMessages = batchedHistory.filter((m) => m.role === "user");
1155
+ const allText = userMessages
1156
+ .map((m) =>
1157
+ (Array.isArray(m.content) ? m.content : [])
1158
+ .filter((b) => b.type === "text")
1159
+ .map((b) => (b as { text: string }).text)
1160
+ .join("\n"),
1161
+ )
1162
+ .join("\n");
1163
+ expect(allText).toContain("a.png");
1164
+ expect(allText).toContain("b.png");
1165
+
1166
+ resolveRun(1);
1167
+ await new Promise((r) => setTimeout(r, 10));
1168
+ });
1169
+
1170
+ test("byte-budget accounting is unchanged by shiftN-based batching", async () => {
1171
+ // Uses a small budget so we can observe reclamation after drain.
1172
+ // Each ~500-char message ≈ 1512 bytes.
1173
+ const conversation = makeConversation();
1174
+ await conversation.loadFromDb();
1175
+
1176
+ const budget = 4000;
1177
+ (conversation as unknown as { queue: MessageQueue }).queue =
1178
+ new MessageQueue(budget);
1179
+
1180
+ // Start in-flight so subsequent enqueues are queued (not processed).
1181
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
1182
+ await waitForPendingRun(1);
1183
+
1184
+ // Fill to just-under budget: two ~500-char messages (1512+1512 = 3024 bytes).
1185
+ const accepted1 = conversation.enqueueMessage(
1186
+ "x".repeat(500),
1187
+ [],
1188
+ () => {},
1189
+ "req-big-1",
1190
+ );
1191
+ const accepted2 = conversation.enqueueMessage(
1192
+ "y".repeat(500),
1193
+ [],
1194
+ () => {},
1195
+ "req-big-2",
1196
+ );
1197
+ expect(accepted1.queued).toBe(true);
1198
+ expect(accepted2.queued).toBe(true);
1199
+ // A third would push the queue over budget → rejected. Capture its
1200
+ // onEvent callback so we can verify the queue_full error event reaches
1201
+ // the rejected caller (not just the synchronous return value).
1202
+ const rejectedEvents: ServerMessage[] = [];
1203
+ const rejected = conversation.enqueueMessage(
1204
+ "z".repeat(500),
1205
+ [],
1206
+ (e) => rejectedEvents.push(e),
1207
+ "req-over",
1208
+ );
1209
+ expect(rejected.queued).toBe(false);
1210
+ expect(rejected.rejected).toBe(true);
1211
+ expect(conversation.getQueueDepth()).toBe(2);
1212
+
1213
+ // The rejected caller must have received a `queue_full` error event on
1214
+ // its own onEvent callback — event emission is part of the public
1215
+ // contract, not just the return value.
1216
+ const queueFullErr = rejectedEvents.find(
1217
+ (e) => e.type === "error" && e.category === "queue_full",
1218
+ );
1219
+ expect(queueFullErr).toBeDefined();
1220
+ if (queueFullErr && queueFullErr.type === "error") {
1221
+ expect(queueFullErr.category).toBe("queue_full");
1222
+ expect(typeof queueFullErr.message).toBe("string");
1223
+ expect(queueFullErr.message.length).toBeGreaterThan(0);
1224
+ }
1225
+
1226
+ // Complete in-flight → drain pulls both queued passthroughs as ONE batched run.
1227
+ resolveRun(0);
1228
+ await p1;
1229
+ await waitForPendingRun(2);
1230
+ expect(conversation.getQueueDepth()).toBe(0);
1231
+
1232
+ // Resolve the batched run.
1233
+ resolveRun(1);
1234
+ await new Promise((r) => setTimeout(r, 10));
1235
+
1236
+ // After the full drain, the byte budget must be fully reclaimed — a fresh
1237
+ // round of enqueues up to the budget should succeed again. Spin up another
1238
+ // in-flight message to reach the queueing state.
1239
+ const p2 = conversation.processMessage("msg-2", [], () => {}, "req-2");
1240
+ await waitForPendingRun(3);
1241
+ expect(
1242
+ conversation.enqueueMessage("a".repeat(500), [], () => {}, "req-a")
1243
+ .queued,
1244
+ ).toBe(true);
1245
+ expect(
1246
+ conversation.enqueueMessage("b".repeat(500), [], () => {}, "req-b")
1247
+ .queued,
1248
+ ).toBe(true);
1249
+
1250
+ resolveRun(2);
1251
+ await p2;
1252
+ await waitForPendingRun(4);
1253
+ resolveRun(3);
1254
+ await new Promise((r) => setTimeout(r, 10));
1255
+ });
1256
+ });
1257
+
1258
+ // ---------------------------------------------------------------------------
1259
+ // Batched drain — correctness fixes (surface exclusion, abort, last-successful
1260
+ // tracking, single activity-state emission)
1261
+ // ---------------------------------------------------------------------------
1262
+
1263
+ describe("Batched drain correctness fixes", () => {
1264
+ beforeEach(() => {
1265
+ pendingRuns = [];
1266
+ capturedAddMessages.length = 0;
1267
+ });
1268
+
1269
+ test("surface-action messages are not batched with regular passthroughs", async () => {
1270
+ const conversation = makeConversation();
1271
+ await conversation.loadFromDb();
1272
+
1273
+ const eventsSurface: ServerMessage[] = [];
1274
+ const eventsRegular: ServerMessage[] = [];
1275
+
1276
+ // Start in-flight message
1277
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
1278
+ await waitForPendingRun(1);
1279
+
1280
+ // Enqueue a surface-action message (activeSurfaceId set + tracked in
1281
+ // surfaceActionRequestIds) followed by a regular passthrough from the
1282
+ // same interface. The batch builder must reject the surface-action head
1283
+ // so each drains as its own run.
1284
+ conversation.surfaceActionRequestIds.add("req-surface");
1285
+ conversation.enqueueMessage(
1286
+ "surface action response",
1287
+ [],
1288
+ (e) => eventsSurface.push(e),
1289
+ "req-surface",
1290
+ "surface-1", // activeSurfaceId
1291
+ );
1292
+ conversation.enqueueMessage(
1293
+ "regular follow-up",
1294
+ [],
1295
+ (e) => eventsRegular.push(e),
1296
+ "req-regular",
1297
+ );
1298
+ expect(conversation.getQueueDepth()).toBe(2);
1299
+
1300
+ // Complete run 0 → drain must NOT batch the surface-action with the
1301
+ // regular passthrough. Expect the surface-action to drain as a single
1302
+ // run first.
1303
+ resolveRun(0);
1304
+ await p1;
1305
+ await waitForPendingRun(2);
1306
+
1307
+ // The second run is the surface-action single-message run.
1308
+ const surfaceUserRowsAfterRun2 = capturedAddMessages.filter(
1309
+ (m) => m.role === "user" && m.content.includes("surface action response"),
1310
+ );
1311
+ expect(surfaceUserRowsAfterRun2).toHaveLength(1);
1312
+ expect(eventsSurface.filter((e) => e.type === "message_dequeued")).toHaveLength(
1313
+ 1,
1314
+ );
1315
+
1316
+ // Complete the surface-action run; drain pulls the regular passthrough
1317
+ // as its own separate run.
1318
+ resolveRun(1);
1319
+ await waitForPendingRun(3);
1320
+ expect(pendingRuns.length).toBe(3);
1321
+ expect(eventsRegular.filter((e) => e.type === "message_dequeued")).toHaveLength(
1322
+ 1,
1323
+ );
1324
+
1325
+ // Total runs = 3: msg-1, surface-action, regular — NOT 2 (would mean
1326
+ // they were batched).
1327
+ resolveRun(2);
1328
+ await new Promise((r) => setTimeout(r, 10));
1329
+ });
1330
+
1331
+ test("abort mid-batch stops tail persists", async () => {
1332
+ const conversation = makeConversation();
1333
+ await conversation.loadFromDb();
1334
+
1335
+ const events1: ServerMessage[] = [];
1336
+ const events2: ServerMessage[] = [];
1337
+ const events3: ServerMessage[] = [];
1338
+ const events4: ServerMessage[] = [];
1339
+
1340
+ // Start in-flight message
1341
+ const p1 = conversation.processMessage(
1342
+ "msg-1",
1343
+ [],
1344
+ (e) => events1.push(e),
1345
+ "req-1",
1346
+ );
1347
+ await waitForPendingRun(1);
1348
+
1349
+ // Enqueue three sibling passthroughs (msg-2 = head, msg-3 = mid,
1350
+ // msg-4 = tail). We trigger abort from msg-3's dequeue callback —
1351
+ // by the time that fires, msg-2 has already been persisted (which
1352
+ // REPLACED the abortController, since persistUserMessage creates a
1353
+ // fresh one). Calling abort() now aborts that fresh controller, and
1354
+ // the drainBatch loop's abort check after msg-3's persist will break,
1355
+ // so msg-4 never persists.
1356
+ conversation.enqueueMessage("msg-2", [], (e) => events2.push(e), "req-2");
1357
+
1358
+ // Install a one-shot abort trigger on msg-3's dequeue event. We do
1359
+ // this before enqueueing so the wrapped callback is what drainBatch
1360
+ // invokes.
1361
+ let aborted = false;
1362
+ const onMsg3Event = (e: ServerMessage) => {
1363
+ events3.push(e);
1364
+ if (!aborted && e.type === "message_dequeued") {
1365
+ aborted = true;
1366
+ conversation.abort();
1367
+ }
1368
+ };
1369
+ conversation.enqueueMessage("msg-3", [], onMsg3Event, "req-3");
1370
+ conversation.enqueueMessage("msg-4", [], (e) => events4.push(e), "req-4");
1371
+ expect(conversation.getQueueDepth()).toBe(3);
1372
+
1373
+ const persistedUserRowCountBefore = capturedAddMessages.filter(
1374
+ (m) => m.role === "user",
1375
+ ).length;
1376
+
1377
+ // Complete run 0 → drain pulls the sibling batch.
1378
+ resolveRun(0);
1379
+ await p1;
1380
+
1381
+ // Give the drain loop a chance to iterate. Abort happens on msg-3's
1382
+ // dequeue (between msg-2's persist and msg-3's persist), so msg-3 may
1383
+ // still persist before the abort check at the end of its iteration.
1384
+ // Either way, msg-4 must NOT persist.
1385
+ await new Promise((r) => setTimeout(r, 30));
1386
+
1387
+ const userRowsAfter = capturedAddMessages
1388
+ .slice(persistedUserRowCountBefore)
1389
+ .filter((m) => m.role === "user");
1390
+ const contents = userRowsAfter.map((r) => r.content).join("||");
1391
+ expect(contents).toContain("msg-2");
1392
+ expect(contents).not.toContain("msg-4");
1393
+ expect(
1394
+ events4.filter((e) => e.type === "message_dequeued"),
1395
+ ).toHaveLength(0);
1396
+ });
1397
+
1398
+ test("failed tail persist uses last-successful requestId", async () => {
1399
+ const conversation = makeConversation();
1400
+ await conversation.loadFromDb();
1401
+
1402
+ const events1: ServerMessage[] = [];
1403
+ const events2: ServerMessage[] = [];
1404
+ const events3: ServerMessage[] = [];
1405
+ const events4: ServerMessage[] = [];
1406
+
1407
+ // Start in-flight message
1408
+ const p1 = conversation.processMessage(
1409
+ "msg-1",
1410
+ [],
1411
+ (e) => events1.push(e),
1412
+ "req-1",
1413
+ );
1414
+ await waitForPendingRun(1);
1415
+
1416
+ // Enqueue three siblings. Configure addMessage to throw for the second
1417
+ // tail (msg-mid) but succeed for msg-head and msg-tail. This simulates
1418
+ // a middle tail persist failure — currentRequestId should end up as
1419
+ // msg-tail's requestId (the LAST successful persist), not msg-mid's.
1420
+ addMessageShouldThrowForContent.add("msg-mid-unique-marker");
1421
+
1422
+ conversation.enqueueMessage(
1423
+ "msg-head",
1424
+ [],
1425
+ (e) => events2.push(e),
1426
+ "req-head",
1427
+ );
1428
+ conversation.enqueueMessage(
1429
+ "msg-mid-unique-marker",
1430
+ [],
1431
+ (e) => events3.push(e),
1432
+ "req-mid",
1433
+ );
1434
+ conversation.enqueueMessage(
1435
+ "msg-tail",
1436
+ [],
1437
+ (e) => events4.push(e),
1438
+ "req-tail",
1439
+ );
1440
+ expect(conversation.getQueueDepth()).toBe(3);
1441
+
1442
+ // Complete run 0 → batched drain.
1443
+ resolveRun(0);
1444
+ await p1;
1445
+ await waitForPendingRun(2);
1446
+
1447
+ // mid should have emitted an error event via persist failure.
1448
+ const errMid = events3.find((e) => e.type === "error");
1449
+ expect(errMid).toBeDefined();
1450
+
1451
+ // The agent loop should have been invoked with the tail's userMessageId
1452
+ // (last SUCCESSFUL persist), not the mid's. We check via currentRequestId
1453
+ // on the conversation which drainBatch assigns after the loop.
1454
+ expect(
1455
+ (conversation as unknown as { currentRequestId?: string }).currentRequestId,
1456
+ ).toBe("req-tail");
1457
+
1458
+ // Cleanup: resolve the batched run.
1459
+ resolveRun(1);
1460
+ await new Promise((r) => setTimeout(r, 20));
1461
+ });
1462
+
1463
+ test("failed tail persist is excluded from fanOutOnEvent agent events", async () => {
1464
+ const conversation = makeConversation();
1465
+ await conversation.loadFromDb();
1466
+
1467
+ const events1: ServerMessage[] = [];
1468
+ const events2: ServerMessage[] = [];
1469
+ const events3: ServerMessage[] = [];
1470
+ const events4: ServerMessage[] = [];
1471
+
1472
+ const p1 = conversation.processMessage(
1473
+ "msg-1",
1474
+ [],
1475
+ (e) => events1.push(e),
1476
+ "req-1",
1477
+ );
1478
+ await waitForPendingRun(1);
1479
+
1480
+ // Mid tail will fail to persist. After the batched run resolves,
1481
+ // message_complete (broadcast via fanOutOnEvent) must NOT land on the
1482
+ // failed mid tail — it already received an error event and persisting
1483
+ // the assistant reply for a user message that has no DB row would
1484
+ // desync the client.
1485
+ addMessageShouldThrowForContent.add("fanout-mid-marker");
1486
+
1487
+ conversation.enqueueMessage(
1488
+ "fanout-head",
1489
+ [],
1490
+ (e) => events2.push(e),
1491
+ "req-fanout-head",
1492
+ );
1493
+ conversation.enqueueMessage(
1494
+ "fanout-mid-marker",
1495
+ [],
1496
+ (e) => events3.push(e),
1497
+ "req-fanout-mid",
1498
+ );
1499
+ conversation.enqueueMessage(
1500
+ "fanout-tail",
1501
+ [],
1502
+ (e) => events4.push(e),
1503
+ "req-fanout-tail",
1504
+ );
1505
+
1506
+ resolveRun(0);
1507
+ await p1;
1508
+ await waitForPendingRun(2);
1509
+
1510
+ // Drive the batched run to emit message_complete via fanOutOnEvent.
1511
+ resolveRun(1);
1512
+ await new Promise((r) => setTimeout(r, 20));
1513
+
1514
+ expect(events3.find((e) => e.type === "error")).toBeDefined();
1515
+ expect(events3.find((e) => e.type === "message_complete")).toBeUndefined();
1516
+
1517
+ expect(events2.find((e) => e.type === "message_complete")).toBeDefined();
1518
+ expect(events4.find((e) => e.type === "message_complete")).toBeDefined();
1519
+ });
1520
+
1521
+ test("drainBatch emits exactly one activity-state event for the whole batch", async () => {
1522
+ const activityStates: ServerMessage[] = [];
1523
+ const conversation = makeConversation((msg) => {
1524
+ if ("type" in msg && msg.type === "assistant_activity_state") {
1525
+ activityStates.push(msg);
1526
+ }
1527
+ });
1528
+ await conversation.loadFromDb();
1529
+
1530
+ // Start in-flight message
1531
+ const p1 = conversation.processMessage("msg-1", [], () => {}, "req-1");
1532
+ await waitForPendingRun(1);
1533
+
1534
+ // Snapshot the count before drain so we only compare batch-emitted
1535
+ // transitions (msg-1's processMessage already fired one).
1536
+ const baseline = activityStates.length;
1537
+
1538
+ // Enqueue three sibling passthroughs.
1539
+ conversation.enqueueMessage("msg-2", [], () => {}, "req-2");
1540
+ conversation.enqueueMessage("msg-3", [], () => {}, "req-3");
1541
+ conversation.enqueueMessage("msg-4", [], () => {}, "req-4");
1542
+
1543
+ // Complete run 0 → drain pulls the batched siblings as ONE run.
1544
+ resolveRun(0);
1545
+ await p1;
1546
+ await waitForPendingRun(2);
1547
+
1548
+ // Filter for "message_dequeued" reasons emitted by the batched drain.
1549
+ const batchEmissions = activityStates
1550
+ .slice(baseline)
1551
+ .filter(
1552
+ (m) =>
1553
+ "type" in m &&
1554
+ m.type === "assistant_activity_state" &&
1555
+ (m as { reason?: string }).reason === "message_dequeued",
1556
+ );
1557
+ expect(batchEmissions).toHaveLength(1);
1558
+ expect(batchEmissions[0]).toMatchObject({
1559
+ type: "assistant_activity_state",
1560
+ reason: "message_dequeued",
1561
+ requestId: "req-2", // head's requestId, per the fix
1562
+ });
1563
+
1564
+ resolveRun(1);
1565
+ await new Promise((r) => setTimeout(r, 10));
1566
+ });
1567
+
1568
+ // Defensive recovery path: buildPassthroughBatch is designed to make
1569
+ // the invariant throw unreachable in practice, so neither the head
1570
+ // branch (re-dispatch batch.slice(1) to drainBatch/drainSingleMessage/
1571
+ // drainQueue) nor the tail branch (skip + continue) can fire in normal
1572
+ // operation. Left as a todo so the harness contract is documented
1573
+ // without wedging mainline CI. Covering this would require either
1574
+ // (a) reflecting into drainBatch to short-circuit resolveSlash for a
1575
+ // specific batch entry, or (b) exposing a seam on SlashContext — both
1576
+ // are more invasive than the safety-net value justifies.
1577
+ test.todo(
1578
+ "invariant violation in persist loop triggers error event + recovery, not stranded state",
1579
+ async () => {
1580
+ // no-op: see comment above.
1581
+ },
1582
+ );
1583
+ });
1584
+
776
1585
  // ---------------------------------------------------------------------------
777
1586
  // Queue policy primitives
778
1587
  // ---------------------------------------------------------------------------
@@ -962,32 +1771,31 @@ describe("Conversation checkpoint handoff", () => {
962
1771
  await p1;
963
1772
  });
964
1773
 
965
- test("[experimental] FIFO ordering is preserved through checkpoint handoff", async () => {
1774
+ test("[experimental] checkpoint handoff pulls a batched run for all queued siblings", async () => {
966
1775
  const conversation = makeConversation();
967
1776
  await conversation.loadFromDb();
968
1777
 
969
- const processedOrder: string[] = [];
970
-
971
- const makeHandler = (label: string) => (e: ServerMessage) => {
972
- if (e.type === "message_complete" || e.type === "generation_handoff")
973
- processedOrder.push(label);
974
- };
1778
+ const events1: ServerMessage[] = [];
1779
+ const events2: ServerMessage[] = [];
1780
+ const events3: ServerMessage[] = [];
1781
+ const events4: ServerMessage[] = [];
975
1782
 
976
- // Start first message
1783
+ // Start first message (mid-tool-use — will yield at the next checkpoint)
977
1784
  const p1 = conversation.processMessage(
978
1785
  "msg-1",
979
1786
  [],
980
- makeHandler("msg-1"),
1787
+ (e) => events1.push(e),
981
1788
  "req-1",
982
1789
  );
983
1790
  await waitForPendingRun(1);
984
1791
 
985
- // Enqueue two messages
986
- conversation.enqueueMessage("msg-2", [], makeHandler("msg-2"), "req-2");
987
- conversation.enqueueMessage("msg-3", [], makeHandler("msg-3"), "req-3");
988
- expect(conversation.getQueueDepth()).toBe(2);
1792
+ // Enqueue three sibling passthroughs while msg-1 is mid-turn
1793
+ conversation.enqueueMessage("msg-2", [], (e) => events2.push(e), "req-2");
1794
+ conversation.enqueueMessage("msg-3", [], (e) => events3.push(e), "req-3");
1795
+ conversation.enqueueMessage("msg-4", [], (e) => events4.push(e), "req-4");
1796
+ expect(conversation.getQueueDepth()).toBe(3);
989
1797
 
990
- // Simulate the agent loop yielding at the checkpoint (first run)
1798
+ // Simulate the agent loop yielding at the checkpoint (first run is mid-tool-use)
991
1799
  const run0 = pendingRuns[0];
992
1800
  expect(run0.onCheckpoint).toBeDefined();
993
1801
  const decision = run0.onCheckpoint!({
@@ -1002,19 +1810,23 @@ describe("Conversation checkpoint handoff", () => {
1002
1810
  resolveRun(0);
1003
1811
  await p1;
1004
1812
 
1005
- // msg-2 should be draining next
1813
+ // The yielded drain pulls ALL THREE queued siblings as ONE batched run —
1814
+ // not three separate runs.
1006
1815
  await waitForPendingRun(2);
1816
+ expect(pendingRuns.length).toBe(2);
1007
1817
 
1008
- // Complete second run (msg-2)
1009
- resolveRun(1);
1010
- await waitForPendingRun(3);
1818
+ // Each client saw its own message_dequeued tagged with its own requestId.
1819
+ expect(events2.some((e) => e.type === "message_dequeued")).toBe(true);
1820
+ expect(events3.some((e) => e.type === "message_dequeued")).toBe(true);
1821
+ expect(events4.some((e) => e.type === "message_dequeued")).toBe(true);
1011
1822
 
1012
- // Complete third run (msg-3)
1013
- resolveRun(2);
1823
+ // Resolve the batched run — message_complete fans out to all three clients.
1824
+ resolveRun(1);
1014
1825
  await new Promise((r) => setTimeout(r, 10));
1015
1826
 
1016
- // FIFO order: msg-1 completes first, then msg-2, then msg-3
1017
- expect(processedOrder).toEqual(["msg-1", "msg-2", "msg-3"]);
1827
+ expect(events2.some((e) => e.type === "message_complete")).toBe(true);
1828
+ expect(events3.some((e) => e.type === "message_complete")).toBe(true);
1829
+ expect(events4.some((e) => e.type === "message_complete")).toBe(true);
1018
1830
  });
1019
1831
 
1020
1832
  test("[experimental] active run with repeated tool turns + queued message triggers checkpoint handoff", async () => {
@@ -1100,10 +1912,39 @@ describe("Conversation checkpoint handoff", () => {
1100
1912
  );
1101
1913
  await waitForPendingRun(1);
1102
1914
 
1103
- // Enqueue messages B, C, D
1104
- conversation.enqueueMessage("msg-B", [], makeHandler("B"), "req-B");
1105
- conversation.enqueueMessage("msg-C", [], makeHandler("C"), "req-C");
1106
- conversation.enqueueMessage("msg-D", [], makeHandler("D"), "req-D");
1915
+ // Enqueue messages B, C, D — each on a distinct userMessageInterface so the
1916
+ // batch builder stops at each boundary and we see one run per message.
1917
+ const meta = (iface: string) => ({
1918
+ userMessageInterface: iface,
1919
+ assistantMessageInterface: iface,
1920
+ });
1921
+ conversation.enqueueMessage(
1922
+ "msg-B",
1923
+ [],
1924
+ makeHandler("B"),
1925
+ "req-B",
1926
+ undefined,
1927
+ undefined,
1928
+ meta("macos"),
1929
+ );
1930
+ conversation.enqueueMessage(
1931
+ "msg-C",
1932
+ [],
1933
+ makeHandler("C"),
1934
+ "req-C",
1935
+ undefined,
1936
+ undefined,
1937
+ meta("cli"),
1938
+ );
1939
+ conversation.enqueueMessage(
1940
+ "msg-D",
1941
+ [],
1942
+ makeHandler("D"),
1943
+ "req-D",
1944
+ undefined,
1945
+ undefined,
1946
+ meta("vellum"),
1947
+ );
1107
1948
  expect(conversation.getQueueDepth()).toBe(3);
1108
1949
 
1109
1950
  // Handoff from A -> B