@vellumai/assistant 0.4.49 → 0.4.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/integrations.md +2 -2
  4. package/docs/architecture/keychain-broker.md +6 -6
  5. package/docs/architecture/memory.md +180 -119
  6. package/knip.json +32 -0
  7. package/package.json +3 -2
  8. package/src/__tests__/agent-loop.test.ts +3 -1
  9. package/src/__tests__/anthropic-provider.test.ts +114 -23
  10. package/src/__tests__/approval-cascade.test.ts +1 -15
  11. package/src/__tests__/approval-routes-http.test.ts +2 -0
  12. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  13. package/src/__tests__/btw-routes.test.ts +61 -5
  14. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  15. package/src/__tests__/checker.test.ts +13 -0
  16. package/src/__tests__/config-schema.test.ts +1 -68
  17. package/src/__tests__/config-watcher.test.ts +8 -0
  18. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  19. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  20. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  21. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  22. package/src/__tests__/credential-security-invariants.test.ts +8 -7
  23. package/src/__tests__/credential-vault-unit.test.ts +23 -18
  24. package/src/__tests__/credential-vault.test.ts +30 -18
  25. package/src/__tests__/credentials-cli.test.ts +257 -82
  26. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  27. package/src/__tests__/date-context.test.ts +93 -77
  28. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  29. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  30. package/src/__tests__/history-repair.test.ts +245 -0
  31. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  32. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  33. package/src/__tests__/inbound-invite-redemption.test.ts +36 -7
  34. package/src/__tests__/integration-status.test.ts +31 -30
  35. package/src/__tests__/invite-redemption-service.test.ts +166 -13
  36. package/src/__tests__/invite-routes-http.test.ts +166 -5
  37. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  38. package/src/__tests__/list-messages-attachments.test.ts +193 -0
  39. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  40. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  41. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  42. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  43. package/src/__tests__/memory-regressions.test.ts +477 -2841
  44. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  45. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  46. package/src/__tests__/mime-builder.test.ts +28 -0
  47. package/src/__tests__/native-web-search.test.ts +1 -0
  48. package/src/__tests__/oauth-cli.test.ts +824 -31
  49. package/src/__tests__/oauth-provider-profiles.test.ts +1 -1
  50. package/src/__tests__/oauth-store.test.ts +363 -17
  51. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  52. package/src/__tests__/registry.test.ts +0 -1
  53. package/src/__tests__/relay-server.test.ts +55 -1
  54. package/src/__tests__/schedule-tools.test.ts +32 -0
  55. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  56. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  57. package/src/__tests__/secret-routes-managed-proxy.test.ts +183 -0
  58. package/src/__tests__/secure-keys.test.ts +78 -18
  59. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  60. package/src/__tests__/server-history-render.test.ts +2 -2
  61. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  62. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  63. package/src/__tests__/session-agent-loop.test.ts +19 -15
  64. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  65. package/src/__tests__/session-error.test.ts +124 -2
  66. package/src/__tests__/session-history-web-search.test.ts +918 -0
  67. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  68. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  69. package/src/__tests__/session-queue.test.ts +37 -27
  70. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  71. package/src/__tests__/session-slash-known.test.ts +1 -15
  72. package/src/__tests__/session-slash-queue.test.ts +1 -15
  73. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  74. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  75. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  76. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  77. package/src/__tests__/skills-install-extract.test.ts +93 -0
  78. package/src/__tests__/skills.test.ts +2 -2
  79. package/src/__tests__/skillssh-registry.test.ts +451 -0
  80. package/src/__tests__/slack-channel-config.test.ts +10 -8
  81. package/src/__tests__/trust-store.test.ts +15 -0
  82. package/src/__tests__/twilio-config.test.ts +11 -10
  83. package/src/__tests__/twilio-provider.test.ts +9 -4
  84. package/src/__tests__/voice-invite-redemption.test.ts +85 -5
  85. package/src/agent/ax-tree-compaction.test.ts +51 -0
  86. package/src/agent/loop.ts +39 -12
  87. package/src/approvals/AGENTS.md +1 -1
  88. package/src/approvals/guardian-request-resolvers.ts +14 -2
  89. package/src/bundler/compiler-tools.ts +66 -2
  90. package/src/calls/call-domain.ts +134 -3
  91. package/src/calls/call-store.ts +6 -0
  92. package/src/calls/relay-server.ts +44 -6
  93. package/src/calls/relay-setup-router.ts +17 -1
  94. package/src/calls/twilio-config.ts +5 -4
  95. package/src/calls/twilio-provider.ts +14 -9
  96. package/src/calls/twilio-rest.ts +10 -7
  97. package/src/calls/types.ts +3 -1
  98. package/src/cli/commands/config.ts +14 -9
  99. package/src/cli/commands/contacts.ts +3 -0
  100. package/src/cli/commands/credentials.ts +170 -174
  101. package/src/cli/commands/doctor.ts +11 -8
  102. package/src/cli/commands/keys.ts +9 -9
  103. package/src/cli/commands/mcp.ts +46 -59
  104. package/src/cli/commands/memory.ts +16 -165
  105. package/src/cli/commands/oauth/apps.ts +68 -10
  106. package/src/cli/commands/oauth/connections.ts +475 -105
  107. package/src/cli/commands/oauth/index.ts +3 -3
  108. package/src/cli/commands/oauth/providers.ts +18 -4
  109. package/src/cli/commands/sessions.ts +5 -2
  110. package/src/cli/commands/skills.ts +173 -1
  111. package/src/cli/http-client.ts +0 -20
  112. package/src/cli/main-screen.tsx +2 -2
  113. package/src/cli/program.ts +5 -6
  114. package/src/cli.ts +20 -22
  115. package/src/config/__tests__/feature-flag-registry-bundled.test.ts +39 -0
  116. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  117. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  118. package/src/config/bundled-skills/contacts/SKILL.md +35 -11
  119. package/src/config/bundled-skills/contacts/tools/google-contacts.ts +1 -1
  120. package/src/config/bundled-skills/gmail/SKILL.md +1 -1
  121. package/src/config/bundled-skills/gmail/TOOLS.json +52 -0
  122. package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +13 -3
  123. package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +9 -2
  124. package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +5 -1
  125. package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +5 -1
  126. package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +5 -1
  127. package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +5 -1
  128. package/src/config/bundled-skills/gmail/tools/gmail-label.ts +9 -2
  129. package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +5 -1
  130. package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +5 -1
  131. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +5 -1
  132. package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +5 -1
  133. package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +5 -1
  134. package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +5 -1
  135. package/src/config/bundled-skills/google-calendar/TOOLS.json +20 -0
  136. package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +2 -1
  137. package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +2 -1
  138. package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +2 -1
  139. package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +2 -1
  140. package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +2 -1
  141. package/src/config/bundled-skills/google-calendar/tools/shared.ts +8 -2
  142. package/src/config/bundled-skills/messaging/SKILL.md +1 -1
  143. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +2 -2
  144. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
  145. package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +2 -2
  146. package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +2 -2
  147. package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +2 -2
  148. package/src/config/bundled-skills/messaging/tools/messaging-read.ts +2 -2
  149. package/src/config/bundled-skills/messaging/tools/messaging-search.ts +2 -2
  150. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +2 -2
  151. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +2 -2
  152. package/src/config/bundled-skills/messaging/tools/shared.ts +7 -5
  153. package/src/config/bundled-skills/slack/tools/shared.ts +1 -1
  154. package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +1 -1
  155. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +1 -1
  156. package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +1 -1
  157. package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +1 -1
  158. package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +1 -1
  159. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +1 -1
  160. package/src/config/bundled-tool-registry.ts +2 -5
  161. package/src/config/loader.ts +6 -42
  162. package/src/config/schema.ts +1 -12
  163. package/src/config/schemas/memory-lifecycle.ts +0 -9
  164. package/src/config/schemas/memory-processing.ts +0 -180
  165. package/src/config/schemas/memory-retrieval.ts +32 -104
  166. package/src/config/schemas/memory.ts +0 -10
  167. package/src/config/types.ts +0 -4
  168. package/src/contacts/contact-store.ts +39 -2
  169. package/src/contacts/contacts-write.ts +9 -0
  170. package/src/context/window-manager.ts +4 -1
  171. package/src/daemon/config-watcher.ts +55 -2
  172. package/src/daemon/daemon-control.ts +1 -1
  173. package/src/daemon/date-context.ts +114 -31
  174. package/src/daemon/handlers/config-ingress.ts +2 -2
  175. package/src/daemon/handlers/config-slack-channel.ts +59 -39
  176. package/src/daemon/handlers/config-telegram.ts +23 -14
  177. package/src/daemon/handlers/session-history.ts +1 -358
  178. package/src/daemon/handlers/sessions.ts +18 -13
  179. package/src/daemon/handlers/shared.ts +3 -17
  180. package/src/daemon/handlers/skills.ts +20 -1
  181. package/src/daemon/history-repair.ts +72 -8
  182. package/src/daemon/host-cu-proxy.ts +55 -26
  183. package/src/daemon/lifecycle.ts +39 -4
  184. package/src/daemon/mcp-reload-service.ts +2 -2
  185. package/src/daemon/message-types/computer-use.ts +1 -12
  186. package/src/daemon/message-types/memory.ts +4 -16
  187. package/src/daemon/message-types/messages.ts +1 -0
  188. package/src/daemon/message-types/sessions.ts +4 -42
  189. package/src/daemon/server.ts +6 -1
  190. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  191. package/src/daemon/session-agent-loop.ts +334 -48
  192. package/src/daemon/session-error.ts +89 -6
  193. package/src/daemon/session-history.ts +17 -7
  194. package/src/daemon/session-media-retry.ts +6 -2
  195. package/src/daemon/session-memory.ts +69 -149
  196. package/src/daemon/session-process.ts +10 -1
  197. package/src/daemon/session-runtime-assembly.ts +49 -19
  198. package/src/daemon/session-slash.ts +3 -5
  199. package/src/daemon/session-surfaces.ts +4 -1
  200. package/src/daemon/session-tool-setup.ts +7 -1
  201. package/src/daemon/session.ts +12 -2
  202. package/src/email/providers/index.ts +2 -2
  203. package/src/instrument.ts +61 -1
  204. package/src/media/avatar-router.ts +1 -1
  205. package/src/memory/admin.ts +2 -191
  206. package/src/memory/canonical-guardian-store.ts +38 -2
  207. package/src/memory/conversation-crud.ts +0 -33
  208. package/src/memory/conversation-queries.ts +25 -83
  209. package/src/memory/db-init.ts +32 -0
  210. package/src/memory/embedding-backend.ts +84 -8
  211. package/src/memory/embedding-types.ts +9 -1
  212. package/src/memory/indexer.ts +7 -46
  213. package/src/memory/invite-store.ts +19 -0
  214. package/src/memory/items-extractor.ts +274 -76
  215. package/src/memory/job-handlers/backfill.ts +2 -127
  216. package/src/memory/job-handlers/cleanup.ts +2 -16
  217. package/src/memory/job-handlers/extraction.ts +2 -138
  218. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  219. package/src/memory/job-handlers/summarization.ts +3 -148
  220. package/src/memory/job-utils.ts +21 -59
  221. package/src/memory/jobs-store.ts +1 -159
  222. package/src/memory/jobs-worker.ts +9 -52
  223. package/src/memory/migrations/104-core-indexes.ts +3 -3
  224. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  225. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  226. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  227. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  228. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  229. package/src/memory/migrations/154-drop-fts.ts +20 -0
  230. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  231. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  232. package/src/memory/migrations/157-invite-contact-id.ts +104 -0
  233. package/src/memory/migrations/index.ts +8 -0
  234. package/src/memory/migrations/registry.ts +6 -0
  235. package/src/memory/qdrant-client.ts +148 -51
  236. package/src/memory/raw-query.ts +1 -1
  237. package/src/memory/retriever.test.ts +294 -273
  238. package/src/memory/retriever.ts +421 -645
  239. package/src/memory/schema/calls.ts +2 -0
  240. package/src/memory/schema/contacts.ts +1 -0
  241. package/src/memory/schema/memory-core.ts +3 -48
  242. package/src/memory/schema/oauth.ts +2 -0
  243. package/src/memory/search/formatting.ts +263 -176
  244. package/src/memory/search/lexical.ts +1 -254
  245. package/src/memory/search/ranking.ts +0 -455
  246. package/src/memory/search/semantic.ts +100 -14
  247. package/src/memory/search/staleness.ts +47 -0
  248. package/src/memory/search/tier-classifier.ts +21 -0
  249. package/src/memory/search/types.ts +15 -77
  250. package/src/memory/task-memory-cleanup.ts +4 -6
  251. package/src/messaging/provider.ts +1 -1
  252. package/src/messaging/providers/gmail/adapter.ts +1 -1
  253. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  254. package/src/messaging/providers/telegram-bot/adapter.ts +17 -8
  255. package/src/messaging/providers/whatsapp/adapter.ts +13 -9
  256. package/src/messaging/registry.ts +9 -5
  257. package/src/oauth/byo-connection.test.ts +40 -25
  258. package/src/oauth/connect-orchestrator.ts +4 -10
  259. package/src/oauth/connection-resolver.ts +20 -6
  260. package/src/oauth/manual-token-connection.ts +5 -5
  261. package/src/oauth/oauth-store.ts +183 -31
  262. package/src/oauth/platform-connection.test.ts +1 -1
  263. package/src/oauth/provider-behaviors.ts +503 -4
  264. package/src/oauth/seed-providers.ts +214 -8
  265. package/src/oauth/token-persistence.ts +31 -16
  266. package/src/permissions/defaults.ts +1 -0
  267. package/src/permissions/trust-store.ts +23 -1
  268. package/src/playbooks/playbook-compiler.ts +1 -1
  269. package/src/prompts/system-prompt.ts +18 -2
  270. package/src/providers/anthropic/client.ts +56 -126
  271. package/src/providers/types.ts +7 -1
  272. package/src/runtime/AGENTS.md +9 -0
  273. package/src/runtime/auth/route-policy.ts +6 -3
  274. package/src/runtime/channel-readiness-service.ts +48 -40
  275. package/src/runtime/guardian-reply-router.ts +24 -22
  276. package/src/runtime/http-server.ts +2 -2
  277. package/src/runtime/http-types.ts +2 -0
  278. package/src/runtime/invite-redemption-service.ts +72 -12
  279. package/src/runtime/invite-service.ts +43 -0
  280. package/src/runtime/middleware/twilio-validation.ts +1 -1
  281. package/src/runtime/pending-interactions.ts +2 -2
  282. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  283. package/src/runtime/routes/btw-routes.ts +10 -5
  284. package/src/runtime/routes/conversation-routes.ts +56 -11
  285. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  286. package/src/runtime/routes/integrations/slack/channel.ts +2 -2
  287. package/src/runtime/routes/integrations/telegram.ts +2 -2
  288. package/src/runtime/routes/integrations/twilio.ts +17 -17
  289. package/src/runtime/routes/invite-routes.ts +29 -4
  290. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  291. package/src/runtime/routes/memory-item-routes.ts +503 -0
  292. package/src/runtime/routes/secret-routes.ts +17 -0
  293. package/src/runtime/routes/session-management-routes.ts +3 -3
  294. package/src/runtime/routes/settings-routes.ts +3 -3
  295. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  296. package/src/runtime/routes/workspace-routes.ts +9 -4
  297. package/src/runtime/routes/workspace-utils.ts +8 -2
  298. package/src/schedule/integration-status.ts +26 -19
  299. package/src/security/keychain-broker-client.ts +17 -4
  300. package/src/security/oauth2.ts +6 -7
  301. package/src/security/secure-keys.ts +44 -19
  302. package/src/security/token-manager.ts +46 -39
  303. package/src/services/vercel-deploy.ts +0 -24
  304. package/src/signals/confirm.ts +78 -0
  305. package/src/signals/mcp-reload.ts +18 -0
  306. package/src/skills/catalog-install.ts +74 -18
  307. package/src/skills/skillssh-registry.ts +503 -0
  308. package/src/tools/assets/search.ts +5 -1
  309. package/src/tools/computer-use/definitions.ts +0 -10
  310. package/src/tools/computer-use/registry.ts +1 -1
  311. package/src/tools/credentials/vault.ts +22 -7
  312. package/src/tools/memory/definitions.ts +4 -13
  313. package/src/tools/memory/handlers.test.ts +83 -103
  314. package/src/tools/memory/handlers.ts +50 -85
  315. package/src/tools/network/script-proxy/session-manager.ts +8 -8
  316. package/src/tools/schedule/create.ts +10 -3
  317. package/src/tools/schedule/update.ts +8 -1
  318. package/src/tools/skills/load.ts +25 -2
  319. package/src/watcher/provider-types.ts +1 -1
  320. package/src/watcher/providers/github.ts +1 -1
  321. package/src/watcher/providers/gmail.ts +3 -3
  322. package/src/watcher/providers/google-calendar.ts +3 -3
  323. package/src/watcher/providers/linear.ts +1 -1
  324. package/src/__tests__/clarification-resolver.test.ts +0 -193
  325. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  326. package/src/__tests__/conflict-policy.test.ts +0 -269
  327. package/src/__tests__/conflict-store.test.ts +0 -372
  328. package/src/__tests__/contradiction-checker.test.ts +0 -361
  329. package/src/__tests__/entity-extractor.test.ts +0 -211
  330. package/src/__tests__/entity-search.test.ts +0 -1117
  331. package/src/__tests__/profile-compiler.test.ts +0 -392
  332. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  333. package/src/__tests__/session-profile-injection.test.ts +0 -557
  334. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  335. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  336. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  337. package/src/daemon/session-conflict-gate.ts +0 -167
  338. package/src/daemon/session-dynamic-profile.ts +0 -77
  339. package/src/memory/clarification-resolver.ts +0 -417
  340. package/src/memory/conflict-intent.ts +0 -205
  341. package/src/memory/conflict-policy.ts +0 -127
  342. package/src/memory/conflict-store.ts +0 -410
  343. package/src/memory/contradiction-checker.ts +0 -508
  344. package/src/memory/entity-extractor.ts +0 -535
  345. package/src/memory/format-recall.ts +0 -47
  346. package/src/memory/fts-reconciler.ts +0 -165
  347. package/src/memory/job-handlers/conflict.ts +0 -200
  348. package/src/memory/profile-compiler.ts +0 -195
  349. package/src/memory/recall-cache.ts +0 -117
  350. package/src/memory/search/entity.ts +0 -535
  351. package/src/memory/search/query-expansion.test.ts +0 -70
  352. package/src/memory/search/query-expansion.ts +0 -118
  353. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -0,0 +1,532 @@
1
+ /**
2
+ * End-to-end tests for the unified CU proxy flow.
3
+ *
4
+ * Tests the surfaceProxyResolver's CU tool routing — the integration
5
+ * point between the agent loop and the HostCuProxy.
6
+ */
7
+
8
+ import { afterEach, describe, expect, test } from "bun:test";
9
+
10
+ import { HostCuProxy } from "../daemon/host-cu-proxy.js";
11
+ import type { SurfaceSessionContext } from "../daemon/session-surfaces.js";
12
+ import { surfaceProxyResolver } from "../daemon/session-surfaces.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Test helpers
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Build a minimal SurfaceSessionContext with optional hostCuProxy.
20
+ * Only the fields required by the CU routing path are populated.
21
+ */
22
+ function buildMockContext(hostCuProxy?: HostCuProxy): SurfaceSessionContext {
23
+ return {
24
+ conversationId: "test-session",
25
+ traceEmitter: { emit: () => {} },
26
+ sendToClient: () => {},
27
+ pendingSurfaceActions: new Map(),
28
+ lastSurfaceAction: new Map(),
29
+ surfaceState: new Map(),
30
+ surfaceUndoStacks: new Map(),
31
+ surfaceActionRequestIds: new Set(),
32
+ currentTurnSurfaces: [],
33
+ hostCuProxy,
34
+ isProcessing: () => false,
35
+ enqueueMessage: () => ({ queued: false, requestId: "r1" }),
36
+ getQueueDepth: () => 0,
37
+ processMessage: async () => "",
38
+ withSurface: async (_id, fn) => fn(),
39
+ };
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Tests
44
+ // ---------------------------------------------------------------------------
45
+
46
+ describe("surfaceProxyResolver — CU tool routing", () => {
47
+ let sentMessages: unknown[];
48
+ let proxy: HostCuProxy;
49
+
50
+ function setupProxy(maxSteps?: number): SurfaceSessionContext {
51
+ sentMessages = [];
52
+ const sendToClient = (msg: unknown) => sentMessages.push(msg);
53
+ proxy = new HostCuProxy(sendToClient as never, undefined, maxSteps);
54
+ // Mark client as connected so requests are sent
55
+ proxy.updateSender(sendToClient as never, true);
56
+ return buildMockContext(proxy);
57
+ }
58
+
59
+ afterEach(() => {
60
+ proxy?.dispose();
61
+ });
62
+
63
+ // -------------------------------------------------------------------------
64
+ // No desktop client connected
65
+ // -------------------------------------------------------------------------
66
+
67
+ describe("no desktop client connected", () => {
68
+ test("returns error when hostCuProxy is undefined", async () => {
69
+ const ctx = buildMockContext(/* no proxy */);
70
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
71
+ element_id: 42,
72
+ reasoning: "click the button",
73
+ });
74
+
75
+ expect(result.isError).toBe(true);
76
+ expect(result.content).toContain("not available");
77
+ expect(result.content).toContain("no desktop client");
78
+ });
79
+
80
+ test("returns error for screenshot tool when no proxy", async () => {
81
+ const ctx = buildMockContext();
82
+ const result = await surfaceProxyResolver(
83
+ ctx,
84
+ "computer_use_screenshot",
85
+ {},
86
+ );
87
+
88
+ expect(result.isError).toBe(true);
89
+ expect(result.content).toContain("not available");
90
+ });
91
+
92
+ test("returns error when proxy exists but client not connected", async () => {
93
+ const sendToClient = () => {};
94
+ const proxyObj = new HostCuProxy(sendToClient as never);
95
+ // Default clientConnected is false — do NOT call updateSender with true
96
+ const ctx = buildMockContext(proxyObj);
97
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
98
+ element_id: 1,
99
+ });
100
+
101
+ expect(result.isError).toBe(true);
102
+ expect(result.content).toContain("not available");
103
+ proxyObj.dispose();
104
+ });
105
+
106
+ test("returns error for terminal tools when no proxy", async () => {
107
+ const ctx = buildMockContext();
108
+
109
+ const doneResult = await surfaceProxyResolver(ctx, "computer_use_done", {
110
+ summary: "finished",
111
+ });
112
+ expect(doneResult.isError).toBe(true);
113
+
114
+ const respondResult = await surfaceProxyResolver(
115
+ ctx,
116
+ "computer_use_respond",
117
+ { answer: "42" },
118
+ );
119
+ expect(respondResult.isError).toBe(true);
120
+ });
121
+ });
122
+
123
+ // -------------------------------------------------------------------------
124
+ // Terminal tools (computer_use_done, computer_use_respond)
125
+ // -------------------------------------------------------------------------
126
+
127
+ describe("terminal tools resolve immediately", () => {
128
+ test("computer_use_done resets proxy and returns summary", async () => {
129
+ const ctx = setupProxy();
130
+
131
+ // Record some actions first to verify reset
132
+ proxy.recordAction("computer_use_click", { element_id: 1 });
133
+ proxy.recordAction("computer_use_click", { element_id: 2 });
134
+ expect(proxy.stepCount).toBe(2);
135
+
136
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
137
+ summary: "Completed the file upload",
138
+ });
139
+
140
+ expect(result.isError).toBe(false);
141
+ expect(result.content).toBe("Completed the file upload");
142
+ // No message sent to client for terminal tools
143
+ expect(sentMessages).toHaveLength(0);
144
+ // Proxy state should be reset
145
+ expect(proxy.stepCount).toBe(0);
146
+ expect(proxy.actionHistory).toHaveLength(0);
147
+ });
148
+
149
+ test("computer_use_respond resets proxy and returns answer", async () => {
150
+ const ctx = setupProxy();
151
+
152
+ proxy.recordAction("computer_use_click", { element_id: 1 });
153
+
154
+ const result = await surfaceProxyResolver(ctx, "computer_use_respond", {
155
+ answer: "The price is $42",
156
+ reasoning: "Found the price on the page",
157
+ });
158
+
159
+ expect(result.isError).toBe(false);
160
+ expect(result.content).toBe("The price is $42");
161
+ expect(sentMessages).toHaveLength(0);
162
+ expect(proxy.stepCount).toBe(0);
163
+ });
164
+
165
+ test("computer_use_done uses default when no summary provided", async () => {
166
+ const ctx = setupProxy();
167
+
168
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {});
169
+
170
+ expect(result.isError).toBe(false);
171
+ expect(result.content).toBe("Task complete");
172
+ });
173
+
174
+ test("computer_use_respond falls back to summary then default", async () => {
175
+ const ctx = setupProxy();
176
+
177
+ // No answer but has summary — done tool uses summary
178
+ const r1 = await surfaceProxyResolver(ctx, "computer_use_done", {
179
+ summary: "All done",
180
+ });
181
+ expect(r1.content).toBe("All done");
182
+
183
+ // respond with answer field
184
+ const r2 = await surfaceProxyResolver(ctx, "computer_use_respond", {
185
+ answer: "The answer is 7",
186
+ });
187
+ expect(r2.content).toBe("The answer is 7");
188
+ });
189
+ });
190
+
191
+ // -------------------------------------------------------------------------
192
+ // Action tools (computer_use_click, screenshot, etc.) — proxy to client
193
+ // -------------------------------------------------------------------------
194
+
195
+ describe("action tools proxy to client", () => {
196
+ test("computer_use_click routes through proxy and returns observation", async () => {
197
+ const ctx = setupProxy();
198
+
199
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
200
+ element_id: 42,
201
+ reasoning: "Click the submit button",
202
+ });
203
+
204
+ // Verify the proxy sent a request to the client
205
+ expect(sentMessages).toHaveLength(1);
206
+ const sent = sentMessages[0] as Record<string, unknown>;
207
+ expect(sent.type).toBe("host_cu_request");
208
+ expect(sent.toolName).toBe("computer_use_click");
209
+ expect(sent.input).toEqual({
210
+ element_id: 42,
211
+ reasoning: "Click the submit button",
212
+ });
213
+ expect(sent.sessionId).toBe("test-session");
214
+
215
+ // Action was recorded
216
+ expect(proxy.stepCount).toBe(1);
217
+ expect(proxy.actionHistory).toHaveLength(1);
218
+ expect(proxy.actionHistory[0].toolName).toBe("computer_use_click");
219
+
220
+ // Simulate client resolving with observation
221
+ const requestId = sent.requestId as string;
222
+ proxy.resolve(requestId, {
223
+ axTree: "SubmitButton [1]\nTextField [2]",
224
+ executionResult: "Clicked element 42",
225
+ });
226
+
227
+ const result = await resultPromise;
228
+ expect(result.isError).toBe(false);
229
+ expect(result.content).toContain("Clicked element 42");
230
+ expect(result.content).toContain("<ax-tree>");
231
+ expect(result.content).toContain("SubmitButton [1]");
232
+ });
233
+
234
+ test("computer_use_screenshot routes through proxy", async () => {
235
+ const ctx = setupProxy();
236
+
237
+ const resultPromise = surfaceProxyResolver(
238
+ ctx,
239
+ "computer_use_screenshot",
240
+ { reasoning: "Capture current state" },
241
+ );
242
+
243
+ expect(sentMessages).toHaveLength(1);
244
+ const sent = sentMessages[0] as Record<string, unknown>;
245
+ expect(sent.type).toBe("host_cu_request");
246
+ expect(sent.toolName).toBe("computer_use_screenshot");
247
+
248
+ proxy.resolve(sent.requestId as string, {
249
+ axTree: "Window [1]",
250
+ screenshot: "base64screenshot",
251
+ screenshotWidthPx: 1920,
252
+ screenshotHeightPx: 1080,
253
+ });
254
+
255
+ const result = await resultPromise;
256
+ expect(result.isError).toBe(false);
257
+ expect(result.content).toContain("1920x1080 px");
258
+ expect(result.contentBlocks).toHaveLength(1);
259
+ expect(result.contentBlocks![0]).toEqual({
260
+ type: "image",
261
+ source: {
262
+ type: "base64",
263
+ media_type: "image/jpeg",
264
+ data: "base64screenshot",
265
+ },
266
+ });
267
+ });
268
+
269
+ test("computer_use_type_text routes through proxy", async () => {
270
+ const ctx = setupProxy();
271
+
272
+ const resultPromise = surfaceProxyResolver(
273
+ ctx,
274
+ "computer_use_type_text",
275
+ { text: "Hello world", reasoning: "Type into search box" },
276
+ );
277
+
278
+ const sent = sentMessages[0] as Record<string, unknown>;
279
+ expect(sent.toolName).toBe("computer_use_type_text");
280
+ expect(sent.input).toEqual({
281
+ text: "Hello world",
282
+ reasoning: "Type into search box",
283
+ });
284
+
285
+ proxy.resolve(sent.requestId as string, {
286
+ axTree: "SearchBox [1] value='Hello world'",
287
+ executionResult: "Typed text",
288
+ });
289
+
290
+ const result = await resultPromise;
291
+ expect(result.isError).toBe(false);
292
+ expect(result.content).toContain("Typed text");
293
+ });
294
+ });
295
+
296
+ // -------------------------------------------------------------------------
297
+ // Full proxy lifecycle (observe → click → done)
298
+ // -------------------------------------------------------------------------
299
+
300
+ describe("full proxy lifecycle", () => {
301
+ test("observe → click → done sequence", async () => {
302
+ const ctx = setupProxy();
303
+
304
+ // Step 1: observe (screenshot)
305
+ const p1 = surfaceProxyResolver(ctx, "computer_use_screenshot", {
306
+ reasoning: "Check what's on screen",
307
+ });
308
+ const sent1 = sentMessages[0] as Record<string, unknown>;
309
+ proxy.resolve(sent1.requestId as string, {
310
+ axTree: "LoginButton [1]\nUsernameField [2]",
311
+ });
312
+ const r1 = await p1;
313
+ expect(r1.isError).toBe(false);
314
+ expect(r1.content).toContain("LoginButton [1]");
315
+ expect(proxy.stepCount).toBe(1);
316
+
317
+ // Step 2: click
318
+ const p2 = surfaceProxyResolver(ctx, "computer_use_click", {
319
+ element_id: 1,
320
+ reasoning: "Click login button",
321
+ });
322
+ const sent2 = sentMessages[1] as Record<string, unknown>;
323
+ proxy.resolve(sent2.requestId as string, {
324
+ axTree: "PasswordField [1]\nSubmitButton [2]",
325
+ axDiff: "+ PasswordField [1]\n+ SubmitButton [2]\n- LoginButton [1]",
326
+ executionResult: "Clicked element 1",
327
+ });
328
+ const r2 = await p2;
329
+ expect(r2.isError).toBe(false);
330
+ expect(r2.content).toContain("Clicked element 1");
331
+ expect(r2.content).toContain("PasswordField [1]");
332
+ expect(proxy.stepCount).toBe(2);
333
+
334
+ // Step 3: done
335
+ const r3 = await surfaceProxyResolver(ctx, "computer_use_done", {
336
+ summary: "Logged in successfully",
337
+ });
338
+ expect(r3.isError).toBe(false);
339
+ expect(r3.content).toBe("Logged in successfully");
340
+
341
+ // Proxy state is clean after done
342
+ expect(proxy.stepCount).toBe(0);
343
+ expect(proxy.actionHistory).toHaveLength(0);
344
+ // Only 2 messages sent to client (screenshot + click; done is terminal)
345
+ expect(sentMessages).toHaveLength(2);
346
+ });
347
+ });
348
+
349
+ // -------------------------------------------------------------------------
350
+ // Step limit enforced through resolver
351
+ // -------------------------------------------------------------------------
352
+
353
+ describe("step limit enforcement through resolver", () => {
354
+ test("rejects action tools when step limit exceeded", async () => {
355
+ const ctx = setupProxy(2); // maxSteps = 2
356
+
357
+ // Record enough actions to exceed the limit
358
+ proxy.recordAction("computer_use_click", { element_id: 1 });
359
+ proxy.recordAction("computer_use_click", { element_id: 2 });
360
+ proxy.recordAction("computer_use_click", { element_id: 3 });
361
+ expect(proxy.stepCount).toBe(3);
362
+
363
+ // The surfaceProxyResolver calls proxy.request, which checks step limit
364
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
365
+ element_id: 4,
366
+ reasoning: "click",
367
+ });
368
+
369
+ expect(result.isError).toBe(true);
370
+ expect(result.content).toContain("Step limit");
371
+ expect(result.content).toContain("computer_use_done");
372
+ });
373
+
374
+ test("terminal tools still work after step limit exceeded", async () => {
375
+ const ctx = setupProxy(2);
376
+
377
+ proxy.recordAction("computer_use_click", { element_id: 1 });
378
+ proxy.recordAction("computer_use_click", { element_id: 2 });
379
+ proxy.recordAction("computer_use_click", { element_id: 3 });
380
+
381
+ // computer_use_done should still work (terminal, resolves immediately)
382
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
383
+ summary: "Stopped because step limit",
384
+ });
385
+
386
+ expect(result.isError).toBe(false);
387
+ expect(result.content).toBe("Stopped because step limit");
388
+ expect(proxy.stepCount).toBe(0);
389
+ });
390
+ });
391
+
392
+ // -------------------------------------------------------------------------
393
+ // Error from client
394
+ // -------------------------------------------------------------------------
395
+
396
+ describe("error from client observation", () => {
397
+ test("returns error result when client reports execution error", async () => {
398
+ const ctx = setupProxy();
399
+
400
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
401
+ element_id: 999,
402
+ reasoning: "click missing element",
403
+ });
404
+
405
+ const sent = sentMessages[0] as Record<string, unknown>;
406
+ proxy.resolve(sent.requestId as string, {
407
+ executionError: "Element 999 not found in AX tree",
408
+ axTree: "Window [1]",
409
+ });
410
+
411
+ const result = await resultPromise;
412
+ expect(result.isError).toBe(true);
413
+ expect(result.content).toContain("Action failed");
414
+ expect(result.content).toContain("Element 999 not found");
415
+ });
416
+ });
417
+
418
+ // -------------------------------------------------------------------------
419
+ // Reasoning propagation
420
+ // -------------------------------------------------------------------------
421
+
422
+ describe("reasoning propagation", () => {
423
+ test("reasoning from input is passed to proxy request", async () => {
424
+ const ctx = setupProxy();
425
+
426
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_key", {
427
+ key: "Enter",
428
+ reasoning: "Submit the form",
429
+ });
430
+
431
+ const sent = sentMessages[0] as Record<string, unknown>;
432
+ expect(sent.reasoning).toBe("Submit the form");
433
+
434
+ // Resolve to avoid unhandled rejection on dispose
435
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
436
+ await resultPromise;
437
+ });
438
+
439
+ test("reasoning is recorded in action history", async () => {
440
+ const ctx = setupProxy();
441
+
442
+ surfaceProxyResolver(ctx, "computer_use_scroll", {
443
+ direction: "down",
444
+ amount: 3,
445
+ reasoning: "Scroll to see more",
446
+ });
447
+
448
+ expect(proxy.actionHistory[0].reasoning).toBe("Scroll to see more");
449
+
450
+ // Resolve to avoid hanging
451
+ const sent = sentMessages[0] as Record<string, unknown>;
452
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
453
+ });
454
+ });
455
+
456
+ // -------------------------------------------------------------------------
457
+ // Non-CU tools are not handled by CU routing
458
+ // -------------------------------------------------------------------------
459
+
460
+ describe("non-CU tools are not handled by CU routing", () => {
461
+ test("ui_show is not affected by CU routing", async () => {
462
+ const ctx = setupProxy();
463
+
464
+ const result = await surfaceProxyResolver(ctx, "ui_show", {
465
+ surface_type: "confirmation",
466
+ data: { message: "Are you sure?" },
467
+ });
468
+
469
+ // ui_show goes through its own path, not the CU path
470
+ expect(result.content).not.toContain("not available");
471
+ expect(result.content).not.toContain("desktop client");
472
+ });
473
+
474
+ test("unknown tool returns error", async () => {
475
+ const ctx = setupProxy();
476
+
477
+ const result = await surfaceProxyResolver(ctx, "not_a_real_tool", {});
478
+
479
+ expect(result.isError).toBe(true);
480
+ expect(result.content).toContain("Unknown proxy tool");
481
+ });
482
+ });
483
+
484
+ // -------------------------------------------------------------------------
485
+ // Multiple sequential CU actions accumulate state
486
+ // -------------------------------------------------------------------------
487
+
488
+ describe("state accumulation across actions", () => {
489
+ test("step count increments across multiple actions", async () => {
490
+ const ctx = setupProxy();
491
+
492
+ // Action 1
493
+ const p1 = surfaceProxyResolver(ctx, "computer_use_click", {
494
+ element_id: 1,
495
+ reasoning: "first",
496
+ });
497
+ const s1 = sentMessages[0] as Record<string, unknown>;
498
+ proxy.resolve(s1.requestId as string, { axTree: "A" });
499
+ await p1;
500
+ expect(proxy.stepCount).toBe(1);
501
+
502
+ // Action 2
503
+ const p2 = surfaceProxyResolver(ctx, "computer_use_type_text", {
504
+ text: "hello",
505
+ reasoning: "second",
506
+ });
507
+ const s2 = sentMessages[1] as Record<string, unknown>;
508
+ proxy.resolve(s2.requestId as string, { axTree: "B" });
509
+ await p2;
510
+ expect(proxy.stepCount).toBe(2);
511
+
512
+ // Action 3
513
+ const p3 = surfaceProxyResolver(ctx, "computer_use_scroll", {
514
+ direction: "down",
515
+ amount: 1,
516
+ reasoning: "third",
517
+ });
518
+ const s3 = sentMessages[2] as Record<string, unknown>;
519
+ proxy.resolve(s3.requestId as string, { axTree: "C" });
520
+ await p3;
521
+ expect(proxy.stepCount).toBe(3);
522
+
523
+ // History has all 3
524
+ expect(proxy.actionHistory).toHaveLength(3);
525
+ expect(proxy.actionHistory.map((a) => a.toolName)).toEqual([
526
+ "computer_use_click",
527
+ "computer_use_type_text",
528
+ "computer_use_scroll",
529
+ ]);
530
+ });
531
+ });
532
+ });