@vellumai/assistant 0.4.48 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. package/ARCHITECTURE.md +26 -35
  2. package/README.md +5 -26
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/architecture/memory.md +180 -119
  6. package/docs/runbook-trusted-contacts.md +3 -8
  7. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  8. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  9. package/package.json +2 -2
  10. package/src/__tests__/actor-token-service.test.ts +0 -1
  11. package/src/__tests__/agent-loop.test.ts +3 -1
  12. package/src/__tests__/anthropic-provider.test.ts +249 -2
  13. package/src/__tests__/approval-cascade.test.ts +796 -0
  14. package/src/__tests__/approval-primitive.test.ts +0 -1
  15. package/src/__tests__/approval-routes-http.test.ts +4 -0
  16. package/src/__tests__/assistant-attachments.test.ts +12 -34
  17. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  18. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  19. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  20. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  21. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  22. package/src/__tests__/channel-guardian.test.ts +0 -2
  23. package/src/__tests__/channel-readiness-routes.test.ts +15 -6
  24. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  25. package/src/__tests__/checker.test.ts +13 -20
  26. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  27. package/src/__tests__/computer-use-tools.test.ts +2 -19
  28. package/src/__tests__/config-schema.test.ts +1 -68
  29. package/src/__tests__/config-watcher.test.ts +0 -1
  30. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  31. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  32. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  33. package/src/__tests__/context-token-estimator.test.ts +196 -13
  34. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  35. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  36. package/src/__tests__/conversation-routes-guardian-reply.test.ts +152 -0
  37. package/src/__tests__/conversation-routes-slash-commands.test.ts +2 -0
  38. package/src/__tests__/credential-metadata-store.test.ts +64 -73
  39. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  40. package/src/__tests__/credential-security-invariants.test.ts +13 -7
  41. package/src/__tests__/credential-vault-unit.test.ts +284 -49
  42. package/src/__tests__/credential-vault.test.ts +150 -16
  43. package/src/__tests__/credentials-cli.test.ts +71 -0
  44. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  45. package/src/__tests__/date-context.test.ts +93 -77
  46. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  47. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  48. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  49. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  50. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  51. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  52. package/src/__tests__/guardian-routing-invariants.test.ts +93 -1
  53. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  54. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  55. package/src/__tests__/heartbeat-service.test.ts +0 -1
  56. package/src/__tests__/history-repair.test.ts +245 -0
  57. package/src/__tests__/host-cu-proxy.test.ts +791 -0
  58. package/src/__tests__/host-shell-tool.test.ts +27 -15
  59. package/src/__tests__/http-user-message-parity.test.ts +2 -0
  60. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  61. package/src/__tests__/integration-status.test.ts +32 -51
  62. package/src/__tests__/intent-routing.test.ts +0 -1
  63. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  64. package/src/__tests__/invite-routes-http.test.ts +10 -9
  65. package/src/__tests__/keychain-broker-client.test.ts +14 -46
  66. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  67. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  68. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  69. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  70. package/src/__tests__/memory-regressions.test.ts +477 -2841
  71. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  72. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  73. package/src/__tests__/mime-builder.test.ts +28 -0
  74. package/src/__tests__/native-web-search.test.ts +1 -0
  75. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  76. package/src/__tests__/oauth-cli.test.ts +941 -15
  77. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  78. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  79. package/src/__tests__/oauth-store.test.ts +870 -0
  80. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  81. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  82. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  83. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  84. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  85. package/src/__tests__/recording-handler.test.ts +3 -4
  86. package/src/__tests__/registry.test.ts +2 -3
  87. package/src/__tests__/relay-server.test.ts +46 -1
  88. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  89. package/src/__tests__/schedule-store.test.ts +0 -1
  90. package/src/__tests__/schedule-tools.test.ts +32 -0
  91. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  92. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  93. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  94. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  95. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  96. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  97. package/src/__tests__/secure-keys.test.ts +7 -2
  98. package/src/__tests__/send-endpoint-busy.test.ts +24 -6
  99. package/src/__tests__/sequence-store.test.ts +0 -1
  100. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  101. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  102. package/src/__tests__/session-agent-loop.test.ts +19 -15
  103. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  104. package/src/__tests__/session-error.test.ts +124 -2
  105. package/src/__tests__/session-history-web-search.test.ts +918 -0
  106. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  107. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  108. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  109. package/src/__tests__/session-queue.test.ts +37 -27
  110. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  111. package/src/__tests__/session-slash-known.test.ts +1 -15
  112. package/src/__tests__/session-slash-queue.test.ts +1 -15
  113. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  114. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  115. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  116. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  117. package/src/__tests__/skill-include-graph.test.ts +66 -0
  118. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  119. package/src/__tests__/skill-load-tool.test.ts +149 -1
  120. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  121. package/src/__tests__/skills-install-extract.test.ts +93 -0
  122. package/src/__tests__/skills-uninstall.test.ts +1 -1
  123. package/src/__tests__/skills.test.ts +3 -3
  124. package/src/__tests__/skillssh-registry.test.ts +451 -0
  125. package/src/__tests__/slack-channel-config.test.ts +67 -3
  126. package/src/__tests__/slack-share-routes.test.ts +17 -19
  127. package/src/__tests__/system-prompt.test.ts +0 -1
  128. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  129. package/src/__tests__/terminal-tools.test.ts +4 -3
  130. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  131. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  132. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  133. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  134. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  135. package/src/__tests__/tool-executor.test.ts +0 -1
  136. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  137. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  138. package/src/__tests__/trust-store.test.ts +7 -13
  139. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  140. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  141. package/src/__tests__/twilio-routes.test.ts +0 -16
  142. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  143. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  144. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  145. package/src/agent/ax-tree-compaction.test.ts +286 -0
  146. package/src/agent/loop.ts +104 -131
  147. package/src/approvals/AGENTS.md +1 -1
  148. package/src/approvals/guardian-request-resolvers.ts +14 -2
  149. package/src/bundler/compiler-tools.ts +66 -2
  150. package/src/calls/call-domain.ts +133 -6
  151. package/src/calls/call-store.ts +6 -0
  152. package/src/calls/relay-server.ts +52 -18
  153. package/src/calls/relay-setup-router.ts +17 -1
  154. package/src/calls/twilio-config.ts +3 -8
  155. package/src/calls/twilio-routes.ts +1 -2
  156. package/src/calls/types.ts +3 -1
  157. package/src/calls/voice-ingress-preflight.ts +1 -1
  158. package/src/cli/commands/browser-relay.ts +18 -12
  159. package/src/cli/commands/completions.ts +0 -3
  160. package/src/cli/commands/credentials.ts +101 -15
  161. package/src/cli/commands/doctor.ts +4 -3
  162. package/src/cli/commands/mcp.ts +46 -59
  163. package/src/cli/commands/memory.ts +16 -165
  164. package/src/cli/commands/oauth/apps.ts +284 -0
  165. package/src/cli/commands/oauth/connections.ts +633 -0
  166. package/src/cli/commands/oauth/index.ts +52 -0
  167. package/src/cli/commands/oauth/providers.ts +256 -0
  168. package/src/cli/commands/sessions.ts +5 -2
  169. package/src/cli/commands/skills.ts +177 -339
  170. package/src/cli/http-client.ts +0 -20
  171. package/src/cli/main-screen.tsx +2 -2
  172. package/src/cli/program.ts +6 -11
  173. package/src/cli/reference.ts +1 -3
  174. package/src/cli.ts +4 -10
  175. package/src/config/assistant-feature-flags.ts +0 -3
  176. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  177. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  178. package/src/config/bundled-skills/computer-use/TOOLS.json +23 -5
  179. package/src/config/bundled-skills/computer-use/tools/{computer-use-request-control.ts → computer-use-observe.ts} +1 -5
  180. package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
  181. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
  182. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  183. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  184. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  185. package/src/config/bundled-tool-registry.ts +2 -5
  186. package/src/config/env-registry.ts +14 -83
  187. package/src/config/env.ts +11 -50
  188. package/src/config/feature-flag-registry.json +16 -16
  189. package/src/config/loader.ts +0 -6
  190. package/src/config/schema.ts +4 -13
  191. package/src/config/schemas/memory-lifecycle.ts +0 -9
  192. package/src/config/schemas/memory-processing.ts +0 -180
  193. package/src/config/schemas/memory-retrieval.ts +32 -104
  194. package/src/config/schemas/memory.ts +0 -10
  195. package/src/config/skills.ts +21 -2
  196. package/src/config/types.ts +0 -4
  197. package/src/context/image-dimensions.ts +229 -0
  198. package/src/context/token-estimator.ts +75 -12
  199. package/src/context/window-manager.ts +53 -11
  200. package/src/daemon/assistant-attachments.ts +1 -13
  201. package/src/daemon/config-watcher.ts +61 -3
  202. package/src/daemon/daemon-control.ts +1 -1
  203. package/src/daemon/date-context.ts +114 -31
  204. package/src/daemon/handlers/config-ingress.ts +8 -33
  205. package/src/daemon/handlers/config-slack-channel.ts +49 -46
  206. package/src/daemon/handlers/config-telegram.ts +32 -16
  207. package/src/daemon/handlers/sessions.ts +27 -36
  208. package/src/daemon/handlers/shared.ts +0 -130
  209. package/src/daemon/handlers/skills.ts +20 -1
  210. package/src/daemon/history-repair.ts +72 -8
  211. package/src/daemon/host-cu-proxy.ts +430 -0
  212. package/src/daemon/lifecycle.ts +67 -71
  213. package/src/daemon/mcp-reload-service.ts +2 -2
  214. package/src/daemon/message-protocol.ts +3 -0
  215. package/src/daemon/message-types/computer-use.ts +1 -129
  216. package/src/daemon/message-types/host-cu.ts +19 -0
  217. package/src/daemon/message-types/memory.ts +4 -16
  218. package/src/daemon/message-types/messages.ts +4 -0
  219. package/src/daemon/message-types/sessions.ts +4 -0
  220. package/src/daemon/server.ts +25 -21
  221. package/src/daemon/session-agent-loop-handlers.ts +40 -0
  222. package/src/daemon/session-agent-loop.ts +334 -48
  223. package/src/daemon/session-attachments.ts +1 -2
  224. package/src/daemon/session-error.ts +89 -6
  225. package/src/daemon/session-history.ts +17 -7
  226. package/src/daemon/session-media-retry.ts +6 -2
  227. package/src/daemon/session-memory.ts +69 -149
  228. package/src/daemon/session-process.ts +10 -1
  229. package/src/daemon/session-runtime-assembly.ts +49 -19
  230. package/src/daemon/session-slash.ts +1 -1
  231. package/src/daemon/session-surfaces.ts +43 -28
  232. package/src/daemon/session-tool-setup.ts +9 -10
  233. package/src/daemon/session.ts +150 -17
  234. package/src/daemon/tool-side-effects.ts +2 -8
  235. package/src/daemon/watch-handler.ts +2 -2
  236. package/src/events/tool-metrics-listener.ts +2 -2
  237. package/src/hooks/manager.ts +1 -4
  238. package/src/inbound/public-ingress-urls.ts +7 -7
  239. package/src/instrument.ts +61 -1
  240. package/src/logfire.ts +16 -5
  241. package/src/memory/admin.ts +2 -191
  242. package/src/memory/canonical-guardian-store.ts +38 -2
  243. package/src/memory/conversation-crud.ts +0 -33
  244. package/src/memory/conversation-key-store.ts +21 -0
  245. package/src/memory/conversation-queries.ts +22 -3
  246. package/src/memory/db-init.ts +32 -0
  247. package/src/memory/embedding-backend.ts +84 -8
  248. package/src/memory/embedding-types.ts +9 -1
  249. package/src/memory/indexer.ts +7 -46
  250. package/src/memory/items-extractor.ts +274 -76
  251. package/src/memory/job-handlers/backfill.ts +2 -127
  252. package/src/memory/job-handlers/cleanup.ts +2 -16
  253. package/src/memory/job-handlers/extraction.ts +2 -138
  254. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  255. package/src/memory/job-handlers/summarization.ts +3 -148
  256. package/src/memory/job-utils.ts +21 -59
  257. package/src/memory/jobs-store.ts +1 -159
  258. package/src/memory/jobs-worker.ts +9 -52
  259. package/src/memory/migrations/104-core-indexes.ts +3 -3
  260. package/src/memory/migrations/149-oauth-tables.ts +62 -0
  261. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  262. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  263. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  264. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  265. package/src/memory/migrations/154-drop-fts.ts +20 -0
  266. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  267. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  268. package/src/memory/migrations/index.ts +8 -0
  269. package/src/memory/qdrant-client.ts +148 -51
  270. package/src/memory/raw-query.ts +1 -1
  271. package/src/memory/retriever.test.ts +294 -273
  272. package/src/memory/retriever.ts +421 -645
  273. package/src/memory/schema/calls.ts +2 -0
  274. package/src/memory/schema/index.ts +1 -0
  275. package/src/memory/schema/memory-core.ts +3 -48
  276. package/src/memory/schema/oauth.ts +67 -0
  277. package/src/memory/search/formatting.ts +263 -176
  278. package/src/memory/search/lexical.ts +1 -254
  279. package/src/memory/search/ranking.ts +0 -455
  280. package/src/memory/search/semantic.ts +100 -14
  281. package/src/memory/search/staleness.ts +47 -0
  282. package/src/memory/search/tier-classifier.ts +21 -0
  283. package/src/memory/search/types.ts +15 -77
  284. package/src/memory/task-memory-cleanup.ts +4 -6
  285. package/src/messaging/provider.ts +4 -4
  286. package/src/messaging/providers/gmail/client.ts +82 -2
  287. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  288. package/src/messaging/providers/gmail/people-client.ts +10 -10
  289. package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
  290. package/src/messaging/providers/whatsapp/adapter.ts +11 -8
  291. package/src/messaging/registry.ts +2 -32
  292. package/src/notifications/copy-composer.ts +0 -5
  293. package/src/notifications/signal.ts +4 -5
  294. package/src/oauth/byo-connection.test.ts +133 -25
  295. package/src/oauth/byo-connection.ts +22 -6
  296. package/src/oauth/connect-orchestrator.ts +113 -57
  297. package/src/oauth/connect-types.ts +17 -23
  298. package/src/oauth/connection-resolver.ts +35 -11
  299. package/src/oauth/connection.ts +1 -1
  300. package/src/oauth/manual-token-connection.ts +104 -0
  301. package/src/oauth/oauth-store.ts +582 -0
  302. package/src/oauth/platform-connection.test.ts +29 -0
  303. package/src/oauth/platform-connection.ts +6 -5
  304. package/src/oauth/provider-behaviors.ts +124 -0
  305. package/src/oauth/scope-policy.ts +9 -2
  306. package/src/oauth/seed-providers.ts +167 -0
  307. package/src/oauth/token-persistence.ts +81 -77
  308. package/src/permissions/checker.ts +3 -3
  309. package/src/permissions/defaults.ts +1 -1
  310. package/src/permissions/prompter.ts +10 -1
  311. package/src/permissions/trust-store.ts +36 -1
  312. package/src/playbooks/playbook-compiler.ts +1 -1
  313. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  314. package/src/prompts/system-prompt.ts +46 -42
  315. package/src/providers/anthropic/client.ts +59 -20
  316. package/src/providers/retry.ts +1 -27
  317. package/src/providers/types.ts +7 -1
  318. package/src/runtime/AGENTS.md +9 -0
  319. package/src/runtime/auth/route-policy.ts +6 -6
  320. package/src/runtime/channel-reply-delivery.ts +0 -40
  321. package/src/runtime/gateway-client.ts +0 -7
  322. package/src/runtime/guardian-reply-router.ts +24 -22
  323. package/src/runtime/http-server.ts +10 -8
  324. package/src/runtime/http-types.ts +2 -2
  325. package/src/runtime/invite-redemption-service.ts +19 -1
  326. package/src/runtime/invite-service.ts +25 -0
  327. package/src/runtime/middleware/twilio-validation.ts +1 -11
  328. package/src/runtime/pending-interactions.ts +14 -12
  329. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  330. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  331. package/src/runtime/routes/conversation-routes.ts +81 -19
  332. package/src/runtime/routes/events-routes.ts +21 -11
  333. package/src/runtime/routes/host-cu-routes.ts +97 -0
  334. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  335. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  336. package/src/runtime/routes/integrations/slack/share.ts +6 -7
  337. package/src/runtime/routes/log-export-routes.ts +126 -8
  338. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  339. package/src/runtime/routes/memory-item-routes.ts +503 -0
  340. package/src/runtime/routes/session-management-routes.ts +3 -3
  341. package/src/runtime/routes/settings-routes.ts +55 -48
  342. package/src/runtime/routes/surface-action-routes.ts +1 -1
  343. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  344. package/src/runtime/routes/watch-routes.ts +128 -0
  345. package/src/runtime/routes/workspace-routes.ts +2 -1
  346. package/src/schedule/integration-status.ts +10 -9
  347. package/src/security/credential-key.ts +0 -156
  348. package/src/security/keychain-broker-client.ts +22 -10
  349. package/src/security/oauth2.ts +1 -1
  350. package/src/security/secure-keys.ts +25 -3
  351. package/src/security/token-manager.ts +137 -64
  352. package/src/skills/catalog-install.ts +414 -0
  353. package/src/skills/include-graph.ts +32 -0
  354. package/src/skills/skillssh-registry.ts +503 -0
  355. package/src/telegram/bot-username.ts +2 -3
  356. package/src/tools/assets/search.ts +5 -1
  357. package/src/tools/browser/network-recorder.ts +1 -1
  358. package/src/tools/browser/network-recording-types.ts +1 -1
  359. package/src/tools/computer-use/definitions.ts +36 -11
  360. package/src/tools/computer-use/registry.ts +5 -6
  361. package/src/tools/credentials/broker.ts +1 -2
  362. package/src/tools/credentials/metadata-store.ts +17 -121
  363. package/src/tools/credentials/vault.ts +92 -167
  364. package/src/tools/memory/definitions.ts +4 -13
  365. package/src/tools/memory/handlers.test.ts +83 -103
  366. package/src/tools/memory/handlers.ts +50 -85
  367. package/src/tools/registry.ts +2 -7
  368. package/src/tools/schedule/create.ts +8 -1
  369. package/src/tools/schedule/update.ts +8 -1
  370. package/src/tools/skills/load.ts +85 -3
  371. package/src/tools/watch/watch-state.ts +0 -12
  372. package/src/util/logger.ts +7 -41
  373. package/src/util/platform.ts +9 -28
  374. package/src/watcher/providers/google-calendar.ts +2 -1
  375. package/src/__tests__/clarification-resolver.test.ts +0 -193
  376. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  377. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  378. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  379. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  380. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  381. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  382. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  383. package/src/__tests__/conflict-policy.test.ts +0 -269
  384. package/src/__tests__/conflict-store.test.ts +0 -372
  385. package/src/__tests__/contradiction-checker.test.ts +0 -361
  386. package/src/__tests__/entity-extractor.test.ts +0 -211
  387. package/src/__tests__/entity-search.test.ts +0 -1117
  388. package/src/__tests__/profile-compiler.test.ts +0 -392
  389. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  390. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  391. package/src/__tests__/session-profile-injection.test.ts +0 -557
  392. package/src/cli/commands/dev.ts +0 -129
  393. package/src/cli/commands/map.ts +0 -391
  394. package/src/cli/commands/oauth.ts +0 -77
  395. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  396. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  397. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  398. package/src/daemon/computer-use-session.ts +0 -1026
  399. package/src/daemon/ride-shotgun-handler.ts +0 -569
  400. package/src/daemon/session-conflict-gate.ts +0 -167
  401. package/src/daemon/session-dynamic-profile.ts +0 -77
  402. package/src/memory/clarification-resolver.ts +0 -417
  403. package/src/memory/conflict-intent.ts +0 -205
  404. package/src/memory/conflict-policy.ts +0 -127
  405. package/src/memory/conflict-store.ts +0 -410
  406. package/src/memory/contradiction-checker.ts +0 -508
  407. package/src/memory/entity-extractor.ts +0 -535
  408. package/src/memory/format-recall.ts +0 -47
  409. package/src/memory/fts-reconciler.ts +0 -165
  410. package/src/memory/job-handlers/conflict.ts +0 -200
  411. package/src/memory/profile-compiler.ts +0 -195
  412. package/src/memory/recall-cache.ts +0 -117
  413. package/src/memory/search/entity.ts +0 -535
  414. package/src/memory/search/query-expansion.test.ts +0 -70
  415. package/src/memory/search/query-expansion.ts +0 -118
  416. package/src/oauth/provider-base-urls.ts +0 -21
  417. package/src/oauth/provider-profiles.ts +0 -192
  418. package/src/prompts/computer-use-prompt.ts +0 -98
  419. package/src/runtime/routes/computer-use-routes.ts +0 -641
  420. package/src/runtime/routes/mcp-routes.ts +0 -20
  421. package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
  422. package/src/runtime/telegram-streaming-delivery.ts +0 -393
  423. package/src/tools/computer-use/request-computer-control.ts +0 -56
@@ -164,6 +164,26 @@ mock.module("../tools/credentials/metadata-store.js", () => ({
164
164
  },
165
165
  }));
166
166
 
167
+ // ---------------------------------------------------------------------------
168
+ // Mock oauth-store
169
+ // ---------------------------------------------------------------------------
170
+
171
+ let disconnectOAuthProviderCalls: string[] = [];
172
+ let disconnectOAuthProviderResult: "disconnected" | "not-found" | "error" =
173
+ "not-found";
174
+
175
+ mock.module("../oauth/oauth-store.js", () => ({
176
+ disconnectOAuthProvider: async (
177
+ providerKey: string,
178
+ ): Promise<"disconnected" | "not-found" | "error"> => {
179
+ disconnectOAuthProviderCalls.push(providerKey);
180
+ return disconnectOAuthProviderResult;
181
+ },
182
+ getConnectionByProvider: (): undefined => undefined,
183
+ listConnections: (): never[] => [],
184
+ deleteConnection: (): boolean => false,
185
+ }));
186
+
167
187
  // ---------------------------------------------------------------------------
168
188
  // Import the module under test (after mocks are registered)
169
189
  // ---------------------------------------------------------------------------
@@ -281,6 +301,8 @@ describe("assistant credentials CLI", () => {
281
301
  _listMetadataCalls = 0;
282
302
  _getMetadataCalls = 0;
283
303
  _getMetadataByIdCalls = 0;
304
+ disconnectOAuthProviderCalls = [];
305
+ disconnectOAuthProviderResult = "not-found";
284
306
  process.exitCode = 0;
285
307
  });
286
308
 
@@ -611,6 +633,55 @@ describe("assistant credentials CLI", () => {
611
633
  ),
612
634
  ).toBeUndefined();
613
635
  });
636
+
637
+ test("calls disconnectOAuthProvider for OAuth cleanup", async () => {
638
+ seedCredential("gmail", "access_token", "ya29.token_value");
639
+
640
+ const result = await runCli(["delete", "gmail:access_token", "--json"]);
641
+ expect(result.exitCode).toBe(0);
642
+ const parsed = JSON.parse(result.stdout);
643
+ expect(parsed.ok).toBe(true);
644
+
645
+ // disconnectOAuthProvider should have been called with the service name
646
+ expect(disconnectOAuthProviderCalls).toEqual(["gmail"]);
647
+ });
648
+
649
+ test("succeeds when only OAuth connection exists (no legacy credential)", async () => {
650
+ // No legacy credential seeded — only the OAuth disconnect finds something
651
+ disconnectOAuthProviderResult = "disconnected";
652
+
653
+ const result = await runCli(["delete", "gmail:access_token", "--json"]);
654
+ expect(result.exitCode).toBe(0);
655
+ const parsed = JSON.parse(result.stdout);
656
+ expect(parsed.ok).toBe(true);
657
+ expect(parsed.service).toBe("gmail");
658
+ expect(parsed.field).toBe("access_token");
659
+
660
+ expect(disconnectOAuthProviderCalls).toEqual(["gmail"]);
661
+ });
662
+
663
+ test("demonstrates colon-in-service-name parsing limitation with integration:gmail", async () => {
664
+ // parseCredentialName("integration:gmail:access_token") splits on the
665
+ // first colon, yielding service="integration" and field="gmail:access_token".
666
+ // This is incorrect for the intended service "integration:gmail". The fix
667
+ // for this limitation is addressed by introducing a dedicated `disconnect`
668
+ // subcommand (PR 5).
669
+ const result = await runCli([
670
+ "delete",
671
+ "integration:gmail:access_token",
672
+ "--json",
673
+ ]);
674
+ // The command parses as service="integration", field="gmail:access_token"
675
+ // which finds nothing and reports not-found.
676
+ expect(result.exitCode).toBe(1);
677
+ const parsed = JSON.parse(result.stdout);
678
+ expect(parsed.ok).toBe(false);
679
+ expect(parsed.error).toContain("not found");
680
+
681
+ // disconnectOAuthProvider was called with "integration" (wrong) instead
682
+ // of "integration:gmail" (intended).
683
+ expect(disconnectOAuthProviderCalls).toEqual(["integration"]);
684
+ });
614
685
  });
615
686
 
616
687
  // =========================================================================
@@ -0,0 +1,532 @@
1
+ /**
2
+ * End-to-end tests for the unified CU proxy flow.
3
+ *
4
+ * Tests the surfaceProxyResolver's CU tool routing — the integration
5
+ * point between the agent loop and the HostCuProxy.
6
+ */
7
+
8
+ import { afterEach, describe, expect, test } from "bun:test";
9
+
10
+ import { HostCuProxy } from "../daemon/host-cu-proxy.js";
11
+ import type { SurfaceSessionContext } from "../daemon/session-surfaces.js";
12
+ import { surfaceProxyResolver } from "../daemon/session-surfaces.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Test helpers
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Build a minimal SurfaceSessionContext with optional hostCuProxy.
20
+ * Only the fields required by the CU routing path are populated.
21
+ */
22
+ function buildMockContext(hostCuProxy?: HostCuProxy): SurfaceSessionContext {
23
+ return {
24
+ conversationId: "test-session",
25
+ traceEmitter: { emit: () => {} },
26
+ sendToClient: () => {},
27
+ pendingSurfaceActions: new Map(),
28
+ lastSurfaceAction: new Map(),
29
+ surfaceState: new Map(),
30
+ surfaceUndoStacks: new Map(),
31
+ surfaceActionRequestIds: new Set(),
32
+ currentTurnSurfaces: [],
33
+ hostCuProxy,
34
+ isProcessing: () => false,
35
+ enqueueMessage: () => ({ queued: false, requestId: "r1" }),
36
+ getQueueDepth: () => 0,
37
+ processMessage: async () => "",
38
+ withSurface: async (_id, fn) => fn(),
39
+ };
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Tests
44
+ // ---------------------------------------------------------------------------
45
+
46
+ describe("surfaceProxyResolver — CU tool routing", () => {
47
+ let sentMessages: unknown[];
48
+ let proxy: HostCuProxy;
49
+
50
+ function setupProxy(maxSteps?: number): SurfaceSessionContext {
51
+ sentMessages = [];
52
+ const sendToClient = (msg: unknown) => sentMessages.push(msg);
53
+ proxy = new HostCuProxy(sendToClient as never, undefined, maxSteps);
54
+ // Mark client as connected so requests are sent
55
+ proxy.updateSender(sendToClient as never, true);
56
+ return buildMockContext(proxy);
57
+ }
58
+
59
+ afterEach(() => {
60
+ proxy?.dispose();
61
+ });
62
+
63
+ // -------------------------------------------------------------------------
64
+ // No desktop client connected
65
+ // -------------------------------------------------------------------------
66
+
67
+ describe("no desktop client connected", () => {
68
+ test("returns error when hostCuProxy is undefined", async () => {
69
+ const ctx = buildMockContext(/* no proxy */);
70
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
71
+ element_id: 42,
72
+ reasoning: "click the button",
73
+ });
74
+
75
+ expect(result.isError).toBe(true);
76
+ expect(result.content).toContain("not available");
77
+ expect(result.content).toContain("no desktop client");
78
+ });
79
+
80
+ test("returns error for screenshot tool when no proxy", async () => {
81
+ const ctx = buildMockContext();
82
+ const result = await surfaceProxyResolver(
83
+ ctx,
84
+ "computer_use_screenshot",
85
+ {},
86
+ );
87
+
88
+ expect(result.isError).toBe(true);
89
+ expect(result.content).toContain("not available");
90
+ });
91
+
92
+ test("returns error when proxy exists but client not connected", async () => {
93
+ const sendToClient = () => {};
94
+ const proxyObj = new HostCuProxy(sendToClient as never);
95
+ // Default clientConnected is false — do NOT call updateSender with true
96
+ const ctx = buildMockContext(proxyObj);
97
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
98
+ element_id: 1,
99
+ });
100
+
101
+ expect(result.isError).toBe(true);
102
+ expect(result.content).toContain("not available");
103
+ proxyObj.dispose();
104
+ });
105
+
106
+ test("returns error for terminal tools when no proxy", async () => {
107
+ const ctx = buildMockContext();
108
+
109
+ const doneResult = await surfaceProxyResolver(ctx, "computer_use_done", {
110
+ summary: "finished",
111
+ });
112
+ expect(doneResult.isError).toBe(true);
113
+
114
+ const respondResult = await surfaceProxyResolver(
115
+ ctx,
116
+ "computer_use_respond",
117
+ { answer: "42" },
118
+ );
119
+ expect(respondResult.isError).toBe(true);
120
+ });
121
+ });
122
+
123
+ // -------------------------------------------------------------------------
124
+ // Terminal tools (computer_use_done, computer_use_respond)
125
+ // -------------------------------------------------------------------------
126
+
127
+ describe("terminal tools resolve immediately", () => {
128
+ test("computer_use_done resets proxy and returns summary", async () => {
129
+ const ctx = setupProxy();
130
+
131
+ // Record some actions first to verify reset
132
+ proxy.recordAction("computer_use_click", { element_id: 1 });
133
+ proxy.recordAction("computer_use_click", { element_id: 2 });
134
+ expect(proxy.stepCount).toBe(2);
135
+
136
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
137
+ summary: "Completed the file upload",
138
+ });
139
+
140
+ expect(result.isError).toBe(false);
141
+ expect(result.content).toBe("Completed the file upload");
142
+ // No message sent to client for terminal tools
143
+ expect(sentMessages).toHaveLength(0);
144
+ // Proxy state should be reset
145
+ expect(proxy.stepCount).toBe(0);
146
+ expect(proxy.actionHistory).toHaveLength(0);
147
+ });
148
+
149
+ test("computer_use_respond resets proxy and returns answer", async () => {
150
+ const ctx = setupProxy();
151
+
152
+ proxy.recordAction("computer_use_click", { element_id: 1 });
153
+
154
+ const result = await surfaceProxyResolver(ctx, "computer_use_respond", {
155
+ answer: "The price is $42",
156
+ reasoning: "Found the price on the page",
157
+ });
158
+
159
+ expect(result.isError).toBe(false);
160
+ expect(result.content).toBe("The price is $42");
161
+ expect(sentMessages).toHaveLength(0);
162
+ expect(proxy.stepCount).toBe(0);
163
+ });
164
+
165
+ test("computer_use_done uses default when no summary provided", async () => {
166
+ const ctx = setupProxy();
167
+
168
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {});
169
+
170
+ expect(result.isError).toBe(false);
171
+ expect(result.content).toBe("Task complete");
172
+ });
173
+
174
+ test("computer_use_respond falls back to summary then default", async () => {
175
+ const ctx = setupProxy();
176
+
177
+ // No answer but has summary — done tool uses summary
178
+ const r1 = await surfaceProxyResolver(ctx, "computer_use_done", {
179
+ summary: "All done",
180
+ });
181
+ expect(r1.content).toBe("All done");
182
+
183
+ // respond with answer field
184
+ const r2 = await surfaceProxyResolver(ctx, "computer_use_respond", {
185
+ answer: "The answer is 7",
186
+ });
187
+ expect(r2.content).toBe("The answer is 7");
188
+ });
189
+ });
190
+
191
+ // -------------------------------------------------------------------------
192
+ // Action tools (computer_use_click, screenshot, etc.) — proxy to client
193
+ // -------------------------------------------------------------------------
194
+
195
+ describe("action tools proxy to client", () => {
196
+ test("computer_use_click routes through proxy and returns observation", async () => {
197
+ const ctx = setupProxy();
198
+
199
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
200
+ element_id: 42,
201
+ reasoning: "Click the submit button",
202
+ });
203
+
204
+ // Verify the proxy sent a request to the client
205
+ expect(sentMessages).toHaveLength(1);
206
+ const sent = sentMessages[0] as Record<string, unknown>;
207
+ expect(sent.type).toBe("host_cu_request");
208
+ expect(sent.toolName).toBe("computer_use_click");
209
+ expect(sent.input).toEqual({
210
+ element_id: 42,
211
+ reasoning: "Click the submit button",
212
+ });
213
+ expect(sent.sessionId).toBe("test-session");
214
+
215
+ // Action was recorded
216
+ expect(proxy.stepCount).toBe(1);
217
+ expect(proxy.actionHistory).toHaveLength(1);
218
+ expect(proxy.actionHistory[0].toolName).toBe("computer_use_click");
219
+
220
+ // Simulate client resolving with observation
221
+ const requestId = sent.requestId as string;
222
+ proxy.resolve(requestId, {
223
+ axTree: "SubmitButton [1]\nTextField [2]",
224
+ executionResult: "Clicked element 42",
225
+ });
226
+
227
+ const result = await resultPromise;
228
+ expect(result.isError).toBe(false);
229
+ expect(result.content).toContain("Clicked element 42");
230
+ expect(result.content).toContain("<ax-tree>");
231
+ expect(result.content).toContain("SubmitButton [1]");
232
+ });
233
+
234
+ test("computer_use_screenshot routes through proxy", async () => {
235
+ const ctx = setupProxy();
236
+
237
+ const resultPromise = surfaceProxyResolver(
238
+ ctx,
239
+ "computer_use_screenshot",
240
+ { reasoning: "Capture current state" },
241
+ );
242
+
243
+ expect(sentMessages).toHaveLength(1);
244
+ const sent = sentMessages[0] as Record<string, unknown>;
245
+ expect(sent.type).toBe("host_cu_request");
246
+ expect(sent.toolName).toBe("computer_use_screenshot");
247
+
248
+ proxy.resolve(sent.requestId as string, {
249
+ axTree: "Window [1]",
250
+ screenshot: "base64screenshot",
251
+ screenshotWidthPx: 1920,
252
+ screenshotHeightPx: 1080,
253
+ });
254
+
255
+ const result = await resultPromise;
256
+ expect(result.isError).toBe(false);
257
+ expect(result.content).toContain("1920x1080 px");
258
+ expect(result.contentBlocks).toHaveLength(1);
259
+ expect(result.contentBlocks![0]).toEqual({
260
+ type: "image",
261
+ source: {
262
+ type: "base64",
263
+ media_type: "image/jpeg",
264
+ data: "base64screenshot",
265
+ },
266
+ });
267
+ });
268
+
269
+ test("computer_use_type_text routes through proxy", async () => {
270
+ const ctx = setupProxy();
271
+
272
+ const resultPromise = surfaceProxyResolver(
273
+ ctx,
274
+ "computer_use_type_text",
275
+ { text: "Hello world", reasoning: "Type into search box" },
276
+ );
277
+
278
+ const sent = sentMessages[0] as Record<string, unknown>;
279
+ expect(sent.toolName).toBe("computer_use_type_text");
280
+ expect(sent.input).toEqual({
281
+ text: "Hello world",
282
+ reasoning: "Type into search box",
283
+ });
284
+
285
+ proxy.resolve(sent.requestId as string, {
286
+ axTree: "SearchBox [1] value='Hello world'",
287
+ executionResult: "Typed text",
288
+ });
289
+
290
+ const result = await resultPromise;
291
+ expect(result.isError).toBe(false);
292
+ expect(result.content).toContain("Typed text");
293
+ });
294
+ });
295
+
296
+ // -------------------------------------------------------------------------
297
+ // Full proxy lifecycle (observe → click → done)
298
+ // -------------------------------------------------------------------------
299
+
300
+ describe("full proxy lifecycle", () => {
301
+ test("observe → click → done sequence", async () => {
302
+ const ctx = setupProxy();
303
+
304
+ // Step 1: observe (screenshot)
305
+ const p1 = surfaceProxyResolver(ctx, "computer_use_screenshot", {
306
+ reasoning: "Check what's on screen",
307
+ });
308
+ const sent1 = sentMessages[0] as Record<string, unknown>;
309
+ proxy.resolve(sent1.requestId as string, {
310
+ axTree: "LoginButton [1]\nUsernameField [2]",
311
+ });
312
+ const r1 = await p1;
313
+ expect(r1.isError).toBe(false);
314
+ expect(r1.content).toContain("LoginButton [1]");
315
+ expect(proxy.stepCount).toBe(1);
316
+
317
+ // Step 2: click
318
+ const p2 = surfaceProxyResolver(ctx, "computer_use_click", {
319
+ element_id: 1,
320
+ reasoning: "Click login button",
321
+ });
322
+ const sent2 = sentMessages[1] as Record<string, unknown>;
323
+ proxy.resolve(sent2.requestId as string, {
324
+ axTree: "PasswordField [1]\nSubmitButton [2]",
325
+ axDiff: "+ PasswordField [1]\n+ SubmitButton [2]\n- LoginButton [1]",
326
+ executionResult: "Clicked element 1",
327
+ });
328
+ const r2 = await p2;
329
+ expect(r2.isError).toBe(false);
330
+ expect(r2.content).toContain("Clicked element 1");
331
+ expect(r2.content).toContain("PasswordField [1]");
332
+ expect(proxy.stepCount).toBe(2);
333
+
334
+ // Step 3: done
335
+ const r3 = await surfaceProxyResolver(ctx, "computer_use_done", {
336
+ summary: "Logged in successfully",
337
+ });
338
+ expect(r3.isError).toBe(false);
339
+ expect(r3.content).toBe("Logged in successfully");
340
+
341
+ // Proxy state is clean after done
342
+ expect(proxy.stepCount).toBe(0);
343
+ expect(proxy.actionHistory).toHaveLength(0);
344
+ // Only 2 messages sent to client (screenshot + click; done is terminal)
345
+ expect(sentMessages).toHaveLength(2);
346
+ });
347
+ });
348
+
349
+ // -------------------------------------------------------------------------
350
+ // Step limit enforced through resolver
351
+ // -------------------------------------------------------------------------
352
+
353
+ describe("step limit enforcement through resolver", () => {
354
+ test("rejects action tools when step limit exceeded", async () => {
355
+ const ctx = setupProxy(2); // maxSteps = 2
356
+
357
+ // Record enough actions to exceed the limit
358
+ proxy.recordAction("computer_use_click", { element_id: 1 });
359
+ proxy.recordAction("computer_use_click", { element_id: 2 });
360
+ proxy.recordAction("computer_use_click", { element_id: 3 });
361
+ expect(proxy.stepCount).toBe(3);
362
+
363
+ // The surfaceProxyResolver calls proxy.request, which checks step limit
364
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
365
+ element_id: 4,
366
+ reasoning: "click",
367
+ });
368
+
369
+ expect(result.isError).toBe(true);
370
+ expect(result.content).toContain("Step limit");
371
+ expect(result.content).toContain("computer_use_done");
372
+ });
373
+
374
+ test("terminal tools still work after step limit exceeded", async () => {
375
+ const ctx = setupProxy(2);
376
+
377
+ proxy.recordAction("computer_use_click", { element_id: 1 });
378
+ proxy.recordAction("computer_use_click", { element_id: 2 });
379
+ proxy.recordAction("computer_use_click", { element_id: 3 });
380
+
381
+ // computer_use_done should still work (terminal, resolves immediately)
382
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
383
+ summary: "Stopped because step limit",
384
+ });
385
+
386
+ expect(result.isError).toBe(false);
387
+ expect(result.content).toBe("Stopped because step limit");
388
+ expect(proxy.stepCount).toBe(0);
389
+ });
390
+ });
391
+
392
+ // -------------------------------------------------------------------------
393
+ // Error from client
394
+ // -------------------------------------------------------------------------
395
+
396
+ describe("error from client observation", () => {
397
+ test("returns error result when client reports execution error", async () => {
398
+ const ctx = setupProxy();
399
+
400
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
401
+ element_id: 999,
402
+ reasoning: "click missing element",
403
+ });
404
+
405
+ const sent = sentMessages[0] as Record<string, unknown>;
406
+ proxy.resolve(sent.requestId as string, {
407
+ executionError: "Element 999 not found in AX tree",
408
+ axTree: "Window [1]",
409
+ });
410
+
411
+ const result = await resultPromise;
412
+ expect(result.isError).toBe(true);
413
+ expect(result.content).toContain("Action failed");
414
+ expect(result.content).toContain("Element 999 not found");
415
+ });
416
+ });
417
+
418
+ // -------------------------------------------------------------------------
419
+ // Reasoning propagation
420
+ // -------------------------------------------------------------------------
421
+
422
+ describe("reasoning propagation", () => {
423
+ test("reasoning from input is passed to proxy request", async () => {
424
+ const ctx = setupProxy();
425
+
426
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_key", {
427
+ key: "Enter",
428
+ reasoning: "Submit the form",
429
+ });
430
+
431
+ const sent = sentMessages[0] as Record<string, unknown>;
432
+ expect(sent.reasoning).toBe("Submit the form");
433
+
434
+ // Resolve to avoid unhandled rejection on dispose
435
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
436
+ await resultPromise;
437
+ });
438
+
439
+ test("reasoning is recorded in action history", async () => {
440
+ const ctx = setupProxy();
441
+
442
+ surfaceProxyResolver(ctx, "computer_use_scroll", {
443
+ direction: "down",
444
+ amount: 3,
445
+ reasoning: "Scroll to see more",
446
+ });
447
+
448
+ expect(proxy.actionHistory[0].reasoning).toBe("Scroll to see more");
449
+
450
+ // Resolve to avoid hanging
451
+ const sent = sentMessages[0] as Record<string, unknown>;
452
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
453
+ });
454
+ });
455
+
456
+ // -------------------------------------------------------------------------
457
+ // Non-CU tools are not handled by CU routing
458
+ // -------------------------------------------------------------------------
459
+
460
+ describe("non-CU tools are not handled by CU routing", () => {
461
+ test("ui_show is not affected by CU routing", async () => {
462
+ const ctx = setupProxy();
463
+
464
+ const result = await surfaceProxyResolver(ctx, "ui_show", {
465
+ surface_type: "confirmation",
466
+ data: { message: "Are you sure?" },
467
+ });
468
+
469
+ // ui_show goes through its own path, not the CU path
470
+ expect(result.content).not.toContain("not available");
471
+ expect(result.content).not.toContain("desktop client");
472
+ });
473
+
474
+ test("unknown tool returns error", async () => {
475
+ const ctx = setupProxy();
476
+
477
+ const result = await surfaceProxyResolver(ctx, "not_a_real_tool", {});
478
+
479
+ expect(result.isError).toBe(true);
480
+ expect(result.content).toContain("Unknown proxy tool");
481
+ });
482
+ });
483
+
484
+ // -------------------------------------------------------------------------
485
+ // Multiple sequential CU actions accumulate state
486
+ // -------------------------------------------------------------------------
487
+
488
+ describe("state accumulation across actions", () => {
489
+ test("step count increments across multiple actions", async () => {
490
+ const ctx = setupProxy();
491
+
492
+ // Action 1
493
+ const p1 = surfaceProxyResolver(ctx, "computer_use_click", {
494
+ element_id: 1,
495
+ reasoning: "first",
496
+ });
497
+ const s1 = sentMessages[0] as Record<string, unknown>;
498
+ proxy.resolve(s1.requestId as string, { axTree: "A" });
499
+ await p1;
500
+ expect(proxy.stepCount).toBe(1);
501
+
502
+ // Action 2
503
+ const p2 = surfaceProxyResolver(ctx, "computer_use_type_text", {
504
+ text: "hello",
505
+ reasoning: "second",
506
+ });
507
+ const s2 = sentMessages[1] as Record<string, unknown>;
508
+ proxy.resolve(s2.requestId as string, { axTree: "B" });
509
+ await p2;
510
+ expect(proxy.stepCount).toBe(2);
511
+
512
+ // Action 3
513
+ const p3 = surfaceProxyResolver(ctx, "computer_use_scroll", {
514
+ direction: "down",
515
+ amount: 1,
516
+ reasoning: "third",
517
+ });
518
+ const s3 = sentMessages[2] as Record<string, unknown>;
519
+ proxy.resolve(s3.requestId as string, { axTree: "C" });
520
+ await p3;
521
+ expect(proxy.stepCount).toBe(3);
522
+
523
+ // History has all 3
524
+ expect(proxy.actionHistory).toHaveLength(3);
525
+ expect(proxy.actionHistory.map((a) => a.toolName)).toEqual([
526
+ "computer_use_click",
527
+ "computer_use_type_text",
528
+ "computer_use_scroll",
529
+ ]);
530
+ });
531
+ });
532
+ });