@vellumai/assistant 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (463) hide show
  1. package/bun.lock +40 -40
  2. package/bunfig.toml +3 -0
  3. package/docker-entrypoint.sh +12 -2
  4. package/docs/architecture/memory.md +1 -1
  5. package/node_modules/@vellumai/ces-contracts/src/handles.ts +7 -9
  6. package/node_modules/@vellumai/ces-contracts/src/rpc.ts +42 -0
  7. package/openapi.yaml +184 -69
  8. package/package.json +41 -41
  9. package/scripts/generate-openapi.ts +1 -2
  10. package/src/__tests__/acp-session.test.ts +43 -0
  11. package/src/__tests__/app-builder-tool-scripts.test.ts +1 -0
  12. package/src/__tests__/app-executors.test.ts +1 -0
  13. package/src/__tests__/app-source-watcher.test.ts +37 -11
  14. package/src/__tests__/approval-routes-http.test.ts +178 -1
  15. package/src/__tests__/assistant-event-hub.test.ts +30 -0
  16. package/src/__tests__/browser-fill-credential.test.ts +229 -94
  17. package/src/__tests__/browser-manager.test.ts +40 -27
  18. package/src/__tests__/catalog-files.test.ts +862 -0
  19. package/src/__tests__/channel-approvals.test.ts +53 -0
  20. package/src/__tests__/checker.test.ts +104 -170
  21. package/src/__tests__/cli-command-risk-guard.test.ts +1 -1
  22. package/src/__tests__/config-managed-gemini-defaults.test.ts +326 -0
  23. package/src/__tests__/config-schema-cmd.test.ts +2 -2
  24. package/src/__tests__/config-schema.test.ts +125 -48
  25. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +23 -0
  26. package/src/__tests__/context-overflow-approval.test.ts +21 -6
  27. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  28. package/src/__tests__/conversation-agent-loop.test.ts +1 -1
  29. package/src/__tests__/conversation-analysis-routes.test.ts +169 -0
  30. package/src/__tests__/conversation-attachments.test.ts +80 -4
  31. package/src/__tests__/conversation-confirmation-signals.test.ts +155 -0
  32. package/src/__tests__/conversation-directories-parse.test.ts +105 -0
  33. package/src/__tests__/conversation-fork-crud.test.ts +17 -0
  34. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  35. package/src/__tests__/conversation-host-access-routes.test.ts +229 -0
  36. package/src/__tests__/conversation-inject-context.test.ts +103 -0
  37. package/src/__tests__/conversation-queue.test.ts +45 -2
  38. package/src/__tests__/conversation-routes-disk-view.test.ts +5 -0
  39. package/src/__tests__/conversation-routes-guardian-reply.test.ts +16 -0
  40. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  41. package/src/__tests__/conversation-runtime-assembly.test.ts +269 -46
  42. package/src/__tests__/conversation-starter-routes.test.ts +126 -0
  43. package/src/__tests__/conversation-starters-cadence.test.ts +161 -0
  44. package/src/__tests__/conversation-store.test.ts +195 -0
  45. package/src/__tests__/conversation-workspace-cache-state.test.ts +193 -0
  46. package/src/__tests__/credential-execution-approval-bridge.test.ts +32 -3
  47. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  48. package/src/__tests__/credential-vault-unit.test.ts +4 -4
  49. package/src/__tests__/credential-vault.test.ts +152 -13
  50. package/src/__tests__/credentials-cli.test.ts +2 -2
  51. package/src/__tests__/date-context.test.ts +4 -4
  52. package/src/__tests__/embedding-managed-proxy-selection.test.ts +256 -0
  53. package/src/__tests__/extension-id-sync-guard.test.ts +155 -0
  54. package/src/__tests__/fixtures/mock-chrome-extension.ts +375 -0
  55. package/src/__tests__/gateway-only-guard.test.ts +3 -0
  56. package/src/__tests__/gemini-provider.test.ts +2 -2
  57. package/src/__tests__/guardian-routing-invariants.test.ts +70 -2
  58. package/src/__tests__/headless-browser-interactions.test.ts +707 -371
  59. package/src/__tests__/headless-browser-navigate.test.ts +389 -47
  60. package/src/__tests__/headless-browser-read-tools.test.ts +266 -103
  61. package/src/__tests__/headless-browser-snapshot.test.ts +240 -77
  62. package/src/__tests__/host-bash-proxy.test.ts +150 -1
  63. package/src/__tests__/host-browser-e2e-cloud.test.ts +462 -0
  64. package/src/__tests__/host-browser-e2e-self-hosted-capability.test.ts +286 -0
  65. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +374 -0
  66. package/src/__tests__/host-browser-event-routes.test.ts +350 -0
  67. package/src/__tests__/host-browser-proxy.test.ts +444 -0
  68. package/src/__tests__/host-browser-routes.test.ts +198 -0
  69. package/src/__tests__/host-browser-ws-events-e2e.test.ts +320 -0
  70. package/src/__tests__/host-cu-proxy.test.ts +171 -1
  71. package/src/__tests__/host-file-proxy.test.ts +185 -1
  72. package/src/__tests__/host-file-read-tool.test.ts +52 -0
  73. package/src/__tests__/host-proxy-interface.test.ts +165 -0
  74. package/src/__tests__/host-shell-tool.test.ts +1 -11
  75. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  76. package/src/__tests__/init-feature-flag-overrides.test.ts +167 -0
  77. package/src/__tests__/inline-command-runner.test.ts +7 -5
  78. package/src/__tests__/integration-status.test.ts +6 -7
  79. package/src/__tests__/list-messages-tool-merge.test.ts +37 -12
  80. package/src/__tests__/log-export-workspace.test.ts +190 -0
  81. package/src/__tests__/managed-credential-catalog-cli.test.ts +12 -14
  82. package/src/__tests__/mcp-client-auth.test.ts +40 -4
  83. package/src/__tests__/mcp-health-check.test.ts +10 -3
  84. package/src/__tests__/migration-cross-version-compatibility.test.ts +3 -1
  85. package/src/__tests__/migration-export-http.test.ts +61 -2
  86. package/src/__tests__/migration-export-streaming.test.ts +66 -0
  87. package/src/__tests__/migration-import-commit-http.test.ts +101 -1
  88. package/src/__tests__/native-host-marker-sync-guard.test.ts +157 -0
  89. package/src/__tests__/navigate-settings-tab.test.ts +14 -1
  90. package/src/__tests__/notification-broadcaster.test.ts +65 -0
  91. package/src/__tests__/oauth-apps-routes.test.ts +17 -12
  92. package/src/__tests__/oauth-cli.test.ts +707 -60
  93. package/src/__tests__/oauth-connect-orchestrator.test.ts +116 -24
  94. package/src/__tests__/oauth-provider-seed-logos.test.ts +23 -0
  95. package/src/__tests__/oauth-provider-serializer.test.ts +146 -10
  96. package/src/__tests__/oauth-provider-visibility.test.ts +19 -21
  97. package/src/__tests__/oauth-providers-routes.test.ts +50 -14
  98. package/src/__tests__/oauth-store.test.ts +1386 -182
  99. package/src/__tests__/oauth2-gateway-transport.test.ts +211 -20
  100. package/src/__tests__/onboarding-template-contract.test.ts +74 -55
  101. package/src/__tests__/openai-provider.test.ts +2 -2
  102. package/src/__tests__/outlook-categories.test.ts +1 -1
  103. package/src/__tests__/outlook-client-automation.test.ts +1 -1
  104. package/src/__tests__/outlook-compose-tools.test.ts +1 -1
  105. package/src/__tests__/outlook-email-watcher.test.ts +1 -1
  106. package/src/__tests__/outlook-follow-up.test.ts +1 -1
  107. package/src/__tests__/outlook-messaging-provider.test.ts +2 -2
  108. package/src/__tests__/outlook-trash.test.ts +1 -1
  109. package/src/__tests__/outlook-unsubscribe.test.ts +1 -1
  110. package/src/__tests__/permission-checker-host-gate.test.ts +74 -14
  111. package/src/__tests__/permission-mode.test.ts +28 -56
  112. package/src/__tests__/pkb-autoinject.test.ts +96 -0
  113. package/src/__tests__/platform-callback-registration.test.ts +19 -0
  114. package/src/__tests__/post-turn-tool-result-truncation.test.ts +296 -0
  115. package/src/__tests__/proxy-approval-callback.test.ts +18 -0
  116. package/src/__tests__/require-fresh-approval.test.ts +40 -3
  117. package/src/__tests__/sandbox-diagnostics.test.ts +1 -32
  118. package/src/__tests__/sanitize-config-for-transfer.test.ts +132 -0
  119. package/src/__tests__/schedule-routes.test.ts +162 -0
  120. package/src/__tests__/secret-detection-handler.test.ts +84 -0
  121. package/src/__tests__/secret-ingress-http.test.ts +1 -0
  122. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  123. package/src/__tests__/set-permission-mode.test.ts +13 -250
  124. package/src/__tests__/skills-file-content-endpoint.test.ts +670 -0
  125. package/src/__tests__/skills-files-catalog-fallback.test.ts +450 -0
  126. package/src/__tests__/slack-channel-config.test.ts +12 -15
  127. package/src/__tests__/subagent-detail.test.ts +44 -2
  128. package/src/__tests__/subagent-disposal.test.ts +1 -0
  129. package/src/__tests__/subagent-fork-notifications.test.ts +291 -0
  130. package/src/__tests__/subagent-fork-spawn.test.ts +384 -0
  131. package/src/__tests__/subagent-manager-notify.test.ts +1 -0
  132. package/src/__tests__/subagent-notify-parent.test.ts +1 -0
  133. package/src/__tests__/subagent-spawn-tool-fork.test.ts +411 -0
  134. package/src/__tests__/subagent-tools.test.ts +1 -0
  135. package/src/__tests__/subagent-types.test.ts +1 -0
  136. package/src/__tests__/system-prompt-ask-mode.test.ts +27 -71
  137. package/src/__tests__/system-prompt.test.ts +72 -1
  138. package/src/__tests__/task-scheduler.test.ts +32 -6
  139. package/src/__tests__/telegram-config.test.ts +10 -13
  140. package/src/__tests__/terminal-sandbox.test.ts +1 -1
  141. package/src/__tests__/terminal-tools.test.ts +11 -5
  142. package/src/__tests__/test-preload.ts +14 -0
  143. package/src/__tests__/tool-approval-handler.test.ts +73 -0
  144. package/src/__tests__/tool-domain-event-publisher.test.ts +0 -1
  145. package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -8
  146. package/src/__tests__/tool-executor.test.ts +0 -1
  147. package/src/__tests__/tool-side-effects-slack-dm.test.ts +22 -0
  148. package/src/__tests__/top-level-renderer.test.ts +73 -1
  149. package/src/__tests__/transport-hints-queue.test.ts +62 -0
  150. package/src/__tests__/trust-store.test.ts +4 -4
  151. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +109 -0
  152. package/src/__tests__/v2-consent-policy.test.ts +103 -0
  153. package/src/__tests__/workspace-migration-030-seed-pkb-autoinject.test.ts +168 -0
  154. package/src/__tests__/workspace-policy.test.ts +2 -7
  155. package/src/acp/client-handler.ts +30 -4
  156. package/src/agent/loop.ts +12 -35
  157. package/src/approvals/guardian-request-resolvers.ts +21 -15
  158. package/src/browser-session/__tests__/manager.test.ts +297 -0
  159. package/src/browser-session/backends/cdp-inspect.ts +30 -0
  160. package/src/browser-session/backends/extension.ts +26 -0
  161. package/src/browser-session/backends/local.ts +24 -0
  162. package/src/browser-session/events.ts +164 -0
  163. package/src/browser-session/index.ts +27 -0
  164. package/src/browser-session/manager.ts +159 -0
  165. package/src/browser-session/types.ts +28 -0
  166. package/src/channels/__tests__/types.test.ts +134 -0
  167. package/src/channels/types.ts +55 -0
  168. package/src/cli/__tests__/run-assistant-command.ts +34 -7
  169. package/src/cli/__tests__/unknown-command.test.ts +33 -0
  170. package/src/cli/commands/browser-relay.ts +339 -409
  171. package/src/cli/commands/credentials.ts +3 -3
  172. package/src/cli/commands/default-action.ts +68 -1
  173. package/src/cli/commands/email.ts +18 -13
  174. package/src/cli/commands/mcp.ts +16 -4
  175. package/src/cli/commands/oauth/__tests__/connect.test.ts +68 -41
  176. package/src/cli/commands/oauth/__tests__/disconnect.test.ts +21 -21
  177. package/src/cli/commands/oauth/__tests__/mode.test.ts +17 -17
  178. package/src/cli/commands/oauth/__tests__/ping.test.ts +16 -16
  179. package/src/cli/commands/oauth/__tests__/providers-delete.test.ts +31 -33
  180. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +329 -0
  181. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +116 -12
  182. package/src/cli/commands/oauth/__tests__/status.test.ts +10 -10
  183. package/src/cli/commands/oauth/__tests__/token.test.ts +7 -7
  184. package/src/cli/commands/oauth/apps.ts +7 -4
  185. package/src/cli/commands/oauth/connect.ts +16 -2
  186. package/src/cli/commands/oauth/disconnect.ts +1 -1
  187. package/src/cli/commands/oauth/providers.ts +200 -36
  188. package/src/cli/commands/oauth/shared.ts +5 -5
  189. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +259 -0
  190. package/src/cli/commands/platform/__tests__/connect.test.ts +1 -1
  191. package/src/cli/commands/platform/__tests__/disconnect.test.ts +1 -1
  192. package/src/cli/commands/platform/__tests__/status.test.ts +1 -1
  193. package/src/cli/commands/platform/index.ts +107 -10
  194. package/src/cli/commands/usage.ts +10 -9
  195. package/src/cli/lib/daemon-credential-client.ts +4 -0
  196. package/src/cli/program.ts +10 -3
  197. package/src/config/assistant-feature-flags.ts +59 -55
  198. package/src/config/bundled-skills/app-builder/SKILL.md +33 -173
  199. package/src/config/bundled-skills/app-builder/references/CUSTOM_ROUTES.md +105 -0
  200. package/src/config/bundled-skills/app-builder/references/INTERACTION_HOOKS.md +56 -0
  201. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +125 -0
  202. package/src/config/bundled-skills/contacts/SKILL.md +3 -0
  203. package/src/config/bundled-skills/document/SKILL.md +4 -0
  204. package/src/config/bundled-skills/gmail/SKILL.md +12 -7
  205. package/src/config/bundled-skills/gmail/TOOLS.json +1 -1
  206. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +2 -1
  207. package/src/config/bundled-skills/outlook/SKILL.md +7 -0
  208. package/src/config/bundled-skills/settings/TOOLS.json +1 -1
  209. package/src/config/bundled-skills/settings/tools/navigate-settings-tab.ts +8 -3
  210. package/src/config/bundled-skills/subagent/SKILL.md +21 -0
  211. package/src/config/bundled-skills/subagent/TOOLS.json +8 -4
  212. package/src/config/bundled-skills/tasks/SKILL.md +5 -0
  213. package/src/config/env-registry.ts +14 -0
  214. package/src/config/env.ts +21 -0
  215. package/src/config/feature-flag-registry.json +46 -7
  216. package/src/config/loader.ts +56 -1
  217. package/src/config/sanitize-for-transfer.ts +47 -0
  218. package/src/config/schema.ts +46 -5
  219. package/src/config/schemas/host-browser.ts +66 -0
  220. package/src/config/schemas/memory-lifecycle.ts +1 -1
  221. package/src/config/schemas/memory-retrieval.ts +103 -0
  222. package/src/config/schemas/security.ts +0 -6
  223. package/src/config/schemas/services.ts +16 -0
  224. package/src/config/types.ts +0 -1
  225. package/src/context/post-turn-tool-result-truncation.ts +176 -0
  226. package/src/context/window-manager.ts +19 -1
  227. package/src/credential-execution/approval-bridge.ts +49 -16
  228. package/src/credential-execution/managed-catalog.ts +3 -7
  229. package/src/daemon/__tests__/conversation-tool-setup.test.ts +186 -0
  230. package/src/daemon/app-source-watcher.ts +35 -0
  231. package/src/daemon/config-watcher.ts +6 -2
  232. package/src/daemon/context-overflow-approval.ts +5 -1
  233. package/src/daemon/conversation-agent-loop-handlers.ts +17 -2
  234. package/src/daemon/conversation-agent-loop.ts +74 -19
  235. package/src/daemon/conversation-attachments.ts +40 -1
  236. package/src/daemon/conversation-messaging.ts +3 -0
  237. package/src/daemon/conversation-process.ts +66 -3
  238. package/src/daemon/conversation-queue-manager.ts +8 -0
  239. package/src/daemon/conversation-runtime-assembly.ts +159 -20
  240. package/src/daemon/conversation-surfaces.ts +78 -12
  241. package/src/daemon/conversation-tool-setup.ts +74 -11
  242. package/src/daemon/conversation-workspace.ts +12 -0
  243. package/src/daemon/conversation.ts +227 -11
  244. package/src/daemon/date-context.ts +10 -10
  245. package/src/daemon/first-greeting.ts +3 -2
  246. package/src/daemon/handlers/conversations.ts +9 -139
  247. package/src/daemon/handlers/shared.ts +65 -0
  248. package/src/daemon/handlers/skills.ts +232 -37
  249. package/src/daemon/host-bash-proxy.ts +48 -13
  250. package/src/daemon/host-browser-proxy.ts +191 -0
  251. package/src/daemon/host-cu-proxy.ts +36 -11
  252. package/src/daemon/host-file-proxy.ts +57 -9
  253. package/src/daemon/lifecycle.ts +86 -12
  254. package/src/daemon/message-protocol.ts +7 -0
  255. package/src/daemon/message-types/conversations.ts +59 -13
  256. package/src/daemon/message-types/host-browser.ts +100 -0
  257. package/src/daemon/message-types/messages.ts +5 -6
  258. package/src/daemon/message-types/notifications.ts +12 -0
  259. package/src/daemon/message-types/settings.ts +12 -0
  260. package/src/daemon/message-types/skills.ts +10 -0
  261. package/src/daemon/message-types/subagents.ts +2 -0
  262. package/src/daemon/server.ts +112 -35
  263. package/src/daemon/tool-side-effects.ts +6 -0
  264. package/src/daemon/transport-hints.ts +14 -0
  265. package/src/inbound/platform-callback-registration.ts +18 -17
  266. package/src/index.ts +1 -1
  267. package/src/mcp/client.ts +59 -24
  268. package/src/memory/app-store.ts +31 -1
  269. package/src/memory/conversation-crud.ts +38 -10
  270. package/src/memory/conversation-directories.ts +39 -0
  271. package/src/memory/conversation-group-migration.ts +65 -5
  272. package/src/memory/conversation-starters-cadence.ts +76 -0
  273. package/src/memory/conversation-title-service.ts +5 -2
  274. package/src/memory/db-init.ts +12 -0
  275. package/src/memory/embedding-backend.test.ts +75 -0
  276. package/src/memory/embedding-backend.ts +131 -5
  277. package/src/memory/embedding-gemini.test.ts +54 -0
  278. package/src/memory/embedding-gemini.ts +20 -9
  279. package/src/memory/embedding-local.ts +177 -18
  280. package/src/memory/graph/capability-seed.ts +3 -5
  281. package/src/memory/graph/consolidation.ts +10 -23
  282. package/src/memory/graph/extraction-job.ts +15 -0
  283. package/src/memory/graph/retriever.ts +40 -22
  284. package/src/memory/graph/store.test.ts +7 -3
  285. package/src/memory/graph/store.ts +47 -12
  286. package/src/memory/group-crud.ts +25 -9
  287. package/src/memory/llm-usage-store.ts +45 -4
  288. package/src/memory/migrations/213-oauth-providers-scope-separator.ts +13 -0
  289. package/src/memory/migrations/214-oauth-providers-refresh-url.ts +11 -0
  290. package/src/memory/migrations/215-oauth-providers-revoke.ts +14 -0
  291. package/src/memory/migrations/216-oauth-providers-token-auth-method.ts +30 -0
  292. package/src/memory/migrations/217-conversation-host-access.ts +40 -0
  293. package/src/memory/migrations/218-oauth-providers-logo-url.ts +11 -0
  294. package/src/memory/migrations/index.ts +6 -0
  295. package/src/memory/migrations/registry.ts +8 -0
  296. package/src/memory/schema/conversations.ts +1 -0
  297. package/src/memory/schema/oauth.ts +18 -13
  298. package/src/messaging/provider.ts +1 -1
  299. package/src/notifications/broadcaster.ts +6 -0
  300. package/src/notifications/conversation-pairing.ts +12 -4
  301. package/src/notifications/emit-signal.ts +14 -0
  302. package/src/notifications/signal.ts +11 -0
  303. package/src/oauth/AGENTS.md +76 -0
  304. package/src/oauth/__tests__/identity-verifier.test.ts +24 -19
  305. package/src/oauth/__tests__/seed-providers-managed.test.ts +32 -0
  306. package/src/oauth/byo-connection.test.ts +8 -8
  307. package/src/oauth/byo-connection.ts +7 -7
  308. package/src/oauth/connect-orchestrator.ts +23 -21
  309. package/src/oauth/connect-types.ts +3 -3
  310. package/src/oauth/connection-resolver.test.ts +17 -4
  311. package/src/oauth/connection-resolver.ts +16 -16
  312. package/src/oauth/connection.ts +1 -1
  313. package/src/oauth/manual-token-connection.ts +13 -13
  314. package/src/oauth/oauth-store.ts +214 -100
  315. package/src/oauth/platform-connection.test.ts +5 -5
  316. package/src/oauth/platform-connection.ts +4 -4
  317. package/src/oauth/provider-serializer.ts +31 -5
  318. package/src/oauth/revoke.ts +76 -0
  319. package/src/oauth/seed-providers.ts +127 -87
  320. package/src/oauth/token-persistence.ts +1 -1
  321. package/src/permissions/checker.ts +3 -3
  322. package/src/permissions/defaults.ts +7 -8
  323. package/src/permissions/permission-mode.ts +4 -11
  324. package/src/permissions/prompter.ts +13 -3
  325. package/src/permissions/v2-consent-policy.ts +87 -0
  326. package/src/platform/client.ts +1 -1
  327. package/src/prompts/system-prompt.ts +18 -21
  328. package/src/prompts/templates/BOOTSTRAP-REFERENCE.md +3 -65
  329. package/src/prompts/templates/BOOTSTRAP.md +59 -96
  330. package/src/prompts/templates/SOUL.md +11 -11
  331. package/src/providers/anthropic/client.ts +1 -0
  332. package/src/providers/types.ts +1 -1
  333. package/src/runtime/AGENTS.md +23 -0
  334. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +715 -0
  335. package/src/runtime/__tests__/capability-tokens.test.ts +258 -0
  336. package/src/runtime/__tests__/chrome-extension-registry.test.ts +518 -0
  337. package/src/runtime/assistant-event-hub.ts +24 -2
  338. package/src/runtime/auth/__tests__/guard-tests.test.ts +1 -0
  339. package/src/runtime/auth/__tests__/middleware.test.ts +116 -1
  340. package/src/runtime/auth/__tests__/route-policy.test.ts +8 -0
  341. package/src/runtime/auth/middleware.ts +98 -0
  342. package/src/runtime/auth/route-policy.ts +6 -7
  343. package/src/runtime/auth/token-service.ts +8 -0
  344. package/src/runtime/capability-tokens.ts +414 -0
  345. package/src/runtime/channel-approvals.ts +18 -5
  346. package/src/runtime/chrome-extension-registry.ts +332 -0
  347. package/src/runtime/confirmation-request-guardian-bridge.ts +6 -0
  348. package/src/runtime/guardian-decision-types.ts +7 -0
  349. package/src/runtime/http-server.ts +425 -70
  350. package/src/runtime/migrations/__tests__/rebind-secrets-credentials.test.ts +172 -0
  351. package/src/runtime/migrations/__tests__/vbundle-builder-credentials.test.ts +276 -0
  352. package/src/runtime/migrations/__tests__/vbundle-import-credentials.test.ts +162 -0
  353. package/src/runtime/migrations/migration-transport.ts +6 -0
  354. package/src/runtime/migrations/migration-wizard.ts +22 -2
  355. package/src/runtime/migrations/rebind-secrets-screen.ts +76 -15
  356. package/src/runtime/migrations/vbundle-builder.ts +145 -38
  357. package/src/runtime/migrations/vbundle-import-analyzer.ts +19 -0
  358. package/src/runtime/migrations/vbundle-importer.ts +55 -5
  359. package/src/runtime/pending-interactions.ts +29 -13
  360. package/src/runtime/routes/approval-routes.ts +90 -16
  361. package/src/runtime/routes/browser-cdp-routes.ts +229 -0
  362. package/src/runtime/routes/browser-extension-pair-routes.ts +497 -0
  363. package/src/runtime/routes/conversation-analysis-routes.ts +18 -5
  364. package/src/runtime/routes/conversation-management-routes.ts +108 -0
  365. package/src/runtime/routes/conversation-routes.ts +308 -28
  366. package/src/runtime/routes/conversation-starter-routes.ts +78 -16
  367. package/src/runtime/routes/group-routes.ts +22 -8
  368. package/src/runtime/routes/guardian-action-routes.ts +24 -13
  369. package/src/runtime/routes/host-browser-routes.ts +279 -0
  370. package/src/runtime/routes/host-file-routes.ts +9 -1
  371. package/src/runtime/routes/identity-routes.ts +259 -16
  372. package/src/runtime/routes/log-export/AGENTS.md +104 -0
  373. package/src/runtime/routes/log-export/__tests__/workspace-allowlist-error-contract.test.ts +103 -0
  374. package/src/runtime/routes/log-export/__tests__/workspace-allowlist.test.ts +716 -0
  375. package/src/runtime/routes/log-export/workspace-allowlist.ts +458 -0
  376. package/src/runtime/routes/log-export-routes.ts +60 -25
  377. package/src/runtime/routes/memory-item-routes.ts +1 -7
  378. package/src/runtime/routes/migration-routes.ts +87 -2
  379. package/src/runtime/routes/oauth-apps.ts +15 -17
  380. package/src/runtime/routes/oauth-providers.ts +4 -0
  381. package/src/runtime/routes/schedule-routes.ts +24 -11
  382. package/src/runtime/routes/settings-routes.ts +9 -97
  383. package/src/runtime/routes/skills-routes.ts +52 -2
  384. package/src/runtime/routes/subagents-routes.ts +14 -10
  385. package/src/runtime/routes/usage-routes.ts +8 -7
  386. package/src/runtime/routes/workspace-routes.test.ts +22 -0
  387. package/src/runtime/routes/workspace-routes.ts +8 -1
  388. package/src/runtime/routes/workspace-utils.ts +2 -0
  389. package/src/schedule/scheduler.ts +7 -5
  390. package/src/security/ces-credential-client.ts +20 -0
  391. package/src/security/ces-rpc-credential-backend.ts +17 -0
  392. package/src/security/credential-backend.ts +5 -0
  393. package/src/security/oauth2.ts +42 -25
  394. package/src/security/secure-keys.ts +118 -25
  395. package/src/security/token-manager.ts +23 -10
  396. package/src/skills/catalog-files.ts +492 -0
  397. package/src/skills/inline-command-runner.ts +12 -14
  398. package/src/subagent/manager.ts +131 -26
  399. package/src/subagent/types.ts +19 -0
  400. package/src/tools/apps/executors.ts +11 -2
  401. package/src/tools/browser/__tests__/auth-detector.test.ts +202 -108
  402. package/src/tools/browser/auth-detector.ts +43 -12
  403. package/src/tools/browser/browser-execution.ts +645 -340
  404. package/src/tools/browser/browser-manager.ts +36 -12
  405. package/src/tools/browser/cdp-client/__tests__/accessibility-snapshot.test.ts +318 -0
  406. package/src/tools/browser/cdp-client/__tests__/cdp-dom-helpers.test.ts +1175 -0
  407. package/src/tools/browser/cdp-client/__tests__/cdp-inspect-client.test.ts +870 -0
  408. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +330 -0
  409. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +377 -0
  410. package/src/tools/browser/cdp-client/__tests__/fixtures/ax-tree-nested-frames.json +64 -0
  411. package/src/tools/browser/cdp-client/__tests__/fixtures/ax-tree-simple.json +69 -0
  412. package/src/tools/browser/cdp-client/__tests__/local-cdp-client.test.ts +310 -0
  413. package/src/tools/browser/cdp-client/__tests__/types.test.ts +96 -0
  414. package/src/tools/browser/cdp-client/accessibility-snapshot.ts +387 -0
  415. package/src/tools/browser/cdp-client/cdp-dom-helpers.ts +695 -0
  416. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/discovery.test.ts +743 -0
  417. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/ws-transport.test.ts +580 -0
  418. package/src/tools/browser/cdp-client/cdp-inspect/discovery.ts +578 -0
  419. package/src/tools/browser/cdp-client/cdp-inspect/ws-transport.ts +579 -0
  420. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +635 -0
  421. package/src/tools/browser/cdp-client/errors.ts +34 -0
  422. package/src/tools/browser/cdp-client/extension-cdp-client.ts +125 -0
  423. package/src/tools/browser/cdp-client/factory.ts +204 -0
  424. package/src/tools/browser/cdp-client/index.ts +14 -0
  425. package/src/tools/browser/cdp-client/local-cdp-client.ts +187 -0
  426. package/src/tools/browser/cdp-client/types.ts +52 -0
  427. package/src/tools/filesystem/edit.ts +1 -1
  428. package/src/tools/filesystem/list.ts +1 -1
  429. package/src/tools/filesystem/read.ts +1 -1
  430. package/src/tools/filesystem/write.ts +2 -1
  431. package/src/tools/host-filesystem/edit.ts +1 -1
  432. package/src/tools/host-filesystem/read.ts +12 -15
  433. package/src/tools/host-filesystem/write.ts +1 -1
  434. package/src/tools/host-terminal/host-shell.ts +21 -16
  435. package/src/tools/permission-checker.ts +77 -100
  436. package/src/tools/registry.ts +0 -2
  437. package/src/tools/secret-detection-handler.ts +34 -1
  438. package/src/tools/shared/filesystem/image-read.ts +61 -40
  439. package/src/tools/skills/sandbox-runner.ts +3 -6
  440. package/src/tools/subagent/spawn.ts +47 -3
  441. package/src/tools/subagent/status.ts +2 -0
  442. package/src/tools/system/register.ts +2 -16
  443. package/src/tools/terminal/safe-env.ts +7 -0
  444. package/src/tools/terminal/sandbox-diagnostics.ts +4 -4
  445. package/src/tools/terminal/sandbox.ts +4 -1
  446. package/src/tools/terminal/shell.ts +24 -21
  447. package/src/tools/tool-approval-handler.ts +48 -2
  448. package/src/tools/types.ts +2 -3
  449. package/src/util/platform.ts +14 -19
  450. package/src/watcher/provider-types.ts +1 -1
  451. package/src/workspace/migrations/029-seed-pkb.ts +1 -0
  452. package/src/workspace/migrations/030-seed-pkb-autoinject.ts +73 -0
  453. package/src/workspace/migrations/registry.ts +2 -0
  454. package/src/workspace/top-level-renderer.ts +19 -1
  455. package/src/__tests__/chrome-cdp.test.ts +0 -419
  456. package/src/__tests__/permission-mode-sse.test.ts +0 -418
  457. package/src/__tests__/permission-mode-store.test.ts +0 -277
  458. package/src/browser-extension-relay/protocol.ts +0 -63
  459. package/src/browser-extension-relay/server.ts +0 -203
  460. package/src/config/schemas/sandbox.ts +0 -14
  461. package/src/permissions/permission-mode-store.ts +0 -180
  462. package/src/tools/browser/chrome-cdp.ts +0 -239
  463. package/src/tools/system/set-permission-mode.ts +0 -103
@@ -9,40 +9,92 @@ mock.module("../util/logger.js", () => ({
9
9
  }),
10
10
  }));
11
11
 
12
+ /**
13
+ * Fake CDP session used by every interaction tool that has been
14
+ * migrated to `CdpClient` (click, hover, type, press_key,
15
+ * select_option, scroll). Each `session.send(method, params)` call is
16
+ * recorded in `sendCalls` and routed to `sendHandler`, which tests
17
+ * configure per-case. The handler returns either a CDP response
18
+ * object or an `Error` to simulate transport failure. `detachCalls`
19
+ * counts `session.detach()` invocations so tests can assert that
20
+ * `CdpClient.dispose()` runs in the tool's `finally` block.
21
+ *
22
+ * The fake session is exposed via `mockPage.context().newCDPSession(
23
+ * page)` so the real `LocalCdpClient` drives it. Routing through the
24
+ * production client (instead of mocking the factory / cdp-client
25
+ * submodules) avoids polluting the global module cache that the CDP
26
+ * unit tests rely on.
27
+ */
28
+ interface SendCall {
29
+ method: string;
30
+ params: Record<string, unknown> | undefined;
31
+ }
32
+
33
+ let sendCalls: SendCall[];
34
+ let sendHandler: (
35
+ method: string,
36
+ params: Record<string, unknown> | undefined,
37
+ ) => unknown;
38
+ let detachCalls: number;
39
+
40
+ function resetCdpMock() {
41
+ sendCalls = [];
42
+ detachCalls = 0;
43
+ sendHandler = () => ({});
44
+ }
45
+
46
+ const fakeCdpSession = {
47
+ send: async (method: string, params?: Record<string, unknown>) => {
48
+ sendCalls.push({ method, params });
49
+ const value = sendHandler(method, params);
50
+ if (value instanceof Error) throw value;
51
+ return value;
52
+ },
53
+ detach: async () => {
54
+ detachCalls += 1;
55
+ },
56
+ };
57
+
58
+ /**
59
+ * The mock page only needs to expose `context().newCDPSession()` so
60
+ * the real `LocalCdpClient` can obtain a CDP session. All interaction
61
+ * tools now route through CDP, so no Playwright `page.*` surface is
62
+ * required.
63
+ */
12
64
  let mockPage: {
13
- click: ReturnType<typeof mock>;
14
- fill: ReturnType<typeof mock>;
15
- press: ReturnType<typeof mock>;
16
- evaluate: ReturnType<typeof mock>;
17
- title: ReturnType<typeof mock>;
18
- url: ReturnType<typeof mock>;
19
- goto: ReturnType<typeof mock>;
20
- screenshot: ReturnType<typeof mock>;
21
- selectOption: ReturnType<typeof mock>;
22
- hover: ReturnType<typeof mock>;
23
65
  close: () => Promise<void>;
24
66
  isClosed: () => boolean;
25
- keyboard: { press: ReturnType<typeof mock> };
26
- mouse: { wheel: ReturnType<typeof mock>; move: ReturnType<typeof mock> };
67
+ context: () => {
68
+ newCDPSession: (page: unknown) => Promise<typeof fakeCdpSession>;
69
+ };
27
70
  };
28
71
 
29
- let snapshotMaps: Map<string, Map<string, string>>;
72
+ let snapshotBackendNodeMaps: Map<string, Map<string, number>>;
30
73
 
31
74
  mock.module("../tools/browser/browser-manager.js", () => {
32
- snapshotMaps = new Map();
75
+ snapshotBackendNodeMaps = new Map();
33
76
  return {
34
77
  browserManager: {
35
78
  getOrCreateSessionPage: async () => mockPage,
36
79
  closeSessionPage: async () => {},
37
80
  closeAllPages: async () => {},
38
- storeSnapshotMap: (conversationId: string, map: Map<string, string>) => {
39
- snapshotMaps.set(conversationId, map);
81
+ storeSnapshotBackendNodeMap: (
82
+ conversationId: string,
83
+ map: Map<string, number>,
84
+ ) => {
85
+ snapshotBackendNodeMaps.set(conversationId, map);
40
86
  },
41
- resolveSnapshotSelector: (conversationId: string, elementId: string) => {
42
- const map = snapshotMaps.get(conversationId);
87
+ resolveSnapshotBackendNodeId: (
88
+ conversationId: string,
89
+ elementId: string,
90
+ ) => {
91
+ const map = snapshotBackendNodeMaps.get(conversationId);
43
92
  if (!map) return null;
44
93
  return map.get(elementId) ?? null;
45
94
  },
95
+ clearSnapshotBackendNodeMap: (conversationId: string) => {
96
+ snapshotBackendNodeMaps.delete(conversationId);
97
+ },
46
98
  },
47
99
  };
48
100
  });
@@ -65,13 +117,10 @@ mock.module("../tools/browser/browser-screencast.js", () => ({
65
117
  import {
66
118
  executeBrowserClick,
67
119
  executeBrowserClose,
68
- executeBrowserExtract,
69
120
  executeBrowserHover,
70
121
  executeBrowserPressKey,
71
- executeBrowserScreenshot,
72
122
  executeBrowserScroll,
73
123
  executeBrowserSelectOption,
74
- executeBrowserSnapshot,
75
124
  executeBrowserType,
76
125
  } from "../tools/browser/browser-execution.js";
77
126
  import type { ToolContext } from "../tools/types.js";
@@ -84,68 +133,234 @@ const ctx: ToolContext = {
84
133
 
85
134
  function resetMockPage() {
86
135
  mockPage = {
87
- click: mock(async () => {}),
88
- fill: mock(async () => {}),
89
- press: mock(async () => {}),
90
- evaluate: mock(async () => ""),
91
- title: mock(async () => "Test Page"),
92
- url: mock(() => "https://example.com/"),
93
- goto: mock(async () => ({
94
- status: () => 200,
95
- url: () => "https://example.com/",
96
- })),
97
- screenshot: mock(async () => Buffer.from("fake-jpeg-data")),
98
- selectOption: mock(async () => []),
99
- hover: mock(async () => {}),
100
136
  close: async () => {},
101
137
  isClosed: () => false,
102
- keyboard: { press: mock(async () => {}) },
103
- mouse: { wheel: mock(async () => {}), move: mock(async () => {}) },
138
+ // `LocalCdpClient.ensureSession()` calls `page.context().newCDPSession(
139
+ // page)` to obtain a CDP session. Return the in-file `fakeCdpSession`
140
+ // so tests can assert on the exact CDP method sequence.
141
+ context: () => ({
142
+ newCDPSession: async (_page: unknown) => fakeCdpSession,
143
+ }),
144
+ };
145
+ }
146
+
147
+ /**
148
+ * Default CDP send handler that answers the common plumbing calls
149
+ * used by the migrated tools (querySelectorBackendNodeId, DOM.focus,
150
+ * DOM.resolveNode, Runtime.callFunctionOn, Input.*, and
151
+ * Runtime.evaluate for viewport dimensions). Individual tests can
152
+ * override `sendHandler` to simulate failures or shape responses.
153
+ */
154
+ function defaultCdpHandler(
155
+ method: string,
156
+ _params: Record<string, unknown> | undefined,
157
+ ): unknown {
158
+ switch (method) {
159
+ case "DOM.getDocument":
160
+ return { root: { nodeId: 1 } };
161
+ case "DOM.querySelector":
162
+ return { nodeId: 42 };
163
+ case "DOM.describeNode":
164
+ return { node: { backendNodeId: 100 } };
165
+ case "DOM.resolveNode":
166
+ return { object: { objectId: "obj-1" } };
167
+ case "Runtime.evaluate":
168
+ return { result: { value: { w: 800, h: 600 } } };
169
+ case "Runtime.callFunctionOn":
170
+ // executeBrowserSelectOption invokes a function that returns
171
+ // a `matched` boolean — default to true so wrapper-contract
172
+ // tests don't need to know the inner select-option matching
173
+ // shape. Tests that exercise the no-match path override the
174
+ // handler explicitly.
175
+ return { result: { value: true } };
176
+ default:
177
+ return {};
178
+ }
179
+ }
180
+
181
+ /**
182
+ * Install a CDP `sendHandler` tuned for the click + hover DOM →
183
+ * Input.dispatchMouseEvent chain (`DOM.getDocument`,
184
+ * `DOM.querySelector`, `DOM.describeNode`,
185
+ * `DOM.scrollIntoViewIfNeeded`, `DOM.getBoxModel`,
186
+ * `Input.dispatchMouseEvent`). Tests can override `throwFrom` to make
187
+ * one method reject, or override `backendNodeId` to control what
188
+ * `querySelectorBackendNodeId` resolves to.
189
+ */
190
+ function installClickHoverCdpSend(
191
+ overrides: Partial<{
192
+ backendNodeId: number;
193
+ throwFrom: string;
194
+ }> = {},
195
+ ) {
196
+ const backendNodeId = overrides.backendNodeId ?? 1234;
197
+ const throwFrom = overrides.throwFrom;
198
+
199
+ sendHandler = (method, _params) => {
200
+ if (throwFrom === method) {
201
+ return new Error("cdp boom");
202
+ }
203
+ switch (method) {
204
+ case "DOM.getDocument":
205
+ return { root: { nodeId: 1 } };
206
+ case "DOM.querySelector":
207
+ return { nodeId: 2 };
208
+ case "DOM.describeNode":
209
+ return { node: { backendNodeId } };
210
+ case "DOM.scrollIntoViewIfNeeded":
211
+ return {};
212
+ case "DOM.getBoxModel":
213
+ // Flat 8-number quad: (10,20) (30,20) (30,40) (10,40)
214
+ // → center (20, 30).
215
+ return { model: { content: [10, 20, 30, 20, 30, 40, 10, 40] } };
216
+ case "Input.dispatchMouseEvent":
217
+ return {};
218
+ case "Runtime.evaluate":
219
+ // cdpWaitForSelector (used by click/hover selector branches)
220
+ // polls Runtime.evaluate with the visible-state probe and
221
+ // expects { result: { value: boolean } }. Returning true on
222
+ // the first poll lets the test resolve immediately instead
223
+ // of timing out after ACTION_TIMEOUT_MS.
224
+ return { result: { value: true } };
225
+ default:
226
+ return {};
227
+ }
104
228
  };
105
229
  }
106
230
 
107
231
  // ── browser_click ────────────────────────────────────────────────────
108
232
 
109
- describe("executeBrowserClick", () => {
233
+ describe("executeBrowserClick (CDP)", () => {
110
234
  beforeEach(() => {
111
235
  resetMockPage();
112
- snapshotMaps.clear();
236
+ resetCdpMock();
237
+ snapshotBackendNodeMaps.clear();
113
238
  });
114
239
 
115
- test("clicks by element_id via snapshot map", async () => {
116
- snapshotMaps.set(
117
- "test-conversation",
118
- new Map([["e1", '[data-vellum-eid="e1"]']]),
119
- );
120
- const result = await executeBrowserClick({ element_id: "e1" }, ctx);
240
+ test("clicks by selector: runs full DOM → Input.dispatchMouseEvent chain", async () => {
241
+ installClickHoverCdpSend({ backendNodeId: 5555 });
242
+ const result = await executeBrowserClick({ selector: "#submit-btn" }, ctx);
243
+
121
244
  expect(result.isError).toBe(false);
122
- expect(result.content).toContain("Clicked element");
123
- expect(mockPage.click).toHaveBeenCalledWith('[data-vellum-eid="e1"]', {
124
- timeout: 10000,
245
+ expect(result.content).toContain("Clicked element: #submit-btn");
246
+
247
+ // Expected CDP call sequence for the selector path. The leading
248
+ // Runtime.evaluate is the visible-state probe issued by
249
+ // cdpWaitForSelector before resolving the backend node — this
250
+ // matches Playwright's `page.click(selector, { timeout })`
251
+ // semantics and lets click work on async-hydrated pages.
252
+ const methods = sendCalls.map((c) => c.method);
253
+ expect(methods).toEqual([
254
+ "Runtime.evaluate",
255
+ "DOM.getDocument",
256
+ "DOM.querySelector",
257
+ "DOM.describeNode",
258
+ "DOM.scrollIntoViewIfNeeded",
259
+ "DOM.getBoxModel",
260
+ "Input.dispatchMouseEvent",
261
+ "Input.dispatchMouseEvent",
262
+ "Input.dispatchMouseEvent",
263
+ ]);
264
+
265
+ // The leading Runtime.evaluate is the visible-state probe.
266
+ const visibleProbe = sendCalls.find(
267
+ (c) => c.method === "Runtime.evaluate",
268
+ )!;
269
+ expect(
270
+ (visibleProbe.params as { expression: string }).expression,
271
+ ).toContain("getBoundingClientRect");
272
+
273
+ // Arguments threaded through correctly.
274
+ const qsCall = sendCalls.find((c) => c.method === "DOM.querySelector")!;
275
+ expect(qsCall.params).toMatchObject({ nodeId: 1, selector: "#submit-btn" });
276
+ const scrollCall = sendCalls.find(
277
+ (c) => c.method === "DOM.scrollIntoViewIfNeeded",
278
+ )!;
279
+ expect(scrollCall.params).toMatchObject({ backendNodeId: 5555 });
280
+ const boxCall = sendCalls.find((c) => c.method === "DOM.getBoxModel")!;
281
+ expect(boxCall.params).toMatchObject({ backendNodeId: 5555 });
282
+
283
+ // All three mouse events land on the quad midpoint (20, 30).
284
+ const mouseCalls = sendCalls.filter(
285
+ (c) => c.method === "Input.dispatchMouseEvent",
286
+ );
287
+ expect(mouseCalls).toHaveLength(3);
288
+ expect(mouseCalls[0]!.params).toMatchObject({
289
+ type: "mouseMoved",
290
+ x: 20,
291
+ y: 30,
292
+ button: "left",
293
+ clickCount: 1,
125
294
  });
295
+ expect(mouseCalls[1]!.params).toMatchObject({
296
+ type: "mousePressed",
297
+ x: 20,
298
+ y: 30,
299
+ button: "left",
300
+ clickCount: 1,
301
+ });
302
+ expect(mouseCalls[2]!.params).toMatchObject({
303
+ type: "mouseReleased",
304
+ x: 20,
305
+ y: 30,
306
+ button: "left",
307
+ clickCount: 1,
308
+ });
309
+
310
+ // CdpClient disposed in finally → session.detach called.
311
+ await new Promise((resolve) => setTimeout(resolve, 0));
312
+ expect(detachCalls).toBe(1);
126
313
  });
127
314
 
128
- test("clicks by raw selector", async () => {
129
- const result = await executeBrowserClick({ selector: "#submit-btn" }, ctx);
315
+ test("clicks by element_id (backend path): skips DOM.querySelector", async () => {
316
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e1", 42]]));
317
+ installClickHoverCdpSend();
318
+
319
+ const result = await executeBrowserClick({ element_id: "e1" }, ctx);
320
+
130
321
  expect(result.isError).toBe(false);
131
- expect(mockPage.click).toHaveBeenCalledWith("#submit-btn", {
132
- timeout: 10000,
133
- });
322
+ expect(result.content).toContain("Clicked element: eid=e1");
323
+
324
+ const methods = sendCalls.map((c) => c.method);
325
+ // Backend path jumps straight to scrollIntoViewIfNeeded — no
326
+ // DOM.getDocument / querySelector / describeNode round-trip.
327
+ expect(methods).not.toContain("DOM.getDocument");
328
+ expect(methods).not.toContain("DOM.querySelector");
329
+ expect(methods).not.toContain("DOM.describeNode");
330
+ expect(methods).toEqual([
331
+ "DOM.scrollIntoViewIfNeeded",
332
+ "DOM.getBoxModel",
333
+ "Input.dispatchMouseEvent",
334
+ "Input.dispatchMouseEvent",
335
+ "Input.dispatchMouseEvent",
336
+ ]);
337
+
338
+ // Backend node id threaded directly from the snapshot map.
339
+ const scrollCall = sendCalls.find(
340
+ (c) => c.method === "DOM.scrollIntoViewIfNeeded",
341
+ )!;
342
+ expect(scrollCall.params).toMatchObject({ backendNodeId: 42 });
343
+ const boxCall = sendCalls.find((c) => c.method === "DOM.getBoxModel")!;
344
+ expect(boxCall.params).toMatchObject({ backendNodeId: 42 });
345
+
346
+ await new Promise((resolve) => setTimeout(resolve, 0));
347
+ expect(detachCalls).toBe(1);
134
348
  });
135
349
 
136
- test("prefers element_id over selector", async () => {
137
- snapshotMaps.set(
138
- "test-conversation",
139
- new Map([["e1", '[data-vellum-eid="e1"]']]),
140
- );
350
+ test("prefers element_id over selector when both provided", async () => {
351
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e1", 77]]));
352
+ installClickHoverCdpSend();
353
+
141
354
  const result = await executeBrowserClick(
142
- { element_id: "e1", selector: "#other" },
355
+ { element_id: "e1", selector: "#ignored" },
143
356
  ctx,
144
357
  );
145
358
  expect(result.isError).toBe(false);
146
- expect(mockPage.click).toHaveBeenCalledWith('[data-vellum-eid="e1"]', {
147
- timeout: 10000,
148
- });
359
+ expect(result.content).toContain("eid=e1");
360
+
361
+ // DOM.querySelector must NOT have been called (selector ignored).
362
+ const methods = sendCalls.map((c) => c.method);
363
+ expect(methods).not.toContain("DOM.querySelector");
149
364
  });
150
365
 
151
366
  test("errors when neither element_id nor selector provided", async () => {
@@ -154,29 +369,89 @@ describe("executeBrowserClick", () => {
154
369
  expect(result.content).toContain(
155
370
  "Either element_id or selector is required",
156
371
  );
372
+ // No CDP session should have been opened at all.
373
+ expect(sendCalls).toHaveLength(0);
374
+ expect(detachCalls).toBe(0);
157
375
  });
158
376
 
159
377
  test("errors when element_id not found in snapshot map", async () => {
378
+ installClickHoverCdpSend();
160
379
  const result = await executeBrowserClick({ element_id: "e99" }, ctx);
161
380
  expect(result.isError).toBe(true);
162
381
  expect(result.content).toContain('element_id "e99" not found');
163
382
  expect(result.content).toContain("browser_snapshot");
383
+ // Resolution failed before acquiring a CdpClient.
384
+ expect(sendCalls).toHaveLength(0);
164
385
  });
165
386
 
166
- test("errors when snapshot map is missing for session", async () => {
387
+ test("errors when snapshot backend-node map is missing for session", async () => {
388
+ installClickHoverCdpSend();
167
389
  const result = await executeBrowserClick({ element_id: "e1" }, ctx);
168
390
  expect(result.isError).toBe(true);
169
391
  expect(result.content).toContain("not found");
392
+ expect(sendCalls).toHaveLength(0);
170
393
  });
171
394
 
172
- test("handles click error from page", async () => {
173
- mockPage.click = mock(async () => {
174
- throw new Error("Element not visible");
175
- });
176
- const result = await executeBrowserClick({ selector: "#hidden" }, ctx);
395
+ test("returns error + still disposes CdpClient when cdp.send throws", async () => {
396
+ installClickHoverCdpSend({ throwFrom: "Input.dispatchMouseEvent" });
397
+
398
+ const result = await executeBrowserClick({ selector: "#submit-btn" }, ctx);
399
+
177
400
  expect(result.isError).toBe(true);
178
401
  expect(result.content).toContain("Click failed");
179
- expect(result.content).toContain("Element not visible");
402
+ expect(result.content).toContain("cdp boom");
403
+
404
+ // finally { cdp.dispose() } must still fire → detach called.
405
+ await new Promise((resolve) => setTimeout(resolve, 0));
406
+ expect(detachCalls).toBe(1);
407
+ });
408
+
409
+ test("waits for selector that initially doesn't exist but becomes visible", async () => {
410
+ // Simulates a hydrating page: the visible-state probe returns
411
+ // false for the first 2 polls, then true on the 3rd. The click
412
+ // tool must wait through these polls (instead of failing
413
+ // immediately) and then complete the click as normal.
414
+ let visibleProbeCount = 0;
415
+ sendHandler = (method, _params) => {
416
+ switch (method) {
417
+ case "Runtime.evaluate":
418
+ visibleProbeCount++;
419
+ return { result: { value: visibleProbeCount >= 3 } };
420
+ case "DOM.getDocument":
421
+ return { root: { nodeId: 1 } };
422
+ case "DOM.querySelector":
423
+ return { nodeId: 2 };
424
+ case "DOM.describeNode":
425
+ return { node: { backendNodeId: 8888 } };
426
+ case "DOM.scrollIntoViewIfNeeded":
427
+ return {};
428
+ case "DOM.getBoxModel":
429
+ return { model: { content: [10, 20, 30, 20, 30, 40, 10, 40] } };
430
+ case "Input.dispatchMouseEvent":
431
+ return {};
432
+ default:
433
+ return {};
434
+ }
435
+ };
436
+
437
+ const result = await executeBrowserClick({ selector: "#hydrated" }, ctx);
438
+
439
+ expect(result.isError).toBe(false);
440
+ expect(result.content).toContain("Clicked element: #hydrated");
441
+ // The visible-state probe was polled at least 3 times before
442
+ // succeeding, then the rest of the click pipeline ran exactly
443
+ // once.
444
+ expect(visibleProbeCount).toBeGreaterThanOrEqual(3);
445
+ const mouseCalls = sendCalls.filter(
446
+ (c) => c.method === "Input.dispatchMouseEvent",
447
+ );
448
+ expect(mouseCalls).toHaveLength(3);
449
+ // querySelectorBackendNodeId only ran once at the end (after the
450
+ // probe returned true) — not on every polling iteration.
451
+ const describeCalls = sendCalls.filter(
452
+ (c) => c.method === "DOM.describeNode",
453
+ );
454
+ expect(describeCalls).toHaveLength(1);
180
455
  });
181
456
  });
182
457
 
@@ -185,51 +460,68 @@ describe("executeBrowserClick", () => {
185
460
  describe("executeBrowserType", () => {
186
461
  beforeEach(() => {
187
462
  resetMockPage();
188
- snapshotMaps.clear();
463
+ resetCdpMock();
464
+ snapshotBackendNodeMaps.clear();
465
+ sendHandler = defaultCdpHandler;
189
466
  });
190
467
 
191
468
  test("types with element_id and default clear_first=true", async () => {
192
- snapshotMaps.set(
193
- "test-conversation",
194
- new Map([["e3", '[data-vellum-eid="e3"]']]),
195
- );
469
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e3", 555]]));
196
470
  const result = await executeBrowserType(
197
471
  { element_id: "e3", text: "hello" },
198
472
  ctx,
199
473
  );
200
474
  expect(result.isError).toBe(false);
201
- expect(result.content).toContain("Typed into element");
475
+ expect(result.content).toContain('Typed into element: element_id "e3"');
202
476
  expect(result.content).toContain("cleared existing content");
203
- expect(mockPage.fill).toHaveBeenCalledWith(
204
- '[data-vellum-eid="e3"]',
205
- "hello",
206
- { timeout: 10000 },
207
- );
477
+
478
+ // Expected CDP sequence when resolving by backendNodeId + clearFirst:
479
+ // DOM.focus → DOM.resolveNode → Runtime.callFunctionOn (clear) →
480
+ // DOM.focus Input.insertText
481
+ const methods = sendCalls.map((c) => c.method);
482
+ expect(methods).toEqual([
483
+ "DOM.focus",
484
+ "DOM.resolveNode",
485
+ "Runtime.callFunctionOn",
486
+ "DOM.focus",
487
+ "Input.insertText",
488
+ ]);
489
+ const focusCall = sendCalls[0]!;
490
+ expect(focusCall.params).toEqual({ backendNodeId: 555 });
491
+ const insertCall = sendCalls[sendCalls.length - 1]!;
492
+ expect(insertCall.params).toEqual({ text: "hello" });
208
493
  });
209
494
 
210
- test("types with raw selector", async () => {
495
+ test("types with raw selector (resolves via DOM.querySelector)", async () => {
211
496
  const result = await executeBrowserType(
212
497
  { selector: 'input[name="email"]', text: "test" },
213
498
  ctx,
214
499
  );
215
500
  expect(result.isError).toBe(false);
216
- expect(mockPage.fill).toHaveBeenCalledWith('input[name="email"]', "test", {
217
- timeout: 10000,
218
- });
501
+ expect(result.content).toContain('Typed into element: input[name="email"]');
502
+ // Raw-selector path must resolve the backendNodeId first.
503
+ const methods = sendCalls.map((c) => c.method);
504
+ expect(methods[0]).toBe("DOM.getDocument");
505
+ expect(methods[1]).toBe("DOM.querySelector");
506
+ expect(methods[2]).toBe("DOM.describeNode");
507
+ expect(methods).toContain("Input.insertText");
219
508
  });
220
509
 
221
510
  test("appends text when clear_first=false", async () => {
222
- mockPage.evaluate = mock(async () => "existing");
223
511
  const result = await executeBrowserType(
224
512
  { selector: "#input", text: " more", clear_first: false },
225
513
  ctx,
226
514
  );
227
515
  expect(result.isError).toBe(false);
228
- expect(mockPage.evaluate).toHaveBeenCalled();
229
- expect(mockPage.fill).toHaveBeenCalledWith("#input", "existing more", {
230
- timeout: 10000,
231
- });
232
516
  expect(result.content).not.toContain("cleared");
517
+ // clear_first=false skips DOM.resolveNode + Runtime.callFunctionOn
518
+ // and the re-focus call, so we should see focus + insertText only.
519
+ const methods = sendCalls.map((c) => c.method);
520
+ expect(methods).not.toContain("DOM.resolveNode");
521
+ expect(methods).not.toContain("Runtime.callFunctionOn");
522
+ const focusCount = methods.filter((m) => m === "DOM.focus").length;
523
+ expect(focusCount).toBe(1);
524
+ expect(methods).toContain("Input.insertText");
233
525
  });
234
526
 
235
527
  test("presses Enter after typing when press_enter=true", async () => {
@@ -239,16 +531,32 @@ describe("executeBrowserType", () => {
239
531
  );
240
532
  expect(result.isError).toBe(false);
241
533
  expect(result.content).toContain("pressed Enter");
242
- expect(mockPage.fill).toHaveBeenCalledWith("#search", "query", {
243
- timeout: 10000,
244
- });
245
- expect(mockPage.press).toHaveBeenCalledWith("#search", "Enter");
534
+ const methods = sendCalls.map((c) => c.method);
535
+ // Input.insertText must come before the Enter keyDown/char/keyUp.
536
+ const insertIdx = methods.indexOf("Input.insertText");
537
+ const keyDownIdx = methods.findIndex(
538
+ (m, i) =>
539
+ m === "Input.dispatchKeyEvent" &&
540
+ (sendCalls[i]!.params as { type: string }).type === "keyDown",
541
+ );
542
+ expect(insertIdx).toBeGreaterThanOrEqual(0);
543
+ expect(keyDownIdx).toBeGreaterThan(insertIdx);
544
+ // Enter is text-producing → keyDown + char + keyUp.
545
+ const keyEvents = sendCalls.filter(
546
+ (c) => c.method === "Input.dispatchKeyEvent",
547
+ );
548
+ expect(keyEvents).toHaveLength(3);
549
+ expect((keyEvents[0]!.params as { key: string }).key).toBe("Enter");
550
+ expect((keyEvents[0]!.params as { type: string }).type).toBe("keyDown");
551
+ expect((keyEvents[1]!.params as { type: string }).type).toBe("char");
552
+ expect((keyEvents[2]!.params as { type: string }).type).toBe("keyUp");
246
553
  });
247
554
 
248
555
  test("errors when text is missing", async () => {
249
556
  const result = await executeBrowserType({ selector: "#input" }, ctx);
250
557
  expect(result.isError).toBe(true);
251
558
  expect(result.content).toContain("text is required");
559
+ expect(sendCalls).toHaveLength(0);
252
560
  });
253
561
 
254
562
  test("errors when text is empty string", async () => {
@@ -258,6 +566,7 @@ describe("executeBrowserType", () => {
258
566
  );
259
567
  expect(result.isError).toBe(true);
260
568
  expect(result.content).toContain("text is required");
569
+ expect(sendCalls).toHaveLength(0);
261
570
  });
262
571
 
263
572
  test("errors when neither element_id nor selector provided", async () => {
@@ -266,6 +575,7 @@ describe("executeBrowserType", () => {
266
575
  expect(result.content).toContain(
267
576
  "Either element_id or selector is required",
268
577
  );
578
+ expect(sendCalls).toHaveLength(0);
269
579
  });
270
580
 
271
581
  test("errors when element_id not found", async () => {
@@ -275,143 +585,33 @@ describe("executeBrowserType", () => {
275
585
  );
276
586
  expect(result.isError).toBe(true);
277
587
  expect(result.content).toContain('element_id "e99" not found');
588
+ expect(sendCalls).toHaveLength(0);
278
589
  });
279
590
 
280
- test("handles type error from page", async () => {
281
- mockPage.fill = mock(async () => {
282
- throw new Error("Element is not an input");
283
- });
591
+ test("surfaces CDP failure as a type error", async () => {
592
+ sendHandler = () => new Error("focus failed");
284
593
  const result = await executeBrowserType(
285
594
  { selector: "#div", text: "hello" },
286
595
  ctx,
287
596
  );
288
597
  expect(result.isError).toBe(true);
289
598
  expect(result.content).toContain("Type failed");
290
- expect(result.content).toContain("Element is not an input");
599
+ expect(result.content).toContain("focus failed");
291
600
  });
292
601
  });
293
602
 
294
- // ── browser_snapshot ──────────────────────────────────────────────────
295
-
296
- describe("executeBrowserSnapshot", () => {
297
- beforeEach(() => {
298
- resetMockPage();
299
- snapshotMaps.clear();
300
- });
301
-
302
- test("returns element list with eid format", async () => {
303
- const sampleElements = [
304
- { eid: "e1", tag: "a", attrs: { href: "/about" }, text: "About Us" },
305
- { eid: "e2", tag: "button", attrs: { type: "submit" }, text: "Submit" },
306
- {
307
- eid: "e3",
308
- tag: "input",
309
- attrs: { type: "text", name: "email", placeholder: "Enter email" },
310
- text: "",
311
- },
312
- ];
313
- mockPage.evaluate = mock(async () => sampleElements);
314
- const result = await executeBrowserSnapshot({}, ctx);
315
- expect(result.isError).toBe(false);
316
- expect(result.content).toContain("[e1]");
317
- expect(result.content).toContain("[e2]");
318
- expect(result.content).toContain("[e3]");
319
- expect(result.content).toContain("<a");
320
- expect(result.content).toContain("<button");
321
- expect(result.content).toContain("<input");
322
- expect(result.content).toContain("3 interactive elements found");
323
- });
324
-
325
- test("stores snapshot map for later element resolution", async () => {
326
- const sampleElements = [
327
- { eid: "e1", tag: "a", attrs: { href: "/" }, text: "Home" },
328
- ];
329
- mockPage.evaluate = mock(async () => sampleElements);
330
- await executeBrowserSnapshot({}, ctx);
331
- const map = snapshotMaps.get("test-conversation");
332
- expect(map).toBeDefined();
333
- expect(map!.get("e1")).toBe('[data-vellum-eid="e1"]');
334
- });
335
-
336
- test("reports no interactive elements when page is empty", async () => {
337
- mockPage.evaluate = mock(async () => []);
338
- const result = await executeBrowserSnapshot({}, ctx);
339
- expect(result.isError).toBe(false);
340
- expect(result.content).toContain("no interactive elements found");
341
- });
342
-
343
- test("includes page URL and title", async () => {
344
- mockPage.evaluate = mock(async () => []);
345
- const result = await executeBrowserSnapshot({}, ctx);
346
- expect(result.content).toContain("URL: https://example.com/");
347
- expect(result.content).toContain("Title: Test Page");
348
- });
349
-
350
- test("handles snapshot error from page", async () => {
351
- mockPage.evaluate = mock(async () => {
352
- throw new Error("Page crashed");
353
- });
354
- const result = await executeBrowserSnapshot({}, ctx);
355
- expect(result.isError).toBe(true);
356
- expect(result.content).toContain("Snapshot failed");
357
- expect(result.content).toContain("Page crashed");
358
- });
359
- });
603
+ // NOTE: executeBrowserSnapshot tests live in
604
+ // `headless-browser-snapshot.test.ts`.
360
605
 
361
- // ── browser_screenshot ───────────────────────────────────────────────
362
-
363
- describe("executeBrowserScreenshot", () => {
364
- beforeEach(() => {
365
- resetMockPage();
366
- });
367
-
368
- test("captures and returns image content", async () => {
369
- const fakeBuffer = Buffer.from("fake-jpeg-screenshot-data");
370
- mockPage.screenshot = mock(async () => fakeBuffer);
371
- const result = await executeBrowserScreenshot({}, ctx);
372
- expect(result.isError).toBe(false);
373
- expect(result.content).toContain("Screenshot captured");
374
- expect(result.content).toContain(`${fakeBuffer.length} bytes`);
375
- expect(result.content).toContain("viewport");
376
- expect(result.contentBlocks).toBeDefined();
377
- expect(result.contentBlocks!.length).toBe(1);
378
- const imageBlock = result.contentBlocks![0] as {
379
- type: string;
380
- source: { type: string; media_type: string; data: string };
381
- };
382
- expect(imageBlock.type).toBe("image");
383
- expect(imageBlock.source.media_type).toBe("image/jpeg");
384
- expect(imageBlock.source.data).toBe(fakeBuffer.toString("base64"));
385
- });
386
-
387
- test("supports full_page mode", async () => {
388
- mockPage.screenshot = mock(async () => Buffer.from("full"));
389
- const result = await executeBrowserScreenshot({ full_page: true }, ctx);
390
- expect(result.isError).toBe(false);
391
- expect(result.content).toContain("full page");
392
- expect(mockPage.screenshot).toHaveBeenCalledWith({
393
- type: "jpeg",
394
- quality: 80,
395
- fullPage: true,
396
- });
397
- });
398
-
399
- test("handles screenshot error from page", async () => {
400
- mockPage.screenshot = mock(async () => {
401
- throw new Error("Render failed");
402
- });
403
- const result = await executeBrowserScreenshot({}, ctx);
404
- expect(result.isError).toBe(true);
405
- expect(result.content).toContain("Screenshot failed");
406
- expect(result.content).toContain("Render failed");
407
- });
408
- });
606
+ // browser_screenshot tests live in headless-browser-read-tools.test.ts
607
+ // (alongside browser_extract / browser_wait_for).
409
608
 
410
609
  // ── browser_close ────────────────────────────────────────────────────
411
610
 
412
611
  describe("executeBrowserClose", () => {
413
612
  beforeEach(() => {
414
613
  resetMockPage();
614
+ resetCdpMock();
415
615
  });
416
616
 
417
617
  test("closes session page", async () => {
@@ -429,92 +629,63 @@ describe("executeBrowserClose", () => {
429
629
  });
430
630
  });
431
631
 
432
- // ── browser_extract ──────────────────────────────────────────────────
433
-
434
- describe("executeBrowserExtract", () => {
435
- beforeEach(() => {
436
- resetMockPage();
437
- });
438
-
439
- test("extracts text content from page", async () => {
440
- mockPage.evaluate = mock(
441
- async () => "Hello, this is the page text content.",
442
- );
443
- const result = await executeBrowserExtract({}, ctx);
444
- expect(result.isError).toBe(false);
445
- expect(result.content).toContain("URL: https://example.com/");
446
- expect(result.content).toContain("Title: Test Page");
447
- expect(result.content).toContain("Hello, this is the page text content.");
448
- });
449
-
450
- test("includes links when include_links=true", async () => {
451
- // First call returns text content, second returns link list
452
- let callCount = 0;
453
- mockPage.evaluate = mock(async () => {
454
- callCount++;
455
- if (callCount === 1) return "Some text";
456
- return [
457
- { text: "Example Link", href: "https://example.com/link1" },
458
- { text: "Another", href: "https://example.com/link2" },
459
- ];
460
- });
461
- const result = await executeBrowserExtract({ include_links: true }, ctx);
462
- expect(result.isError).toBe(false);
463
- expect(result.content).toContain("Links:");
464
- expect(result.content).toContain(
465
- "[Example Link](https://example.com/link1)",
466
- );
467
- expect(result.content).toContain("[Another](https://example.com/link2)");
468
- });
469
-
470
- test("handles empty page", async () => {
471
- mockPage.evaluate = mock(async () => "");
472
- const result = await executeBrowserExtract({}, ctx);
473
- expect(result.isError).toBe(false);
474
- expect(result.content).toContain("(empty page)");
475
- });
476
-
477
- test("handles extract error from page", async () => {
478
- mockPage.evaluate = mock(async () => {
479
- throw new Error("Page not loaded");
480
- });
481
- const result = await executeBrowserExtract({}, ctx);
482
- expect(result.isError).toBe(true);
483
- expect(result.content).toContain("Extract failed");
484
- expect(result.content).toContain("Page not loaded");
485
- });
486
- });
632
+ // browser_extract tests live in headless-browser-read-tools.test.ts
633
+ // because it drives CDP via getCdpClient() rather than the
634
+ // Playwright page mock this file uses.
487
635
 
488
636
  // ── browser_press_key ────────────────────────────────────────────────
489
637
 
490
638
  describe("executeBrowserPressKey", () => {
491
639
  beforeEach(() => {
492
640
  resetMockPage();
493
- snapshotMaps.clear();
641
+ resetCdpMock();
642
+ snapshotBackendNodeMaps.clear();
643
+ sendHandler = defaultCdpHandler;
494
644
  });
495
645
 
496
- test("presses key on page (focused element) when no target", async () => {
646
+ test("presses key on focused element when no target", async () => {
497
647
  const result = await executeBrowserPressKey({ key: "Enter" }, ctx);
498
648
  expect(result.isError).toBe(false);
499
649
  expect(result.content).toContain('Pressed "Enter"');
500
- expect(mockPage.keyboard.press).toHaveBeenCalledWith("Enter");
650
+ // No target => no DOM.focus, no selector resolution. Enter is a
651
+ // text-producing key (text "\r") so dispatchKeyPress emits
652
+ // keyDown + char + keyUp.
653
+ const methods = sendCalls.map((c) => c.method);
654
+ expect(methods).toEqual([
655
+ "Input.dispatchKeyEvent",
656
+ "Input.dispatchKeyEvent",
657
+ "Input.dispatchKeyEvent",
658
+ ]);
659
+ const keyDown = sendCalls[0]!.params as Record<string, unknown>;
660
+ const charEvt = sendCalls[1]!.params as Record<string, unknown>;
661
+ const keyUp = sendCalls[2]!.params as Record<string, unknown>;
662
+ expect(keyDown.type).toBe("keyDown");
663
+ expect(keyDown.key).toBe("Enter");
664
+ expect(keyDown.windowsVirtualKeyCode).toBe(13);
665
+ expect(charEvt.type).toBe("char");
666
+ expect(keyUp.type).toBe("keyUp");
667
+ expect(keyUp.key).toBe("Enter");
501
668
  });
502
669
 
503
670
  test("presses key on targeted element via element_id", async () => {
504
- snapshotMaps.set(
505
- "test-conversation",
506
- new Map([["e5", '[data-vellum-eid="e5"]']]),
507
- );
671
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e5", 555]]));
508
672
  const result = await executeBrowserPressKey(
509
673
  { key: "Tab", element_id: "e5" },
510
674
  ctx,
511
675
  );
512
676
  expect(result.isError).toBe(false);
513
677
  expect(result.content).toContain('Pressed "Tab" on element');
514
- expect(mockPage.press).toHaveBeenCalledWith(
515
- '[data-vellum-eid="e5"]',
516
- "Tab",
517
- );
678
+ expect(result.content).toContain('element_id "e5"');
679
+ // Backend-resolved path: focus → dispatchKeyEvent × 3 (Tab is
680
+ // text-producing so we also dispatch a char event).
681
+ const methods = sendCalls.map((c) => c.method);
682
+ expect(methods).toEqual([
683
+ "DOM.focus",
684
+ "Input.dispatchKeyEvent",
685
+ "Input.dispatchKeyEvent",
686
+ "Input.dispatchKeyEvent",
687
+ ]);
688
+ expect(sendCalls[0]!.params).toEqual({ backendNodeId: 555 });
518
689
  });
519
690
 
520
691
  test("presses key on targeted element via selector", async () => {
@@ -524,13 +695,24 @@ describe("executeBrowserPressKey", () => {
524
695
  );
525
696
  expect(result.isError).toBe(false);
526
697
  expect(result.content).toContain('Pressed "Escape" on element');
527
- expect(mockPage.press).toHaveBeenCalledWith("#dialog", "Escape");
698
+ // Selector path: DOM.getDocument → DOM.querySelector → DOM.describeNode
699
+ // → DOM.focus → dispatchKeyEvent × 2 (Escape has no text, so no char event).
700
+ const methods = sendCalls.map((c) => c.method);
701
+ expect(methods).toEqual([
702
+ "DOM.getDocument",
703
+ "DOM.querySelector",
704
+ "DOM.describeNode",
705
+ "DOM.focus",
706
+ "Input.dispatchKeyEvent",
707
+ "Input.dispatchKeyEvent",
708
+ ]);
528
709
  });
529
710
 
530
711
  test("errors when key is missing", async () => {
531
712
  const result = await executeBrowserPressKey({}, ctx);
532
713
  expect(result.isError).toBe(true);
533
714
  expect(result.content).toContain("key is required");
715
+ expect(sendCalls).toHaveLength(0);
534
716
  });
535
717
 
536
718
  test("errors when element_id not found", async () => {
@@ -540,12 +722,11 @@ describe("executeBrowserPressKey", () => {
540
722
  );
541
723
  expect(result.isError).toBe(true);
542
724
  expect(result.content).toContain('element_id "e99" not found');
725
+ expect(sendCalls).toHaveLength(0);
543
726
  });
544
727
 
545
- test("handles press key error from page", async () => {
546
- mockPage.keyboard.press = mock(async () => {
547
- throw new Error("Key not recognized");
548
- });
728
+ test("surfaces CDP failure as a press-key error", async () => {
729
+ sendHandler = () => new Error("Key not recognized");
549
730
  const result = await executeBrowserPressKey({ key: "InvalidKey" }, ctx);
550
731
  expect(result.isError).toBe(true);
551
732
  expect(result.content).toContain("Press key failed");
@@ -558,14 +739,32 @@ describe("executeBrowserPressKey", () => {
558
739
  describe("executeBrowserScroll", () => {
559
740
  beforeEach(() => {
560
741
  resetMockPage();
561
- snapshotMaps.clear();
742
+ resetCdpMock();
743
+ sendHandler = defaultCdpHandler;
562
744
  });
563
745
 
564
746
  test("scrolls down by default amount", async () => {
565
747
  const result = await executeBrowserScroll({ direction: "down" }, ctx);
566
748
  expect(result.isError).toBe(false);
567
749
  expect(result.content).toContain("Scrolled down by 500px");
568
- expect(mockPage.mouse.wheel).toHaveBeenCalledWith(0, 500);
750
+ // Runtime.evaluate for viewport dimensions, then a single
751
+ // Input.dispatchMouseEvent mouseWheel at the viewport center.
752
+ const evaluateCall = sendCalls.find((c) => c.method === "Runtime.evaluate");
753
+ expect(evaluateCall).toBeDefined();
754
+ expect((evaluateCall!.params as { expression: string }).expression).toBe(
755
+ "({ w: window.innerWidth, h: window.innerHeight })",
756
+ );
757
+ const wheelCall = sendCalls.find(
758
+ (c) => c.method === "Input.dispatchMouseEvent",
759
+ );
760
+ expect(wheelCall).toBeDefined();
761
+ expect(wheelCall!.params).toEqual({
762
+ type: "mouseWheel",
763
+ x: 400,
764
+ y: 300,
765
+ deltaX: 0,
766
+ deltaY: 500,
767
+ });
569
768
  });
570
769
 
571
770
  test("scrolls up by custom amount", async () => {
@@ -575,7 +774,16 @@ describe("executeBrowserScroll", () => {
575
774
  );
576
775
  expect(result.isError).toBe(false);
577
776
  expect(result.content).toContain("Scrolled up by 300px");
578
- expect(mockPage.mouse.wheel).toHaveBeenCalledWith(0, -300);
777
+ const wheelCall = sendCalls.find(
778
+ (c) => c.method === "Input.dispatchMouseEvent",
779
+ );
780
+ expect(wheelCall!.params).toEqual({
781
+ type: "mouseWheel",
782
+ x: 400,
783
+ y: 300,
784
+ deltaX: 0,
785
+ deltaY: -300,
786
+ });
579
787
  });
580
788
 
581
789
  test("scrolls left", async () => {
@@ -584,7 +792,16 @@ describe("executeBrowserScroll", () => {
584
792
  ctx,
585
793
  );
586
794
  expect(result.isError).toBe(false);
587
- expect(mockPage.mouse.wheel).toHaveBeenCalledWith(-200, 0);
795
+ const wheelCall = sendCalls.find(
796
+ (c) => c.method === "Input.dispatchMouseEvent",
797
+ );
798
+ expect(wheelCall!.params).toEqual({
799
+ type: "mouseWheel",
800
+ x: 400,
801
+ y: 300,
802
+ deltaX: -200,
803
+ deltaY: 0,
804
+ });
588
805
  });
589
806
 
590
807
  test("scrolls right", async () => {
@@ -593,35 +810,79 @@ describe("executeBrowserScroll", () => {
593
810
  ctx,
594
811
  );
595
812
  expect(result.isError).toBe(false);
596
- expect(mockPage.mouse.wheel).toHaveBeenCalledWith(200, 0);
813
+ const wheelCall = sendCalls.find(
814
+ (c) => c.method === "Input.dispatchMouseEvent",
815
+ );
816
+ expect(wheelCall!.params).toEqual({
817
+ type: "mouseWheel",
818
+ x: 400,
819
+ y: 300,
820
+ deltaX: 200,
821
+ deltaY: 0,
822
+ });
597
823
  });
598
824
 
599
825
  test("errors when direction is missing", async () => {
600
826
  const result = await executeBrowserScroll({}, ctx);
601
827
  expect(result.isError).toBe(true);
602
828
  expect(result.content).toContain("direction is required");
829
+ expect(sendCalls).toHaveLength(0);
603
830
  });
604
831
 
605
832
  test("errors when direction is invalid", async () => {
606
833
  const result = await executeBrowserScroll({ direction: "diagonal" }, ctx);
607
834
  expect(result.isError).toBe(true);
608
835
  expect(result.content).toContain("direction is required");
836
+ expect(sendCalls).toHaveLength(0);
837
+ });
838
+
839
+ test("surfaces CDP failure as a scroll error", async () => {
840
+ sendHandler = () => new Error("viewport unavailable");
841
+ const result = await executeBrowserScroll({ direction: "down" }, ctx);
842
+ expect(result.isError).toBe(true);
843
+ expect(result.content).toContain("Scroll failed");
844
+ expect(result.content).toContain("viewport unavailable");
609
845
  });
610
846
  });
611
847
 
612
848
  // ── browser_select_option ────────────────────────────────────────────
613
849
 
850
+ /**
851
+ * Default handler tuned for select-option tests. The Runtime.callFunctionOn
852
+ * call now returns whether an option matched; tests assert on this
853
+ * via `result.value`.
854
+ */
855
+ function selectOptionHandler(
856
+ matched = true,
857
+ ): (method: string, params?: Record<string, unknown>) => unknown {
858
+ return (method, _params) => {
859
+ switch (method) {
860
+ case "DOM.getDocument":
861
+ return { root: { nodeId: 1 } };
862
+ case "DOM.querySelector":
863
+ return { nodeId: 42 };
864
+ case "DOM.describeNode":
865
+ return { node: { backendNodeId: 100 } };
866
+ case "DOM.resolveNode":
867
+ return { object: { objectId: "obj-1" } };
868
+ case "Runtime.callFunctionOn":
869
+ return { result: { value: matched } };
870
+ default:
871
+ return {};
872
+ }
873
+ };
874
+ }
875
+
614
876
  describe("executeBrowserSelectOption", () => {
615
877
  beforeEach(() => {
616
878
  resetMockPage();
617
- snapshotMaps.clear();
879
+ resetCdpMock();
880
+ snapshotBackendNodeMaps.clear();
881
+ sendHandler = selectOptionHandler();
618
882
  });
619
883
 
620
884
  test("selects by value via element_id", async () => {
621
- snapshotMaps.set(
622
- "test-conversation",
623
- new Map([["e4", '[data-vellum-eid="e4"]']]),
624
- );
885
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e4", 777]]));
625
886
  const result = await executeBrowserSelectOption(
626
887
  { element_id: "e4", value: "ca" },
627
888
  ctx,
@@ -629,10 +890,26 @@ describe("executeBrowserSelectOption", () => {
629
890
  expect(result.isError).toBe(false);
630
891
  expect(result.content).toContain("Selected option");
631
892
  expect(result.content).toContain('value="ca"');
632
- expect(mockPage.selectOption).toHaveBeenCalledWith(
633
- '[data-vellum-eid="e4"]',
893
+ expect(result.content).toContain('element_id "e4"');
894
+
895
+ // Expected CDP sequence: DOM.resolveNode → Runtime.callFunctionOn
896
+ const methods = sendCalls.map((c) => c.method);
897
+ expect(methods).toEqual(["DOM.resolveNode", "Runtime.callFunctionOn"]);
898
+ expect(sendCalls[0]!.params).toEqual({ backendNodeId: 777 });
899
+ const callFn = sendCalls[1]!.params as {
900
+ objectId: string;
901
+ arguments: Array<{ value: unknown }>;
902
+ returnByValue?: boolean;
903
+ };
904
+ expect(callFn.objectId).toBe("obj-1");
905
+ expect(callFn.arguments).toEqual([
634
906
  { value: "ca" },
635
- );
907
+ { value: null },
908
+ { value: null },
909
+ ]);
910
+ // returnByValue must be true so the matched boolean comes back
911
+ // primitive instead of as a RemoteObject reference.
912
+ expect(callFn.returnByValue).toBe(true);
636
913
  });
637
914
 
638
915
  test("selects by label", async () => {
@@ -642,9 +919,23 @@ describe("executeBrowserSelectOption", () => {
642
919
  );
643
920
  expect(result.isError).toBe(false);
644
921
  expect(result.content).toContain('label="California"');
645
- expect(mockPage.selectOption).toHaveBeenCalledWith("#state", {
646
- label: "California",
647
- });
922
+ // Selector path: querySelectorBackendNodeId sequence + DOM.resolveNode + Runtime.callFunctionOn
923
+ const methods = sendCalls.map((c) => c.method);
924
+ expect(methods).toEqual([
925
+ "DOM.getDocument",
926
+ "DOM.querySelector",
927
+ "DOM.describeNode",
928
+ "DOM.resolveNode",
929
+ "Runtime.callFunctionOn",
930
+ ]);
931
+ const callFn = sendCalls[4]!.params as {
932
+ arguments: Array<{ value: unknown }>;
933
+ };
934
+ expect(callFn.arguments).toEqual([
935
+ { value: null },
936
+ { value: "California" },
937
+ { value: null },
938
+ ]);
648
939
  });
649
940
 
650
941
  test("selects by index", async () => {
@@ -654,7 +945,39 @@ describe("executeBrowserSelectOption", () => {
654
945
  );
655
946
  expect(result.isError).toBe(false);
656
947
  expect(result.content).toContain("index=2");
657
- expect(mockPage.selectOption).toHaveBeenCalledWith("#state", { index: 2 });
948
+ const callFn = sendCalls.find((c) => c.method === "Runtime.callFunctionOn")!
949
+ .params as { arguments: Array<{ value: unknown }> };
950
+ expect(callFn.arguments).toEqual([
951
+ { value: null },
952
+ { value: null },
953
+ { value: 2 },
954
+ ]);
955
+ });
956
+
957
+ test("returns error when no option matches", async () => {
958
+ sendHandler = selectOptionHandler(false);
959
+ const result = await executeBrowserSelectOption(
960
+ { selector: "#state", value: "nope" },
961
+ ctx,
962
+ );
963
+ expect(result.isError).toBe(true);
964
+ expect(result.content).toContain("Select option failed");
965
+ expect(result.content).toContain("no option matched");
966
+ expect(result.content).toContain('value="nope"');
967
+ });
968
+
969
+ test("dispatches input + change events via the function declaration", async () => {
970
+ await executeBrowserSelectOption({ selector: "#state", value: "ca" }, ctx);
971
+ const callFn = sendCalls.find((c) => c.method === "Runtime.callFunctionOn")!
972
+ .params as { functionDeclaration: string };
973
+ // The function body must dispatch BOTH input and change events
974
+ // (HTML spec order: input fires before change for <select>).
975
+ expect(callFn.functionDeclaration).toContain('new Event("input"');
976
+ expect(callFn.functionDeclaration).toContain('new Event("change"');
977
+ const inputIdx = callFn.functionDeclaration.indexOf('new Event("input"');
978
+ const changeIdx = callFn.functionDeclaration.indexOf('new Event("change"');
979
+ expect(inputIdx).toBeGreaterThanOrEqual(0);
980
+ expect(changeIdx).toBeGreaterThan(inputIdx);
658
981
  });
659
982
 
660
983
  test("errors when no option specifier provided", async () => {
@@ -666,6 +989,7 @@ describe("executeBrowserSelectOption", () => {
666
989
  expect(result.content).toContain(
667
990
  "One of value, label, or index is required",
668
991
  );
992
+ expect(sendCalls).toHaveLength(0);
669
993
  });
670
994
 
671
995
  test("errors when neither element_id nor selector provided", async () => {
@@ -674,12 +998,11 @@ describe("executeBrowserSelectOption", () => {
674
998
  expect(result.content).toContain(
675
999
  "Either element_id or selector is required",
676
1000
  );
1001
+ expect(sendCalls).toHaveLength(0);
677
1002
  });
678
1003
 
679
- test("handles select option error from page", async () => {
680
- mockPage.selectOption = mock(async () => {
681
- throw new Error("Not a select element");
682
- });
1004
+ test("surfaces CDP failure as a select-option error", async () => {
1005
+ sendHandler = () => new Error("Not a select element");
683
1006
  const result = await executeBrowserSelectOption(
684
1007
  { selector: "#div", value: "x" },
685
1008
  ctx,
@@ -692,34 +1015,72 @@ describe("executeBrowserSelectOption", () => {
692
1015
 
693
1016
  // ── browser_hover ────────────────────────────────────────────────────
694
1017
 
695
- describe("executeBrowserHover", () => {
1018
+ describe("executeBrowserHover (CDP)", () => {
696
1019
  beforeEach(() => {
697
1020
  resetMockPage();
698
- snapshotMaps.clear();
699
- });
700
-
701
- test("hovers by element_id via snapshot map", async () => {
702
- snapshotMaps.set(
703
- "test-conversation",
704
- new Map([["e2", '[data-vellum-eid="e2"]']]),
705
- );
706
- const result = await executeBrowserHover({ element_id: "e2" }, ctx);
707
- expect(result.isError).toBe(false);
708
- expect(result.content).toContain("Hovered element");
709
- expect(mockPage.hover).toHaveBeenCalledWith('[data-vellum-eid="e2"]', {
710
- timeout: 10000,
711
- });
1021
+ resetCdpMock();
1022
+ snapshotBackendNodeMaps.clear();
712
1023
  });
713
1024
 
714
- test("hovers by raw selector", async () => {
1025
+ test("hovers by selector: emits a single mouseMoved event", async () => {
1026
+ installClickHoverCdpSend({ backendNodeId: 9000 });
715
1027
  const result = await executeBrowserHover(
716
1028
  { selector: ".menu-trigger" },
717
1029
  ctx,
718
1030
  );
719
1031
  expect(result.isError).toBe(false);
720
- expect(mockPage.hover).toHaveBeenCalledWith(".menu-trigger", {
721
- timeout: 10000,
1032
+ expect(result.content).toContain("Hovered element: .menu-trigger");
1033
+
1034
+ // Selector path waits for the element to become visible via
1035
+ // cdpWaitForSelector before resolving the backend node.
1036
+ const methods = sendCalls.map((c) => c.method);
1037
+ expect(methods).toEqual([
1038
+ "Runtime.evaluate",
1039
+ "DOM.getDocument",
1040
+ "DOM.querySelector",
1041
+ "DOM.describeNode",
1042
+ "DOM.scrollIntoViewIfNeeded",
1043
+ "DOM.getBoxModel",
1044
+ "Input.dispatchMouseEvent",
1045
+ ]);
1046
+
1047
+ // Exactly ONE mouseMoved event (no press/release) → hover semantics.
1048
+ const mouseCalls = sendCalls.filter(
1049
+ (c) => c.method === "Input.dispatchMouseEvent",
1050
+ );
1051
+ expect(mouseCalls).toHaveLength(1);
1052
+ expect(mouseCalls[0]!.params).toMatchObject({
1053
+ type: "mouseMoved",
1054
+ x: 20,
1055
+ y: 30,
1056
+ button: "none",
722
1057
  });
1058
+
1059
+ await new Promise((resolve) => setTimeout(resolve, 0));
1060
+ expect(detachCalls).toBe(1);
1061
+ });
1062
+
1063
+ test("hovers by element_id (backend path): skips DOM.querySelector", async () => {
1064
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e2", 12]]));
1065
+ installClickHoverCdpSend();
1066
+
1067
+ const result = await executeBrowserHover({ element_id: "e2" }, ctx);
1068
+
1069
+ expect(result.isError).toBe(false);
1070
+ expect(result.content).toContain("Hovered element: eid=e2");
1071
+
1072
+ const methods = sendCalls.map((c) => c.method);
1073
+ expect(methods).not.toContain("DOM.querySelector");
1074
+ expect(methods).toEqual([
1075
+ "DOM.scrollIntoViewIfNeeded",
1076
+ "DOM.getBoxModel",
1077
+ "Input.dispatchMouseEvent",
1078
+ ]);
1079
+
1080
+ const scrollCall = sendCalls.find(
1081
+ (c) => c.method === "DOM.scrollIntoViewIfNeeded",
1082
+ )!;
1083
+ expect(scrollCall.params).toMatchObject({ backendNodeId: 12 });
723
1084
  });
724
1085
 
725
1086
  test("errors when neither element_id nor selector provided", async () => {
@@ -728,22 +1089,28 @@ describe("executeBrowserHover", () => {
728
1089
  expect(result.content).toContain(
729
1090
  "Either element_id or selector is required",
730
1091
  );
1092
+ expect(sendCalls).toHaveLength(0);
731
1093
  });
732
1094
 
733
1095
  test("errors when element_id not found in snapshot map", async () => {
1096
+ installClickHoverCdpSend();
734
1097
  const result = await executeBrowserHover({ element_id: "e99" }, ctx);
735
1098
  expect(result.isError).toBe(true);
736
1099
  expect(result.content).toContain('element_id "e99" not found');
1100
+ expect(sendCalls).toHaveLength(0);
737
1101
  });
738
1102
 
739
- test("handles hover error from page", async () => {
740
- mockPage.hover = mock(async () => {
741
- throw new Error("Element detached");
742
- });
1103
+ test("returns error + still disposes CdpClient when cdp.send throws", async () => {
1104
+ installClickHoverCdpSend({ throwFrom: "DOM.getBoxModel" });
1105
+
743
1106
  const result = await executeBrowserHover({ selector: "#gone" }, ctx);
1107
+
744
1108
  expect(result.isError).toBe(true);
745
1109
  expect(result.content).toContain("Hover failed");
746
- expect(result.content).toContain("Element detached");
1110
+ expect(result.content).toContain("cdp boom");
1111
+
1112
+ await new Promise((resolve) => setTimeout(resolve, 0));
1113
+ expect(detachCalls).toBe(1);
747
1114
  });
748
1115
  });
749
1116
 
@@ -754,14 +1121,14 @@ describe("executeBrowserHover", () => {
754
1121
  describe("browser execution wrapper contract", () => {
755
1122
  beforeEach(() => {
756
1123
  resetMockPage();
757
- snapshotMaps.clear();
1124
+ resetCdpMock();
1125
+ sendHandler = defaultCdpHandler;
1126
+ snapshotBackendNodeMaps.clear();
758
1127
  });
759
1128
 
760
1129
  test("executeBrowserClick matches wrapper contract (input, context) → result", async () => {
761
- snapshotMaps.set(
762
- "test-conversation",
763
- new Map([["e1", '[data-vellum-eid="e1"]']]),
764
- );
1130
+ installClickHoverCdpSend();
1131
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e1", 1]]));
765
1132
  const result = await executeBrowserClick({ element_id: "e1" }, ctx);
766
1133
  expect(result).toHaveProperty("content");
767
1134
  expect(result).toHaveProperty("isError");
@@ -771,10 +1138,7 @@ describe("browser execution wrapper contract", () => {
771
1138
  });
772
1139
 
773
1140
  test("executeBrowserType matches wrapper contract", async () => {
774
- snapshotMaps.set(
775
- "test-conversation",
776
- new Map([["e3", '[data-vellum-eid="e3"]']]),
777
- );
1141
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e3", 555]]));
778
1142
  const result = await executeBrowserType(
779
1143
  { element_id: "e3", text: "hello" },
780
1144
  ctx,
@@ -784,27 +1148,12 @@ describe("browser execution wrapper contract", () => {
784
1148
  expect(result.isError).toBe(false);
785
1149
  });
786
1150
 
787
- test("executeBrowserSnapshot matches wrapper contract", async () => {
788
- mockPage.evaluate = mock(async () => [
789
- { eid: "e1", tag: "button", attrs: {}, text: "Click me" },
790
- ]);
791
- mockPage.title = mock(async () => "Test");
792
- mockPage.url = mock(() => "https://example.com");
793
- const result = await executeBrowserSnapshot({}, ctx);
794
- expect(result).toHaveProperty("content");
795
- expect(result).toHaveProperty("isError");
796
- expect(result.isError).toBe(false);
797
- });
1151
+ // executeBrowserSnapshot wrapper-contract check lives in
1152
+ // `headless-browser-snapshot.test.ts`.
798
1153
 
799
- test("executeBrowserExtract matches wrapper contract", async () => {
800
- mockPage.evaluate = mock(async () => "Page text content");
801
- mockPage.title = mock(async () => "Test");
802
- mockPage.url = mock(() => "https://example.com");
803
- const result = await executeBrowserExtract({}, ctx);
804
- expect(result).toHaveProperty("content");
805
- expect(result).toHaveProperty("isError");
806
- expect(result.isError).toBe(false);
807
- });
1154
+ // wrapper contract for executeBrowserExtract and
1155
+ // executeBrowserScreenshot lives in
1156
+ // headless-browser-read-tools.test.ts.
808
1157
 
809
1158
  test("executeBrowserPressKey matches wrapper contract", async () => {
810
1159
  const result = await executeBrowserPressKey({ key: "Enter" }, ctx);
@@ -813,14 +1162,6 @@ describe("browser execution wrapper contract", () => {
813
1162
  expect(result.isError).toBe(false);
814
1163
  });
815
1164
 
816
- test("executeBrowserScreenshot matches wrapper contract", async () => {
817
- mockPage.screenshot = mock(async () => Buffer.from("fake-image"));
818
- const result = await executeBrowserScreenshot({}, ctx);
819
- expect(result).toHaveProperty("content");
820
- expect(result).toHaveProperty("isError");
821
- expect(result.isError).toBe(false);
822
- });
823
-
824
1165
  test("executeBrowserClose matches wrapper contract", async () => {
825
1166
  const result = await executeBrowserClose({}, ctx);
826
1167
  expect(result).toHaveProperty("content");
@@ -836,10 +1177,7 @@ describe("browser execution wrapper contract", () => {
836
1177
  });
837
1178
 
838
1179
  test("executeBrowserSelectOption matches wrapper contract", async () => {
839
- snapshotMaps.set(
840
- "test-conversation",
841
- new Map([["e4", '[data-vellum-eid="e4"]']]),
842
- );
1180
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e4", 777]]));
843
1181
  const result = await executeBrowserSelectOption(
844
1182
  { element_id: "e4", value: "opt1" },
845
1183
  ctx,
@@ -850,10 +1188,8 @@ describe("browser execution wrapper contract", () => {
850
1188
  });
851
1189
 
852
1190
  test("executeBrowserHover matches wrapper contract", async () => {
853
- snapshotMaps.set(
854
- "test-conversation",
855
- new Map([["e2", '[data-vellum-eid="e2"]']]),
856
- );
1191
+ installClickHoverCdpSend();
1192
+ snapshotBackendNodeMaps.set("test-conversation", new Map([["e2", 2]]));
857
1193
  const result = await executeBrowserHover({ element_id: "e2" }, ctx);
858
1194
  expect(result).toHaveProperty("content");
859
1195
  expect(result).toHaveProperty("isError");