@vellumai/assistant 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (396) hide show
  1. package/bun.lock +40 -40
  2. package/bunfig.toml +3 -0
  3. package/docs/architecture/memory.md +1 -1
  4. package/node_modules/@vellumai/ces-contracts/src/rpc.ts +42 -0
  5. package/openapi.yaml +184 -69
  6. package/package.json +41 -41
  7. package/scripts/generate-openapi.ts +1 -2
  8. package/src/__tests__/acp-session.test.ts +43 -0
  9. package/src/__tests__/app-builder-tool-scripts.test.ts +1 -0
  10. package/src/__tests__/app-executors.test.ts +1 -0
  11. package/src/__tests__/app-source-watcher.test.ts +37 -11
  12. package/src/__tests__/approval-routes-http.test.ts +178 -1
  13. package/src/__tests__/browser-fill-credential.test.ts +229 -94
  14. package/src/__tests__/browser-manager.test.ts +40 -27
  15. package/src/__tests__/catalog-files.test.ts +862 -0
  16. package/src/__tests__/channel-approvals.test.ts +53 -0
  17. package/src/__tests__/config-managed-gemini-defaults.test.ts +326 -0
  18. package/src/__tests__/config-schema-cmd.test.ts +2 -2
  19. package/src/__tests__/config-schema.test.ts +125 -48
  20. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +23 -0
  21. package/src/__tests__/context-overflow-approval.test.ts +16 -1
  22. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  23. package/src/__tests__/conversation-agent-loop.test.ts +1 -1
  24. package/src/__tests__/conversation-analysis-routes.test.ts +2 -2
  25. package/src/__tests__/conversation-attachments.test.ts +80 -4
  26. package/src/__tests__/conversation-confirmation-signals.test.ts +155 -0
  27. package/src/__tests__/conversation-fork-crud.test.ts +17 -0
  28. package/src/__tests__/conversation-history-web-search.test.ts +1 -0
  29. package/src/__tests__/conversation-host-access-routes.test.ts +229 -0
  30. package/src/__tests__/conversation-inject-context.test.ts +103 -0
  31. package/src/__tests__/conversation-queue.test.ts +45 -2
  32. package/src/__tests__/conversation-routes-disk-view.test.ts +5 -0
  33. package/src/__tests__/conversation-routes-guardian-reply.test.ts +16 -0
  34. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  35. package/src/__tests__/conversation-runtime-assembly.test.ts +269 -46
  36. package/src/__tests__/conversation-starter-routes.test.ts +126 -0
  37. package/src/__tests__/conversation-starters-cadence.test.ts +161 -0
  38. package/src/__tests__/conversation-store.test.ts +195 -0
  39. package/src/__tests__/conversation-workspace-cache-state.test.ts +193 -0
  40. package/src/__tests__/credential-execution-approval-bridge.test.ts +32 -1
  41. package/src/__tests__/credential-security-invariants.test.ts +1 -0
  42. package/src/__tests__/credential-vault-unit.test.ts +4 -4
  43. package/src/__tests__/credential-vault.test.ts +152 -13
  44. package/src/__tests__/credentials-cli.test.ts +2 -2
  45. package/src/__tests__/date-context.test.ts +4 -4
  46. package/src/__tests__/embedding-managed-proxy-selection.test.ts +256 -0
  47. package/src/__tests__/extension-id-sync-guard.test.ts +155 -0
  48. package/src/__tests__/fixtures/mock-chrome-extension.ts +375 -0
  49. package/src/__tests__/gateway-only-guard.test.ts +3 -0
  50. package/src/__tests__/gemini-provider.test.ts +2 -2
  51. package/src/__tests__/guardian-routing-invariants.test.ts +70 -2
  52. package/src/__tests__/headless-browser-interactions.test.ts +707 -371
  53. package/src/__tests__/headless-browser-navigate.test.ts +389 -47
  54. package/src/__tests__/headless-browser-read-tools.test.ts +266 -103
  55. package/src/__tests__/headless-browser-snapshot.test.ts +240 -77
  56. package/src/__tests__/host-bash-proxy.test.ts +150 -1
  57. package/src/__tests__/host-browser-e2e-cloud.test.ts +462 -0
  58. package/src/__tests__/host-browser-e2e-self-hosted-capability.test.ts +286 -0
  59. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +374 -0
  60. package/src/__tests__/host-browser-event-routes.test.ts +350 -0
  61. package/src/__tests__/host-browser-proxy.test.ts +444 -0
  62. package/src/__tests__/host-browser-routes.test.ts +198 -0
  63. package/src/__tests__/host-browser-ws-events-e2e.test.ts +320 -0
  64. package/src/__tests__/host-cu-proxy.test.ts +171 -1
  65. package/src/__tests__/host-file-proxy.test.ts +185 -1
  66. package/src/__tests__/host-file-read-tool.test.ts +52 -0
  67. package/src/__tests__/host-proxy-interface.test.ts +165 -0
  68. package/src/__tests__/host-shell-tool.test.ts +1 -11
  69. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  70. package/src/__tests__/integration-status.test.ts +6 -7
  71. package/src/__tests__/list-messages-tool-merge.test.ts +37 -12
  72. package/src/__tests__/mcp-client-auth.test.ts +40 -4
  73. package/src/__tests__/mcp-health-check.test.ts +10 -3
  74. package/src/__tests__/migration-cross-version-compatibility.test.ts +3 -1
  75. package/src/__tests__/migration-export-http.test.ts +61 -2
  76. package/src/__tests__/migration-export-streaming.test.ts +66 -0
  77. package/src/__tests__/migration-import-commit-http.test.ts +101 -1
  78. package/src/__tests__/native-host-marker-sync-guard.test.ts +157 -0
  79. package/src/__tests__/oauth-apps-routes.test.ts +17 -12
  80. package/src/__tests__/oauth-cli.test.ts +707 -60
  81. package/src/__tests__/oauth-connect-orchestrator.test.ts +116 -24
  82. package/src/__tests__/oauth-provider-seed-logos.test.ts +23 -0
  83. package/src/__tests__/oauth-provider-serializer.test.ts +146 -10
  84. package/src/__tests__/oauth-provider-visibility.test.ts +19 -21
  85. package/src/__tests__/oauth-providers-routes.test.ts +50 -14
  86. package/src/__tests__/oauth-store.test.ts +1386 -182
  87. package/src/__tests__/oauth2-gateway-transport.test.ts +211 -20
  88. package/src/__tests__/onboarding-template-contract.test.ts +75 -57
  89. package/src/__tests__/openai-provider.test.ts +2 -2
  90. package/src/__tests__/outlook-categories.test.ts +1 -1
  91. package/src/__tests__/outlook-client-automation.test.ts +1 -1
  92. package/src/__tests__/outlook-compose-tools.test.ts +1 -1
  93. package/src/__tests__/outlook-email-watcher.test.ts +1 -1
  94. package/src/__tests__/outlook-follow-up.test.ts +1 -1
  95. package/src/__tests__/outlook-messaging-provider.test.ts +2 -2
  96. package/src/__tests__/outlook-trash.test.ts +1 -1
  97. package/src/__tests__/outlook-unsubscribe.test.ts +1 -1
  98. package/src/__tests__/permission-checker-host-gate.test.ts +74 -14
  99. package/src/__tests__/permission-mode.test.ts +28 -56
  100. package/src/__tests__/platform-callback-registration.test.ts +19 -0
  101. package/src/__tests__/post-turn-tool-result-truncation.test.ts +296 -0
  102. package/src/__tests__/proxy-approval-callback.test.ts +18 -0
  103. package/src/__tests__/require-fresh-approval.test.ts +40 -1
  104. package/src/__tests__/sanitize-config-for-transfer.test.ts +132 -0
  105. package/src/__tests__/schedule-routes.test.ts +162 -0
  106. package/src/__tests__/secret-detection-handler.test.ts +84 -0
  107. package/src/__tests__/secret-ingress-http.test.ts +1 -0
  108. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  109. package/src/__tests__/set-permission-mode.test.ts +13 -250
  110. package/src/__tests__/skills-file-content-endpoint.test.ts +670 -0
  111. package/src/__tests__/skills-files-catalog-fallback.test.ts +450 -0
  112. package/src/__tests__/slack-channel-config.test.ts +12 -15
  113. package/src/__tests__/subagent-detail.test.ts +44 -2
  114. package/src/__tests__/subagent-disposal.test.ts +1 -0
  115. package/src/__tests__/subagent-fork-notifications.test.ts +291 -0
  116. package/src/__tests__/subagent-fork-spawn.test.ts +384 -0
  117. package/src/__tests__/subagent-manager-notify.test.ts +1 -0
  118. package/src/__tests__/subagent-notify-parent.test.ts +1 -0
  119. package/src/__tests__/subagent-spawn-tool-fork.test.ts +411 -0
  120. package/src/__tests__/subagent-tools.test.ts +1 -0
  121. package/src/__tests__/subagent-types.test.ts +1 -0
  122. package/src/__tests__/system-prompt-ask-mode.test.ts +27 -71
  123. package/src/__tests__/system-prompt.test.ts +72 -1
  124. package/src/__tests__/task-scheduler.test.ts +32 -6
  125. package/src/__tests__/telegram-config.test.ts +10 -13
  126. package/src/__tests__/terminal-tools.test.ts +9 -0
  127. package/src/__tests__/tool-approval-handler.test.ts +73 -0
  128. package/src/__tests__/tool-side-effects-slack-dm.test.ts +22 -0
  129. package/src/__tests__/top-level-renderer.test.ts +73 -1
  130. package/src/__tests__/transport-hints-queue.test.ts +14 -29
  131. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +109 -0
  132. package/src/__tests__/v2-consent-policy.test.ts +103 -0
  133. package/src/acp/client-handler.ts +30 -4
  134. package/src/agent/loop.ts +12 -6
  135. package/src/approvals/guardian-request-resolvers.ts +21 -15
  136. package/src/browser-session/__tests__/manager.test.ts +297 -0
  137. package/src/browser-session/backends/cdp-inspect.ts +30 -0
  138. package/src/browser-session/backends/extension.ts +26 -0
  139. package/src/browser-session/backends/local.ts +24 -0
  140. package/src/browser-session/events.ts +164 -0
  141. package/src/browser-session/index.ts +27 -0
  142. package/src/browser-session/manager.ts +159 -0
  143. package/src/browser-session/types.ts +28 -0
  144. package/src/channels/__tests__/types.test.ts +134 -0
  145. package/src/channels/types.ts +53 -3
  146. package/src/cli/commands/browser-relay.ts +339 -409
  147. package/src/cli/commands/credentials.ts +3 -3
  148. package/src/cli/commands/email.ts +18 -13
  149. package/src/cli/commands/mcp.ts +16 -4
  150. package/src/cli/commands/oauth/__tests__/connect.test.ts +44 -44
  151. package/src/cli/commands/oauth/__tests__/disconnect.test.ts +21 -21
  152. package/src/cli/commands/oauth/__tests__/mode.test.ts +17 -17
  153. package/src/cli/commands/oauth/__tests__/ping.test.ts +16 -16
  154. package/src/cli/commands/oauth/__tests__/providers-delete.test.ts +31 -33
  155. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +329 -0
  156. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +116 -12
  157. package/src/cli/commands/oauth/__tests__/status.test.ts +10 -10
  158. package/src/cli/commands/oauth/__tests__/token.test.ts +7 -7
  159. package/src/cli/commands/oauth/apps.ts +7 -4
  160. package/src/cli/commands/oauth/connect.ts +6 -3
  161. package/src/cli/commands/oauth/disconnect.ts +1 -1
  162. package/src/cli/commands/oauth/providers.ts +200 -36
  163. package/src/cli/commands/oauth/shared.ts +5 -5
  164. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +259 -0
  165. package/src/cli/commands/platform/index.ts +107 -10
  166. package/src/cli/commands/usage.ts +10 -9
  167. package/src/cli/lib/daemon-credential-client.ts +4 -0
  168. package/src/cli/program.ts +1 -1
  169. package/src/config/bundled-skills/app-builder/SKILL.md +26 -249
  170. package/src/config/bundled-skills/app-builder/references/CUSTOM_ROUTES.md +105 -0
  171. package/src/config/bundled-skills/app-builder/references/INTERACTION_HOOKS.md +56 -0
  172. package/src/config/bundled-skills/app-builder/references/WIDGETS.md +125 -0
  173. package/src/config/bundled-skills/contacts/SKILL.md +3 -0
  174. package/src/config/bundled-skills/document/SKILL.md +4 -0
  175. package/src/config/bundled-skills/gmail/SKILL.md +1 -1
  176. package/src/config/bundled-skills/outlook/SKILL.md +7 -0
  177. package/src/config/bundled-skills/subagent/SKILL.md +21 -0
  178. package/src/config/bundled-skills/subagent/TOOLS.json +8 -4
  179. package/src/config/bundled-skills/tasks/SKILL.md +5 -0
  180. package/src/config/env-registry.ts +14 -0
  181. package/src/config/env.ts +21 -0
  182. package/src/config/feature-flag-registry.json +44 -5
  183. package/src/config/loader.ts +56 -1
  184. package/src/config/sanitize-for-transfer.ts +47 -0
  185. package/src/config/schema.ts +46 -5
  186. package/src/config/schemas/host-browser.ts +66 -0
  187. package/src/config/schemas/memory-lifecycle.ts +1 -1
  188. package/src/config/schemas/memory-retrieval.ts +103 -0
  189. package/src/config/schemas/security.ts +0 -6
  190. package/src/config/schemas/services.ts +8 -0
  191. package/src/config/types.ts +0 -1
  192. package/src/context/post-turn-tool-result-truncation.ts +176 -0
  193. package/src/context/window-manager.ts +19 -1
  194. package/src/credential-execution/approval-bridge.ts +49 -15
  195. package/src/daemon/__tests__/conversation-tool-setup.test.ts +186 -0
  196. package/src/daemon/app-source-watcher.ts +35 -0
  197. package/src/daemon/context-overflow-approval.ts +5 -0
  198. package/src/daemon/conversation-agent-loop-handlers.ts +17 -2
  199. package/src/daemon/conversation-agent-loop.ts +58 -24
  200. package/src/daemon/conversation-attachments.ts +40 -0
  201. package/src/daemon/conversation-process.ts +48 -1
  202. package/src/daemon/conversation-runtime-assembly.ts +118 -36
  203. package/src/daemon/conversation-surfaces.ts +37 -36
  204. package/src/daemon/conversation-tool-setup.ts +74 -8
  205. package/src/daemon/conversation-workspace.ts +12 -0
  206. package/src/daemon/conversation.ts +226 -8
  207. package/src/daemon/date-context.ts +10 -10
  208. package/src/daemon/first-greeting.ts +3 -2
  209. package/src/daemon/handlers/conversations.ts +9 -140
  210. package/src/daemon/handlers/shared.ts +58 -0
  211. package/src/daemon/handlers/skills.ts +232 -37
  212. package/src/daemon/host-bash-proxy.ts +48 -13
  213. package/src/daemon/host-browser-proxy.ts +191 -0
  214. package/src/daemon/host-cu-proxy.ts +36 -11
  215. package/src/daemon/host-file-proxy.ts +57 -9
  216. package/src/daemon/lifecycle.ts +65 -11
  217. package/src/daemon/message-protocol.ts +7 -0
  218. package/src/daemon/message-types/conversations.ts +55 -13
  219. package/src/daemon/message-types/host-browser.ts +100 -0
  220. package/src/daemon/message-types/messages.ts +5 -5
  221. package/src/daemon/message-types/skills.ts +10 -0
  222. package/src/daemon/message-types/subagents.ts +2 -0
  223. package/src/daemon/server.ts +92 -12
  224. package/src/daemon/tool-side-effects.ts +6 -0
  225. package/src/daemon/transport-hints.ts +5 -24
  226. package/src/inbound/platform-callback-registration.ts +18 -17
  227. package/src/mcp/client.ts +59 -24
  228. package/src/memory/app-store.ts +31 -1
  229. package/src/memory/conversation-crud.ts +23 -0
  230. package/src/memory/conversation-starters-cadence.ts +76 -0
  231. package/src/memory/conversation-title-service.ts +5 -2
  232. package/src/memory/db-init.ts +12 -0
  233. package/src/memory/embedding-backend.test.ts +75 -0
  234. package/src/memory/embedding-backend.ts +131 -5
  235. package/src/memory/embedding-gemini.test.ts +54 -0
  236. package/src/memory/embedding-gemini.ts +20 -9
  237. package/src/memory/embedding-local.ts +176 -17
  238. package/src/memory/graph/consolidation.ts +10 -23
  239. package/src/memory/graph/extraction-job.ts +15 -0
  240. package/src/memory/graph/retriever.ts +40 -22
  241. package/src/memory/graph/store.test.ts +7 -3
  242. package/src/memory/graph/store.ts +47 -12
  243. package/src/memory/llm-usage-store.ts +45 -4
  244. package/src/memory/migrations/213-oauth-providers-scope-separator.ts +13 -0
  245. package/src/memory/migrations/214-oauth-providers-refresh-url.ts +11 -0
  246. package/src/memory/migrations/215-oauth-providers-revoke.ts +14 -0
  247. package/src/memory/migrations/216-oauth-providers-token-auth-method.ts +30 -0
  248. package/src/memory/migrations/217-conversation-host-access.ts +40 -0
  249. package/src/memory/migrations/218-oauth-providers-logo-url.ts +11 -0
  250. package/src/memory/migrations/index.ts +6 -0
  251. package/src/memory/migrations/registry.ts +8 -0
  252. package/src/memory/schema/conversations.ts +1 -0
  253. package/src/memory/schema/oauth.ts +18 -13
  254. package/src/oauth/AGENTS.md +76 -0
  255. package/src/oauth/__tests__/identity-verifier.test.ts +24 -19
  256. package/src/oauth/__tests__/seed-providers-managed.test.ts +32 -0
  257. package/src/oauth/byo-connection.test.ts +8 -8
  258. package/src/oauth/byo-connection.ts +7 -7
  259. package/src/oauth/connect-orchestrator.ts +23 -21
  260. package/src/oauth/connect-types.ts +3 -3
  261. package/src/oauth/connection-resolver.test.ts +17 -4
  262. package/src/oauth/connection-resolver.ts +16 -16
  263. package/src/oauth/connection.ts +1 -1
  264. package/src/oauth/manual-token-connection.ts +13 -13
  265. package/src/oauth/oauth-store.ts +214 -100
  266. package/src/oauth/platform-connection.test.ts +3 -3
  267. package/src/oauth/platform-connection.ts +4 -4
  268. package/src/oauth/provider-serializer.ts +31 -5
  269. package/src/oauth/revoke.ts +76 -0
  270. package/src/oauth/seed-providers.ts +126 -87
  271. package/src/oauth/token-persistence.ts +1 -1
  272. package/src/permissions/permission-mode.ts +4 -11
  273. package/src/permissions/prompter.ts +13 -1
  274. package/src/permissions/v2-consent-policy.ts +87 -0
  275. package/src/prompts/system-prompt.ts +18 -21
  276. package/src/prompts/templates/BOOTSTRAP-REFERENCE.md +3 -65
  277. package/src/prompts/templates/BOOTSTRAP.md +59 -105
  278. package/src/providers/anthropic/client.ts +1 -0
  279. package/src/providers/types.ts +1 -1
  280. package/src/runtime/AGENTS.md +23 -0
  281. package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +715 -0
  282. package/src/runtime/__tests__/capability-tokens.test.ts +258 -0
  283. package/src/runtime/__tests__/chrome-extension-registry.test.ts +518 -0
  284. package/src/runtime/assistant-event-hub.ts +2 -2
  285. package/src/runtime/auth/__tests__/guard-tests.test.ts +1 -0
  286. package/src/runtime/auth/__tests__/middleware.test.ts +116 -1
  287. package/src/runtime/auth/__tests__/route-policy.test.ts +8 -0
  288. package/src/runtime/auth/middleware.ts +98 -0
  289. package/src/runtime/auth/route-policy.ts +6 -7
  290. package/src/runtime/capability-tokens.ts +414 -0
  291. package/src/runtime/channel-approvals.ts +18 -5
  292. package/src/runtime/chrome-extension-registry.ts +332 -0
  293. package/src/runtime/confirmation-request-guardian-bridge.ts +6 -0
  294. package/src/runtime/guardian-decision-types.ts +7 -0
  295. package/src/runtime/http-server.ts +425 -70
  296. package/src/runtime/migrations/__tests__/rebind-secrets-credentials.test.ts +172 -0
  297. package/src/runtime/migrations/__tests__/vbundle-builder-credentials.test.ts +276 -0
  298. package/src/runtime/migrations/__tests__/vbundle-import-credentials.test.ts +162 -0
  299. package/src/runtime/migrations/migration-transport.ts +6 -0
  300. package/src/runtime/migrations/migration-wizard.ts +22 -2
  301. package/src/runtime/migrations/rebind-secrets-screen.ts +76 -15
  302. package/src/runtime/migrations/vbundle-builder.ts +145 -38
  303. package/src/runtime/migrations/vbundle-import-analyzer.ts +19 -0
  304. package/src/runtime/migrations/vbundle-importer.ts +55 -5
  305. package/src/runtime/pending-interactions.ts +29 -13
  306. package/src/runtime/routes/approval-routes.ts +90 -16
  307. package/src/runtime/routes/browser-cdp-routes.ts +229 -0
  308. package/src/runtime/routes/browser-extension-pair-routes.ts +497 -0
  309. package/src/runtime/routes/conversation-analysis-routes.ts +2 -1
  310. package/src/runtime/routes/conversation-management-routes.ts +108 -0
  311. package/src/runtime/routes/conversation-routes.ts +301 -27
  312. package/src/runtime/routes/conversation-starter-routes.ts +78 -16
  313. package/src/runtime/routes/guardian-action-routes.ts +24 -13
  314. package/src/runtime/routes/host-browser-routes.ts +279 -0
  315. package/src/runtime/routes/host-file-routes.ts +9 -1
  316. package/src/runtime/routes/identity-routes.ts +259 -16
  317. package/src/runtime/routes/log-export-routes.ts +42 -22
  318. package/src/runtime/routes/memory-item-routes.ts +1 -7
  319. package/src/runtime/routes/migration-routes.ts +87 -2
  320. package/src/runtime/routes/oauth-apps.ts +15 -17
  321. package/src/runtime/routes/oauth-providers.ts +4 -0
  322. package/src/runtime/routes/schedule-routes.ts +24 -11
  323. package/src/runtime/routes/settings-routes.ts +9 -97
  324. package/src/runtime/routes/skills-routes.ts +52 -2
  325. package/src/runtime/routes/subagents-routes.ts +14 -10
  326. package/src/runtime/routes/usage-routes.ts +8 -7
  327. package/src/runtime/routes/workspace-routes.test.ts +22 -0
  328. package/src/runtime/routes/workspace-routes.ts +8 -1
  329. package/src/runtime/routes/workspace-utils.ts +2 -0
  330. package/src/schedule/scheduler.ts +7 -5
  331. package/src/security/ces-credential-client.ts +20 -0
  332. package/src/security/ces-rpc-credential-backend.ts +17 -0
  333. package/src/security/credential-backend.ts +5 -0
  334. package/src/security/oauth2.ts +42 -25
  335. package/src/security/secure-keys.ts +118 -25
  336. package/src/security/token-manager.ts +23 -10
  337. package/src/skills/catalog-files.ts +492 -0
  338. package/src/subagent/manager.ts +131 -26
  339. package/src/subagent/types.ts +19 -0
  340. package/src/tools/apps/executors.ts +11 -2
  341. package/src/tools/browser/__tests__/auth-detector.test.ts +202 -108
  342. package/src/tools/browser/auth-detector.ts +43 -12
  343. package/src/tools/browser/browser-execution.ts +645 -340
  344. package/src/tools/browser/browser-manager.ts +36 -12
  345. package/src/tools/browser/cdp-client/__tests__/accessibility-snapshot.test.ts +318 -0
  346. package/src/tools/browser/cdp-client/__tests__/cdp-dom-helpers.test.ts +1175 -0
  347. package/src/tools/browser/cdp-client/__tests__/cdp-inspect-client.test.ts +870 -0
  348. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +330 -0
  349. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +377 -0
  350. package/src/tools/browser/cdp-client/__tests__/fixtures/ax-tree-nested-frames.json +64 -0
  351. package/src/tools/browser/cdp-client/__tests__/fixtures/ax-tree-simple.json +69 -0
  352. package/src/tools/browser/cdp-client/__tests__/local-cdp-client.test.ts +310 -0
  353. package/src/tools/browser/cdp-client/__tests__/types.test.ts +96 -0
  354. package/src/tools/browser/cdp-client/accessibility-snapshot.ts +387 -0
  355. package/src/tools/browser/cdp-client/cdp-dom-helpers.ts +695 -0
  356. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/discovery.test.ts +743 -0
  357. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/ws-transport.test.ts +580 -0
  358. package/src/tools/browser/cdp-client/cdp-inspect/discovery.ts +578 -0
  359. package/src/tools/browser/cdp-client/cdp-inspect/ws-transport.ts +579 -0
  360. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +635 -0
  361. package/src/tools/browser/cdp-client/errors.ts +34 -0
  362. package/src/tools/browser/cdp-client/extension-cdp-client.ts +125 -0
  363. package/src/tools/browser/cdp-client/factory.ts +204 -0
  364. package/src/tools/browser/cdp-client/index.ts +14 -0
  365. package/src/tools/browser/cdp-client/local-cdp-client.ts +187 -0
  366. package/src/tools/browser/cdp-client/types.ts +52 -0
  367. package/src/tools/filesystem/edit.ts +1 -1
  368. package/src/tools/filesystem/list.ts +1 -1
  369. package/src/tools/filesystem/read.ts +1 -1
  370. package/src/tools/filesystem/write.ts +2 -1
  371. package/src/tools/host-filesystem/edit.ts +1 -1
  372. package/src/tools/host-filesystem/read.ts +12 -15
  373. package/src/tools/host-filesystem/write.ts +1 -1
  374. package/src/tools/host-terminal/host-shell.ts +21 -16
  375. package/src/tools/permission-checker.ts +77 -82
  376. package/src/tools/registry.ts +0 -2
  377. package/src/tools/secret-detection-handler.ts +34 -0
  378. package/src/tools/shared/filesystem/image-read.ts +61 -40
  379. package/src/tools/subagent/spawn.ts +47 -3
  380. package/src/tools/subagent/status.ts +2 -0
  381. package/src/tools/system/register.ts +2 -16
  382. package/src/tools/terminal/safe-env.ts +7 -0
  383. package/src/tools/terminal/shell.ts +21 -16
  384. package/src/tools/tool-approval-handler.ts +48 -2
  385. package/src/tools/types.ts +2 -0
  386. package/src/util/platform.ts +14 -19
  387. package/src/workspace/top-level-renderer.ts +19 -1
  388. package/src/__tests__/chrome-cdp.test.ts +0 -419
  389. package/src/__tests__/permission-mode-sse.test.ts +0 -418
  390. package/src/__tests__/permission-mode-store.test.ts +0 -277
  391. package/src/browser-extension-relay/protocol.ts +0 -63
  392. package/src/browser-extension-relay/server.ts +0 -203
  393. package/src/config/schemas/sandbox.ts +0 -14
  394. package/src/permissions/permission-mode-store.ts +0 -180
  395. package/src/tools/browser/chrome-cdp.ts +0 -239
  396. package/src/tools/system/set-permission-mode.ts +0 -103
@@ -15,7 +15,7 @@ import {
15
15
  detectCaptchaChallenge,
16
16
  formatAuthChallenge,
17
17
  } from "./auth-detector.js";
18
- import type { PageResponse, RouteHandler } from "./browser-manager.js";
18
+ import type { RouteHandler } from "./browser-manager.js";
19
19
  import { browserManager } from "./browser-manager.js";
20
20
  import {
21
21
  ensureScreencast,
@@ -23,6 +23,30 @@ import {
23
23
  stopAllScreencasts,
24
24
  stopBrowserScreencast,
25
25
  } from "./browser-screencast.js";
26
+ import {
27
+ formatAxSnapshot,
28
+ transformAxTree,
29
+ } from "./cdp-client/accessibility-snapshot.js";
30
+ import {
31
+ captureScreenshotJpeg,
32
+ dispatchClickAt,
33
+ dispatchHoverAt,
34
+ dispatchInsertText,
35
+ dispatchKeyPress,
36
+ dispatchWheelScroll,
37
+ evaluateExpression,
38
+ focusElement,
39
+ getCenterPoint,
40
+ getCurrentUrl,
41
+ getPageTitle,
42
+ navigateAndWait,
43
+ querySelectorBackendNodeId,
44
+ scrollIntoViewIfNeeded,
45
+ waitForSelector as cdpWaitForSelector,
46
+ waitForText as cdpWaitForText,
47
+ } from "./cdp-client/cdp-dom-helpers.js";
48
+ import { getCdpClient } from "./cdp-client/factory.js";
49
+ import type { CdpClient } from "./cdp-client/types.js";
26
50
 
27
51
  const log = getLogger("headless-browser");
28
52
 
@@ -32,43 +56,79 @@ export const NAVIGATE_TIMEOUT_MS = 15_000;
32
56
 
33
57
  export const ACTION_TIMEOUT_MS = 10_000;
34
58
 
35
- export const MAX_SNAPSHOT_ELEMENTS = 150;
36
-
37
- export const INTERACTIVE_SELECTOR = [
38
- "a[href]",
39
- "button",
40
- "input",
41
- "select",
42
- "textarea",
43
- '[role="button"]',
44
- '[role="link"]',
45
- '[role="checkbox"]',
46
- '[role="radio"]',
47
- '[role="tab"]',
48
- '[role="menuitem"]',
49
- '[role="option"]',
50
- '[role="combobox"]',
51
- '[role="listbox"]',
52
- '[contenteditable="true"]',
53
- ].join(", ");
54
-
55
- export type SnapshotElement = {
56
- eid: string;
57
- tag: string;
58
- attrs: Record<string, string>;
59
- text: string;
60
- };
61
-
62
59
  export const MAX_WAIT_MS = 30_000;
63
60
 
64
61
  export const MAX_EXTRACT_LENGTH = 50_000;
65
62
 
63
+ /**
64
+ * IIFE evaluated inside the page via `Runtime.evaluate` to auto-dismiss
65
+ * common blocker modals (regulatory notices, cookie banners) that
66
+ * aren't exposed in the accessibility tree. Runs silently - if no
67
+ * matching modal is present the expression is a no-op.
68
+ */
69
+ const DISMISS_MODALS_EXPRESSION = `(() => {
70
+ const dismissPatterns = /^(got it|accept|ok|dismiss|i understand|close)$/i;
71
+ const buttons = document.querySelectorAll('button, [role="button"], input[type="submit"]');
72
+ for (const btn of buttons) {
73
+ const text = (btn.textContent || '').trim();
74
+ if (dismissPatterns.test(text)) {
75
+ const modal = btn.closest('[role="dialog"], [class*="modal"], [class*="Modal"], [class*="overlay"], [class*="Overlay"]');
76
+ if (modal) {
77
+ btn.click();
78
+ break;
79
+ }
80
+ }
81
+ }
82
+ })()`;
83
+
84
+ /**
85
+ * IIFE evaluated by {@link executeBrowserExtract} when `include_links`
86
+ * is true. Walks `document.querySelectorAll('a[href]')`, caps at 200
87
+ * anchors, and shapes each entry as `{ text, href }`. Extracted to a
88
+ * module-level constant so the expression is shared between the
89
+ * runtime call site and any future refactors / tests that need to
90
+ * reason about the evaluated source.
91
+ */
92
+ export const EXTRACT_LINKS_EXPRESSION = `
93
+ (() => {
94
+ const anchors = Array.from(document.querySelectorAll('a[href]'));
95
+ return anchors.slice(0, 200).map(a => ({
96
+ text: (a.textContent || '').trim().slice(0, 80),
97
+ href: a.href,
98
+ }));
99
+ })()
100
+ `;
101
+
66
102
  // ── Shared element resolution ────────────────────────────────────────
67
103
 
68
- export function resolveSelector(
104
+ /**
105
+ * Discriminated union returned by {@link resolveElement}. The
106
+ * `"backend"` variant is produced when an `element_id` from the most
107
+ * recent AX-tree snapshot is resolved to a CDP `backendNodeId`; the
108
+ * `"selector"` variant is produced when the caller passed a raw CSS
109
+ * `selector` that should be resolved via `DOM.querySelector` at
110
+ * send-time by the individual tool.
111
+ *
112
+ * Consumed by CDP-native interaction tools (click, hover, type, …)
113
+ * that talk to CDP directly.
114
+ */
115
+ export type ResolvedElement =
116
+ | { kind: "backend"; backendNodeId: number; eid: string }
117
+ | { kind: "selector"; selector: string };
118
+
119
+ /**
120
+ * Resolve an element reference (either `element_id` from a prior
121
+ * snapshot or a raw `selector`) for CDP-native tools. Returns a
122
+ * {@link ResolvedElement} discriminated union so callers can branch
123
+ * on whether a backendNodeId was recovered from the snapshot map.
124
+ * Returns `{ resolved: null, error: "Error: …" }` on invalid input
125
+ * or when an `element_id` is provided but the snapshot map is
126
+ * empty/stale.
127
+ */
128
+ export function resolveElement(
69
129
  conversationId: string,
70
130
  input: Record<string, unknown>,
71
- ): { selector: string | null; error: string | null } {
131
+ ): { resolved: ResolvedElement | null; error: string | null } {
72
132
  const elementId =
73
133
  typeof input.element_id === "string" ? input.element_id : null;
74
134
  const rawSelector =
@@ -76,26 +136,32 @@ export function resolveSelector(
76
136
 
77
137
  if (!elementId && !rawSelector) {
78
138
  return {
79
- selector: null,
139
+ resolved: null,
80
140
  error: "Error: Either element_id or selector is required.",
81
141
  };
82
142
  }
83
143
 
84
144
  if (elementId) {
85
- const resolved = browserManager.resolveSnapshotSelector(
145
+ const backendNodeId = browserManager.resolveSnapshotBackendNodeId(
86
146
  conversationId,
87
147
  elementId,
88
148
  );
89
- if (!resolved) {
149
+ if (backendNodeId !== null) {
90
150
  return {
91
- selector: null,
92
- error: `Error: element_id "${elementId}" not found. Run browser_snapshot first to get current element IDs.`,
151
+ resolved: { kind: "backend", backendNodeId, eid: elementId },
152
+ error: null,
93
153
  };
94
154
  }
95
- return { selector: resolved, error: null };
155
+ return {
156
+ resolved: null,
157
+ error: `Error: element_id "${elementId}" not found. Run browser_snapshot first to get current element IDs.`,
158
+ };
96
159
  }
97
160
 
98
- return { selector: rawSelector!, error: null };
161
+ return {
162
+ resolved: { kind: "selector", selector: rawSelector! },
163
+ error: null,
164
+ };
99
165
  }
100
166
 
101
167
  // ── browser_navigate ─────────────────────────────────────────────────
@@ -122,7 +188,8 @@ export async function executeBrowserNavigate(
122
188
  const allowPrivateNetwork = input.allow_private_network === true;
123
189
  const safeRequestedUrl = sanitizeUrlForOutput(parsedUrl);
124
190
 
125
- // Block private/local targets by default
191
+ // Block private/local targets by default. Runs before any CDP session
192
+ // is opened so we fail fast on obviously invalid URLs.
126
193
  if (!allowPrivateNetwork && isPrivateOrLocalHost(parsedUrl.hostname)) {
127
194
  return {
128
195
  content: `Error: Refusing to navigate to local/private network target (${parsedUrl.hostname}). Set allow_private_network=true if you explicitly need it.`,
@@ -130,7 +197,7 @@ export async function executeBrowserNavigate(
130
197
  };
131
198
  }
132
199
 
133
- // DNS resolution check for non-literal hostnames
200
+ // DNS resolution check for non-literal hostnames.
134
201
  if (!allowPrivateNetwork) {
135
202
  const resolution = await resolveRequestAddress(
136
203
  parsedUrl.hostname,
@@ -145,29 +212,35 @@ export async function executeBrowserNavigate(
145
212
  }
146
213
  }
147
214
 
148
- let routeHandler: RouteHandler | null = null;
149
- let blockedUrl: string | null = null;
215
+ const cdp = getCdpClient(context);
150
216
 
151
- // Start screencast if a sender is registered for this conversation
152
- const sender = getSender(context.conversationId);
153
- if (sender) {
217
+ // Screencast + handoff are Playwright-backed and only meaningful
218
+ // for the local sacrificial-profile path. On the extension path the
219
+ // user already has their own Chrome window, so both are no-ops.
220
+ const sender =
221
+ cdp.kind === "local" ? getSender(context.conversationId) : null;
222
+ if (cdp.kind === "local" && sender) {
154
223
  await ensureScreencast(context.conversationId);
155
224
  }
156
225
 
226
+ // SSRF route interception is a Playwright-specific affordance used on
227
+ // the local path to block redirect-time requests to private networks.
228
+ // On the extension path we rely on the pre-CDP URL validation above;
229
+ // see phase3-cdp-migration.md PR 7 for the rationale.
230
+ let routeHandler: RouteHandler | null = null;
231
+ let blockedUrl: string | null = null;
232
+
157
233
  try {
158
- const page = await browserManager.getOrCreateSessionPage(
159
- context.conversationId,
160
- );
161
234
  log.debug(
162
235
  { url: safeRequestedUrl, conversationId: context.conversationId },
163
236
  "Navigating",
164
237
  );
165
238
 
166
- // Install request interception to block redirects/sub-requests to private networks.
167
- // This prevents SSRF bypass via server-side redirects and DNS rebinding attacks,
168
- // since Playwright follows redirects internally and performs its own DNS resolution.
169
- // Only skip for connectOverCDP browsers where page.route() is unreliable.
170
- if (!allowPrivateNetwork && browserManager.supportsRouteInterception) {
239
+ if (
240
+ cdp.kind === "local" &&
241
+ !allowPrivateNetwork &&
242
+ browserManager.supportsRouteInterception
243
+ ) {
171
244
  // Cache DNS results per-hostname to avoid redundant lookups on subrequests
172
245
  // (heavy sites like DoorDash fire hundreds of requests to the same CDN hostnames).
173
246
  // Use a short TTL to mitigate DNS rebinding attacks where a hostname first
@@ -242,47 +315,60 @@ export async function executeBrowserNavigate(
242
315
  );
243
316
  }
244
317
  };
318
+ // Bridge through browserManager to reach the Playwright Page for
319
+ // route installation. The route handler intercepts redirect-time
320
+ // requests before Page.navigate's network fetches can hit them.
321
+ const page = await browserManager.getOrCreateSessionPage(
322
+ context.conversationId,
323
+ );
245
324
  await page.route("**/*", routeHandler);
246
325
  }
247
326
 
248
- // Use domcontentloaded but with a shorter timeout - if it times out,
249
- // the page is likely still usable (heavy SPAs like DoorDash keep loading
250
- // scripts after DOMContentLoaded). Fall back gracefully instead of failing.
251
- let response: PageResponse | null = null;
252
- let navigationTimedOut = false;
253
- const urlBeforeNav = page.url();
254
- try {
255
- response = await page.goto(parsedUrl.href, {
256
- waitUntil: "domcontentloaded",
257
- timeout: NAVIGATE_TIMEOUT_MS,
258
- });
259
- } catch (navErr) {
260
- const navMsg = navErr instanceof Error ? navErr.message : String(navErr);
261
- if (navMsg.includes("Timeout") || navMsg.includes("timeout")) {
262
- // If the page URL never changed from before navigation, the page
263
- // never actually loaded - re-throw instead of reporting success.
264
- if (page.url() === urlBeforeNav && urlBeforeNav !== parsedUrl.href) {
265
- throw navErr;
266
- }
267
- navigationTimedOut = true;
268
- log.info(
269
- { url: safeRequestedUrl },
270
- "Navigation timed out waiting for domcontentloaded, continuing with partial load",
327
+ // Read the current URL BEFORE calling navigateAndWait so we can
328
+ // detect the "page never moved" case on timeout.
329
+ const urlBeforeNav = await getCurrentUrl(cdp, context.signal);
330
+
331
+ // Navigate via CDP Page.navigate + document.readyState polling.
332
+ // navigateAndWait returns { finalUrl, timedOut }; HTTP status is
333
+ // not available on the CDP path because Page.navigate does not
334
+ // surface the response status.
335
+ const { finalUrl, timedOut: navigationTimedOut } = await navigateAndWait(
336
+ cdp,
337
+ parsedUrl.href,
338
+ { timeoutMs: NAVIGATE_TIMEOUT_MS },
339
+ context.signal,
340
+ );
341
+ if (navigationTimedOut) {
342
+ // If the page URL never changed from before navigation, the page
343
+ // never actually loaded - re-throw instead of reporting success.
344
+ if (finalUrl === urlBeforeNav && urlBeforeNav !== parsedUrl.href) {
345
+ throw new Error(
346
+ `Navigation to ${parsedUrl.href} timed out after ${NAVIGATE_TIMEOUT_MS}ms`,
271
347
  );
272
- } else {
273
- throw navErr;
274
348
  }
349
+ log.info(
350
+ { url: safeRequestedUrl },
351
+ "Navigation timed out waiting for document.readyState, continuing with partial load",
352
+ );
275
353
  }
276
354
 
277
- // Remove the route handler now that navigation is complete
355
+ // Remove the Playwright route handler now that navigation is
356
+ // complete (local path only).
278
357
  if (routeHandler) {
358
+ const page = await browserManager.getOrCreateSessionPage(
359
+ context.conversationId,
360
+ );
279
361
  await page.unroute("**/*", routeHandler);
280
362
  routeHandler = null;
281
363
  }
282
364
 
283
- // Reposition the browser window after navigation so the user can watch.
284
- // positionWindowSidebar() is a no-op when browserCdpSession is unavailable.
285
- if (!browserManager.isInteractive(context.conversationId)) {
365
+ // Window positioning is a Playwright-internal affordance - on the
366
+ // extension path the user owns their Chrome window, so positioning
367
+ // is a no-op.
368
+ if (
369
+ cdp.kind === "local" &&
370
+ !browserManager.isInteractive(context.conversationId)
371
+ ) {
286
372
  await browserManager.positionWindowSidebar();
287
373
  }
288
374
 
@@ -293,38 +379,34 @@ export async function executeBrowserNavigate(
293
379
  };
294
380
  }
295
381
 
296
- // Navigation changed the page content, so clear stale snapshot mappings.
297
- // Without this, element IDs from a previous page could resolve and cause
298
- // confusing Playwright timeout errors instead of the actionable
299
- // "run browser_snapshot first" message.
300
- browserManager.clearSnapshotMap(context.conversationId);
382
+ // Navigation changed the page content, so clear stale snapshot
383
+ // mappings regardless of backend. The backendNodeId map is shared
384
+ // per-conversation state that needs to be invalidated on any nav.
385
+ browserManager.clearSnapshotBackendNodeMap(context.conversationId);
301
386
 
302
- // Auto-dismiss common blocker modals (regulatory notices, cookie banners)
303
- // that aren't exposed in the accessibility tree. Runs silently - if no
304
- // modal is present the evaluate is a no-op.
387
+ // Auto-dismiss common blocker modals (regulatory notices, cookie
388
+ // banners) that aren't exposed in the accessibility tree. Runs
389
+ // silently - if no modal is present the evaluate is a no-op.
305
390
  try {
306
- await page.evaluate(`(() => {
307
- const dismissPatterns = /^(got it|accept|ok|dismiss|i understand|close)$/i;
308
- const buttons = document.querySelectorAll('button, [role="button"], input[type="submit"]');
309
- for (const btn of buttons) {
310
- const text = (btn.textContent || '').trim();
311
- if (dismissPatterns.test(text)) {
312
- const modal = btn.closest('[role="dialog"], [class*="modal"], [class*="Modal"], [class*="overlay"], [class*="Overlay"]');
313
- if (modal) {
314
- btn.click();
315
- break;
316
- }
317
- }
318
- }
319
- })()`);
391
+ await evaluateExpression(
392
+ cdp,
393
+ DISMISS_MODALS_EXPRESSION,
394
+ {},
395
+ context.signal,
396
+ );
320
397
  } catch {
321
398
  // Page may have navigated during evaluate - safe to ignore
322
399
  }
323
400
 
324
- const finalUrl = page.url();
325
401
  const safeFinalUrl = sanitizeUrlForOutput(new URL(finalUrl));
326
- const title = await page.title();
327
- const status = response?.status() ?? null;
402
+ const title = await getPageTitle(cdp, context.signal);
403
+ // HTTP status is not available on the CDP path: `Page.navigate`
404
+ // resolves the frame id and (on failure) an error text, but does
405
+ // not carry the response status code. Both the local and extension
406
+ // paths therefore print "unknown" here. A future phase may subscribe
407
+ // to `Network.responseReceived` events during the navigation window
408
+ // if the status is needed again.
409
+ const status: number | null = null;
328
410
 
329
411
  const lines: string[] = [
330
412
  `Requested URL: ${safeRequestedUrl}`,
@@ -335,7 +417,7 @@ export async function executeBrowserNavigate(
335
417
 
336
418
  if (navigationTimedOut) {
337
419
  lines.push(
338
- `Note: Page is still loading (domcontentloaded timed out). The page should still be interactive - use browser_snapshot to check.`,
420
+ `Note: Page is still loading (document.readyState timed out). The page should still be interactive - use browser_snapshot to check.`,
339
421
  );
340
422
  }
341
423
 
@@ -343,10 +425,14 @@ export async function executeBrowserNavigate(
343
425
  lines.push(`Note: Page redirected from the requested URL.`);
344
426
  }
345
427
 
346
- // Detect auth challenges (login pages, 2FA, OAuth consent) and CAPTCHA challenges
428
+ // Detect auth challenges (login pages, 2FA, OAuth consent) and CAPTCHA
429
+ // challenges via the CDP-migrated auth-detector helpers.
347
430
  try {
348
- const authChallenge = await detectAuthChallenge(page);
349
- const captchaChallenge = await detectCaptchaChallenge(page);
431
+ const authChallenge = await detectAuthChallenge(cdp, context.signal);
432
+ const captchaChallenge = await detectCaptchaChallenge(
433
+ cdp,
434
+ context.signal,
435
+ );
350
436
  // CAPTCHA takes priority - it blocks all interaction including login
351
437
  let challenge = captchaChallenge ?? authChallenge;
352
438
 
@@ -359,12 +445,12 @@ export async function executeBrowserNavigate(
359
445
  return { content: "Navigation cancelled.", isError: true };
360
446
  }
361
447
  await new Promise((r) => setTimeout(r, 1000));
362
- const still = await detectCaptchaChallenge(page);
448
+ const still = await detectCaptchaChallenge(cdp, context.signal);
363
449
  if (!still) {
364
450
  log.info("CAPTCHA auto-resolved");
365
451
  // Re-check for auth challenge now that CAPTCHA is gone -
366
452
  // the page may have loaded a login form behind it.
367
- challenge = await detectAuthChallenge(page);
453
+ challenge = await detectAuthChallenge(cdp, context.signal);
368
454
  break;
369
455
  }
370
456
  }
@@ -373,7 +459,11 @@ export async function executeBrowserNavigate(
373
459
  if (challenge) {
374
460
  if (challenge.type === "captcha") {
375
461
  // CAPTCHA persisted after auto-resolve wait - hand off to user
376
- if (sender) {
462
+ // only when we have a local Playwright-managed Chrome window
463
+ // AND a sender is registered. The extension path falls back
464
+ // to the text-only "solve manually" branch because the user
465
+ // already owns their Chrome window.
466
+ if (cdp.kind === "local" && sender) {
377
467
  const { startHandoff } = await import("./browser-handoff.js");
378
468
  await startHandoff(context.conversationId, {
379
469
  reason: "captcha",
@@ -381,15 +471,18 @@ export async function executeBrowserNavigate(
381
471
  "Cloudflare verification detected. Please solve the CAPTCHA in the Chrome window. The browser will automatically detect when you're done and resume.",
382
472
  bringToFront: true,
383
473
  });
384
- const newUrl = page.url();
385
- const newTitle = await page.title();
474
+ const newUrl = await getCurrentUrl(cdp, context.signal);
475
+ const newTitle = await getPageTitle(cdp, context.signal);
386
476
  lines.push("");
387
477
  lines.push(
388
478
  `CAPTCHA solved by user. Current page: ${newTitle} (${newUrl})`,
389
479
  );
390
480
 
391
481
  // Re-check for auth challenges - the page behind the CAPTCHA may have a login form
392
- const postCaptchaAuth = await detectAuthChallenge(page);
482
+ const postCaptchaAuth = await detectAuthChallenge(
483
+ cdp,
484
+ context.signal,
485
+ );
393
486
  if (postCaptchaAuth) {
394
487
  lines.push("");
395
488
  lines.push(formatAuthChallenge(postCaptchaAuth));
@@ -448,7 +541,7 @@ export async function executeBrowserNavigate(
448
541
 
449
542
  return { content: lines.join("\n"), isError: false };
450
543
  } catch (err) {
451
- // Best-effort cleanup of route handler on error
544
+ // Best-effort cleanup of route handler on error (local path only)
452
545
  if (routeHandler) {
453
546
  try {
454
547
  const page = await browserManager.getOrCreateSessionPage(
@@ -461,8 +554,8 @@ export async function executeBrowserNavigate(
461
554
  }
462
555
 
463
556
  // If the route handler blocked a redirect to a private network address,
464
- // page.goto() throws. Return the clear security message instead of the
465
- // raw Playwright error (which could leak credentials from the URL).
557
+ // Page.navigate throws. Return the clear security message instead of
558
+ // the raw underlying error (which could leak credentials from the URL).
466
559
  if (blockedUrl) {
467
560
  return {
468
561
  content: `Error: Navigation blocked. A request targeted a local/private network address (${blockedUrl}). Set allow_private_network=true if you explicitly need it.`,
@@ -473,6 +566,8 @@ export async function executeBrowserNavigate(
473
566
  const msg = err instanceof Error ? err.message : String(err);
474
567
  log.error({ err, url: safeRequestedUrl }, "Navigation failed");
475
568
  return { content: `Error: Navigation failed: ${msg}`, isError: true };
569
+ } finally {
570
+ cdp.dispose();
476
571
  }
477
572
  }
478
573
 
@@ -482,79 +577,42 @@ export async function executeBrowserSnapshot(
482
577
  _input: Record<string, unknown>,
483
578
  context: ToolContext,
484
579
  ): Promise<ToolExecutionResult> {
580
+ const cdp = getCdpClient(context);
485
581
  try {
486
- const page = await browserManager.getOrCreateSessionPage(
487
- context.conversationId,
582
+ const currentUrl = await getCurrentUrl(cdp, context.signal);
583
+ const title = await getPageTitle(cdp, context.signal);
584
+
585
+ // Pull the full accessibility tree via CDP and fold it into typed
586
+ // interactive elements + an `eid → backendNodeId` map. Interaction
587
+ // tools (click, hover, type, …) resolve element_id against this map
588
+ // and jump straight to CDP DOM commands without another round-trip
589
+ // through any selector engine.
590
+ await cdp.send("Accessibility.enable", {}, context.signal);
591
+ const rawTree = await cdp.send(
592
+ "Accessibility.getFullAXTree",
593
+ {},
594
+ context.signal,
488
595
  );
489
- const currentUrl = page.url();
490
- const title = await page.title();
491
-
492
- const elements = (await page.evaluate(`
493
- (() => {
494
- const SELECTOR = ${JSON.stringify(INTERACTIVE_SELECTOR)};
495
- const MAX = ${MAX_SNAPSHOT_ELEMENTS};
496
- // Clear stale eid attributes from previous snapshots
497
- document.querySelectorAll('[data-vellum-eid]').forEach(el => el.removeAttribute('data-vellum-eid'));
498
- const els = Array.from(document.querySelectorAll(SELECTOR));
499
- const visible = els.filter(el => {
500
- const rect = el.getBoundingClientRect();
501
- return rect.width > 0 && rect.height > 0;
502
- });
503
- return visible.slice(0, MAX).map((el, i) => {
504
- const eid = 'e' + (i + 1);
505
- el.setAttribute('data-vellum-eid', eid);
506
- const tag = el.tagName.toLowerCase();
507
- const attrs = {};
508
- for (const attr of ['type', 'name', 'placeholder', 'href', 'value', 'role', 'aria-label', 'id']) {
509
- if (el.hasAttribute(attr)) attrs[attr] = el.getAttribute(attr);
510
- }
511
- const text = (el.textContent || '').trim().slice(0, 80);
512
- return { eid, tag, attrs, text };
513
- });
514
- })()
515
- `)) as SnapshotElement[];
516
-
517
- // Build and store selector map
518
- const selectorMap = new Map<string, string>();
519
- for (const el of elements) {
520
- selectorMap.set(el.eid, `[data-vellum-eid="${el.eid}"]`);
521
- }
522
- browserManager.storeSnapshotMap(context.conversationId, selectorMap);
596
+ const { elements, selectorMap: backendNodeMap } = transformAxTree(rawTree);
523
597
 
524
- // Format output
525
- const lines: string[] = [
526
- `URL: ${currentUrl}`,
527
- `Title: ${title || "(none)"}`,
528
- "",
529
- ];
530
-
531
- if (elements.length === 0) {
532
- lines.push("(no interactive elements found)");
533
- } else {
534
- for (const el of elements) {
535
- let desc = `<${el.tag}`;
536
- for (const [key, val] of Object.entries(el.attrs)) {
537
- desc += ` ${key}="${val}"`;
538
- }
539
- desc += ">";
540
- if (el.text) {
541
- desc += ` ${el.text}`;
542
- }
543
- lines.push(`[${el.eid}] ${desc}`);
544
- }
545
- lines.push("");
546
- lines.push(
547
- `${elements.length} interactive element${
548
- elements.length === 1 ? "" : "s"
549
- } found.`,
550
- );
551
- }
598
+ browserManager.storeSnapshotBackendNodeMap(
599
+ context.conversationId,
600
+ backendNodeMap,
601
+ );
552
602
 
553
- return { content: lines.join("\n"), isError: false };
603
+ return {
604
+ content: formatAxSnapshot(
605
+ { elements, selectorMap: backendNodeMap },
606
+ { url: currentUrl, title },
607
+ ),
608
+ isError: false,
609
+ };
554
610
  } catch (err) {
555
611
  const msg = err instanceof Error ? err.message : String(err);
556
612
  log.error({ err }, "Snapshot failed");
557
613
  return { content: `Error: Snapshot failed: ${msg}`, isError: true };
614
+ } finally {
615
+ cdp.dispose();
558
616
  }
559
617
  }
560
618
 
@@ -566,15 +624,13 @@ export async function executeBrowserScreenshot(
566
624
  ): Promise<ToolExecutionResult> {
567
625
  const fullPage = input.full_page === true;
568
626
 
627
+ const cdp = getCdpClient(context);
569
628
  try {
570
- const page = await browserManager.getOrCreateSessionPage(
571
- context.conversationId,
629
+ const buffer = await captureScreenshotJpeg(
630
+ cdp,
631
+ { quality: 80, fullPage },
632
+ context.signal,
572
633
  );
573
- const buffer = await page.screenshot({
574
- type: "jpeg",
575
- quality: 80,
576
- fullPage,
577
- });
578
634
  const base64Data = buffer.toString("base64");
579
635
 
580
636
  const imageBlock: ImageContent = {
@@ -597,6 +653,8 @@ export async function executeBrowserScreenshot(
597
653
  const msg = err instanceof Error ? err.message : String(err);
598
654
  log.error({ err }, "Screenshot failed");
599
655
  return { content: `Error: Screenshot failed: ${msg}`, isError: true };
656
+ } finally {
657
+ cdp.dispose();
600
658
  }
601
659
  }
602
660
 
@@ -606,29 +664,46 @@ export async function executeBrowserClose(
606
664
  input: Record<string, unknown>,
607
665
  context: ToolContext,
608
666
  ): Promise<ToolExecutionResult> {
667
+ const cdp = getCdpClient(context);
609
668
  try {
610
- const sender = getSender(context.conversationId);
611
- if (sender) {
612
- await stopBrowserScreencast(context.conversationId);
613
- }
669
+ if (cdp.kind === "local") {
670
+ // Local/sacrificial-profile path: tear down the Playwright page,
671
+ // screencast, and associated CDP state for this conversation.
672
+ const sender = getSender(context.conversationId);
673
+ if (sender) {
674
+ await stopBrowserScreencast(context.conversationId);
675
+ }
614
676
 
615
- if (input.close_all_pages === true) {
616
- await stopAllScreencasts();
617
- await browserManager.closeAllPages();
677
+ if (input.close_all_pages === true) {
678
+ await stopAllScreencasts();
679
+ await browserManager.closeAllPages();
680
+ return {
681
+ content: "All browser pages and context closed.",
682
+ isError: false,
683
+ };
684
+ }
685
+ await browserManager.closeSessionPage(context.conversationId);
618
686
  return {
619
- content: "All browser pages and context closed.",
687
+ content: "Browser page closed for this conversation.",
620
688
  isError: false,
621
689
  };
622
690
  }
623
- await browserManager.closeSessionPage(context.conversationId);
691
+
692
+ // Extension path: the user owns their Chrome tab — we must not
693
+ // close it. Only drop the cached snapshot state so stale eids
694
+ // from prior snapshots cannot be resolved by later tool calls.
695
+ browserManager.clearSnapshotBackendNodeMap(context.conversationId);
624
696
  return {
625
- content: "Browser page closed for this conversation.",
697
+ content:
698
+ "Browser session cleared. (Your Chrome tab was not closed — close it yourself if desired.)",
626
699
  isError: false,
627
700
  };
628
701
  } catch (err) {
629
702
  const msg = err instanceof Error ? err.message : String(err);
630
703
  log.error({ err }, "Close failed");
631
704
  return { content: `Error: Close failed: ${msg}`, isError: true };
705
+ } finally {
706
+ cdp.dispose();
632
707
  }
633
708
  }
634
709
 
@@ -638,32 +713,106 @@ export async function executeBrowserClick(
638
713
  input: Record<string, unknown>,
639
714
  context: ToolContext,
640
715
  ): Promise<ToolExecutionResult> {
641
- const { selector, error } = resolveSelector(context.conversationId, input);
716
+ const { resolved, error } = resolveElement(context.conversationId, input);
642
717
  if (error) return { content: error, isError: true };
643
718
 
644
- const timeout =
645
- typeof input.timeout === "number" ? input.timeout : ACTION_TIMEOUT_MS;
646
-
719
+ const cdp = getCdpClient(context);
647
720
  try {
648
- const page = await browserManager.getOrCreateSessionPage(
649
- context.conversationId,
650
- );
651
- await page.click(selector!, { timeout });
652
- return { content: `Clicked element: ${selector}`, isError: false };
721
+ let backendNodeId: number;
722
+ if (resolved!.kind === "backend") {
723
+ backendNodeId = resolved!.backendNodeId;
724
+ } else {
725
+ // Wait until the selector matches a visible element. Mirrors
726
+ // Playwright's `page.click(selector, { timeout })` semantics
727
+ // and lets click work on async-hydrated pages where the
728
+ // target may not yet exist when the tool is invoked.
729
+ // cdpWaitForSelector returns the backendNodeId so we don't
730
+ // need a separate querySelectorBackendNodeId round-trip.
731
+ backendNodeId = await cdpWaitForSelector(
732
+ cdp,
733
+ resolved!.selector,
734
+ ACTION_TIMEOUT_MS,
735
+ context.signal,
736
+ );
737
+ }
738
+ await scrollIntoViewIfNeeded(cdp, backendNodeId, context.signal);
739
+ const point = await getCenterPoint(cdp, backendNodeId, context.signal);
740
+ await dispatchClickAt(cdp, point, context.signal);
741
+ const desc =
742
+ resolved!.kind === "backend"
743
+ ? `eid=${resolved!.eid}`
744
+ : resolved!.selector;
745
+ return { content: `Clicked element: ${desc}`, isError: false };
653
746
  } catch (err) {
654
747
  const msg = err instanceof Error ? err.message : String(err);
655
- log.error({ err, selector }, "Click failed");
748
+ log.error({ err }, "Click failed");
656
749
  return { content: `Error: Click failed: ${msg}`, isError: true };
750
+ } finally {
751
+ cdp.dispose();
657
752
  }
658
753
  }
659
754
 
755
+ // ── Shared input helpers ─────────────────────────────────────────────
756
+
757
+ /**
758
+ * Focus an element, clear its existing value (handling both
759
+ * `<input>`/`<textarea>` and `contentEditable` targets), re-focus
760
+ * (sites sometimes blur on a programmatic value reset), and insert
761
+ * the requested text via `Input.insertText`.
762
+ *
763
+ * Used by both `executeBrowserType` and `executeBrowserFillCredential`
764
+ * so credential fills cannot append to autofilled / pre-populated
765
+ * fields — appending would leak the existing value into the broker
766
+ * payload and corrupt the resulting password.
767
+ */
768
+ async function clearAndInsertText(
769
+ cdp: CdpClient,
770
+ backendNodeId: number,
771
+ value: string,
772
+ signal?: AbortSignal,
773
+ ): Promise<void> {
774
+ await focusElement(cdp, backendNodeId, signal);
775
+
776
+ // Resolve the node to a Runtime.RemoteObject so we can invoke a
777
+ // function on the element itself via Runtime.callFunctionOn. This
778
+ // is more reliable than a keyboard select-all + delete sequence
779
+ // across input, textarea, and contenteditable targets.
780
+ const { object } = await cdp.send<{ object: { objectId: string } }>(
781
+ "DOM.resolveNode",
782
+ { backendNodeId },
783
+ signal,
784
+ );
785
+ await cdp.send(
786
+ "Runtime.callFunctionOn",
787
+ {
788
+ objectId: object.objectId,
789
+ functionDeclaration: `function() {
790
+ if (typeof this.value === "string") {
791
+ this.value = "";
792
+ } else if (this.isContentEditable) {
793
+ this.textContent = "";
794
+ }
795
+ this.dispatchEvent(new Event("input", { bubbles: true }));
796
+ }`,
797
+ arguments: [],
798
+ },
799
+ signal,
800
+ );
801
+
802
+ // Re-focus after clearing — some sites move focus when the value
803
+ // property is reassigned programmatically.
804
+ await focusElement(cdp, backendNodeId, signal);
805
+
806
+ await dispatchInsertText(cdp, value, signal);
807
+ }
808
+
660
809
  // ── browser_type ─────────────────────────────────────────────────────
661
810
 
662
811
  export async function executeBrowserType(
663
812
  input: Record<string, unknown>,
664
813
  context: ToolContext,
665
814
  ): Promise<ToolExecutionResult> {
666
- const { selector, error } = resolveSelector(context.conversationId, input);
815
+ const { resolved, error } = resolveElement(context.conversationId, input);
667
816
  if (error) return { content: error, isError: true };
668
817
 
669
818
  const text = typeof input.text === "string" ? input.text : "";
@@ -674,40 +823,45 @@ export async function executeBrowserType(
674
823
  const clearFirst = input.clear_first !== false; // default true
675
824
  const pressEnter = input.press_enter === true;
676
825
 
677
- try {
678
- const page = await browserManager.getOrCreateSessionPage(
679
- context.conversationId,
680
- );
826
+ const targetDescription =
827
+ resolved!.kind === "backend"
828
+ ? `element_id "${resolved!.eid}"`
829
+ : resolved!.selector;
681
830
 
682
- const fillTimeout =
683
- typeof input.timeout === "number" ? input.timeout : ACTION_TIMEOUT_MS;
831
+ const cdp = getCdpClient(context);
832
+ try {
833
+ let backendNodeId: number;
834
+ if (resolved!.kind === "backend") {
835
+ backendNodeId = resolved!.backendNodeId;
836
+ } else {
837
+ backendNodeId = await querySelectorBackendNodeId(
838
+ cdp,
839
+ resolved!.selector,
840
+ context.signal,
841
+ );
842
+ }
684
843
 
685
844
  if (clearFirst) {
686
- await page.fill(selector!, text, { timeout: fillTimeout });
845
+ await clearAndInsertText(cdp, backendNodeId, text, context.signal);
687
846
  } else {
688
- // Read existing content before appending. Use .value for form inputs,
689
- // with fallback to .innerText for contenteditable elements (preserves
690
- // visual line breaks from <br> and block elements, unlike textContent).
691
- const currentValue = (await page.evaluate(
692
- `(() => { const el = document.querySelector(${JSON.stringify(
693
- selector!,
694
- )}); if (!el) return ''; if (typeof el.value === 'string') return el.value; return el.innerText ?? ''; })()`,
695
- )) as string;
696
- await page.fill(selector!, currentValue + text, { timeout: fillTimeout });
847
+ await focusElement(cdp, backendNodeId, context.signal);
848
+ await dispatchInsertText(cdp, text, context.signal);
697
849
  }
698
850
 
699
851
  if (pressEnter) {
700
- await page.press(selector!, "Enter");
852
+ await dispatchKeyPress(cdp, "Enter", context.signal);
701
853
  }
702
854
 
703
- const lines = [`Typed into element: ${selector}`];
855
+ const lines = [`Typed into element: ${targetDescription}`];
704
856
  if (clearFirst) lines.push("(cleared existing content first)");
705
857
  if (pressEnter) lines.push("(pressed Enter after typing)");
706
858
  return { content: lines.join("\n"), isError: false };
707
859
  } catch (err) {
708
860
  const msg = err instanceof Error ? err.message : String(err);
709
- log.error({ err, selector }, "Type failed");
861
+ log.error({ err, target: targetDescription }, "Type failed");
710
862
  return { content: `Error: Type failed: ${msg}`, isError: true };
863
+ } finally {
864
+ cdp.dispose();
711
865
  }
712
866
  }
713
867
 
@@ -722,39 +876,56 @@ export async function executeBrowserPressKey(
722
876
  return { content: "Error: key is required.", isError: true };
723
877
  }
724
878
 
725
- try {
726
- const page = await browserManager.getOrCreateSessionPage(
727
- context.conversationId,
728
- );
729
-
730
- // If element_id or selector is provided, press key on that element
731
- const elementId =
732
- typeof input.element_id === "string" ? input.element_id : null;
733
- const rawSelector =
734
- typeof input.selector === "string" ? input.selector : null;
879
+ const elementId =
880
+ typeof input.element_id === "string" ? input.element_id : null;
881
+ const rawSelector =
882
+ typeof input.selector === "string" ? input.selector : null;
883
+ const hasTarget = elementId !== null || rawSelector !== null;
884
+
885
+ let targetDescription: string | null = null;
886
+ let resolved: ResolvedElement | null = null;
887
+ if (hasTarget) {
888
+ const res = resolveElement(context.conversationId, input);
889
+ if (res.error) {
890
+ return { content: res.error, isError: true };
891
+ }
892
+ resolved = res.resolved;
893
+ targetDescription =
894
+ resolved!.kind === "backend"
895
+ ? `element_id "${resolved!.eid}"`
896
+ : resolved!.selector;
897
+ }
735
898
 
736
- if (elementId || rawSelector) {
737
- const { selector, error } = resolveSelector(
738
- context.conversationId,
739
- input,
740
- );
741
- if (error) {
742
- return { content: error, isError: true };
899
+ const cdp = getCdpClient(context);
900
+ try {
901
+ if (resolved) {
902
+ let backendNodeId: number;
903
+ if (resolved.kind === "backend") {
904
+ backendNodeId = resolved.backendNodeId;
905
+ } else {
906
+ backendNodeId = await querySelectorBackendNodeId(
907
+ cdp,
908
+ resolved.selector,
909
+ context.signal,
910
+ );
743
911
  }
744
- await page.press(selector!, key);
912
+ await focusElement(cdp, backendNodeId, context.signal);
913
+ await dispatchKeyPress(cdp, key, context.signal);
745
914
  return {
746
- content: `Pressed "${key}" on element: ${selector}`,
915
+ content: `Pressed "${key}" on element: ${targetDescription}`,
747
916
  isError: false,
748
917
  };
749
918
  }
750
919
 
751
- // No target -> press key on the page (focused element)
752
- await page.keyboard.press(key);
920
+ // No target -> press key on the currently focused element
921
+ await dispatchKeyPress(cdp, key, context.signal);
753
922
  return { content: `Pressed "${key}"`, isError: false };
754
923
  } catch (err) {
755
924
  const msg = err instanceof Error ? err.message : String(err);
756
925
  log.error({ err, key }, "Press key failed");
757
926
  return { content: `Error: Press key failed: ${msg}`, isError: true };
927
+ } finally {
928
+ cdp.dispose();
758
929
  }
759
930
  }
760
931
 
@@ -776,35 +947,49 @@ export async function executeBrowserScroll(
776
947
  const amount =
777
948
  typeof input.amount === "number" ? Math.abs(input.amount) : 500;
778
949
 
950
+ let deltaX = 0;
951
+ let deltaY = 0;
952
+ switch (direction) {
953
+ case "up":
954
+ deltaY = -amount;
955
+ break;
956
+ case "down":
957
+ deltaY = amount;
958
+ break;
959
+ case "left":
960
+ deltaX = -amount;
961
+ break;
962
+ case "right":
963
+ deltaX = amount;
964
+ break;
965
+ }
966
+
967
+ const cdp = getCdpClient(context);
779
968
  try {
780
- const page = await browserManager.getOrCreateSessionPage(
781
- context.conversationId,
969
+ // Fetch viewport dimensions so we can dispatch the wheel event at
970
+ // the viewport center — scrolling from (0, 0) misses sticky
971
+ // headers and overflow containers on many sites.
972
+ const { w, h } = await evaluateExpression<{ w: number; h: number }>(
973
+ cdp,
974
+ "({ w: window.innerWidth, h: window.innerHeight })",
975
+ {},
976
+ context.signal,
782
977
  );
783
978
 
784
- let deltaX = 0;
785
- let deltaY = 0;
786
- switch (direction) {
787
- case "up":
788
- deltaY = -amount;
789
- break;
790
- case "down":
791
- deltaY = amount;
792
- break;
793
- case "left":
794
- deltaX = -amount;
795
- break;
796
- case "right":
797
- deltaX = amount;
798
- break;
799
- }
800
-
801
- await page.mouse.wheel(deltaX, deltaY);
979
+ await dispatchWheelScroll(
980
+ cdp,
981
+ { x: w / 2, y: h / 2 },
982
+ { deltaX, deltaY },
983
+ context.signal,
984
+ );
802
985
 
803
986
  return { content: `Scrolled ${direction} by ${amount}px`, isError: false };
804
987
  } catch (err) {
805
988
  const msg = err instanceof Error ? err.message : String(err);
806
989
  log.error({ err, direction }, "Scroll failed");
807
990
  return { content: `Error: Scroll failed: ${msg}`, isError: true };
991
+ } finally {
992
+ cdp.dispose();
808
993
  }
809
994
  }
810
995
 
@@ -814,7 +999,7 @@ export async function executeBrowserSelectOption(
814
999
  input: Record<string, unknown>,
815
1000
  context: ToolContext,
816
1001
  ): Promise<ToolExecutionResult> {
817
- const { selector, error } = resolveSelector(context.conversationId, input);
1002
+ const { resolved, error } = resolveElement(context.conversationId, input);
818
1003
  if (error) return { content: error, isError: true };
819
1004
 
820
1005
  const value = typeof input.value === "string" ? input.value : undefined;
@@ -828,32 +1013,106 @@ export async function executeBrowserSelectOption(
828
1013
  };
829
1014
  }
830
1015
 
831
- try {
832
- const page = await browserManager.getOrCreateSessionPage(
833
- context.conversationId,
834
- );
1016
+ const targetDescription =
1017
+ resolved!.kind === "backend"
1018
+ ? `element_id "${resolved!.eid}"`
1019
+ : resolved!.selector;
835
1020
 
836
- const option: Record<string, string | number> = {};
837
- if (value !== undefined) option.value = value;
838
- else if (label !== undefined) option.label = label;
839
- else if (index !== undefined) option.index = index;
1021
+ const cdp = getCdpClient(context);
1022
+ try {
1023
+ let backendNodeId: number;
1024
+ if (resolved!.kind === "backend") {
1025
+ backendNodeId = resolved!.backendNodeId;
1026
+ } else {
1027
+ backendNodeId = await querySelectorBackendNodeId(
1028
+ cdp,
1029
+ resolved!.selector,
1030
+ context.signal,
1031
+ );
1032
+ }
840
1033
 
841
- await page.selectOption(selector!, option);
1034
+ // CDP does not expose a native "set select value" command, so we
1035
+ // resolve the node to a Runtime.RemoteObject and invoke a function
1036
+ // on it that applies value/label/index and dispatches `input`
1037
+ // followed by `change` (HTML spec order — Angular's
1038
+ // DefaultValueAccessor listens for `input`, so missing it breaks
1039
+ // form bindings on Angular sites).
1040
+ const { object } = await cdp.send<{ object: { objectId: string } }>(
1041
+ "DOM.resolveNode",
1042
+ { backendNodeId },
1043
+ context.signal,
1044
+ );
1045
+ const callResult = await cdp.send<{
1046
+ result?: { value?: boolean };
1047
+ }>(
1048
+ "Runtime.callFunctionOn",
1049
+ {
1050
+ objectId: object.objectId,
1051
+ functionDeclaration: `function(value, label, index) {
1052
+ let matched = false;
1053
+ if (value !== null && value !== undefined) {
1054
+ for (const opt of this.options) {
1055
+ if (opt.value === value) {
1056
+ this.value = value;
1057
+ matched = true;
1058
+ break;
1059
+ }
1060
+ }
1061
+ } else if (label !== null && label !== undefined) {
1062
+ for (const opt of this.options) {
1063
+ if (opt.label === label) {
1064
+ this.value = opt.value;
1065
+ matched = true;
1066
+ break;
1067
+ }
1068
+ }
1069
+ } else if (index !== null && index !== undefined) {
1070
+ if (index >= 0 && index < this.options.length) {
1071
+ this.selectedIndex = index;
1072
+ matched = true;
1073
+ }
1074
+ }
1075
+ if (matched) {
1076
+ this.dispatchEvent(new Event("input", { bubbles: true }));
1077
+ this.dispatchEvent(new Event("change", { bubbles: true }));
1078
+ }
1079
+ return matched;
1080
+ }`,
1081
+ arguments: [
1082
+ { value: value ?? null },
1083
+ { value: label ?? null },
1084
+ { value: index ?? null },
1085
+ ],
1086
+ returnByValue: true,
1087
+ },
1088
+ context.signal,
1089
+ );
842
1090
 
1091
+ const matched = callResult?.result?.value === true;
843
1092
  const desc =
844
1093
  value !== undefined
845
1094
  ? `value="${value}"`
846
1095
  : label !== undefined
847
1096
  ? `label="${label}"`
848
1097
  : `index=${index}`;
1098
+
1099
+ if (!matched) {
1100
+ return {
1101
+ content: `Error: Select option failed: no option matched ${desc} on ${targetDescription}.`,
1102
+ isError: true,
1103
+ };
1104
+ }
1105
+
849
1106
  return {
850
- content: `Selected option (${desc}) on element: ${selector}`,
1107
+ content: `Selected option (${desc}) on element: ${targetDescription}`,
851
1108
  isError: false,
852
1109
  };
853
1110
  } catch (err) {
854
1111
  const msg = err instanceof Error ? err.message : String(err);
855
- log.error({ err, selector }, "Select option failed");
1112
+ log.error({ err, target: targetDescription }, "Select option failed");
856
1113
  return { content: `Error: Select option failed: ${msg}`, isError: true };
1114
+ } finally {
1115
+ cdp.dispose();
857
1116
  }
858
1117
  }
859
1118
 
@@ -863,20 +1122,39 @@ export async function executeBrowserHover(
863
1122
  input: Record<string, unknown>,
864
1123
  context: ToolContext,
865
1124
  ): Promise<ToolExecutionResult> {
866
- const { selector, error } = resolveSelector(context.conversationId, input);
1125
+ const { resolved, error } = resolveElement(context.conversationId, input);
867
1126
  if (error) return { content: error, isError: true };
868
1127
 
1128
+ const cdp = getCdpClient(context);
869
1129
  try {
870
- const page = await browserManager.getOrCreateSessionPage(
871
- context.conversationId,
872
- );
873
- await page.hover(selector!, { timeout: ACTION_TIMEOUT_MS });
874
-
875
- return { content: `Hovered element: ${selector}`, isError: false };
1130
+ let backendNodeId: number;
1131
+ if (resolved!.kind === "backend") {
1132
+ backendNodeId = resolved!.backendNodeId;
1133
+ } else {
1134
+ // Wait until the selector matches a visible element. See the
1135
+ // matching note in executeBrowserClick async-hydrated pages
1136
+ // need this to behave like Playwright's hover-with-timeout.
1137
+ backendNodeId = await cdpWaitForSelector(
1138
+ cdp,
1139
+ resolved!.selector,
1140
+ ACTION_TIMEOUT_MS,
1141
+ context.signal,
1142
+ );
1143
+ }
1144
+ await scrollIntoViewIfNeeded(cdp, backendNodeId, context.signal);
1145
+ const point = await getCenterPoint(cdp, backendNodeId, context.signal);
1146
+ await dispatchHoverAt(cdp, point, context.signal);
1147
+ const desc =
1148
+ resolved!.kind === "backend"
1149
+ ? `eid=${resolved!.eid}`
1150
+ : resolved!.selector;
1151
+ return { content: `Hovered element: ${desc}`, isError: false };
876
1152
  } catch (err) {
877
1153
  const msg = err instanceof Error ? err.message : String(err);
878
- log.error({ err, selector }, "Hover failed");
1154
+ log.error({ err }, "Hover failed");
879
1155
  return { content: `Error: Hover failed: ${msg}`, isError: true };
1156
+ } finally {
1157
+ cdp.dispose();
880
1158
  }
881
1159
  }
882
1160
 
@@ -917,39 +1195,43 @@ export async function executeBrowserWaitFor(
917
1195
  ? Math.min(input.timeout, MAX_WAIT_MS)
918
1196
  : MAX_WAIT_MS;
919
1197
 
920
- try {
921
- const page = await browserManager.getOrCreateSessionPage(
922
- context.conversationId,
923
- );
1198
+ // Duration mode has no CDP interaction — handle without acquiring
1199
+ // a CdpClient so the common "sleep" path stays transport-agnostic.
1200
+ if (duration != null) {
1201
+ const waitMs = Math.min(duration, MAX_WAIT_MS);
1202
+ await new Promise((r) => setTimeout(r, waitMs));
1203
+ return { content: `Waited ${waitMs}ms.`, isError: false };
1204
+ }
924
1205
 
1206
+ const cdp = getCdpClient(context);
1207
+ try {
925
1208
  if (selector) {
926
- await page.waitForSelector(selector, { timeout });
1209
+ // browser_wait_for selector mode is "did this node appear at
1210
+ // all" — preserve the existing semantics by polling for DOM
1211
+ // attachment, not full visibility. Tools that need
1212
+ // visible-state polling (click/hover) get it via the default
1213
+ // state in cdpWaitForSelector.
1214
+ await cdpWaitForSelector(cdp, selector, timeout, context.signal, {
1215
+ state: "attached",
1216
+ });
927
1217
  return {
928
1218
  content: `Element matching "${selector}" appeared.`,
929
1219
  isError: false,
930
1220
  };
931
1221
  }
932
1222
 
933
- if (text) {
934
- const escaped = JSON.stringify(text);
935
- await page.waitForFunction(
936
- `document.body?.innerText?.includes(${escaped})`,
937
- { timeout },
938
- );
939
- return {
940
- content: `Text "${truncate(text, 80)}" appeared on page.`,
941
- isError: false,
942
- };
943
- }
944
-
945
- // duration mode (milliseconds)
946
- const waitMs = Math.min(duration!, MAX_WAIT_MS);
947
- await new Promise((r) => setTimeout(r, waitMs));
948
- return { content: `Waited ${waitMs}ms.`, isError: false };
1223
+ // text mode (validated above — modeCount === 1 means text is set)
1224
+ await cdpWaitForText(cdp, text!, timeout, context.signal);
1225
+ return {
1226
+ content: `Text "${truncate(text!, 80)}" appeared on page.`,
1227
+ isError: false,
1228
+ };
949
1229
  } catch (err) {
950
1230
  const msg = err instanceof Error ? err.message : String(err);
951
1231
  log.error({ err }, "Wait failed");
952
1232
  return { content: `Error: Wait failed: ${msg}`, isError: true };
1233
+ } finally {
1234
+ cdp.dispose();
953
1235
  }
954
1236
  }
955
1237
 
@@ -961,16 +1243,17 @@ export async function executeBrowserExtract(
961
1243
  ): Promise<ToolExecutionResult> {
962
1244
  const includeLinks = input.include_links === true;
963
1245
 
1246
+ const cdp = getCdpClient(context);
964
1247
  try {
965
- const page = await browserManager.getOrCreateSessionPage(
966
- context.conversationId,
1248
+ const currentUrl = await getCurrentUrl(cdp, context.signal);
1249
+ const title = await getPageTitle(cdp, context.signal);
1250
+
1251
+ let textContent = await evaluateExpression<string>(
1252
+ cdp,
1253
+ "document.body?.innerText ?? ''",
1254
+ {},
1255
+ context.signal,
967
1256
  );
968
- const currentUrl = page.url();
969
- const title = await page.title();
970
-
971
- let textContent = (await page.evaluate(
972
- `document.body?.innerText ?? ''`,
973
- )) as string;
974
1257
 
975
1258
  if (textContent.length > MAX_EXTRACT_LENGTH) {
976
1259
  textContent =
@@ -985,15 +1268,9 @@ export async function executeBrowserExtract(
985
1268
  ];
986
1269
 
987
1270
  if (includeLinks) {
988
- const links = (await page.evaluate(`
989
- (() => {
990
- const anchors = Array.from(document.querySelectorAll('a[href]'));
991
- return anchors.slice(0, 200).map(a => ({
992
- text: (a.textContent || '').trim().slice(0, 80),
993
- href: a.href,
994
- }));
995
- })()
996
- `)) as Array<{ text: string; href: string }>;
1271
+ const links = await evaluateExpression<
1272
+ Array<{ text: string; href: string }>
1273
+ >(cdp, EXTRACT_LINKS_EXPRESSION, {}, context.signal);
997
1274
 
998
1275
  if (links.length > 0) {
999
1276
  lines.push("");
@@ -1009,6 +1286,8 @@ export async function executeBrowserExtract(
1009
1286
  const msg = err instanceof Error ? err.message : String(err);
1010
1287
  log.error({ err }, "Extract failed");
1011
1288
  return { content: `Error: Extract failed: ${msg}`, isError: true };
1289
+ } finally {
1290
+ cdp.dispose();
1012
1291
  }
1013
1292
  }
1014
1293
 
@@ -1028,26 +1307,41 @@ export async function executeBrowserFillCredential(
1028
1307
  return { content: "Error: field is required.", isError: true };
1029
1308
  }
1030
1309
 
1031
- const { selector, error } = resolveSelector(context.conversationId, input);
1310
+ const { resolved, error } = resolveElement(context.conversationId, input);
1032
1311
  if (error) return { content: error, isError: true };
1033
1312
 
1034
1313
  const pressEnter = input.press_enter === true;
1314
+ const targetDescription =
1315
+ resolved!.kind === "backend"
1316
+ ? `element_id "${resolved!.eid}"`
1317
+ : resolved!.selector;
1035
1318
 
1319
+ const cdp = getCdpClient(context);
1036
1320
  try {
1037
- const page = await browserManager.getOrCreateSessionPage(
1038
- context.conversationId,
1039
- );
1321
+ let backendNodeId: number;
1322
+ if (resolved!.kind === "backend") {
1323
+ backendNodeId = resolved!.backendNodeId;
1324
+ } else {
1325
+ backendNodeId = await querySelectorBackendNodeId(
1326
+ cdp,
1327
+ resolved!.selector,
1328
+ context.signal,
1329
+ );
1330
+ }
1040
1331
 
1041
- // Extract domain from the current page for domain policy enforcement
1332
+ // Extract the current page's hostname for broker domain policy
1333
+ // enforcement. Failures here (pre-navigation, about:blank, malformed
1334
+ // URL) fall through with pageDomain undefined; if the credential
1335
+ // has a domain policy the broker will deny the fill.
1042
1336
  let pageDomain: string | undefined;
1043
1337
  try {
1044
- const pageUrl = page.url();
1338
+ const pageUrl = await getCurrentUrl(cdp, context.signal);
1045
1339
  if (pageUrl && pageUrl !== "about:blank") {
1046
1340
  const parsed = new URL(pageUrl);
1047
1341
  pageDomain = parsed.hostname;
1048
1342
  }
1049
1343
  } catch {
1050
- // Invalid URL - pageDomain stays undefined, broker will deny if domain policy exists
1344
+ // pageDomain stays undefined
1051
1345
  }
1052
1346
 
1053
1347
  const result = await credentialBroker.browserFill({
@@ -1056,7 +1350,13 @@ export async function executeBrowserFillCredential(
1056
1350
  toolName: "browser_fill_credential",
1057
1351
  domain: pageDomain,
1058
1352
  fill: async (value) => {
1059
- await page.fill(selector!, value);
1353
+ // Clear-then-focus-then-insert via the shared helper. We
1354
+ // MUST clear first: Input.insertText writes at the cursor,
1355
+ // so on autofilled / pre-populated fields a bare insert
1356
+ // would append the credential to the existing value,
1357
+ // producing a corrupted password and leaking partial state
1358
+ // back into the page.
1359
+ await clearAndInsertText(cdp, backendNodeId, value, context.signal);
1060
1360
  },
1061
1361
  });
1062
1362
 
@@ -1086,7 +1386,10 @@ export async function executeBrowserFillCredential(
1086
1386
  isError: true,
1087
1387
  };
1088
1388
  }
1089
- log.error({ selector, reason }, "Fill credential failed");
1389
+ log.error(
1390
+ { target: targetDescription, reason },
1391
+ "Fill credential failed",
1392
+ );
1090
1393
  return {
1091
1394
  content: `Error: Fill credential failed: ${reason}`,
1092
1395
  isError: true,
@@ -1094,7 +1397,7 @@ export async function executeBrowserFillCredential(
1094
1397
  }
1095
1398
 
1096
1399
  if (pressEnter) {
1097
- await page.press(selector!, "Enter");
1400
+ await dispatchKeyPress(cdp, "Enter", context.signal);
1098
1401
  }
1099
1402
 
1100
1403
  return {
@@ -1105,5 +1408,7 @@ export async function executeBrowserFillCredential(
1105
1408
  const msg = err instanceof Error ? err.message : String(err);
1106
1409
  log.error({ err }, "Fill credential failed");
1107
1410
  return { content: `Error: Fill credential failed: ${msg}`, isError: true };
1411
+ } finally {
1412
+ cdp.dispose();
1108
1413
  }
1109
1414
  }