@browserbasehq/orca 3.2.0-preview.4 → 3.2.1-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (308) hide show
  1. package/README.md +2 -2
  2. package/dist/cjs/lib/inference.d.ts +3 -1
  3. package/dist/cjs/lib/inference.js +3 -3
  4. package/dist/cjs/lib/inference.js.map +1 -1
  5. package/dist/cjs/lib/prompt.d.ts +1 -1
  6. package/dist/cjs/lib/prompt.js +24 -18
  7. package/dist/cjs/lib/prompt.js.map +1 -1
  8. package/dist/cjs/lib/v3/agent/AgentClient.d.ts +8 -0
  9. package/dist/cjs/lib/v3/agent/AgentClient.js +13 -0
  10. package/dist/cjs/lib/v3/agent/AgentClient.js.map +1 -1
  11. package/dist/cjs/lib/v3/agent/AgentProvider.js +1 -0
  12. package/dist/cjs/lib/v3/agent/AgentProvider.js.map +1 -1
  13. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.d.ts +3 -1
  14. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js +61 -9
  15. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  16. package/dist/cjs/lib/v3/agent/GoogleCUAClient.js +1 -0
  17. package/dist/cjs/lib/v3/agent/GoogleCUAClient.js.map +1 -1
  18. package/dist/cjs/lib/v3/agent/MicrosoftCUAClient.js +16 -0
  19. package/dist/cjs/lib/v3/agent/MicrosoftCUAClient.js.map +1 -1
  20. package/dist/cjs/lib/v3/agent/OpenAICUAClient.d.ts +12 -6
  21. package/dist/cjs/lib/v3/agent/OpenAICUAClient.js +164 -49
  22. package/dist/cjs/lib/v3/agent/OpenAICUAClient.js.map +1 -1
  23. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.d.ts +2 -2
  24. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.js +10 -11
  25. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.js.map +1 -1
  26. package/dist/cjs/lib/v3/agent/tools/fillform.d.ts +0 -1
  27. package/dist/cjs/lib/v3/agent/tools/fillform.js +7 -10
  28. package/dist/cjs/lib/v3/agent/tools/fillform.js.map +1 -1
  29. package/dist/cjs/lib/v3/agent/tools/index.js +1 -1
  30. package/dist/cjs/lib/v3/agent/tools/index.js.map +1 -1
  31. package/dist/cjs/lib/v3/agent/tools/keys.d.ts +2 -1
  32. package/dist/cjs/lib/v3/agent/tools/keys.js +57 -49
  33. package/dist/cjs/lib/v3/agent/tools/keys.js.map +1 -1
  34. package/dist/cjs/lib/v3/agent/utils/captchaSolver.d.ts +76 -0
  35. package/dist/cjs/lib/v3/agent/utils/captchaSolver.js +175 -0
  36. package/dist/cjs/lib/v3/agent/utils/captchaSolver.js.map +1 -0
  37. package/dist/cjs/lib/v3/agent/utils/coordinateNormalization.js +3 -5
  38. package/dist/cjs/lib/v3/agent/utils/coordinateNormalization.js.map +1 -1
  39. package/dist/cjs/lib/v3/agent/utils/variables.d.ts +5 -0
  40. package/dist/cjs/lib/v3/agent/utils/variables.js +9 -0
  41. package/dist/cjs/lib/v3/agent/utils/variables.js.map +1 -1
  42. package/dist/cjs/lib/v3/api.d.ts +5 -3
  43. package/dist/cjs/lib/v3/api.js +5 -15
  44. package/dist/cjs/lib/v3/api.js.map +1 -1
  45. package/dist/cjs/lib/v3/cache/AgentCache.js +5 -3
  46. package/dist/cjs/lib/v3/cache/AgentCache.js.map +1 -1
  47. package/dist/cjs/lib/v3/flowlogger/EventStore.js +1 -1
  48. package/dist/cjs/lib/v3/flowlogger/EventStore.js.map +1 -1
  49. package/dist/cjs/lib/v3/handlers/observeHandler.js +2 -1
  50. package/dist/cjs/lib/v3/handlers/observeHandler.js.map +1 -1
  51. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +2 -1
  52. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +110 -46
  53. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  54. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +5 -0
  55. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +131 -16
  56. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  57. package/dist/cjs/lib/v3/index.d.ts +1 -1
  58. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +5 -2
  59. package/dist/cjs/lib/v3/llm/LLMProvider.js +14 -6
  60. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  61. package/dist/cjs/lib/v3/llm/OpenAIClient.js +1 -0
  62. package/dist/cjs/lib/v3/llm/OpenAIClient.js.map +1 -1
  63. package/dist/cjs/lib/v3/llm/aisdk.d.ts +3 -1
  64. package/dist/cjs/lib/v3/llm/aisdk.js +67 -17
  65. package/dist/cjs/lib/v3/llm/aisdk.js.map +1 -1
  66. package/dist/cjs/lib/v3/types/private/cache.d.ts +0 -1
  67. package/dist/cjs/lib/v3/types/private/cache.js.map +1 -1
  68. package/dist/cjs/lib/v3/types/private/handlers.d.ts +1 -0
  69. package/dist/cjs/lib/v3/types/private/handlers.js.map +1 -1
  70. package/dist/cjs/lib/v3/types/public/agent.d.ts +8 -3
  71. package/dist/cjs/lib/v3/types/public/agent.js +1 -0
  72. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  73. package/dist/cjs/lib/v3/types/public/api.d.ts +54 -7
  74. package/dist/cjs/lib/v3/types/public/api.js +47 -16
  75. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  76. package/dist/cjs/lib/v3/types/public/methods.d.ts +1 -0
  77. package/dist/cjs/lib/v3/types/public/methods.js.map +1 -1
  78. package/dist/cjs/lib/v3/types/public/model.d.ts +32 -2
  79. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  80. package/dist/cjs/lib/v3/types/public/variables.d.ts +7 -0
  81. package/dist/cjs/lib/v3/types/public/variables.js +22 -0
  82. package/dist/cjs/lib/v3/types/public/variables.js.map +1 -0
  83. package/dist/cjs/lib/v3/understudy/context.js +11 -3
  84. package/dist/cjs/lib/v3/understudy/context.js.map +1 -1
  85. package/dist/cjs/lib/v3/understudy/page.js +1 -1
  86. package/dist/cjs/lib/v3/understudy/page.js.map +1 -1
  87. package/dist/cjs/lib/v3/v3.d.ts +23 -2
  88. package/dist/cjs/lib/v3/v3.js +111 -13
  89. package/dist/cjs/lib/v3/v3.js.map +1 -1
  90. package/dist/cjs/lib/version.d.ts +1 -1
  91. package/dist/cjs/lib/version.js +1 -1
  92. package/dist/cjs/lib/version.js.map +1 -1
  93. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.d.ts +1 -0
  94. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.js +56 -0
  95. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.js.map +1 -0
  96. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js +6 -6
  97. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js.map +1 -1
  98. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.d.ts +1 -0
  99. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js +56 -0
  100. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js.map +1 -0
  101. package/dist/cjs/tests/integration/timeouts.spec.js +1 -1
  102. package/dist/cjs/tests/integration/timeouts.spec.js.map +1 -1
  103. package/dist/cjs/tests/unit/agent-captcha-hooks.test.d.ts +1 -0
  104. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js +341 -0
  105. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js.map +1 -0
  106. package/dist/cjs/tests/unit/agent-execution-model.test.js +25 -3
  107. package/dist/cjs/tests/unit/agent-execution-model.test.js.map +1 -1
  108. package/dist/cjs/tests/unit/agent-metrics.test.d.ts +1 -0
  109. package/dist/cjs/tests/unit/agent-metrics.test.js +112 -0
  110. package/dist/cjs/tests/unit/agent-metrics.test.js.map +1 -0
  111. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.d.ts +1 -0
  112. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js +23 -0
  113. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js.map +1 -0
  114. package/dist/cjs/tests/unit/aisdk-clients.test.d.ts +1 -0
  115. package/dist/cjs/tests/unit/aisdk-clients.test.js +90 -0
  116. package/dist/cjs/tests/unit/aisdk-clients.test.js.map +1 -0
  117. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +1 -0
  118. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js +250 -0
  119. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +1 -0
  120. package/dist/cjs/tests/unit/api-client-observe-variables.test.d.ts +1 -0
  121. package/dist/cjs/tests/unit/api-client-observe-variables.test.js +86 -0
  122. package/dist/cjs/tests/unit/api-client-observe-variables.test.js.map +1 -0
  123. package/dist/cjs/tests/unit/api-optional-model-api-key.test.d.ts +1 -0
  124. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js +95 -0
  125. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js.map +1 -0
  126. package/dist/cjs/tests/unit/api-variables-schema.test.d.ts +1 -0
  127. package/dist/cjs/tests/unit/api-variables-schema.test.js +37 -0
  128. package/dist/cjs/tests/unit/api-variables-schema.test.js.map +1 -0
  129. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js +40 -0
  130. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js.map +1 -1
  131. package/dist/cjs/tests/unit/captcha-solver.test.d.ts +1 -0
  132. package/dist/cjs/tests/unit/captcha-solver.test.js +154 -0
  133. package/dist/cjs/tests/unit/captcha-solver.test.js.map +1 -0
  134. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js +1 -1
  135. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js.map +1 -1
  136. package/dist/cjs/tests/unit/llm-middleware.test.d.ts +1 -0
  137. package/dist/cjs/tests/unit/llm-middleware.test.js +495 -0
  138. package/dist/cjs/tests/unit/llm-middleware.test.js.map +1 -0
  139. package/dist/cjs/tests/unit/microsoft-cua-client.test.d.ts +1 -0
  140. package/dist/cjs/tests/unit/microsoft-cua-client.test.js +86 -0
  141. package/dist/cjs/tests/unit/microsoft-cua-client.test.js.map +1 -0
  142. package/dist/cjs/tests/unit/openai-cua-client.test.d.ts +1 -0
  143. package/dist/cjs/tests/unit/openai-cua-client.test.js +71 -0
  144. package/dist/cjs/tests/unit/openai-cua-client.test.js.map +1 -0
  145. package/dist/cjs/tests/unit/prompt-observe-variables.test.d.ts +1 -0
  146. package/dist/cjs/tests/unit/prompt-observe-variables.test.js +19 -0
  147. package/dist/cjs/tests/unit/prompt-observe-variables.test.js.map +1 -0
  148. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js +1 -0
  149. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js.map +1 -1
  150. package/dist/cjs/tests/unit/public-api/public-types.test.js.map +1 -1
  151. package/dist/cjs/tests/unit/timeout-handlers.test.js +50 -0
  152. package/dist/cjs/tests/unit/timeout-handlers.test.js.map +1 -1
  153. package/dist/esm/lib/inference.d.ts +3 -1
  154. package/dist/esm/lib/inference.js +3 -3
  155. package/dist/esm/lib/inference.js.map +1 -1
  156. package/dist/esm/lib/prompt.d.ts +1 -1
  157. package/dist/esm/lib/prompt.js +24 -18
  158. package/dist/esm/lib/prompt.js.map +1 -1
  159. package/dist/esm/lib/v3/agent/AgentClient.d.ts +8 -0
  160. package/dist/esm/lib/v3/agent/AgentClient.js +13 -0
  161. package/dist/esm/lib/v3/agent/AgentClient.js.map +1 -1
  162. package/dist/esm/lib/v3/agent/AgentProvider.js +1 -0
  163. package/dist/esm/lib/v3/agent/AgentProvider.js.map +1 -1
  164. package/dist/esm/lib/v3/agent/AnthropicCUAClient.d.ts +3 -1
  165. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js +61 -9
  166. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  167. package/dist/esm/lib/v3/agent/GoogleCUAClient.js +1 -0
  168. package/dist/esm/lib/v3/agent/GoogleCUAClient.js.map +1 -1
  169. package/dist/esm/lib/v3/agent/MicrosoftCUAClient.js +16 -0
  170. package/dist/esm/lib/v3/agent/MicrosoftCUAClient.js.map +1 -1
  171. package/dist/esm/lib/v3/agent/OpenAICUAClient.d.ts +12 -6
  172. package/dist/esm/lib/v3/agent/OpenAICUAClient.js +164 -49
  173. package/dist/esm/lib/v3/agent/OpenAICUAClient.js.map +1 -1
  174. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.d.ts +2 -2
  175. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.js +10 -11
  176. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.js.map +1 -1
  177. package/dist/esm/lib/v3/agent/tools/fillform.d.ts +0 -1
  178. package/dist/esm/lib/v3/agent/tools/fillform.js +7 -10
  179. package/dist/esm/lib/v3/agent/tools/fillform.js.map +1 -1
  180. package/dist/esm/lib/v3/agent/tools/index.js +1 -1
  181. package/dist/esm/lib/v3/agent/tools/index.js.map +1 -1
  182. package/dist/esm/lib/v3/agent/tools/keys.d.ts +2 -1
  183. package/dist/esm/lib/v3/agent/tools/keys.js +57 -49
  184. package/dist/esm/lib/v3/agent/tools/keys.js.map +1 -1
  185. package/dist/esm/lib/v3/agent/utils/captchaSolver.d.ts +76 -0
  186. package/dist/esm/lib/v3/agent/utils/captchaSolver.js +171 -0
  187. package/dist/esm/lib/v3/agent/utils/captchaSolver.js.map +1 -0
  188. package/dist/esm/lib/v3/agent/utils/coordinateNormalization.js +3 -5
  189. package/dist/esm/lib/v3/agent/utils/coordinateNormalization.js.map +1 -1
  190. package/dist/esm/lib/v3/agent/utils/variables.d.ts +5 -0
  191. package/dist/esm/lib/v3/agent/utils/variables.js +8 -0
  192. package/dist/esm/lib/v3/agent/utils/variables.js.map +1 -1
  193. package/dist/esm/lib/v3/api.d.ts +5 -3
  194. package/dist/esm/lib/v3/api.js +5 -15
  195. package/dist/esm/lib/v3/api.js.map +1 -1
  196. package/dist/esm/lib/v3/cache/AgentCache.js +5 -3
  197. package/dist/esm/lib/v3/cache/AgentCache.js.map +1 -1
  198. package/dist/esm/lib/v3/flowlogger/EventStore.js +1 -1
  199. package/dist/esm/lib/v3/flowlogger/EventStore.js.map +1 -1
  200. package/dist/esm/lib/v3/handlers/observeHandler.js +2 -1
  201. package/dist/esm/lib/v3/handlers/observeHandler.js.map +1 -1
  202. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +2 -1
  203. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +110 -46
  204. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  205. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +5 -0
  206. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +131 -16
  207. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  208. package/dist/esm/lib/v3/index.d.ts +1 -1
  209. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +5 -2
  210. package/dist/esm/lib/v3/llm/LLMProvider.js +15 -7
  211. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  212. package/dist/esm/lib/v3/llm/OpenAIClient.js +1 -0
  213. package/dist/esm/lib/v3/llm/OpenAIClient.js.map +1 -1
  214. package/dist/esm/lib/v3/llm/aisdk.d.ts +3 -1
  215. package/dist/esm/lib/v3/llm/aisdk.js +67 -17
  216. package/dist/esm/lib/v3/llm/aisdk.js.map +1 -1
  217. package/dist/esm/lib/v3/types/private/cache.d.ts +0 -1
  218. package/dist/esm/lib/v3/types/private/cache.js.map +1 -1
  219. package/dist/esm/lib/v3/types/private/handlers.d.ts +1 -0
  220. package/dist/esm/lib/v3/types/private/handlers.js.map +1 -1
  221. package/dist/esm/lib/v3/types/public/agent.d.ts +8 -3
  222. package/dist/esm/lib/v3/types/public/agent.js +1 -0
  223. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  224. package/dist/esm/lib/v3/types/public/api.d.ts +54 -7
  225. package/dist/esm/lib/v3/types/public/api.js +42 -14
  226. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  227. package/dist/esm/lib/v3/types/public/methods.d.ts +1 -0
  228. package/dist/esm/lib/v3/types/public/methods.js.map +1 -1
  229. package/dist/esm/lib/v3/types/public/model.d.ts +32 -2
  230. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  231. package/dist/esm/lib/v3/types/public/variables.d.ts +7 -0
  232. package/dist/esm/lib/v3/types/public/variables.js +19 -0
  233. package/dist/esm/lib/v3/types/public/variables.js.map +1 -0
  234. package/dist/esm/lib/v3/understudy/context.js +11 -3
  235. package/dist/esm/lib/v3/understudy/context.js.map +1 -1
  236. package/dist/esm/lib/v3/understudy/page.js +1 -1
  237. package/dist/esm/lib/v3/understudy/page.js.map +1 -1
  238. package/dist/esm/lib/v3/v3.d.ts +23 -2
  239. package/dist/esm/lib/v3/v3.js +111 -14
  240. package/dist/esm/lib/v3/v3.js.map +1 -1
  241. package/dist/esm/lib/version.d.ts +1 -1
  242. package/dist/esm/lib/version.js +1 -1
  243. package/dist/esm/lib/version.js.map +1 -1
  244. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.d.ts +1 -0
  245. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.js +54 -0
  246. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.js.map +1 -0
  247. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js +6 -6
  248. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js.map +1 -1
  249. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.d.ts +1 -0
  250. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js +54 -0
  251. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js.map +1 -0
  252. package/dist/esm/tests/integration/timeouts.spec.js +1 -1
  253. package/dist/esm/tests/integration/timeouts.spec.js.map +1 -1
  254. package/dist/esm/tests/unit/agent-captcha-hooks.test.d.ts +1 -0
  255. package/dist/esm/tests/unit/agent-captcha-hooks.test.js +339 -0
  256. package/dist/esm/tests/unit/agent-captcha-hooks.test.js.map +1 -0
  257. package/dist/esm/tests/unit/agent-execution-model.test.js +25 -3
  258. package/dist/esm/tests/unit/agent-execution-model.test.js.map +1 -1
  259. package/dist/esm/tests/unit/agent-metrics.test.d.ts +1 -0
  260. package/dist/esm/tests/unit/agent-metrics.test.js +110 -0
  261. package/dist/esm/tests/unit/agent-metrics.test.js.map +1 -0
  262. package/dist/esm/tests/unit/agent-system-prompt-variables.test.d.ts +1 -0
  263. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js +21 -0
  264. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js.map +1 -0
  265. package/dist/esm/tests/unit/aisdk-clients.test.d.ts +1 -0
  266. package/dist/esm/tests/unit/aisdk-clients.test.js +88 -0
  267. package/dist/esm/tests/unit/aisdk-clients.test.js.map +1 -0
  268. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +1 -0
  269. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js +245 -0
  270. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +1 -0
  271. package/dist/esm/tests/unit/api-client-observe-variables.test.d.ts +1 -0
  272. package/dist/esm/tests/unit/api-client-observe-variables.test.js +84 -0
  273. package/dist/esm/tests/unit/api-client-observe-variables.test.js.map +1 -0
  274. package/dist/esm/tests/unit/api-optional-model-api-key.test.d.ts +1 -0
  275. package/dist/esm/tests/unit/api-optional-model-api-key.test.js +93 -0
  276. package/dist/esm/tests/unit/api-optional-model-api-key.test.js.map +1 -0
  277. package/dist/esm/tests/unit/api-variables-schema.test.d.ts +1 -0
  278. package/dist/esm/tests/unit/api-variables-schema.test.js +35 -0
  279. package/dist/esm/tests/unit/api-variables-schema.test.js.map +1 -0
  280. package/dist/esm/tests/unit/browserbase-session-accessors.test.js +40 -0
  281. package/dist/esm/tests/unit/browserbase-session-accessors.test.js.map +1 -1
  282. package/dist/esm/tests/unit/captcha-solver.test.d.ts +1 -0
  283. package/dist/esm/tests/unit/captcha-solver.test.js +152 -0
  284. package/dist/esm/tests/unit/captcha-solver.test.js.map +1 -0
  285. package/dist/esm/tests/unit/flowlogger-eventstore.test.js +1 -1
  286. package/dist/esm/tests/unit/flowlogger-eventstore.test.js.map +1 -1
  287. package/dist/esm/tests/unit/llm-middleware.test.d.ts +1 -0
  288. package/dist/esm/tests/unit/llm-middleware.test.js +460 -0
  289. package/dist/esm/tests/unit/llm-middleware.test.js.map +1 -0
  290. package/dist/esm/tests/unit/microsoft-cua-client.test.d.ts +1 -0
  291. package/dist/esm/tests/unit/microsoft-cua-client.test.js +84 -0
  292. package/dist/esm/tests/unit/microsoft-cua-client.test.js.map +1 -0
  293. package/dist/esm/tests/unit/openai-cua-client.test.d.ts +1 -0
  294. package/dist/esm/tests/unit/openai-cua-client.test.js +69 -0
  295. package/dist/esm/tests/unit/openai-cua-client.test.js.map +1 -0
  296. package/dist/esm/tests/unit/prompt-observe-variables.test.d.ts +1 -0
  297. package/dist/esm/tests/unit/prompt-observe-variables.test.js +17 -0
  298. package/dist/esm/tests/unit/prompt-observe-variables.test.js.map +1 -0
  299. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js +1 -0
  300. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js.map +1 -1
  301. package/dist/esm/tests/unit/public-api/public-types.test.js.map +1 -1
  302. package/dist/esm/tests/unit/timeout-handlers.test.js +50 -0
  303. package/dist/esm/tests/unit/timeout-handlers.test.js.map +1 -1
  304. package/package.json +5 -9
  305. package/dist/cjs/lib/v3/dom/build/rerender-index.js +0 -1
  306. package/dist/cjs/lib/v3/dom/build/v3-index.js +0 -1
  307. package/dist/esm/lib/v3/dom/build/rerender-index.js +0 -1
  308. package/dist/esm/lib/v3/dom/build/v3-index.js +0 -1
@@ -1,13 +1,13 @@
1
+ import type { EasyInputMessage } from "openai/resources/responses/responses";
1
2
  import { LogLine } from "../types/public/logs.js";
2
3
  import { AgentAction, AgentResult, AgentType, AgentExecutionOptions, ResponseInputItem, ResponseItem, SafetyConfirmationHandler } from "../types/public/agent.js";
3
4
  import { ClientOptions } from "../types/public/model.js";
4
5
  import { AgentClient } from "./AgentClient.js";
5
6
  import { ToolSet } from "ai";
6
- /**
7
- * Client for OpenAI's Computer Use Assistant API
8
- * This implementation uses the official OpenAI Responses API for Computer Use
9
- */
7
+ type OpenAIRequestInputItem = ResponseInputItem | EasyInputMessage;
10
8
  export declare class OpenAICUAClient extends AgentClient {
9
+ private pendingContextNotes;
10
+ private captchaSolvedToolActive;
11
11
  private apiKey;
12
12
  private organization?;
13
13
  private baseURL;
@@ -21,6 +21,7 @@ export declare class OpenAICUAClient extends AgentClient {
21
21
  private environment;
22
22
  private tools?;
23
23
  private safetyConfirmationHandler?;
24
+ private get usesNewComputerTool();
24
25
  constructor(type: AgentType, modelName: string, userProvidedInstructions?: string, clientOptions?: ClientOptions, tools?: ToolSet);
25
26
  setViewport(width: number, height: number): void;
26
27
  setCurrentUrl(url: string): void;
@@ -28,6 +29,7 @@ export declare class OpenAICUAClient extends AgentClient {
28
29
  setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
29
30
  setTools(tools: ToolSet): void;
30
31
  setSafetyConfirmationHandler(handler?: SafetyConfirmationHandler): void;
32
+ addContextNote(note: string): void;
31
33
  /**
32
34
  * Execute a task with the OpenAI CUA
33
35
  * This is the main entry point for the agent
@@ -38,7 +40,7 @@ export declare class OpenAICUAClient extends AgentClient {
38
40
  * Execute a single step of the agent
39
41
  * This coordinates the flow: Request → Get Action → Execute Action
40
42
  */
41
- executeStep(inputItems: ResponseInputItem[], previousResponseId: string | undefined, logger: (message: LogLine) => void): Promise<{
43
+ executeStep(inputItems: OpenAIRequestInputItem[], previousResponseId: string | undefined, logger: (message: LogLine) => void): Promise<{
42
44
  actions: AgentAction[];
43
45
  message: string;
44
46
  completed: boolean;
@@ -54,16 +56,20 @@ export declare class OpenAICUAClient extends AgentClient {
54
56
  private handleSafetyConfirmation;
55
57
  private isFunctionCallItem;
56
58
  private createInitialInputItems;
57
- getAction(inputItems: ResponseInputItem[], previousResponseId?: string): Promise<{
59
+ getAction(inputItems: OpenAIRequestInputItem[], previousResponseId?: string): Promise<{
58
60
  output: ResponseItem[];
59
61
  responseId: string;
60
62
  usage: Record<string, number>;
61
63
  }>;
62
64
  takeAction(output: ResponseItem[], logger: (message: LogLine) => void): Promise<ResponseInputItem[]>;
63
65
  private convertComputerCallToAction;
66
+ private drainContextNotes;
67
+ private captureInitialScreenshot;
68
+ private convertComputerCallToActions;
64
69
  private convertFunctionCallToAction;
65
70
  captureScreenshot(options?: {
66
71
  base64Image?: string;
67
72
  currentUrl?: string;
68
73
  }): Promise<string>;
69
74
  }
75
+ export {};
@@ -7,7 +7,10 @@ import { v7 as uuidv7 } from "uuid";
7
7
  * Client for OpenAI's Computer Use Assistant API
8
8
  * This implementation uses the official OpenAI Responses API for Computer Use
9
9
  */
10
+ const CAPTCHA_PROCEED_TOOL = "captchaSolvedProceed";
10
11
  export class OpenAICUAClient extends AgentClient {
12
+ pendingContextNotes = [];
13
+ captchaSolvedToolActive = false;
11
14
  apiKey;
12
15
  organization;
13
16
  baseURL;
@@ -21,6 +24,9 @@ export class OpenAICUAClient extends AgentClient {
21
24
  environment = "browser"; // "browser", "mac", "windows", or "ubuntu"
22
25
  tools;
23
26
  safetyConfirmationHandler;
27
+ get usesNewComputerTool() {
28
+ return this.modelName.startsWith("gpt-5");
29
+ }
24
30
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
25
31
  super(type, modelName, userProvidedInstructions);
26
32
  // Process client options
@@ -63,6 +69,15 @@ export class OpenAICUAClient extends AgentClient {
63
69
  setSafetyConfirmationHandler(handler) {
64
70
  this.safetyConfirmationHandler = handler;
65
71
  }
72
+ addContextNote(note) {
73
+ this.pendingContextNotes.push(note);
74
+ // When a captcha-related note arrives, expose a tool that the model can
75
+ // call instead of asking the user for confirmation. This replaces
76
+ // fragile English-phrase parsing with a structured tool call.
77
+ if (note.toLowerCase().includes("captcha")) {
78
+ this.captchaSolvedToolActive = true;
79
+ }
80
+ }
66
81
  /**
67
82
  * Execute a task with the OpenAI CUA
68
83
  * This is the main entry point for the agent
@@ -79,7 +94,7 @@ export class OpenAICUAClient extends AgentClient {
79
94
  let finalMessage = "";
80
95
  this.reasoningItems.clear(); // Clear any previous reasoning items
81
96
  // Start with the initial instruction
82
- let inputItems = this.createInitialInputItems(instruction);
97
+ let inputItems = await this.createInitialInputItems(instruction);
83
98
  let previousResponseId = undefined;
84
99
  let totalInputTokens = 0;
85
100
  let totalOutputTokens = 0;
@@ -87,6 +102,7 @@ export class OpenAICUAClient extends AgentClient {
87
102
  try {
88
103
  // Execute steps until completion or max steps reached
89
104
  while (!completed && currentStep < maxSteps) {
105
+ await this.preStepHook?.();
90
106
  logger({
91
107
  category: "agent",
92
108
  message: `Executing step ${currentStep + 1}/${maxSteps}`,
@@ -105,6 +121,16 @@ export class OpenAICUAClient extends AgentClient {
105
121
  // Update the input items for the next step if we're continuing
106
122
  if (!completed) {
107
123
  inputItems = result.nextInputItems;
124
+ const contextNotes = this.drainContextNotes();
125
+ if (contextNotes.length > 0) {
126
+ inputItems = [
127
+ ...inputItems,
128
+ ...contextNotes.map((note) => ({
129
+ role: "user",
130
+ content: note,
131
+ })),
132
+ ];
133
+ }
108
134
  }
109
135
  // Record any message for this step
110
136
  if (result.message) {
@@ -179,15 +205,15 @@ export class OpenAICUAClient extends AgentClient {
179
205
  if (item.type === "computer_call" && this.isComputerCallItem(item)) {
180
206
  logger({
181
207
  category: "agent",
182
- message: `Found computer_call: ${item.action.type}, call_id: ${item.call_id}`,
208
+ message: `Found computer_call with call_id: ${item.call_id}`,
183
209
  level: 2,
184
210
  });
185
- const action = this.convertComputerCallToAction(item);
186
- if (action) {
211
+ const actions = this.convertComputerCallToActions(item);
212
+ for (const action of actions) {
187
213
  stepActions.push(action);
188
214
  logger({
189
215
  category: "agent",
190
- message: `Converted computer_call to action: ${action.type}`,
216
+ message: `Found computer_call action: ${action.type}, payload: ${JSON.stringify(action)}, call_id: ${item.call_id}`,
191
217
  level: 2,
192
218
  });
193
219
  }
@@ -260,8 +286,8 @@ export class OpenAICUAClient extends AgentClient {
260
286
  isComputerCallItem(item) {
261
287
  return (item.type === "computer_call" &&
262
288
  "call_id" in item &&
263
- "action" in item &&
264
- typeof item.action === "object");
289
+ (("action" in item && typeof item.action === "object") ||
290
+ ("actions" in item && Array.isArray(item.actions))));
265
291
  }
266
292
  async handleSafetyConfirmation(pendingSafetyChecks, logger) {
267
293
  if (this.safetyConfirmationHandler) {
@@ -301,34 +327,54 @@ export class OpenAICUAClient extends AgentClient {
301
327
  "name" in item &&
302
328
  "arguments" in item);
303
329
  }
304
- createInitialInputItems(instruction) {
305
- // For the initial request, we use a simple array with the user's instruction
306
- return [
307
- {
330
+ async createInitialInputItems(instruction) {
331
+ const inputItems = [];
332
+ if (this.userProvidedInstructions) {
333
+ const systemMessage = {
308
334
  role: "system",
309
335
  content: this.userProvidedInstructions,
310
- },
311
- {
312
- role: "user",
313
- content: instruction,
314
- },
336
+ };
337
+ inputItems.push(systemMessage);
338
+ }
339
+ const textInput = {
340
+ type: "input_text",
341
+ text: instruction,
342
+ };
343
+ const userContent = [
344
+ textInput,
315
345
  ];
346
+ const initialScreenshot = await this.captureInitialScreenshot();
347
+ if (initialScreenshot) {
348
+ const screenshotInput = {
349
+ type: "input_image",
350
+ image_url: initialScreenshot,
351
+ detail: "high",
352
+ };
353
+ userContent.push(screenshotInput);
354
+ }
355
+ const userMessage = {
356
+ role: "user",
357
+ content: userContent,
358
+ };
359
+ inputItems.push(userMessage);
360
+ return inputItems;
316
361
  }
317
362
  async getAction(inputItems, previousResponseId) {
318
363
  try {
319
- // Create the request parameters
364
+ // Create the request parameters, branching on tool format
365
+ const computerTool = this.usesNewComputerTool
366
+ ? { type: "computer" }
367
+ : {
368
+ type: "computer_use_preview",
369
+ display_width: this.currentViewport.width,
370
+ display_height: this.currentViewport.height,
371
+ environment: this.environment,
372
+ };
320
373
  const requestParams = {
321
374
  model: this.modelName,
322
- tools: [
323
- {
324
- type: "computer_use_preview",
325
- display_width: this.currentViewport.width,
326
- display_height: this.currentViewport.height,
327
- environment: this.environment,
328
- },
329
- ],
375
+ tools: [computerTool],
330
376
  input: inputItems,
331
- truncation: "auto",
377
+ ...(this.usesNewComputerTool ? {} : { truncation: "auto" }),
332
378
  };
333
379
  // Add custom tools if available
334
380
  if (this.tools && Object.keys(this.tools).length > 0) {
@@ -346,6 +392,25 @@ export class OpenAICUAClient extends AgentClient {
346
392
  ...customTools,
347
393
  ];
348
394
  }
395
+ // When a captcha was just solved, expose a tool the model can call
396
+ // to confirm it should proceed. This avoids fragile English-phrase
397
+ // parsing and works regardless of the model's output language.
398
+ if (this.captchaSolvedToolActive) {
399
+ requestParams.tools = [
400
+ ...requestParams.tools,
401
+ {
402
+ type: "function",
403
+ name: CAPTCHA_PROCEED_TOOL,
404
+ function: {
405
+ name: CAPTCHA_PROCEED_TOOL,
406
+ description: "The captcha on this page was solved automatically. " +
407
+ "Call this tool to confirm and continue with your task " +
408
+ "instead of asking the user for permission.",
409
+ parameters: { type: "object", properties: {}, required: [] },
410
+ },
411
+ },
412
+ ];
413
+ }
349
414
  // Add previous_response_id if available
350
415
  if (previousResponseId) {
351
416
  requestParams.previous_response_id = previousResponseId;
@@ -396,26 +461,34 @@ export class OpenAICUAClient extends AgentClient {
396
461
  // Process each output item
397
462
  for (const item of output) {
398
463
  if (item.type === "computer_call" && this.isComputerCallItem(item)) {
399
- // Handle computer calls
464
+ // Handle computer calls (both single-action and batched-actions formats)
400
465
  try {
401
- const action = this.convertComputerCallToAction(item);
402
- if (action && this.actionHandler) {
403
- logger({
404
- category: "agent",
405
- message: `Executing computer action: ${action.type}`,
406
- level: 1,
407
- });
408
- await this.actionHandler(action);
466
+ const actions = this.convertComputerCallToActions(item);
467
+ if (this.actionHandler) {
468
+ for (const action of actions) {
469
+ logger({
470
+ category: "agent",
471
+ message: `Executing computer action: ${action.type}`,
472
+ level: 1,
473
+ });
474
+ await this.actionHandler(action);
475
+ }
409
476
  }
410
- // Capture a screenshot
477
+ // Capture a screenshot after all actions in the batch
411
478
  const screenshot = await this.captureScreenshot();
412
- // Create a computer_call_output for the next request
479
+ // Build the output — use "computer_screenshot" for new format, "input_image" for legacy
480
+ const outputType = this.usesNewComputerTool
481
+ ? "computer_screenshot"
482
+ : "input_image";
413
483
  const outputItem = {
414
484
  type: "computer_call_output",
415
485
  call_id: item.call_id,
416
486
  output: {
417
- type: "input_image",
487
+ type: outputType,
418
488
  image_url: screenshot,
489
+ ...(this.usesNewComputerTool
490
+ ? { detail: "original" }
491
+ : {}),
419
492
  },
420
493
  };
421
494
  logger({
@@ -423,8 +496,8 @@ export class OpenAICUAClient extends AgentClient {
423
496
  message: `Added computer_call_output for call_id: ${item.call_id}`,
424
497
  level: 2,
425
498
  });
426
- // Add current URL if available
427
- if (this.currentUrl) {
499
+ // Legacy format supports current_url on the output; new format does not
500
+ if (!this.usesNewComputerTool && this.currentUrl) {
428
501
  const computerCallOutput = outputItem;
429
502
  computerCallOutput.output.current_url = this.currentUrl;
430
503
  }
@@ -450,19 +523,23 @@ export class OpenAICUAClient extends AgentClient {
450
523
  level: 0,
451
524
  });
452
525
  try {
453
- // Capture a screenshot even on error
454
526
  const screenshot = await this.captureScreenshot();
527
+ const outputType = this.usesNewComputerTool
528
+ ? "computer_screenshot"
529
+ : "input_image";
455
530
  const errorOutputItem = {
456
531
  type: "computer_call_output",
457
532
  call_id: item.call_id,
458
533
  output: {
459
- type: "input_image",
534
+ type: outputType,
460
535
  image_url: screenshot,
461
536
  error: errorMessage,
537
+ ...(this.usesNewComputerTool
538
+ ? { detail: "original" }
539
+ : {}),
462
540
  },
463
541
  };
464
- // Add current URL if available
465
- if (this.currentUrl) {
542
+ if (!this.usesNewComputerTool && this.currentUrl) {
466
543
  const computerCallOutput = errorOutputItem;
467
544
  computerCallOutput.output.current_url = this.currentUrl;
468
545
  }
@@ -481,13 +558,11 @@ export class OpenAICUAClient extends AgentClient {
481
558
  if (screenshotError instanceof StagehandClosedError) {
482
559
  throw screenshotError;
483
560
  }
484
- // If we can't capture a screenshot, just send the error
485
561
  logger({
486
562
  category: "agent",
487
563
  message: `Error capturing screenshot: ${String(screenshotError)}`,
488
564
  level: 0,
489
565
  });
490
- // For error cases without a screenshot, we need to use a string output
491
566
  nextInputItems.push({
492
567
  type: "computer_call_output",
493
568
  call_id: item.call_id,
@@ -498,6 +573,17 @@ export class OpenAICUAClient extends AgentClient {
498
573
  }
499
574
  else if (item.type === "function_call" &&
500
575
  this.isFunctionCallItem(item)) {
576
+ // Handle the captcha-proceed tool — just return a confirmation and
577
+ // deactivate the tool so it doesn't appear on subsequent steps.
578
+ if (item.name === CAPTCHA_PROCEED_TOOL) {
579
+ this.captchaSolvedToolActive = false;
580
+ nextInputItems.push({
581
+ type: "function_call_output",
582
+ call_id: item.call_id,
583
+ output: "Confirmed. The captcha is solved. Continue completing the original task autonomously without asking for further confirmation.",
584
+ });
585
+ continue;
586
+ }
501
587
  // Handle function calls (tool calls)
502
588
  try {
503
589
  const action = this.convertFunctionCallToAction(item);
@@ -570,13 +656,42 @@ export class OpenAICUAClient extends AgentClient {
570
656
  }
571
657
  convertComputerCallToAction(call) {
572
658
  const { action } = call;
573
- // Instead of wrapping the action in a params object, spread the action properties directly
574
- // This ensures properties like x, y, button, etc. are directly accessible on the AgentAction
659
+ if (!action)
660
+ return null;
575
661
  return {
576
662
  type: action.type,
577
- ...action, // Spread all properties from the action
663
+ ...action,
578
664
  };
579
665
  }
666
+ drainContextNotes() {
667
+ if (this.pendingContextNotes.length === 0) {
668
+ return [];
669
+ }
670
+ const notes = [...this.pendingContextNotes];
671
+ this.pendingContextNotes = [];
672
+ return notes;
673
+ }
674
+ async captureInitialScreenshot() {
675
+ if (!this.screenshotProvider) {
676
+ return undefined;
677
+ }
678
+ try {
679
+ return await this.captureScreenshot();
680
+ }
681
+ catch {
682
+ return undefined;
683
+ }
684
+ }
685
+ convertComputerCallToActions(call) {
686
+ if (call.actions && Array.isArray(call.actions)) {
687
+ return call.actions.map((action) => ({
688
+ type: action.type,
689
+ ...action,
690
+ }));
691
+ }
692
+ const single = this.convertComputerCallToAction(call);
693
+ return single ? [single] : [];
694
+ }
580
695
  convertFunctionCallToAction(call) {
581
696
  try {
582
697
  const args = JSON.parse(call.arguments);