@browserbasehq/orca 3.2.0-preview.4 → 3.2.1-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (308) hide show
  1. package/README.md +2 -2
  2. package/dist/cjs/lib/inference.d.ts +3 -1
  3. package/dist/cjs/lib/inference.js +3 -3
  4. package/dist/cjs/lib/inference.js.map +1 -1
  5. package/dist/cjs/lib/prompt.d.ts +1 -1
  6. package/dist/cjs/lib/prompt.js +24 -18
  7. package/dist/cjs/lib/prompt.js.map +1 -1
  8. package/dist/cjs/lib/v3/agent/AgentClient.d.ts +8 -0
  9. package/dist/cjs/lib/v3/agent/AgentClient.js +13 -0
  10. package/dist/cjs/lib/v3/agent/AgentClient.js.map +1 -1
  11. package/dist/cjs/lib/v3/agent/AgentProvider.js +1 -0
  12. package/dist/cjs/lib/v3/agent/AgentProvider.js.map +1 -1
  13. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.d.ts +3 -1
  14. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js +61 -9
  15. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  16. package/dist/cjs/lib/v3/agent/GoogleCUAClient.js +1 -0
  17. package/dist/cjs/lib/v3/agent/GoogleCUAClient.js.map +1 -1
  18. package/dist/cjs/lib/v3/agent/MicrosoftCUAClient.js +16 -0
  19. package/dist/cjs/lib/v3/agent/MicrosoftCUAClient.js.map +1 -1
  20. package/dist/cjs/lib/v3/agent/OpenAICUAClient.d.ts +12 -6
  21. package/dist/cjs/lib/v3/agent/OpenAICUAClient.js +164 -49
  22. package/dist/cjs/lib/v3/agent/OpenAICUAClient.js.map +1 -1
  23. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.d.ts +2 -2
  24. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.js +10 -11
  25. package/dist/cjs/lib/v3/agent/prompts/agentSystemPrompt.js.map +1 -1
  26. package/dist/cjs/lib/v3/agent/tools/fillform.d.ts +0 -1
  27. package/dist/cjs/lib/v3/agent/tools/fillform.js +7 -10
  28. package/dist/cjs/lib/v3/agent/tools/fillform.js.map +1 -1
  29. package/dist/cjs/lib/v3/agent/tools/index.js +1 -1
  30. package/dist/cjs/lib/v3/agent/tools/index.js.map +1 -1
  31. package/dist/cjs/lib/v3/agent/tools/keys.d.ts +2 -1
  32. package/dist/cjs/lib/v3/agent/tools/keys.js +57 -49
  33. package/dist/cjs/lib/v3/agent/tools/keys.js.map +1 -1
  34. package/dist/cjs/lib/v3/agent/utils/captchaSolver.d.ts +76 -0
  35. package/dist/cjs/lib/v3/agent/utils/captchaSolver.js +175 -0
  36. package/dist/cjs/lib/v3/agent/utils/captchaSolver.js.map +1 -0
  37. package/dist/cjs/lib/v3/agent/utils/coordinateNormalization.js +3 -5
  38. package/dist/cjs/lib/v3/agent/utils/coordinateNormalization.js.map +1 -1
  39. package/dist/cjs/lib/v3/agent/utils/variables.d.ts +5 -0
  40. package/dist/cjs/lib/v3/agent/utils/variables.js +9 -0
  41. package/dist/cjs/lib/v3/agent/utils/variables.js.map +1 -1
  42. package/dist/cjs/lib/v3/api.d.ts +5 -3
  43. package/dist/cjs/lib/v3/api.js +5 -15
  44. package/dist/cjs/lib/v3/api.js.map +1 -1
  45. package/dist/cjs/lib/v3/cache/AgentCache.js +5 -3
  46. package/dist/cjs/lib/v3/cache/AgentCache.js.map +1 -1
  47. package/dist/cjs/lib/v3/flowlogger/EventStore.js +1 -1
  48. package/dist/cjs/lib/v3/flowlogger/EventStore.js.map +1 -1
  49. package/dist/cjs/lib/v3/handlers/observeHandler.js +2 -1
  50. package/dist/cjs/lib/v3/handlers/observeHandler.js.map +1 -1
  51. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +2 -1
  52. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +110 -46
  53. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  54. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +5 -0
  55. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +131 -16
  56. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  57. package/dist/cjs/lib/v3/index.d.ts +1 -1
  58. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +5 -2
  59. package/dist/cjs/lib/v3/llm/LLMProvider.js +14 -6
  60. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  61. package/dist/cjs/lib/v3/llm/OpenAIClient.js +1 -0
  62. package/dist/cjs/lib/v3/llm/OpenAIClient.js.map +1 -1
  63. package/dist/cjs/lib/v3/llm/aisdk.d.ts +3 -1
  64. package/dist/cjs/lib/v3/llm/aisdk.js +67 -17
  65. package/dist/cjs/lib/v3/llm/aisdk.js.map +1 -1
  66. package/dist/cjs/lib/v3/types/private/cache.d.ts +0 -1
  67. package/dist/cjs/lib/v3/types/private/cache.js.map +1 -1
  68. package/dist/cjs/lib/v3/types/private/handlers.d.ts +1 -0
  69. package/dist/cjs/lib/v3/types/private/handlers.js.map +1 -1
  70. package/dist/cjs/lib/v3/types/public/agent.d.ts +8 -3
  71. package/dist/cjs/lib/v3/types/public/agent.js +1 -0
  72. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  73. package/dist/cjs/lib/v3/types/public/api.d.ts +54 -7
  74. package/dist/cjs/lib/v3/types/public/api.js +47 -16
  75. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  76. package/dist/cjs/lib/v3/types/public/methods.d.ts +1 -0
  77. package/dist/cjs/lib/v3/types/public/methods.js.map +1 -1
  78. package/dist/cjs/lib/v3/types/public/model.d.ts +32 -2
  79. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  80. package/dist/cjs/lib/v3/types/public/variables.d.ts +7 -0
  81. package/dist/cjs/lib/v3/types/public/variables.js +22 -0
  82. package/dist/cjs/lib/v3/types/public/variables.js.map +1 -0
  83. package/dist/cjs/lib/v3/understudy/context.js +11 -3
  84. package/dist/cjs/lib/v3/understudy/context.js.map +1 -1
  85. package/dist/cjs/lib/v3/understudy/page.js +1 -1
  86. package/dist/cjs/lib/v3/understudy/page.js.map +1 -1
  87. package/dist/cjs/lib/v3/v3.d.ts +23 -2
  88. package/dist/cjs/lib/v3/v3.js +111 -13
  89. package/dist/cjs/lib/v3/v3.js.map +1 -1
  90. package/dist/cjs/lib/version.d.ts +1 -1
  91. package/dist/cjs/lib/version.js +1 -1
  92. package/dist/cjs/lib/version.js.map +1 -1
  93. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.d.ts +1 -0
  94. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.js +56 -0
  95. package/dist/cjs/tests/integration/agent-captcha-autosolve.spec.js.map +1 -0
  96. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js +6 -6
  97. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js.map +1 -1
  98. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.d.ts +1 -0
  99. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js +56 -0
  100. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js.map +1 -0
  101. package/dist/cjs/tests/integration/timeouts.spec.js +1 -1
  102. package/dist/cjs/tests/integration/timeouts.spec.js.map +1 -1
  103. package/dist/cjs/tests/unit/agent-captcha-hooks.test.d.ts +1 -0
  104. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js +341 -0
  105. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js.map +1 -0
  106. package/dist/cjs/tests/unit/agent-execution-model.test.js +25 -3
  107. package/dist/cjs/tests/unit/agent-execution-model.test.js.map +1 -1
  108. package/dist/cjs/tests/unit/agent-metrics.test.d.ts +1 -0
  109. package/dist/cjs/tests/unit/agent-metrics.test.js +112 -0
  110. package/dist/cjs/tests/unit/agent-metrics.test.js.map +1 -0
  111. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.d.ts +1 -0
  112. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js +23 -0
  113. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js.map +1 -0
  114. package/dist/cjs/tests/unit/aisdk-clients.test.d.ts +1 -0
  115. package/dist/cjs/tests/unit/aisdk-clients.test.js +90 -0
  116. package/dist/cjs/tests/unit/aisdk-clients.test.js.map +1 -0
  117. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +1 -0
  118. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js +250 -0
  119. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +1 -0
  120. package/dist/cjs/tests/unit/api-client-observe-variables.test.d.ts +1 -0
  121. package/dist/cjs/tests/unit/api-client-observe-variables.test.js +86 -0
  122. package/dist/cjs/tests/unit/api-client-observe-variables.test.js.map +1 -0
  123. package/dist/cjs/tests/unit/api-optional-model-api-key.test.d.ts +1 -0
  124. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js +95 -0
  125. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js.map +1 -0
  126. package/dist/cjs/tests/unit/api-variables-schema.test.d.ts +1 -0
  127. package/dist/cjs/tests/unit/api-variables-schema.test.js +37 -0
  128. package/dist/cjs/tests/unit/api-variables-schema.test.js.map +1 -0
  129. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js +40 -0
  130. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js.map +1 -1
  131. package/dist/cjs/tests/unit/captcha-solver.test.d.ts +1 -0
  132. package/dist/cjs/tests/unit/captcha-solver.test.js +154 -0
  133. package/dist/cjs/tests/unit/captcha-solver.test.js.map +1 -0
  134. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js +1 -1
  135. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js.map +1 -1
  136. package/dist/cjs/tests/unit/llm-middleware.test.d.ts +1 -0
  137. package/dist/cjs/tests/unit/llm-middleware.test.js +495 -0
  138. package/dist/cjs/tests/unit/llm-middleware.test.js.map +1 -0
  139. package/dist/cjs/tests/unit/microsoft-cua-client.test.d.ts +1 -0
  140. package/dist/cjs/tests/unit/microsoft-cua-client.test.js +86 -0
  141. package/dist/cjs/tests/unit/microsoft-cua-client.test.js.map +1 -0
  142. package/dist/cjs/tests/unit/openai-cua-client.test.d.ts +1 -0
  143. package/dist/cjs/tests/unit/openai-cua-client.test.js +71 -0
  144. package/dist/cjs/tests/unit/openai-cua-client.test.js.map +1 -0
  145. package/dist/cjs/tests/unit/prompt-observe-variables.test.d.ts +1 -0
  146. package/dist/cjs/tests/unit/prompt-observe-variables.test.js +19 -0
  147. package/dist/cjs/tests/unit/prompt-observe-variables.test.js.map +1 -0
  148. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js +1 -0
  149. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js.map +1 -1
  150. package/dist/cjs/tests/unit/public-api/public-types.test.js.map +1 -1
  151. package/dist/cjs/tests/unit/timeout-handlers.test.js +50 -0
  152. package/dist/cjs/tests/unit/timeout-handlers.test.js.map +1 -1
  153. package/dist/esm/lib/inference.d.ts +3 -1
  154. package/dist/esm/lib/inference.js +3 -3
  155. package/dist/esm/lib/inference.js.map +1 -1
  156. package/dist/esm/lib/prompt.d.ts +1 -1
  157. package/dist/esm/lib/prompt.js +24 -18
  158. package/dist/esm/lib/prompt.js.map +1 -1
  159. package/dist/esm/lib/v3/agent/AgentClient.d.ts +8 -0
  160. package/dist/esm/lib/v3/agent/AgentClient.js +13 -0
  161. package/dist/esm/lib/v3/agent/AgentClient.js.map +1 -1
  162. package/dist/esm/lib/v3/agent/AgentProvider.js +1 -0
  163. package/dist/esm/lib/v3/agent/AgentProvider.js.map +1 -1
  164. package/dist/esm/lib/v3/agent/AnthropicCUAClient.d.ts +3 -1
  165. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js +61 -9
  166. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  167. package/dist/esm/lib/v3/agent/GoogleCUAClient.js +1 -0
  168. package/dist/esm/lib/v3/agent/GoogleCUAClient.js.map +1 -1
  169. package/dist/esm/lib/v3/agent/MicrosoftCUAClient.js +16 -0
  170. package/dist/esm/lib/v3/agent/MicrosoftCUAClient.js.map +1 -1
  171. package/dist/esm/lib/v3/agent/OpenAICUAClient.d.ts +12 -6
  172. package/dist/esm/lib/v3/agent/OpenAICUAClient.js +164 -49
  173. package/dist/esm/lib/v3/agent/OpenAICUAClient.js.map +1 -1
  174. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.d.ts +2 -2
  175. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.js +10 -11
  176. package/dist/esm/lib/v3/agent/prompts/agentSystemPrompt.js.map +1 -1
  177. package/dist/esm/lib/v3/agent/tools/fillform.d.ts +0 -1
  178. package/dist/esm/lib/v3/agent/tools/fillform.js +7 -10
  179. package/dist/esm/lib/v3/agent/tools/fillform.js.map +1 -1
  180. package/dist/esm/lib/v3/agent/tools/index.js +1 -1
  181. package/dist/esm/lib/v3/agent/tools/index.js.map +1 -1
  182. package/dist/esm/lib/v3/agent/tools/keys.d.ts +2 -1
  183. package/dist/esm/lib/v3/agent/tools/keys.js +57 -49
  184. package/dist/esm/lib/v3/agent/tools/keys.js.map +1 -1
  185. package/dist/esm/lib/v3/agent/utils/captchaSolver.d.ts +76 -0
  186. package/dist/esm/lib/v3/agent/utils/captchaSolver.js +171 -0
  187. package/dist/esm/lib/v3/agent/utils/captchaSolver.js.map +1 -0
  188. package/dist/esm/lib/v3/agent/utils/coordinateNormalization.js +3 -5
  189. package/dist/esm/lib/v3/agent/utils/coordinateNormalization.js.map +1 -1
  190. package/dist/esm/lib/v3/agent/utils/variables.d.ts +5 -0
  191. package/dist/esm/lib/v3/agent/utils/variables.js +8 -0
  192. package/dist/esm/lib/v3/agent/utils/variables.js.map +1 -1
  193. package/dist/esm/lib/v3/api.d.ts +5 -3
  194. package/dist/esm/lib/v3/api.js +5 -15
  195. package/dist/esm/lib/v3/api.js.map +1 -1
  196. package/dist/esm/lib/v3/cache/AgentCache.js +5 -3
  197. package/dist/esm/lib/v3/cache/AgentCache.js.map +1 -1
  198. package/dist/esm/lib/v3/flowlogger/EventStore.js +1 -1
  199. package/dist/esm/lib/v3/flowlogger/EventStore.js.map +1 -1
  200. package/dist/esm/lib/v3/handlers/observeHandler.js +2 -1
  201. package/dist/esm/lib/v3/handlers/observeHandler.js.map +1 -1
  202. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +2 -1
  203. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +110 -46
  204. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  205. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +5 -0
  206. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +131 -16
  207. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  208. package/dist/esm/lib/v3/index.d.ts +1 -1
  209. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +5 -2
  210. package/dist/esm/lib/v3/llm/LLMProvider.js +15 -7
  211. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  212. package/dist/esm/lib/v3/llm/OpenAIClient.js +1 -0
  213. package/dist/esm/lib/v3/llm/OpenAIClient.js.map +1 -1
  214. package/dist/esm/lib/v3/llm/aisdk.d.ts +3 -1
  215. package/dist/esm/lib/v3/llm/aisdk.js +67 -17
  216. package/dist/esm/lib/v3/llm/aisdk.js.map +1 -1
  217. package/dist/esm/lib/v3/types/private/cache.d.ts +0 -1
  218. package/dist/esm/lib/v3/types/private/cache.js.map +1 -1
  219. package/dist/esm/lib/v3/types/private/handlers.d.ts +1 -0
  220. package/dist/esm/lib/v3/types/private/handlers.js.map +1 -1
  221. package/dist/esm/lib/v3/types/public/agent.d.ts +8 -3
  222. package/dist/esm/lib/v3/types/public/agent.js +1 -0
  223. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  224. package/dist/esm/lib/v3/types/public/api.d.ts +54 -7
  225. package/dist/esm/lib/v3/types/public/api.js +42 -14
  226. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  227. package/dist/esm/lib/v3/types/public/methods.d.ts +1 -0
  228. package/dist/esm/lib/v3/types/public/methods.js.map +1 -1
  229. package/dist/esm/lib/v3/types/public/model.d.ts +32 -2
  230. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  231. package/dist/esm/lib/v3/types/public/variables.d.ts +7 -0
  232. package/dist/esm/lib/v3/types/public/variables.js +19 -0
  233. package/dist/esm/lib/v3/types/public/variables.js.map +1 -0
  234. package/dist/esm/lib/v3/understudy/context.js +11 -3
  235. package/dist/esm/lib/v3/understudy/context.js.map +1 -1
  236. package/dist/esm/lib/v3/understudy/page.js +1 -1
  237. package/dist/esm/lib/v3/understudy/page.js.map +1 -1
  238. package/dist/esm/lib/v3/v3.d.ts +23 -2
  239. package/dist/esm/lib/v3/v3.js +111 -14
  240. package/dist/esm/lib/v3/v3.js.map +1 -1
  241. package/dist/esm/lib/version.d.ts +1 -1
  242. package/dist/esm/lib/version.js +1 -1
  243. package/dist/esm/lib/version.js.map +1 -1
  244. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.d.ts +1 -0
  245. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.js +54 -0
  246. package/dist/esm/tests/integration/agent-captcha-autosolve.spec.js.map +1 -0
  247. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js +6 -6
  248. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js.map +1 -1
  249. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.d.ts +1 -0
  250. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js +54 -0
  251. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js.map +1 -0
  252. package/dist/esm/tests/integration/timeouts.spec.js +1 -1
  253. package/dist/esm/tests/integration/timeouts.spec.js.map +1 -1
  254. package/dist/esm/tests/unit/agent-captcha-hooks.test.d.ts +1 -0
  255. package/dist/esm/tests/unit/agent-captcha-hooks.test.js +339 -0
  256. package/dist/esm/tests/unit/agent-captcha-hooks.test.js.map +1 -0
  257. package/dist/esm/tests/unit/agent-execution-model.test.js +25 -3
  258. package/dist/esm/tests/unit/agent-execution-model.test.js.map +1 -1
  259. package/dist/esm/tests/unit/agent-metrics.test.d.ts +1 -0
  260. package/dist/esm/tests/unit/agent-metrics.test.js +110 -0
  261. package/dist/esm/tests/unit/agent-metrics.test.js.map +1 -0
  262. package/dist/esm/tests/unit/agent-system-prompt-variables.test.d.ts +1 -0
  263. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js +21 -0
  264. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js.map +1 -0
  265. package/dist/esm/tests/unit/aisdk-clients.test.d.ts +1 -0
  266. package/dist/esm/tests/unit/aisdk-clients.test.js +88 -0
  267. package/dist/esm/tests/unit/aisdk-clients.test.js.map +1 -0
  268. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +1 -0
  269. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js +245 -0
  270. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +1 -0
  271. package/dist/esm/tests/unit/api-client-observe-variables.test.d.ts +1 -0
  272. package/dist/esm/tests/unit/api-client-observe-variables.test.js +84 -0
  273. package/dist/esm/tests/unit/api-client-observe-variables.test.js.map +1 -0
  274. package/dist/esm/tests/unit/api-optional-model-api-key.test.d.ts +1 -0
  275. package/dist/esm/tests/unit/api-optional-model-api-key.test.js +93 -0
  276. package/dist/esm/tests/unit/api-optional-model-api-key.test.js.map +1 -0
  277. package/dist/esm/tests/unit/api-variables-schema.test.d.ts +1 -0
  278. package/dist/esm/tests/unit/api-variables-schema.test.js +35 -0
  279. package/dist/esm/tests/unit/api-variables-schema.test.js.map +1 -0
  280. package/dist/esm/tests/unit/browserbase-session-accessors.test.js +40 -0
  281. package/dist/esm/tests/unit/browserbase-session-accessors.test.js.map +1 -1
  282. package/dist/esm/tests/unit/captcha-solver.test.d.ts +1 -0
  283. package/dist/esm/tests/unit/captcha-solver.test.js +152 -0
  284. package/dist/esm/tests/unit/captcha-solver.test.js.map +1 -0
  285. package/dist/esm/tests/unit/flowlogger-eventstore.test.js +1 -1
  286. package/dist/esm/tests/unit/flowlogger-eventstore.test.js.map +1 -1
  287. package/dist/esm/tests/unit/llm-middleware.test.d.ts +1 -0
  288. package/dist/esm/tests/unit/llm-middleware.test.js +460 -0
  289. package/dist/esm/tests/unit/llm-middleware.test.js.map +1 -0
  290. package/dist/esm/tests/unit/microsoft-cua-client.test.d.ts +1 -0
  291. package/dist/esm/tests/unit/microsoft-cua-client.test.js +84 -0
  292. package/dist/esm/tests/unit/microsoft-cua-client.test.js.map +1 -0
  293. package/dist/esm/tests/unit/openai-cua-client.test.d.ts +1 -0
  294. package/dist/esm/tests/unit/openai-cua-client.test.js +69 -0
  295. package/dist/esm/tests/unit/openai-cua-client.test.js.map +1 -0
  296. package/dist/esm/tests/unit/prompt-observe-variables.test.d.ts +1 -0
  297. package/dist/esm/tests/unit/prompt-observe-variables.test.js +17 -0
  298. package/dist/esm/tests/unit/prompt-observe-variables.test.js.map +1 -0
  299. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js +1 -0
  300. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js.map +1 -1
  301. package/dist/esm/tests/unit/public-api/public-types.test.js.map +1 -1
  302. package/dist/esm/tests/unit/timeout-handlers.test.js +50 -0
  303. package/dist/esm/tests/unit/timeout-handlers.test.js.map +1 -1
  304. package/package.json +5 -9
  305. package/dist/cjs/lib/v3/dom/build/rerender-index.js +0 -1
  306. package/dist/cjs/lib/v3/dom/build/v3-index.js +0 -1
  307. package/dist/esm/lib/v3/dom/build/rerender-index.js +0 -1
  308. package/dist/esm/lib/v3/dom/build/v3-index.js +0 -1
@@ -1,13 +1,13 @@
1
+ import type { EasyInputMessage } from "openai/resources/responses/responses";
1
2
  import { LogLine } from "../types/public/logs.js";
2
3
  import { AgentAction, AgentResult, AgentType, AgentExecutionOptions, ResponseInputItem, ResponseItem, SafetyConfirmationHandler } from "../types/public/agent.js";
3
4
  import { ClientOptions } from "../types/public/model.js";
4
5
  import { AgentClient } from "./AgentClient.js";
5
6
  import { ToolSet } from "ai";
6
- /**
7
- * Client for OpenAI's Computer Use Assistant API
8
- * This implementation uses the official OpenAI Responses API for Computer Use
9
- */
7
+ type OpenAIRequestInputItem = ResponseInputItem | EasyInputMessage;
10
8
  export declare class OpenAICUAClient extends AgentClient {
9
+ private pendingContextNotes;
10
+ private captchaSolvedToolActive;
11
11
  private apiKey;
12
12
  private organization?;
13
13
  private baseURL;
@@ -21,6 +21,7 @@ export declare class OpenAICUAClient extends AgentClient {
21
21
  private environment;
22
22
  private tools?;
23
23
  private safetyConfirmationHandler?;
24
+ private get usesNewComputerTool();
24
25
  constructor(type: AgentType, modelName: string, userProvidedInstructions?: string, clientOptions?: ClientOptions, tools?: ToolSet);
25
26
  setViewport(width: number, height: number): void;
26
27
  setCurrentUrl(url: string): void;
@@ -28,6 +29,7 @@ export declare class OpenAICUAClient extends AgentClient {
28
29
  setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
29
30
  setTools(tools: ToolSet): void;
30
31
  setSafetyConfirmationHandler(handler?: SafetyConfirmationHandler): void;
32
+ addContextNote(note: string): void;
31
33
  /**
32
34
  * Execute a task with the OpenAI CUA
33
35
  * This is the main entry point for the agent
@@ -38,7 +40,7 @@ export declare class OpenAICUAClient extends AgentClient {
38
40
  * Execute a single step of the agent
39
41
  * This coordinates the flow: Request → Get Action → Execute Action
40
42
  */
41
- executeStep(inputItems: ResponseInputItem[], previousResponseId: string | undefined, logger: (message: LogLine) => void): Promise<{
43
+ executeStep(inputItems: OpenAIRequestInputItem[], previousResponseId: string | undefined, logger: (message: LogLine) => void): Promise<{
42
44
  actions: AgentAction[];
43
45
  message: string;
44
46
  completed: boolean;
@@ -54,16 +56,20 @@ export declare class OpenAICUAClient extends AgentClient {
54
56
  private handleSafetyConfirmation;
55
57
  private isFunctionCallItem;
56
58
  private createInitialInputItems;
57
- getAction(inputItems: ResponseInputItem[], previousResponseId?: string): Promise<{
59
+ getAction(inputItems: OpenAIRequestInputItem[], previousResponseId?: string): Promise<{
58
60
  output: ResponseItem[];
59
61
  responseId: string;
60
62
  usage: Record<string, number>;
61
63
  }>;
62
64
  takeAction(output: ResponseItem[], logger: (message: LogLine) => void): Promise<ResponseInputItem[]>;
63
65
  private convertComputerCallToAction;
66
+ private drainContextNotes;
67
+ private captureInitialScreenshot;
68
+ private convertComputerCallToActions;
64
69
  private convertFunctionCallToAction;
65
70
  captureScreenshot(options?: {
66
71
  base64Image?: string;
67
72
  currentUrl?: string;
68
73
  }): Promise<string>;
69
74
  }
75
+ export {};
@@ -13,7 +13,10 @@ const uuid_1 = require("uuid");
13
13
  * Client for OpenAI's Computer Use Assistant API
14
14
  * This implementation uses the official OpenAI Responses API for Computer Use
15
15
  */
16
+ const CAPTCHA_PROCEED_TOOL = "captchaSolvedProceed";
16
17
  class OpenAICUAClient extends AgentClient_js_1.AgentClient {
18
+ pendingContextNotes = [];
19
+ captchaSolvedToolActive = false;
17
20
  apiKey;
18
21
  organization;
19
22
  baseURL;
@@ -27,6 +30,9 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
27
30
  environment = "browser"; // "browser", "mac", "windows", or "ubuntu"
28
31
  tools;
29
32
  safetyConfirmationHandler;
33
+ get usesNewComputerTool() {
34
+ return this.modelName.startsWith("gpt-5");
35
+ }
30
36
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
31
37
  super(type, modelName, userProvidedInstructions);
32
38
  // Process client options
@@ -69,6 +75,15 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
69
75
  setSafetyConfirmationHandler(handler) {
70
76
  this.safetyConfirmationHandler = handler;
71
77
  }
78
+ addContextNote(note) {
79
+ this.pendingContextNotes.push(note);
80
+ // When a captcha-related note arrives, expose a tool that the model can
81
+ // call instead of asking the user for confirmation. This replaces
82
+ // fragile English-phrase parsing with a structured tool call.
83
+ if (note.toLowerCase().includes("captcha")) {
84
+ this.captchaSolvedToolActive = true;
85
+ }
86
+ }
72
87
  /**
73
88
  * Execute a task with the OpenAI CUA
74
89
  * This is the main entry point for the agent
@@ -85,7 +100,7 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
85
100
  let finalMessage = "";
86
101
  this.reasoningItems.clear(); // Clear any previous reasoning items
87
102
  // Start with the initial instruction
88
- let inputItems = this.createInitialInputItems(instruction);
103
+ let inputItems = await this.createInitialInputItems(instruction);
89
104
  let previousResponseId = undefined;
90
105
  let totalInputTokens = 0;
91
106
  let totalOutputTokens = 0;
@@ -93,6 +108,7 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
93
108
  try {
94
109
  // Execute steps until completion or max steps reached
95
110
  while (!completed && currentStep < maxSteps) {
111
+ await this.preStepHook?.();
96
112
  logger({
97
113
  category: "agent",
98
114
  message: `Executing step ${currentStep + 1}/${maxSteps}`,
@@ -111,6 +127,16 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
111
127
  // Update the input items for the next step if we're continuing
112
128
  if (!completed) {
113
129
  inputItems = result.nextInputItems;
130
+ const contextNotes = this.drainContextNotes();
131
+ if (contextNotes.length > 0) {
132
+ inputItems = [
133
+ ...inputItems,
134
+ ...contextNotes.map((note) => ({
135
+ role: "user",
136
+ content: note,
137
+ })),
138
+ ];
139
+ }
114
140
  }
115
141
  // Record any message for this step
116
142
  if (result.message) {
@@ -185,15 +211,15 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
185
211
  if (item.type === "computer_call" && this.isComputerCallItem(item)) {
186
212
  logger({
187
213
  category: "agent",
188
- message: `Found computer_call: ${item.action.type}, call_id: ${item.call_id}`,
214
+ message: `Found computer_call with call_id: ${item.call_id}`,
189
215
  level: 2,
190
216
  });
191
- const action = this.convertComputerCallToAction(item);
192
- if (action) {
217
+ const actions = this.convertComputerCallToActions(item);
218
+ for (const action of actions) {
193
219
  stepActions.push(action);
194
220
  logger({
195
221
  category: "agent",
196
- message: `Converted computer_call to action: ${action.type}`,
222
+ message: `Found computer_call action: ${action.type}, payload: ${JSON.stringify(action)}, call_id: ${item.call_id}`,
197
223
  level: 2,
198
224
  });
199
225
  }
@@ -266,8 +292,8 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
266
292
  isComputerCallItem(item) {
267
293
  return (item.type === "computer_call" &&
268
294
  "call_id" in item &&
269
- "action" in item &&
270
- typeof item.action === "object");
295
+ (("action" in item && typeof item.action === "object") ||
296
+ ("actions" in item && Array.isArray(item.actions))));
271
297
  }
272
298
  async handleSafetyConfirmation(pendingSafetyChecks, logger) {
273
299
  if (this.safetyConfirmationHandler) {
@@ -307,34 +333,54 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
307
333
  "name" in item &&
308
334
  "arguments" in item);
309
335
  }
310
- createInitialInputItems(instruction) {
311
- // For the initial request, we use a simple array with the user's instruction
312
- return [
313
- {
336
+ async createInitialInputItems(instruction) {
337
+ const inputItems = [];
338
+ if (this.userProvidedInstructions) {
339
+ const systemMessage = {
314
340
  role: "system",
315
341
  content: this.userProvidedInstructions,
316
- },
317
- {
318
- role: "user",
319
- content: instruction,
320
- },
342
+ };
343
+ inputItems.push(systemMessage);
344
+ }
345
+ const textInput = {
346
+ type: "input_text",
347
+ text: instruction,
348
+ };
349
+ const userContent = [
350
+ textInput,
321
351
  ];
352
+ const initialScreenshot = await this.captureInitialScreenshot();
353
+ if (initialScreenshot) {
354
+ const screenshotInput = {
355
+ type: "input_image",
356
+ image_url: initialScreenshot,
357
+ detail: "high",
358
+ };
359
+ userContent.push(screenshotInput);
360
+ }
361
+ const userMessage = {
362
+ role: "user",
363
+ content: userContent,
364
+ };
365
+ inputItems.push(userMessage);
366
+ return inputItems;
322
367
  }
323
368
  async getAction(inputItems, previousResponseId) {
324
369
  try {
325
- // Create the request parameters
370
+ // Create the request parameters, branching on tool format
371
+ const computerTool = this.usesNewComputerTool
372
+ ? { type: "computer" }
373
+ : {
374
+ type: "computer_use_preview",
375
+ display_width: this.currentViewport.width,
376
+ display_height: this.currentViewport.height,
377
+ environment: this.environment,
378
+ };
326
379
  const requestParams = {
327
380
  model: this.modelName,
328
- tools: [
329
- {
330
- type: "computer_use_preview",
331
- display_width: this.currentViewport.width,
332
- display_height: this.currentViewport.height,
333
- environment: this.environment,
334
- },
335
- ],
381
+ tools: [computerTool],
336
382
  input: inputItems,
337
- truncation: "auto",
383
+ ...(this.usesNewComputerTool ? {} : { truncation: "auto" }),
338
384
  };
339
385
  // Add custom tools if available
340
386
  if (this.tools && Object.keys(this.tools).length > 0) {
@@ -352,6 +398,25 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
352
398
  ...customTools,
353
399
  ];
354
400
  }
401
+ // When a captcha was just solved, expose a tool the model can call
402
+ // to confirm it should proceed. This avoids fragile English-phrase
403
+ // parsing and works regardless of the model's output language.
404
+ if (this.captchaSolvedToolActive) {
405
+ requestParams.tools = [
406
+ ...requestParams.tools,
407
+ {
408
+ type: "function",
409
+ name: CAPTCHA_PROCEED_TOOL,
410
+ function: {
411
+ name: CAPTCHA_PROCEED_TOOL,
412
+ description: "The captcha on this page was solved automatically. " +
413
+ "Call this tool to confirm and continue with your task " +
414
+ "instead of asking the user for permission.",
415
+ parameters: { type: "object", properties: {}, required: [] },
416
+ },
417
+ },
418
+ ];
419
+ }
355
420
  // Add previous_response_id if available
356
421
  if (previousResponseId) {
357
422
  requestParams.previous_response_id = previousResponseId;
@@ -402,26 +467,34 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
402
467
  // Process each output item
403
468
  for (const item of output) {
404
469
  if (item.type === "computer_call" && this.isComputerCallItem(item)) {
405
- // Handle computer calls
470
+ // Handle computer calls (both single-action and batched-actions formats)
406
471
  try {
407
- const action = this.convertComputerCallToAction(item);
408
- if (action && this.actionHandler) {
409
- logger({
410
- category: "agent",
411
- message: `Executing computer action: ${action.type}`,
412
- level: 1,
413
- });
414
- await this.actionHandler(action);
472
+ const actions = this.convertComputerCallToActions(item);
473
+ if (this.actionHandler) {
474
+ for (const action of actions) {
475
+ logger({
476
+ category: "agent",
477
+ message: `Executing computer action: ${action.type}`,
478
+ level: 1,
479
+ });
480
+ await this.actionHandler(action);
481
+ }
415
482
  }
416
- // Capture a screenshot
483
+ // Capture a screenshot after all actions in the batch
417
484
  const screenshot = await this.captureScreenshot();
418
- // Create a computer_call_output for the next request
485
+ // Build the output — use "computer_screenshot" for new format, "input_image" for legacy
486
+ const outputType = this.usesNewComputerTool
487
+ ? "computer_screenshot"
488
+ : "input_image";
419
489
  const outputItem = {
420
490
  type: "computer_call_output",
421
491
  call_id: item.call_id,
422
492
  output: {
423
- type: "input_image",
493
+ type: outputType,
424
494
  image_url: screenshot,
495
+ ...(this.usesNewComputerTool
496
+ ? { detail: "original" }
497
+ : {}),
425
498
  },
426
499
  };
427
500
  logger({
@@ -429,8 +502,8 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
429
502
  message: `Added computer_call_output for call_id: ${item.call_id}`,
430
503
  level: 2,
431
504
  });
432
- // Add current URL if available
433
- if (this.currentUrl) {
505
+ // Legacy format supports current_url on the output; new format does not
506
+ if (!this.usesNewComputerTool && this.currentUrl) {
434
507
  const computerCallOutput = outputItem;
435
508
  computerCallOutput.output.current_url = this.currentUrl;
436
509
  }
@@ -456,19 +529,23 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
456
529
  level: 0,
457
530
  });
458
531
  try {
459
- // Capture a screenshot even on error
460
532
  const screenshot = await this.captureScreenshot();
533
+ const outputType = this.usesNewComputerTool
534
+ ? "computer_screenshot"
535
+ : "input_image";
461
536
  const errorOutputItem = {
462
537
  type: "computer_call_output",
463
538
  call_id: item.call_id,
464
539
  output: {
465
- type: "input_image",
540
+ type: outputType,
466
541
  image_url: screenshot,
467
542
  error: errorMessage,
543
+ ...(this.usesNewComputerTool
544
+ ? { detail: "original" }
545
+ : {}),
468
546
  },
469
547
  };
470
- // Add current URL if available
471
- if (this.currentUrl) {
548
+ if (!this.usesNewComputerTool && this.currentUrl) {
472
549
  const computerCallOutput = errorOutputItem;
473
550
  computerCallOutput.output.current_url = this.currentUrl;
474
551
  }
@@ -487,13 +564,11 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
487
564
  if (screenshotError instanceof sdkErrors_js_1.StagehandClosedError) {
488
565
  throw screenshotError;
489
566
  }
490
- // If we can't capture a screenshot, just send the error
491
567
  logger({
492
568
  category: "agent",
493
569
  message: `Error capturing screenshot: ${String(screenshotError)}`,
494
570
  level: 0,
495
571
  });
496
- // For error cases without a screenshot, we need to use a string output
497
572
  nextInputItems.push({
498
573
  type: "computer_call_output",
499
574
  call_id: item.call_id,
@@ -504,6 +579,17 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
504
579
  }
505
580
  else if (item.type === "function_call" &&
506
581
  this.isFunctionCallItem(item)) {
582
+ // Handle the captcha-proceed tool — just return a confirmation and
583
+ // deactivate the tool so it doesn't appear on subsequent steps.
584
+ if (item.name === CAPTCHA_PROCEED_TOOL) {
585
+ this.captchaSolvedToolActive = false;
586
+ nextInputItems.push({
587
+ type: "function_call_output",
588
+ call_id: item.call_id,
589
+ output: "Confirmed. The captcha is solved. Continue completing the original task autonomously without asking for further confirmation.",
590
+ });
591
+ continue;
592
+ }
507
593
  // Handle function calls (tool calls)
508
594
  try {
509
595
  const action = this.convertFunctionCallToAction(item);
@@ -576,13 +662,42 @@ class OpenAICUAClient extends AgentClient_js_1.AgentClient {
576
662
  }
577
663
  convertComputerCallToAction(call) {
578
664
  const { action } = call;
579
- // Instead of wrapping the action in a params object, spread the action properties directly
580
- // This ensures properties like x, y, button, etc. are directly accessible on the AgentAction
665
+ if (!action)
666
+ return null;
581
667
  return {
582
668
  type: action.type,
583
- ...action, // Spread all properties from the action
669
+ ...action,
584
670
  };
585
671
  }
672
+ drainContextNotes() {
673
+ if (this.pendingContextNotes.length === 0) {
674
+ return [];
675
+ }
676
+ const notes = [...this.pendingContextNotes];
677
+ this.pendingContextNotes = [];
678
+ return notes;
679
+ }
680
+ async captureInitialScreenshot() {
681
+ if (!this.screenshotProvider) {
682
+ return undefined;
683
+ }
684
+ try {
685
+ return await this.captureScreenshot();
686
+ }
687
+ catch {
688
+ return undefined;
689
+ }
690
+ }
691
+ convertComputerCallToActions(call) {
692
+ if (call.actions && Array.isArray(call.actions)) {
693
+ return call.actions.map((action) => ({
694
+ type: action.type,
695
+ ...action,
696
+ }));
697
+ }
698
+ const single = this.convertComputerCallToAction(call);
699
+ return single ? [single] : [];
700
+ }
586
701
  convertFunctionCallToAction(call) {
587
702
  try {
588
703
  const args = JSON.parse(call.arguments);