@browserbasehq/stagehand 2.5.3-alpha-4994eabfa8016347ec001b40ceadb0c905403470 → 4.0.0-alpha-49bc5b68e4fbeb28a120b301b51c31a36db7d922

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/dist/index.d.ts +1367 -751
  2. package/dist/index.js +37087 -22062
  3. package/package.json +54 -78
  4. package/README.md +0 -165
  5. package/dist/evals/args.d.ts +0 -15
  6. package/dist/evals/cli.d.ts +0 -2
  7. package/dist/evals/core/summary.d.ts +0 -2
  8. package/dist/evals/datasets/osworld/adapter.d.ts +0 -22
  9. package/dist/evals/datasets/osworld/index.d.ts +0 -9
  10. package/dist/evals/datasets/osworld/types.d.ts +0 -54
  11. package/dist/evals/deterministic/bb.playwright.config.d.ts +0 -5
  12. package/dist/evals/deterministic/e2e.playwright.config.d.ts +0 -5
  13. package/dist/evals/deterministic/local.playwright.config.d.ts +0 -5
  14. package/dist/evals/deterministic/stagehand.config.d.ts +0 -3
  15. package/dist/evals/deterministic/tests/BrowserContext/addInitScript.test.d.ts +0 -1
  16. package/dist/evals/deterministic/tests/BrowserContext/cookies.test.d.ts +0 -1
  17. package/dist/evals/deterministic/tests/BrowserContext/multiPage.test.d.ts +0 -1
  18. package/dist/evals/deterministic/tests/BrowserContext/page.test.d.ts +0 -1
  19. package/dist/evals/deterministic/tests/BrowserContext/routing.test.d.ts +0 -1
  20. package/dist/evals/deterministic/tests/Errors/apiKeyError.test.d.ts +0 -1
  21. package/dist/evals/deterministic/tests/browserbase/contexts.test.d.ts +0 -1
  22. package/dist/evals/deterministic/tests/browserbase/downloads.test.d.ts +0 -1
  23. package/dist/evals/deterministic/tests/browserbase/sessions.test.d.ts +0 -1
  24. package/dist/evals/deterministic/tests/browserbase/uploads.test.d.ts +0 -1
  25. package/dist/evals/deterministic/tests/local/create.test.d.ts +0 -1
  26. package/dist/evals/deterministic/tests/local/downloads.test.d.ts +0 -1
  27. package/dist/evals/deterministic/tests/page/addInitScript.test.d.ts +0 -1
  28. package/dist/evals/deterministic/tests/page/addRemoveLocatorHandler.test.d.ts +0 -1
  29. package/dist/evals/deterministic/tests/page/addTags.test.d.ts +0 -1
  30. package/dist/evals/deterministic/tests/page/bringToFront.test.d.ts +0 -1
  31. package/dist/evals/deterministic/tests/page/content.test.d.ts +0 -1
  32. package/dist/evals/deterministic/tests/page/evaluate.test.d.ts +0 -1
  33. package/dist/evals/deterministic/tests/page/expose.test.d.ts +0 -1
  34. package/dist/evals/deterministic/tests/page/frames.test.d.ts +0 -1
  35. package/dist/evals/deterministic/tests/page/getBy.test.d.ts +0 -1
  36. package/dist/evals/deterministic/tests/page/livePageProxy.test.d.ts +0 -1
  37. package/dist/evals/deterministic/tests/page/navigation.test.d.ts +0 -1
  38. package/dist/evals/deterministic/tests/page/on.test.d.ts +0 -1
  39. package/dist/evals/deterministic/tests/page/pageContext.test.d.ts +0 -1
  40. package/dist/evals/deterministic/tests/page/reload.test.d.ts +0 -1
  41. package/dist/evals/deterministic/tests/page/waitFor.test.d.ts +0 -1
  42. package/dist/evals/env.d.ts +0 -13
  43. package/dist/evals/evaluator.d.ts +0 -29
  44. package/dist/evals/index.eval.d.ts +0 -1
  45. package/dist/evals/initStagehand.d.ts +0 -37
  46. package/dist/evals/llm_clients/hn_aisdk.d.ts +0 -2
  47. package/dist/evals/llm_clients/hn_customOpenAI.d.ts +0 -2
  48. package/dist/evals/llm_clients/hn_langchain.d.ts +0 -2
  49. package/dist/evals/logger.d.ts +0 -50
  50. package/dist/evals/scoring.d.ts +0 -25
  51. package/dist/evals/suites/gaia.d.ts +0 -2
  52. package/dist/evals/suites/onlineMind2Web.d.ts +0 -2
  53. package/dist/evals/suites/osworld.d.ts +0 -2
  54. package/dist/evals/suites/webbench.d.ts +0 -2
  55. package/dist/evals/suites/webvoyager.d.ts +0 -2
  56. package/dist/evals/taskConfig.d.ts +0 -33
  57. package/dist/evals/tasks/agent/all_recipes.d.ts +0 -2
  58. package/dist/evals/tasks/agent/apple_trade_in.d.ts +0 -2
  59. package/dist/evals/tasks/agent/apple_tv.d.ts +0 -2
  60. package/dist/evals/tasks/agent/arxiv_gpt_report.d.ts +0 -2
  61. package/dist/evals/tasks/agent/gaia.d.ts +0 -9
  62. package/dist/evals/tasks/agent/github.d.ts +0 -2
  63. package/dist/evals/tasks/agent/github_react_version.d.ts +0 -2
  64. package/dist/evals/tasks/agent/google_flights.d.ts +0 -2
  65. package/dist/evals/tasks/agent/google_maps.d.ts +0 -2
  66. package/dist/evals/tasks/agent/google_maps_2.d.ts +0 -2
  67. package/dist/evals/tasks/agent/google_maps_3.d.ts +0 -2
  68. package/dist/evals/tasks/agent/google_shopping.d.ts +0 -2
  69. package/dist/evals/tasks/agent/hotel_booking.d.ts +0 -2
  70. package/dist/evals/tasks/agent/hugging_face.d.ts +0 -2
  71. package/dist/evals/tasks/agent/iframe_form.d.ts +0 -2
  72. package/dist/evals/tasks/agent/iframe_form_multiple.d.ts +0 -2
  73. package/dist/evals/tasks/agent/kayak.d.ts +0 -2
  74. package/dist/evals/tasks/agent/kith.d.ts +0 -2
  75. package/dist/evals/tasks/agent/nba_trades.d.ts +0 -2
  76. package/dist/evals/tasks/agent/onlineMind2Web.d.ts +0 -2
  77. package/dist/evals/tasks/agent/osworld.d.ts +0 -2
  78. package/dist/evals/tasks/agent/sf_library_card.d.ts +0 -2
  79. package/dist/evals/tasks/agent/sf_library_card_multiple.d.ts +0 -2
  80. package/dist/evals/tasks/agent/sign_in.d.ts +0 -2
  81. package/dist/evals/tasks/agent/steam_games.d.ts +0 -2
  82. package/dist/evals/tasks/agent/trivago.d.ts +0 -2
  83. package/dist/evals/tasks/agent/ubereats.d.ts +0 -2
  84. package/dist/evals/tasks/agent/webbench.d.ts +0 -2
  85. package/dist/evals/tasks/agent/webvoyager.d.ts +0 -2
  86. package/dist/evals/tasks/agent/youtube.d.ts +0 -2
  87. package/dist/evals/tasks/allrecipes.d.ts +0 -2
  88. package/dist/evals/tasks/amazon_add_to_cart.d.ts +0 -2
  89. package/dist/evals/tasks/apple.d.ts +0 -2
  90. package/dist/evals/tasks/arxiv.d.ts +0 -2
  91. package/dist/evals/tasks/bidnet.d.ts +0 -2
  92. package/dist/evals/tasks/checkboxes.d.ts +0 -2
  93. package/dist/evals/tasks/combination_sauce.d.ts +0 -2
  94. package/dist/evals/tasks/costar.d.ts +0 -2
  95. package/dist/evals/tasks/csr_in_oopif.d.ts +0 -2
  96. package/dist/evals/tasks/csr_in_spif.d.ts +0 -2
  97. package/dist/evals/tasks/custom_dropdown.d.ts +0 -2
  98. package/dist/evals/tasks/dropdown.d.ts +0 -2
  99. package/dist/evals/tasks/expect_act_timeout.d.ts +0 -2
  100. package/dist/evals/tasks/extract_aigrant_companies.d.ts +0 -2
  101. package/dist/evals/tasks/extract_aigrant_targeted.d.ts +0 -2
  102. package/dist/evals/tasks/extract_aigrant_targeted_2.d.ts +0 -2
  103. package/dist/evals/tasks/extract_apartments.d.ts +0 -2
  104. package/dist/evals/tasks/extract_area_codes.d.ts +0 -2
  105. package/dist/evals/tasks/extract_baptist_health.d.ts +0 -2
  106. package/dist/evals/tasks/extract_capacitor_info.d.ts +0 -2
  107. package/dist/evals/tasks/extract_collaborators.d.ts +0 -2
  108. package/dist/evals/tasks/extract_csa.d.ts +0 -2
  109. package/dist/evals/tasks/extract_geniusee.d.ts +0 -2
  110. package/dist/evals/tasks/extract_geniusee_2.d.ts +0 -2
  111. package/dist/evals/tasks/extract_github_commits.d.ts +0 -2
  112. package/dist/evals/tasks/extract_github_stars.d.ts +0 -2
  113. package/dist/evals/tasks/extract_hamilton_weather.d.ts +0 -2
  114. package/dist/evals/tasks/extract_jfk_links.d.ts +0 -2
  115. package/dist/evals/tasks/extract_jstor_news.d.ts +0 -2
  116. package/dist/evals/tasks/extract_memorial_healthcare.d.ts +0 -2
  117. package/dist/evals/tasks/extract_nhl_stats.d.ts +0 -2
  118. package/dist/evals/tasks/extract_partners.d.ts +0 -2
  119. package/dist/evals/tasks/extract_press_releases.d.ts +0 -2
  120. package/dist/evals/tasks/extract_professional_info.d.ts +0 -2
  121. package/dist/evals/tasks/extract_public_notices.d.ts +0 -2
  122. package/dist/evals/tasks/extract_recipe.d.ts +0 -2
  123. package/dist/evals/tasks/extract_regulations_table.d.ts +0 -2
  124. package/dist/evals/tasks/extract_repo_name.d.ts +0 -2
  125. package/dist/evals/tasks/extract_resistor_info.d.ts +0 -2
  126. package/dist/evals/tasks/extract_rockauto.d.ts +0 -2
  127. package/dist/evals/tasks/extract_single_link.d.ts +0 -2
  128. package/dist/evals/tasks/extract_snowshoeing_destinations.d.ts +0 -2
  129. package/dist/evals/tasks/extract_staff_members.d.ts +0 -2
  130. package/dist/evals/tasks/extract_zillow.d.ts +0 -2
  131. package/dist/evals/tasks/google_flights.d.ts +0 -11
  132. package/dist/evals/tasks/heal_custom_dropdown.d.ts +0 -2
  133. package/dist/evals/tasks/heal_scroll_50.d.ts +0 -2
  134. package/dist/evals/tasks/heal_simple_google_search.d.ts +0 -2
  135. package/dist/evals/tasks/hidden_input_dropdown.d.ts +0 -2
  136. package/dist/evals/tasks/history.d.ts +0 -2
  137. package/dist/evals/tasks/homedepot.d.ts +0 -2
  138. package/dist/evals/tasks/iframe_form_filling.d.ts +0 -2
  139. package/dist/evals/tasks/iframe_hn.d.ts +0 -2
  140. package/dist/evals/tasks/iframe_same_proc.d.ts +0 -2
  141. package/dist/evals/tasks/iframe_scroll.d.ts +0 -2
  142. package/dist/evals/tasks/iframes_nested.d.ts +0 -2
  143. package/dist/evals/tasks/imdb_movie_details.d.ts +0 -2
  144. package/dist/evals/tasks/instructions.d.ts +0 -2
  145. package/dist/evals/tasks/ionwave.d.ts +0 -2
  146. package/dist/evals/tasks/ionwave_observe.d.ts +0 -2
  147. package/dist/evals/tasks/login.d.ts +0 -2
  148. package/dist/evals/tasks/multi_tab.d.ts +0 -2
  149. package/dist/evals/tasks/namespace_xpath.d.ts +0 -2
  150. package/dist/evals/tasks/nested_iframes_2.d.ts +0 -2
  151. package/dist/evals/tasks/nextChunk.d.ts +0 -2
  152. package/dist/evals/tasks/no_js_click.d.ts +0 -2
  153. package/dist/evals/tasks/nonsense_action.d.ts +0 -2
  154. package/dist/evals/tasks/observe_amazon_add_to_cart.d.ts +0 -2
  155. package/dist/evals/tasks/observe_github.d.ts +0 -2
  156. package/dist/evals/tasks/observe_iframes1.d.ts +0 -2
  157. package/dist/evals/tasks/observe_iframes2.d.ts +0 -2
  158. package/dist/evals/tasks/observe_simple_google_search.d.ts +0 -2
  159. package/dist/evals/tasks/observe_taxes.d.ts +0 -2
  160. package/dist/evals/tasks/observe_vantechjournal.d.ts +0 -2
  161. package/dist/evals/tasks/observe_yc_startup.d.ts +0 -2
  162. package/dist/evals/tasks/oopif_in_csr.d.ts +0 -2
  163. package/dist/evals/tasks/oopif_in_osr.d.ts +0 -2
  164. package/dist/evals/tasks/os_dropdown.d.ts +0 -2
  165. package/dist/evals/tasks/osr_in_oopif.d.ts +0 -2
  166. package/dist/evals/tasks/osr_in_spif.d.ts +0 -2
  167. package/dist/evals/tasks/panamcs.d.ts +0 -2
  168. package/dist/evals/tasks/peeler_complex.d.ts +0 -2
  169. package/dist/evals/tasks/peeler_simple.d.ts +0 -2
  170. package/dist/evals/tasks/prevChunk.d.ts +0 -2
  171. package/dist/evals/tasks/radio_btn.d.ts +0 -2
  172. package/dist/evals/tasks/rakuten_jp.d.ts +0 -2
  173. package/dist/evals/tasks/sciquest.d.ts +0 -2
  174. package/dist/evals/tasks/screenshot_cdp_toggle.d.ts +0 -9
  175. package/dist/evals/tasks/scroll_50.d.ts +0 -2
  176. package/dist/evals/tasks/scroll_75.d.ts +0 -2
  177. package/dist/evals/tasks/shadow_dom.d.ts +0 -2
  178. package/dist/evals/tasks/simple_google_search.d.ts +0 -2
  179. package/dist/evals/tasks/spif_in_csr.d.ts +0 -2
  180. package/dist/evals/tasks/spif_in_osr.d.ts +0 -2
  181. package/dist/evals/tasks/stock_x.d.ts +0 -2
  182. package/dist/evals/tasks/tab_handling.d.ts +0 -2
  183. package/dist/evals/tasks/ted_talk.d.ts +0 -2
  184. package/dist/evals/tasks/vanta_h.d.ts +0 -2
  185. package/dist/evals/tasks/vantechjournal.d.ts +0 -2
  186. package/dist/evals/tasks/wichita.d.ts +0 -2
  187. package/dist/evals/tasks/wikipedia.d.ts +0 -2
  188. package/dist/evals/utils/ScreenshotCollector.d.ts +0 -34
  189. package/dist/evals/utils/imageUtils.d.ts +0 -1
  190. package/dist/evals/utils.d.ts +0 -61
  191. package/dist/examples/2048.d.ts +0 -1
  192. package/dist/examples/actionable_observe_example.d.ts +0 -14
  193. package/dist/examples/cua-example.d.ts +0 -1
  194. package/dist/examples/custom_client_aisdk.d.ts +0 -1
  195. package/dist/examples/custom_client_langchain.d.ts +0 -1
  196. package/dist/examples/custom_client_openai.d.ts +0 -1
  197. package/dist/examples/example.d.ts +0 -1
  198. package/dist/examples/external_clients/aisdk.d.ts +0 -20
  199. package/dist/examples/external_clients/customOpenAI.d.ts +0 -18
  200. package/dist/examples/external_clients/langchain.d.ts +0 -9
  201. package/dist/examples/form_filling_sensible.d.ts +0 -1
  202. package/dist/examples/google_enter.d.ts +0 -6
  203. package/dist/examples/instructions.d.ts +0 -1
  204. package/dist/examples/integrations/exa.d.ts +0 -1
  205. package/dist/examples/integrations/supabase.d.ts +0 -1
  206. package/dist/examples/operator-example.d.ts +0 -8
  207. package/dist/examples/parameterizeApiKey.d.ts +0 -1
  208. package/dist/examples/popup.d.ts +0 -6
  209. package/dist/examples/wordle.d.ts +0 -1
  210. package/dist/lib/StagehandContext.d.ts +0 -25
  211. package/dist/lib/StagehandPage.d.ts +0 -103
  212. package/dist/lib/a11y/utils.d.ts +0 -144
  213. package/dist/lib/agent/AgentClient.d.ts +0 -20
  214. package/dist/lib/agent/AgentProvider.d.ts +0 -19
  215. package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
  216. package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
  217. package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
  218. package/dist/lib/agent/StagehandAgent.d.ts +0 -15
  219. package/dist/lib/agent/tools/act.d.ts +0 -59
  220. package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
  221. package/dist/lib/agent/tools/close.d.ts +0 -22
  222. package/dist/lib/agent/tools/extract.d.ts +0 -38
  223. package/dist/lib/agent/tools/fillform.d.ts +0 -37
  224. package/dist/lib/agent/tools/goto.d.ts +0 -29
  225. package/dist/lib/agent/tools/index.d.ts +0 -257
  226. package/dist/lib/agent/tools/navback.d.ts +0 -17
  227. package/dist/lib/agent/tools/screenshot.d.ts +0 -13
  228. package/dist/lib/agent/tools/scroll.d.ts +0 -23
  229. package/dist/lib/agent/tools/wait.d.ts +0 -18
  230. package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
  231. package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
  232. package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
  233. package/dist/lib/api.d.ts +0 -23
  234. package/dist/lib/browserbaseDefaults.d.ts +0 -9
  235. package/dist/lib/cache/ActionCache.d.ts +0 -62
  236. package/dist/lib/cache/BaseCache.d.ts +0 -66
  237. package/dist/lib/cache/LLMCache.d.ts +0 -22
  238. package/dist/lib/cache.d.ts +0 -29
  239. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  240. package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
  241. package/dist/lib/dom/genDomScripts.d.ts +0 -1
  242. package/dist/lib/dom/index.d.ts +0 -2
  243. package/dist/lib/dom/process.d.ts +0 -17
  244. package/dist/lib/dom/utils.d.ts +0 -7
  245. package/dist/lib/dom/xpathUtils.d.ts +0 -14
  246. package/dist/lib/handlers/actHandler.d.ts +0 -33
  247. package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
  248. package/dist/lib/handlers/extractHandler.d.ts +0 -54
  249. package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
  250. package/dist/lib/handlers/observeHandler.d.ts +0 -40
  251. package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
  252. package/dist/lib/index.d.ts +0 -94
  253. package/dist/lib/inference.d.ts +0 -61
  254. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  255. package/dist/lib/llm/AnthropicClient.d.ts +0 -21
  256. package/dist/lib/llm/CerebrasClient.d.ts +0 -22
  257. package/dist/lib/llm/GoogleClient.d.ts +0 -24
  258. package/dist/lib/llm/GroqClient.d.ts +0 -22
  259. package/dist/lib/llm/LLMClient.d.ts +0 -99
  260. package/dist/lib/llm/LLMProvider.d.ts +0 -13
  261. package/dist/lib/llm/OpenAIClient.d.ts +0 -20
  262. package/dist/lib/llm/aisdk.d.ts +0 -20
  263. package/dist/lib/logger.d.ts +0 -54
  264. package/dist/lib/mcp/connection.d.ts +0 -11
  265. package/dist/lib/mcp/utils.d.ts +0 -3
  266. package/dist/lib/prompt.d.ts +0 -14
  267. package/dist/lib/utils.d.ts +0 -68
  268. package/dist/lib/version.d.ts +0 -5
  269. package/dist/scripts/gen-version.d.ts +0 -1
  270. package/dist/scripts/run-evals.d.ts +0 -1
  271. package/dist/stagehand.config.d.ts +0 -3
  272. package/dist/types/act.d.ts +0 -50
  273. package/dist/types/agent.d.ts +0 -144
  274. package/dist/types/api.d.ts +0 -40
  275. package/dist/types/browser.d.ts +0 -10
  276. package/dist/types/context.d.ts +0 -117
  277. package/dist/types/evals.d.ts +0 -95
  278. package/dist/types/evaluator.d.ts +0 -40
  279. package/dist/types/llm.d.ts +0 -11
  280. package/dist/types/log.d.ts +0 -23
  281. package/dist/types/model.d.ts +0 -17
  282. package/dist/types/page.d.ts +0 -38
  283. package/dist/types/playwright.d.ts +0 -12
  284. package/dist/types/stagehand.d.ts +0 -330
  285. package/dist/types/stagehandApiErrors.d.ts +0 -18
  286. package/dist/types/stagehandErrors.d.ts +0 -104
package/dist/index.d.ts CHANGED
@@ -1,25 +1,47 @@
1
- import { z, ZodType, ZodError } from 'zod/v3';
2
- import Browserbase from '@browserbasehq/sdk';
3
- import { Client } from '@modelcontextprotocol/sdk/dist/esm/client';
4
- import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, LanguageModel, ToolSet } from 'ai';
5
- import { Cookie, Page as Page$1, PageScreenshotOptions, Browser as Browser$1, BrowserContext as BrowserContext$1, Frame, CDPSession } from 'playwright';
1
+ import z, { ZodType, z as z$1, ZodError, ZodTypeAny } from 'zod/v3';
6
2
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
+ import { LanguageModelV2 } from '@ai-sdk/provider';
7
4
  import { ClientOptions as ClientOptions$1 } from 'openai';
8
- import { ClientOptions as ClientOptions$3, Client as Client$1 } from '@modelcontextprotocol/sdk/client/index.js';
9
- import { ChatCompletion } from 'openai/resources';
5
+ import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, ToolSet } from 'ai';
6
+ import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
7
+ import { Page as Page$1 } from 'playwright-core';
8
+ export { Page as PlaywrightPage } from 'playwright-core';
9
+ import { Page as Page$2 } from 'puppeteer-core';
10
+ export { Page as PuppeteerPage } from 'puppeteer-core';
11
+ import { Page as Page$3 } from 'patchright-core';
12
+ export { Page as PatchrightPage } from 'patchright-core';
13
+ import { Protocol } from 'devtools-protocol';
14
+ import { Buffer as Buffer$1 } from 'buffer';
15
+ import Browserbase from '@browserbasehq/sdk';
16
+ import { ToolSet as ToolSet$1 } from 'ai/dist';
17
+ import { Schema } from '@google/genai';
10
18
 
11
- declare class PlaywrightCommandException extends Error {
12
- constructor(message: string);
13
- }
14
- declare class PlaywrightCommandMethodNotSupportedException extends Error {
15
- constructor(message: string);
16
- }
17
- interface GotoOptions {
18
- timeout?: number;
19
- waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit";
20
- referer?: string;
21
- frameId?: string;
19
+ type AnthropicJsonSchemaObject = {
20
+ definitions?: {
21
+ MySchema?: {
22
+ properties?: Record<string, unknown>;
23
+ required?: string[];
24
+ };
25
+ };
26
+ properties?: Record<string, unknown>;
27
+ required?: string[];
28
+ } & Record<string, unknown>;
29
+ interface LLMTool {
30
+ type: "function";
31
+ name: string;
32
+ description: string;
33
+ parameters: Record<string, unknown>;
22
34
  }
35
+ type AISDKProvider = (modelName: string) => LanguageModelV2;
36
+ type AISDKCustomProvider = (options: {
37
+ apiKey: string;
38
+ }) => AISDKProvider;
39
+ type AvailableModel = "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" | "o4-mini" | "o3" | "o3-mini" | "o1" | "o1-mini" | "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "gpt-4.5-preview" | "o1-preview" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "claude-3-7-sonnet-latest" | "claude-3-7-sonnet-20250219" | "cerebras-llama-3.3-70b" | "cerebras-llama-3.1-8b" | "groq-llama-3.3-70b-versatile" | "groq-llama-3.3-70b-specdec" | "gemini-1.5-flash" | "gemini-1.5-pro" | "gemini-1.5-flash-8b" | "gemini-2.0-flash-lite" | "gemini-2.0-flash" | "gemini-2.5-flash-preview-04-17" | "gemini-2.5-pro-preview-03-25" | string;
40
+ type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
41
+ type ClientOptions = ClientOptions$1 | ClientOptions$2;
42
+ type ModelConfiguration = AvailableModel | (ClientOptions & {
43
+ modelName: AvailableModel;
44
+ });
23
45
 
24
46
  type LogLevel = 0 | 1 | 2;
25
47
  /**
@@ -45,28 +67,6 @@ type LogLine = {
45
67
  };
46
68
  type Logger = (logLine: LogLine) => void;
47
69
 
48
- interface LLMTool {
49
- type: "function";
50
- name: string;
51
- description: string;
52
- parameters: Record<string, unknown>;
53
- }
54
-
55
- declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
56
- type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
57
- type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
58
- type ClientOptions = ClientOptions$1 | ClientOptions$2;
59
- interface AnthropicJsonSchemaObject {
60
- definitions?: {
61
- MySchema?: {
62
- properties?: Record<string, unknown>;
63
- required?: string[];
64
- };
65
- };
66
- properties?: Record<string, unknown>;
67
- required?: string[];
68
- }
69
-
70
70
  interface ChatMessage {
71
71
  role: "system" | "user" | "assistant";
72
72
  content: ChatMessageContent;
@@ -105,7 +105,7 @@ interface ChatCompletionOptions {
105
105
  };
106
106
  tools?: LLMTool[];
107
107
  tool_choice?: "auto" | "none" | "required";
108
- maxTokens?: number;
108
+ maxOutputTokens?: number;
109
109
  requestId?: string;
110
110
  }
111
111
  type LLMResponse = {
@@ -159,393 +159,757 @@ declare abstract class LLMClient {
159
159
  embedMany: typeof embedMany;
160
160
  transcribe: typeof experimental_transcribe;
161
161
  generateSpeech: typeof experimental_generateSpeech;
162
- getLanguageModel?(): LanguageModel;
162
+ getLanguageModel?(): LanguageModelV2;
163
163
  }
164
164
 
165
- declare class LLMProvider {
166
- private logger;
167
- private enableCaching;
168
- private cache;
169
- constructor(logger: (message: LogLine) => void, enableCaching: boolean);
170
- cleanRequestCache(requestId: string): void;
171
- getClient(modelName: AvailableModel, clientOptions?: ClientOptions): LLMClient;
172
- static getModelProvider(modelName: AvailableModel): ModelProvider;
165
+ /**
166
+ * CDP transport & session multiplexer
167
+ *
168
+ * Owns the browser WebSocket and multiplexes flattened Target sessions.
169
+ * Tracks inflight CDP calls, routes responses to the right session, and forwards events.
170
+ *
171
+ * This does not interpret Page/DOM/Runtime semantics — callers own that logic.
172
+ */
173
+ interface CDPSessionLike {
174
+ send<R = unknown>(method: string, params?: object): Promise<R>;
175
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
176
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
177
+ close(): Promise<void>;
178
+ readonly id: string | null;
179
+ }
180
+ type EventHandler = (params: unknown) => void;
181
+ declare class CdpConnection implements CDPSessionLike {
182
+ private ws;
183
+ private nextId;
184
+ private inflight;
185
+ private eventHandlers;
186
+ private sessions;
187
+ readonly id: string | null;
188
+ private transportCloseHandlers;
189
+ onTransportClosed(handler: (why: string) => void): void;
190
+ offTransportClosed(handler: (why: string) => void): void;
191
+ private emitTransportClosed;
192
+ private constructor();
193
+ static connect(wsUrl: string): Promise<CdpConnection>;
194
+ enableAutoAttach(): Promise<void>;
195
+ send<R = unknown>(method: string, params?: object): Promise<R>;
196
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
197
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
198
+ close(): Promise<void>;
199
+ getSession(sessionId: string): CdpSession | undefined;
200
+ attachToTarget(targetId: string): Promise<CdpSession>;
201
+ getTargets(): Promise<Array<{
202
+ targetId: string;
203
+ type: string;
204
+ url: string;
205
+ }>>;
206
+ private onMessage;
207
+ _sendViaSession<R = unknown>(sessionId: string, method: string, params?: object): Promise<R>;
208
+ _onSessionEvent(sessionId: string, event: string, handler: EventHandler): void;
209
+ _offSessionEvent(sessionId: string, event: string, handler: EventHandler): void;
210
+ _dispatchToSession(sessionId: string, event: string, params: unknown): void;
211
+ }
212
+ declare class CdpSession implements CDPSessionLike {
213
+ private readonly root;
214
+ readonly id: string;
215
+ constructor(root: CdpConnection, id: string);
216
+ send<R = unknown>(method: string, params?: object): Promise<R>;
217
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
218
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
219
+ close(): Promise<void>;
220
+ dispatch(event: string, params: unknown): void;
173
221
  }
174
222
 
175
- interface ConstructorParams {
223
+ interface FrameManager {
224
+ session: CDPSessionLike;
225
+ frameId: string;
226
+ pageId: string;
227
+ }
228
+ /**
229
+ * Frame
230
+ *
231
+ * A thin, session-bound handle to a specific DOM frame (by frameId).
232
+ * All CDP calls in this class go through `this.session`, which MUST be the
233
+ * owning session for `this.frameId`. Page is responsible for constructing
234
+ * Frames with the correct session.
235
+ */
236
+ declare class Frame implements FrameManager {
237
+ session: CDPSessionLike;
238
+ frameId: string;
239
+ pageId: string;
240
+ /** Owning CDP session id (useful for logs); null for root connection (should not happen for targets) */
241
+ readonly sessionId: string | null;
242
+ constructor(session: CDPSessionLike, frameId: string, pageId: string);
243
+ /** DOM.getNodeForLocation → DOM.describeNode */
244
+ getNodeAtLocation(x: number, y: number): Promise<Protocol.DOM.Node>;
245
+ /** CSS selector → DOM.querySelector → DOM.getBoxModel */
246
+ getLocationForSelector(selector: string): Promise<{
247
+ x: number;
248
+ y: number;
249
+ width: number;
250
+ height: number;
251
+ }>;
252
+ /** Accessibility.getFullAXTree (+ recurse into child frames if requested) */
253
+ getAccessibilityTree(withFrames?: boolean): Promise<Protocol.Accessibility.AXNode[]>;
176
254
  /**
177
- * The environment to use for Stagehand
255
+ * Evaluate a function or expression in this frame's isolated world.
256
+ * - If a string is provided, treated as a JS expression.
257
+ * - If a function is provided, it is stringified and invoked with the optional argument.
178
258
  */
179
- env: "LOCAL" | "BROWSERBASE";
259
+ evaluate<R = unknown, Arg = unknown>(pageFunctionOrExpression: string | ((arg: Arg) => R | Promise<R>), arg?: Arg): Promise<R>;
260
+ /** Page.captureScreenshot (frame-scoped session) */
261
+ screenshot(options?: {
262
+ fullPage?: boolean;
263
+ clip?: {
264
+ x: number;
265
+ y: number;
266
+ width: number;
267
+ height: number;
268
+ };
269
+ }): Promise<Buffer>;
270
+ /** Child frames via Page.getFrameTree */
271
+ childFrames(): Promise<Frame[]>;
272
+ /** Wait for a lifecycle state (load/domcontentloaded/networkidle) */
273
+ waitForLoadState(state?: "load" | "domcontentloaded" | "networkidle"): Promise<void>;
274
+ /** Simple placeholder for your own locator abstraction */
275
+ locator(selector: string, options?: {
276
+ deep?: boolean;
277
+ depth?: number;
278
+ }): Locator;
279
+ /** Create/get an isolated world for this frame and return its executionContextId */
280
+ private getExecutionContextId;
281
+ }
282
+
283
+ type MouseButton = "left" | "right" | "middle";
284
+ /**
285
+ * Locator
286
+ *
287
+ * Purpose:
288
+ * A small, CDP-based element interaction helper scoped to a specific `Frame`.
289
+ * It resolves a CSS/XPath selector inside the frame’s **isolated world**, and then
290
+ * performs low-level actions (click, type, select) using DOM/Runtime/Input
291
+ * protocol domains with minimal abstraction.
292
+ *
293
+ * Key change:
294
+ * - Prefer **objectId**-based CDP calls (scroll, geometry) to avoid brittle
295
+ * frontend nodeId mappings. nodeId is resolved on a best-effort basis and
296
+ * returned for compatibility, but actions do not depend on it.
297
+ *
298
+ * Notes:
299
+ * - Resolution is lazy: every action resolves the selector again.
300
+ * - Uses `Page.createIsolatedWorld` so evaluation is isolated from page scripts.
301
+ * - Releases remote objects (`Runtime.releaseObject`) where appropriate.
302
+ */
303
+ declare class Locator {
304
+ private readonly frame;
305
+ private readonly selector;
306
+ private readonly options?;
307
+ private readonly selectorResolver;
308
+ private readonly selectorQuery;
309
+ private readonly nthIndex;
310
+ constructor(frame: Frame, selector: string, options?: {
311
+ deep?: boolean;
312
+ depth?: number;
313
+ }, nthIndex?: number);
314
+ /** Return the owning Frame for this locator (typed accessor, no private access). */
315
+ getFrame(): Frame;
180
316
  /**
181
- * Your Browserbase API key
317
+ * Set files on an <input type="file"> element.
318
+ *
319
+ * Mirrors Playwright's Locator.setInputFiles basics:
320
+ * - Accepts file path(s) or payload object(s) { name, mimeType, buffer }.
321
+ * - Uses CDP DOM.setFileInputFiles under the hood.
322
+ * - Best‑effort dispatches change/input via CDP (Chrome does by default).
323
+ * - Passing an empty array clears the selection.
182
324
  */
183
- apiKey?: string;
325
+ setInputFiles(files: string | string[] | {
326
+ name: string;
327
+ mimeType: string;
328
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
329
+ } | Array<{
330
+ name: string;
331
+ mimeType: string;
332
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
333
+ }>): Promise<void>;
184
334
  /**
185
- * Your Browserbase project ID
335
+ * Return the DOM backendNodeId for this locator's target element.
336
+ * Useful for identity comparisons without needing element handles.
186
337
  */
187
- projectId?: string;
338
+ backendNodeId(): Promise<Protocol.DOM.BackendNodeId>;
339
+ /** Return how many nodes the current selector resolves to. */
340
+ count(): Promise<number>;
188
341
  /**
189
- * The verbosity of the Stagehand logger
190
- * 0 - No logs
191
- * 1 - Only errors
192
- * 2 - All logs
342
+ * Return the center of the element's bounding box in the owning frame's viewport
343
+ * (CSS pixels), rounded to integers. Scrolls into view best-effort.
193
344
  */
194
- verbose?: 0 | 1 | 2;
345
+ centroid(): Promise<{
346
+ x: number;
347
+ y: number;
348
+ }>;
195
349
  /**
196
- * The LLM provider to use for Stagehand
197
- * See
350
+ * Highlight the element's bounding box using the CDP Overlay domain.
351
+ * - Scrolls element into view best-effort.
352
+ * - Shows a semi-transparent overlay briefly, then hides it.
198
353
  */
199
- llmProvider?: LLMProvider;
354
+ highlight(options?: {
355
+ durationMs?: number;
356
+ borderColor?: {
357
+ r: number;
358
+ g: number;
359
+ b: number;
360
+ a?: number;
361
+ };
362
+ contentColor?: {
363
+ r: number;
364
+ g: number;
365
+ b: number;
366
+ a?: number;
367
+ };
368
+ }): Promise<void>;
200
369
  /**
201
- * The logger to use for Stagehand
370
+ * Move the mouse cursor to the element's visual center without clicking.
371
+ * - Scrolls into view best-effort, resolves geometry, then dispatches a mouse move.
202
372
  */
203
- logger?: (message: LogLine) => void | Promise<void>;
373
+ hover(): Promise<void>;
204
374
  /**
205
- * The timeout to use for the DOM to settle
206
- * @default 10000
375
+ * Click the element at its visual center.
376
+ * Steps:
377
+ * 1) Resolve selector to { objectId } in the frame world.
378
+ * 2) Scroll into view via `DOM.scrollIntoViewIfNeeded({ objectId })`.
379
+ * 3) Read geometry via `DOM.getBoxModel({ objectId })` → compute a center point.
380
+ * 4) Synthesize mouse press + release via `Input.dispatchMouseEvent`.
207
381
  */
208
- domSettleTimeoutMs?: number;
382
+ click(options?: {
383
+ button?: MouseButton;
384
+ clickCount?: number;
385
+ }): Promise<void>;
209
386
  /**
210
- * The parameters to use for creating a Browserbase session
211
- * See https://docs.browserbase.com/reference/api/create-a-session
212
- * Note: projectId is optional here as it will use the main projectId parameter if not provided
387
+ * Dispatch a DOM 'click' MouseEvent on the element itself.
388
+ * - Does not synthesize real pointer input; directly dispatches an event.
389
+ * - Useful for elements that rely on click handlers without needing hit-testing.
213
390
  */
214
- browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
215
- projectId?: string;
216
- };
391
+ sendClickEvent(options?: {
392
+ bubbles?: boolean;
393
+ cancelable?: boolean;
394
+ composed?: boolean;
395
+ detail?: number;
396
+ }): Promise<void>;
217
397
  /**
218
- * Enable caching of LLM responses
219
- * @default true
398
+ * Scroll the element vertically to a given percentage (0–100).
399
+ * - If the element is <html> or <body>, scrolls the window/document.
400
+ * - Otherwise, scrolls the element itself via element.scrollTo.
220
401
  */
221
- enableCaching?: boolean;
402
+ scrollTo(percent: number | string): Promise<void>;
222
403
  /**
223
- * The ID of a Browserbase session to resume
404
+ * Fill an input/textarea/contenteditable element.
405
+ * Mirrors Playwright semantics: the DOM helper either applies the native
406
+ * value setter (for special input types) or asks us to type text via the CDP
407
+ * Input domain after focusing/selecting.
224
408
  */
225
- browserbaseSessionID?: string;
409
+ fill(value: string): Promise<void>;
226
410
  /**
227
- * The model to use for Stagehand
411
+ * Type text into the element (focuses first).
412
+ * - Focus via element.focus() in page JS (no DOM.focus(nodeId)).
413
+ * - If no delay, uses `Input.insertText` for efficiency.
414
+ * - With delay, synthesizes `keyDown`/`keyUp` per character.
228
415
  */
229
- modelName?: AvailableModel;
416
+ type(text: string, options?: {
417
+ delay?: number;
418
+ }): Promise<void>;
230
419
  /**
231
- * The LLM client to use for Stagehand
420
+ * Select one or more options on a `<select>` element.
421
+ * Returns the values actually selected after the operation.
232
422
  */
233
- llmClient?: LLMClient;
423
+ selectOption(values: string | string[]): Promise<string[]>;
234
424
  /**
235
- * The parameters to use for the LLM client
236
- * Useful for parameterizing LLM API Keys
425
+ * Return true if the element is attached and visible (rough heuristic).
237
426
  */
238
- modelClientOptions?: ClientOptions;
427
+ isVisible(): Promise<boolean>;
239
428
  /**
240
- * Customize the Stagehand system prompt
429
+ * Return true if the element is an input[type=checkbox|radio] and is checked.
430
+ * Also considers aria-checked for ARIA widgets.
241
431
  */
242
- systemPrompt?: string;
432
+ isChecked(): Promise<boolean>;
243
433
  /**
244
- * Offload Stagehand method calls to the Stagehand API.
245
- * Must have a valid API key to use
434
+ * Return the element's input value (for input/textarea/select/contenteditable).
246
435
  */
247
- useAPI?: boolean;
436
+ inputValue(): Promise<string>;
248
437
  /**
249
- * Wait for captchas to be solved after navigation when using Browserbase environment.
250
- *
251
- * @default false
438
+ * Return the element's textContent (raw, not innerText).
252
439
  */
253
- waitForCaptchaSolves?: boolean;
440
+ textContent(): Promise<string>;
254
441
  /**
255
- * The parameters to use for launching a local browser
442
+ * Return the element's innerHTML string.
256
443
  */
257
- localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
444
+ innerHtml(): Promise<string>;
258
445
  /**
259
- * Log the inference to a file
446
+ * Return the element's innerText (layout-aware, visible text).
260
447
  */
261
- logInferenceToFile?: boolean;
262
- selfHeal?: boolean;
448
+ innerText(): Promise<string>;
263
449
  /**
264
- * Disable Pino (helpful for Next.js or test environments)
450
+ * For API parity, returns the same locator (querySelector already returns the first match).
265
451
  */
266
- disablePino?: boolean;
452
+ first(): Locator;
453
+ /** Return a locator narrowed to the element at the given zero-based index. */
454
+ nth(index: number): Locator;
267
455
  /**
268
- * Experimental Flag: Enables the latest experimental features
456
+ * Resolve `this.selector` within the frame to `{ objectId, nodeId? }`:
457
+ * Delegates to a shared selector resolver so all selector logic stays in sync.
269
458
  */
270
- experimental?: boolean;
459
+ resolveNode(): Promise<{
460
+ nodeId: Protocol.DOM.NodeId | null;
461
+ objectId: Protocol.Runtime.RemoteObjectId;
462
+ }>;
463
+ /** Compute a center point from a BoxModel content quad */
464
+ private centerFromBoxContent;
271
465
  }
272
- interface InitResult {
273
- debugUrl: string;
274
- sessionUrl: string;
275
- sessionId: string;
466
+
467
+ /**
468
+ * DeepLocatorDelegate: a lightweight wrapper that looks like a Locator and
469
+ * resolves to the correct frame/element on each call using hop/deep-XPath logic.
470
+ *
471
+ * Returned by `page.deepLocator()` for ergonomic, await-free chaining:
472
+ * page.deepLocator('iframe#ifrA >> #btn').click()
473
+ */
474
+ declare class DeepLocatorDelegate {
475
+ private readonly page;
476
+ private readonly root;
477
+ private readonly selector;
478
+ private readonly nthIndex;
479
+ constructor(page: Page, root: Frame, selector: string, nthIndex?: number);
480
+ private real;
481
+ click(options?: {
482
+ button?: "left" | "right" | "middle";
483
+ clickCount?: number;
484
+ }): Promise<void>;
485
+ count(): Promise<number>;
486
+ hover(): Promise<void>;
487
+ fill(value: string): Promise<void>;
488
+ type(text: string, options?: {
489
+ delay?: number;
490
+ }): Promise<void>;
491
+ selectOption(values: string | string[]): Promise<string[]>;
492
+ scrollTo(percent: number | string): Promise<void>;
493
+ isVisible(): Promise<boolean>;
494
+ isChecked(): Promise<boolean>;
495
+ inputValue(): Promise<string>;
496
+ textContent(): Promise<string>;
497
+ innerHtml(): Promise<string>;
498
+ innerText(): Promise<string>;
499
+ centroid(): Promise<{
500
+ x: number;
501
+ y: number;
502
+ }>;
503
+ backendNodeId(): Promise<number>;
504
+ highlight(options?: {
505
+ durationMs?: number;
506
+ borderColor?: {
507
+ r: number;
508
+ g: number;
509
+ b: number;
510
+ a?: number;
511
+ };
512
+ contentColor?: {
513
+ r: number;
514
+ g: number;
515
+ b: number;
516
+ a?: number;
517
+ };
518
+ }): Promise<void>;
519
+ sendClickEvent(options?: {
520
+ bubbles?: boolean;
521
+ cancelable?: boolean;
522
+ composed?: boolean;
523
+ detail?: number;
524
+ }): Promise<void>;
525
+ setInputFiles(files: string | string[] | {
526
+ name: string;
527
+ mimeType: string;
528
+ buffer: ArrayBuffer | Uint8Array | Buffer | string;
529
+ } | Array<{
530
+ name: string;
531
+ mimeType: string;
532
+ buffer: ArrayBuffer | Uint8Array | Buffer | string;
533
+ }>): Promise<void>;
534
+ first(): DeepLocatorDelegate;
535
+ nth(index: number): DeepLocatorDelegate;
276
536
  }
277
- interface ActOptions {
278
- action: string;
279
- modelName?: AvailableModel;
280
- modelClientOptions?: ClientOptions;
281
- variables?: Record<string, string>;
282
- domSettleTimeoutMs?: number;
283
- timeoutMs?: number;
284
- iframes?: boolean;
285
- frameId?: string;
537
+
538
+ /**
539
+ * FrameLocator: resolves iframe elements to their child Frames and allows
540
+ * creating locators scoped to that frame. Supports chaining.
541
+ */
542
+ declare class FrameLocator {
543
+ private readonly parent?;
544
+ private readonly selector;
545
+ private readonly page;
546
+ private readonly root?;
547
+ constructor(page: Page, selector: string, parent?: FrameLocator, root?: Frame);
548
+ /** Create a nested FrameLocator under this one. */
549
+ frameLocator(selector: string): FrameLocator;
550
+ /** Resolve to the concrete Frame for this FrameLocator chain. */
551
+ resolveFrame(): Promise<Frame>;
552
+ /** Return a Locator scoped to this frame. Methods delegate to the frame lazily. */
553
+ locator(selector: string): LocatorDelegate;
554
+ }
555
+ /** A small delegating wrapper that resolves the frame lazily per call. */
556
+ declare class LocatorDelegate {
557
+ private readonly fl;
558
+ private readonly sel;
559
+ constructor(fl: FrameLocator, sel: string);
560
+ private real;
561
+ click(options?: {
562
+ button?: "left" | "right" | "middle";
563
+ clickCount?: number;
564
+ }): Promise<void>;
565
+ hover(): Promise<void>;
566
+ fill(value: string): Promise<void>;
567
+ type(text: string, options?: {
568
+ delay?: number;
569
+ }): Promise<void>;
570
+ selectOption(values: string | string[]): Promise<string[]>;
571
+ scrollTo(percent: number | string): Promise<void>;
572
+ isVisible(): Promise<boolean>;
573
+ isChecked(): Promise<boolean>;
574
+ inputValue(): Promise<string>;
575
+ textContent(): Promise<string>;
576
+ innerHtml(): Promise<string>;
577
+ innerText(): Promise<string>;
578
+ count(): Promise<number>;
579
+ first(): LocatorDelegate;
286
580
  }
287
- interface ActResult {
288
- success: boolean;
289
- message: string;
290
- action: string;
581
+
582
+ type AnyPage = Page$1 | Page$2 | Page$3 | Page;
583
+ type LoadState = "load" | "domcontentloaded" | "networkidle";
584
+
585
+ declare class StagehandAPIClient {
586
+ private apiKey;
587
+ private projectId;
588
+ private sessionId?;
589
+ private modelApiKey;
590
+ private logger;
591
+ private fetchWithCookies;
592
+ constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
593
+ init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
594
+ act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
595
+ extract<T extends z.AnyZodObject>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
596
+ observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
597
+ goto(url: string, options?: {
598
+ waitUntil?: "load" | "domcontentloaded" | "networkidle";
599
+ }, frameId?: string): Promise<void>;
600
+ agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
601
+ end(): Promise<Response>;
602
+ private execute;
603
+ private request;
291
604
  }
292
- interface ExtractOptions<T extends z.AnyZodObject> {
293
- instruction?: string;
294
- schema?: T;
295
- modelName?: AvailableModel;
296
- modelClientOptions?: ClientOptions;
297
- domSettleTimeoutMs?: number;
605
+
606
+ declare class Page {
607
+ private readonly conn;
608
+ private readonly mainSession;
609
+ private readonly _targetId;
610
+ /** Every CDP child session this page owns (top-level + adopted OOPIF sessions). */
611
+ private readonly sessions;
612
+ /** Unified truth for frame topology + ownership. */
613
+ private readonly registry;
614
+ /** A convenience wrapper bound to the current main frame id (top-level session). */
615
+ private mainFrameWrapper;
616
+ /** Compact ordinal per frameId (used by snapshot encoding). */
617
+ private frameOrdinals;
618
+ private nextOrdinal;
619
+ /** cache Frames per frameId so everyone uses the same one */
620
+ private readonly frameCache;
621
+ /** Stable id for Frames created by this Page (use top-level TargetId). */
622
+ private readonly pageId;
623
+ /** Cached current URL for synchronous page.url() */
624
+ private _currentUrl;
625
+ private navigationCommandSeq;
626
+ private latestNavigationCommandId;
627
+ private readonly networkManager;
628
+ /** Optional API client for routing page operations to the API */
629
+ private readonly apiClient;
630
+ private constructor();
631
+ private cursorEnabled;
632
+ private ensureCursorScript;
633
+ enableCursorOverlay(): Promise<void>;
634
+ private updateCursor;
298
635
  /**
299
- * @deprecated The `useTextExtract` parameter has no effect in this version of Stagehand and will be removed in later versions.
636
+ * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
637
+ * Assumes Page domain is already enabled on the session passed in.
300
638
  */
301
- useTextExtract?: boolean;
302
- selector?: string;
303
- iframes?: boolean;
304
- frameId?: string;
305
- }
306
- type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
307
- interface ObserveOptions {
308
- instruction?: string;
309
- modelName?: AvailableModel;
310
- modelClientOptions?: ClientOptions;
311
- domSettleTimeoutMs?: number;
312
- returnAction?: boolean;
639
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null): Promise<Page>;
313
640
  /**
314
- * @deprecated The `onlyVisible` parameter has no effect in this version of Stagehand and will be removed in later versions.
641
+ * Parent/child session emitted a `frameAttached`.
642
+ * Topology update + ownership stamped to **emitting session**.
315
643
  */
316
- onlyVisible?: boolean;
317
- drawOverlay?: boolean;
318
- iframes?: boolean;
319
- frameId?: string;
320
- }
321
- interface ObserveResult$1 {
322
- selector: string;
323
- description: string;
324
- backendNodeId?: number;
325
- method?: string;
326
- arguments?: string[];
327
- }
328
- interface LocalBrowserLaunchOptions {
329
- args?: string[];
330
- chromiumSandbox?: boolean;
331
- devtools?: boolean;
332
- env?: {
333
- [key: string]: string | undefined;
334
- };
335
- executablePath?: string;
336
- handleSIGHUP?: boolean;
337
- handleSIGINT?: boolean;
338
- handleSIGTERM?: boolean;
339
- headless?: boolean;
340
- ignoreDefaultArgs?: boolean | Array<string>;
341
- proxy?: {
342
- server: string;
343
- bypass?: string;
344
- username?: string;
345
- password?: string;
346
- };
347
- tracesDir?: string;
348
- userDataDir?: string;
349
- preserveUserDataDir?: boolean;
350
- acceptDownloads?: boolean;
351
- downloadsPath?: string;
352
- extraHTTPHeaders?: Record<string, string>;
353
- geolocation?: {
354
- latitude: number;
355
- longitude: number;
356
- accuracy?: number;
357
- };
358
- hasTouch?: boolean;
359
- ignoreHTTPSErrors?: boolean;
360
- locale?: string;
361
- permissions?: Array<string>;
362
- recordHar?: {
363
- omitContent?: boolean;
364
- content?: "omit" | "embed" | "attach";
365
- path: string;
366
- mode?: "full" | "minimal";
367
- urlFilter?: string | RegExp;
368
- };
369
- recordVideo?: {
370
- dir: string;
371
- size?: {
372
- width: number;
373
- height: number;
374
- };
375
- };
376
- viewport?: {
377
- width: number;
378
- height: number;
379
- };
380
- deviceScaleFactor?: number;
381
- timezoneId?: string;
382
- bypassCSP?: boolean;
383
- cookies?: Cookie[];
384
- cdpUrl?: string;
385
- }
386
- interface StagehandMetrics {
387
- actPromptTokens: number;
388
- actCompletionTokens: number;
389
- actInferenceTimeMs: number;
390
- extractPromptTokens: number;
391
- extractCompletionTokens: number;
392
- extractInferenceTimeMs: number;
393
- observePromptTokens: number;
394
- observeCompletionTokens: number;
395
- observeInferenceTimeMs: number;
396
- agentPromptTokens: number;
397
- agentCompletionTokens: number;
398
- agentInferenceTimeMs: number;
399
- totalPromptTokens: number;
400
- totalCompletionTokens: number;
401
- totalInferenceTimeMs: number;
402
- }
403
- /**
404
- * Options for executing a task with an agent
405
- */
406
- interface AgentExecuteParams {
644
+ onFrameAttached(frameId: string, parentId: string | null, session: CDPSessionLike): void;
407
645
  /**
408
- * The instruction to execute with the agent
646
+ * Parent/child session emitted a `frameDetached`.
409
647
  */
410
- instruction: string;
648
+ onFrameDetached(frameId: string, reason?: "remove" | "swap" | string): void;
411
649
  /**
412
- * Maximum number of steps the agent can take to complete the task
413
- * @default 10
650
+ * Parent/child session emitted a `frameNavigated`.
651
+ * Topology + ownership update. Handles root swaps.
414
652
  */
415
- maxSteps?: number;
653
+ onFrameNavigated(frame: Protocol.Page.Frame, session: CDPSessionLike): void;
654
+ onNavigatedWithinDocument(frameId: string, url: string, session: CDPSessionLike): void;
416
655
  /**
417
- * Take a screenshot automatically before each agent step
418
- * @default true
656
+ * An OOPIF child session whose **main** frame id equals the parent iframe’s frameId
657
+ * has been attached; adopt the session into this Page and seed ownership for its subtree.
419
658
  */
420
- autoScreenshot?: boolean;
659
+ adoptOopifSession(childSession: CDPSessionLike, childMainFrameId: string): void;
660
+ /** Detach an adopted child session and prune its subtree */
661
+ detachOopifSession(sessionId: string): void;
662
+ /** Return the owning CDP session for a frameId (falls back to main session) */
663
+ getSessionForFrame(frameId: string): CDPSessionLike;
664
+ /** Always returns a Frame bound to the owning session */
665
+ frameForId(frameId: string): Frame;
666
+ /** Expose a session by id (used by snapshot to resolve session id -> session) */
667
+ getSessionById(id: string): CDPSessionLike | undefined;
668
+ registerSessionForNetwork(session: CDPSessionLike): void;
669
+ unregisterSessionForNetwork(sessionId: string | undefined): void;
670
+ targetId(): string;
671
+ /** Seed the cached URL before navigation events converge. */
672
+ seedCurrentUrl(url: string | undefined | null): void;
673
+ mainFrameId(): string;
674
+ mainFrame(): Frame;
421
675
  /**
422
- * Wait time in milliseconds between agent actions
423
- * @default 0
676
+ * Close this top-level page (tab). Best-effort via Target.closeTarget.
424
677
  */
425
- waitBetweenActions?: number;
678
+ close(): Promise<void>;
679
+ getFullFrameTree(): Protocol.Page.FrameTree;
680
+ asProtocolFrameTree(rootMainFrameId: string): Protocol.Page.FrameTree;
681
+ private ensureOrdinal;
682
+ /** Public getter for snapshot code / handlers. */
683
+ getOrdinal(frameId: string): number;
684
+ listAllFrameIds(): string[];
426
685
  /**
427
- * Additional context to provide to the agent
686
+ * Navigate the page; optionally wait for a lifecycle state.
687
+ * Waits on the **current** main frame and follows root swaps during navigation.
428
688
  */
429
- context?: string;
430
- }
431
- /**
432
- * Configuration for agent functionality
433
- */
434
- interface AgentConfig {
689
+ goto(url: string, options?: {
690
+ waitUntil?: LoadState;
691
+ timeoutMs?: number;
692
+ }): Promise<void>;
435
693
  /**
436
- * The provider to use for agent functionality
694
+ * Reload the page; optionally wait for a lifecycle state.
437
695
  */
438
- provider?: AgentProviderType;
696
+ reload(options?: {
697
+ waitUntil?: LoadState;
698
+ timeoutMs?: number;
699
+ ignoreCache?: boolean;
700
+ }): Promise<void>;
439
701
  /**
440
- * The model to use for agent functionality
702
+ * Navigate back in history if possible; optionally wait for a lifecycle state.
441
703
  */
442
- model?: string;
704
+ goBack(options?: {
705
+ waitUntil?: LoadState;
706
+ timeoutMs?: number;
707
+ }): Promise<void>;
443
708
  /**
444
- * The model to use for tool execution (observe/act calls within agent tools).
445
- * If not specified, inherits from the main model configuration.
446
- * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
709
+ * Navigate forward in history if possible; optionally wait for a lifecycle state.
447
710
  */
448
- executionModel?: string;
711
+ goForward(options?: {
712
+ waitUntil?: LoadState;
713
+ timeoutMs?: number;
714
+ }): Promise<void>;
449
715
  /**
450
- * Custom instructions to provide to the agent
716
+ * Return the current page URL (synchronous, cached from navigation events).
451
717
  */
452
- instructions?: string;
718
+ url(): string;
719
+ private beginNavigationCommand;
720
+ isCurrentNavigationCommand(id: number): boolean;
453
721
  /**
454
- * Additional options to pass to the agent client
722
+ * Return the current page title.
723
+ * Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
724
+ * Falls back to navigation history title if evaluation is unavailable.
455
725
  */
456
- options?: Record<string, unknown>;
726
+ title(): Promise<string>;
457
727
  /**
458
- * MCP integrations - Array of Client objects
728
+ * Capture a screenshot (delegated to the current main frame).
459
729
  */
460
- integrations?: (Client | string)[];
730
+ screenshot(options?: {
731
+ fullPage?: boolean;
732
+ }): Promise<Buffer>;
461
733
  /**
462
- * Tools passed to the agent client
734
+ * Create a locator bound to the current main frame.
463
735
  */
464
- tools?: ToolSet;
465
- }
466
- declare enum StagehandFunctionName {
467
- ACT = "ACT",
468
- EXTRACT = "EXTRACT",
469
- OBSERVE = "OBSERVE",
470
- AGENT = "AGENT"
471
- }
472
- interface HistoryEntry {
473
- method: "act" | "extract" | "observe" | "navigate";
474
- parameters: unknown;
475
- result: unknown;
476
- timestamp: string;
477
- }
478
- /**
479
- * Represents a path through a Zod schema from the root object down to a
480
- * particular field. The `segments` array describes the chain of keys/indices.
481
- *
482
- * - **String** segments indicate object property names.
483
- * - **Number** segments indicate array indices.
484
- *
485
- * For example, `["users", 0, "homepage"]` might describe reaching
486
- * the `homepage` field in `schema.users[0].homepage`.
487
- */
488
- interface ZodPathSegments {
736
+ locator(selector: string): ReturnType<Frame["locator"]>;
489
737
  /**
490
- * The ordered list of keys/indices leading from the schema root
491
- * to the targeted field.
738
+ * Deep locator that supports cross-iframe traversal.
739
+ * - Recognizes '>>' hop notation to enter iframe contexts.
740
+ * - Supports deep XPath that includes iframe steps (e.g., '/html/body/iframe[2]//div').
741
+ * Returns a Locator scoped to the appropriate frame.
492
742
  */
493
- segments: Array<string | number>;
494
- }
495
-
496
- interface ActToolResult {
497
- success: boolean;
498
- action?: string;
499
- error?: string;
500
- isIframe?: boolean;
501
- playwrightArguments?: ObserveResult$1 | null;
502
- }
503
- interface AgentAction$1 {
504
- type: string;
505
- reasoning?: string;
506
- taskCompleted?: boolean;
507
- action?: string;
508
- timeMs?: number;
509
- pageText?: string;
510
- timestamp?: number;
511
- pageUrl?: string;
512
- instruction?: string;
513
- playwrightArguments?: ObserveResult$1 | null;
514
- [key: string]: unknown;
515
- }
516
- interface AgentResult$1 {
517
- success: boolean;
518
- message: string;
519
- actions: AgentAction$1[];
520
- completed: boolean;
521
- metadata?: Record<string, unknown>;
522
- usage?: {
523
- input_tokens: number;
524
- output_tokens: number;
525
- inference_time_ms: number;
526
- };
527
- }
528
- interface AgentOptions$1 {
529
- maxSteps?: number;
530
- autoScreenshot?: boolean;
531
- waitBetweenActions?: number;
532
- context?: string;
533
- highlightCursor?: boolean;
534
- }
535
- interface AgentExecuteOptions$1 extends AgentOptions$1 {
536
- instruction: string;
743
+ deepLocator(selector: string): DeepLocatorDelegate;
744
+ /**
745
+ * Frame locator similar to Playwright: targets iframe elements and scopes
746
+ * subsequent locators to that frame. Supports chaining.
747
+ */
748
+ frameLocator(selector: string): FrameLocator;
749
+ /**
750
+ * List all frames belonging to this page as Frame objects bound to their owning sessions.
751
+ * The list is ordered by a stable ordinal assigned during the page lifetime.
752
+ */
753
+ frames(): Frame[];
754
+ /**
755
+ * Wait until the page reaches a lifecycle state on the current main frame.
756
+ * Mirrors Playwright's API signatures.
757
+ */
758
+ waitForLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
759
+ /**
760
+ * Evaluate a function or expression in the current main frame's isolated world.
761
+ * - If a string is provided, it is treated as a JS expression.
762
+ * - If a function is provided, it is stringified and invoked with the optional argument.
763
+ * - The return value should be JSON-serializable. Non-serializable objects will
764
+ * best-effort serialize via JSON.stringify inside the page context.
765
+ */
766
+ evaluate<R = unknown, Arg = unknown>(pageFunctionOrExpression: string | ((arg: Arg) => R | Promise<R>), arg?: Arg): Promise<R>;
767
+ /**
768
+ * Force the page viewport to an exact CSS size and device scale factor.
769
+ * Ensures screenshots match width x height pixels when deviceScaleFactor = 1.
770
+ */
771
+ setViewportSize(width: number, height: number, options?: {
772
+ deviceScaleFactor?: number;
773
+ }): Promise<void>;
774
+ /**
775
+ * Click at absolute page coordinates (CSS pixels).
776
+ * Dispatches mouseMoved → mousePressed → mouseReleased via CDP Input domain
777
+ * on the top-level page target's session. Coordinates are relative to the
778
+ * viewport origin (top-left). Does not scroll.
779
+ */
780
+ click(x: number, y: number, options: {
781
+ button?: "left" | "right" | "middle";
782
+ clickCount?: number;
783
+ returnXpath: true;
784
+ }): Promise<string>;
785
+ click(x: number, y: number, options?: {
786
+ button?: "left" | "right" | "middle";
787
+ clickCount?: number;
788
+ returnXpath?: false;
789
+ }): Promise<void>;
790
+ click(x: number, y: number, options: {
791
+ button?: "left" | "right" | "middle";
792
+ clickCount?: number;
793
+ returnXpath: boolean;
794
+ }): Promise<void | string>;
795
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
796
+ returnXpath: true;
797
+ }): Promise<string>;
798
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options?: {
799
+ returnXpath?: false;
800
+ }): Promise<void>;
801
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
802
+ returnXpath: boolean;
803
+ }): Promise<void | string>;
804
+ /**
805
+ * Drag from (fromX, fromY) to (toX, toY) using mouse events.
806
+ * Sends mouseMoved → mousePressed → mouseMoved (steps) → mouseReleased.
807
+ */
808
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
809
+ button?: "left" | "right" | "middle";
810
+ steps?: number;
811
+ delay?: number;
812
+ returnXpath: true;
813
+ }): Promise<[string, string]>;
814
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options?: {
815
+ button?: "left" | "right" | "middle";
816
+ steps?: number;
817
+ delay?: number;
818
+ returnXpath?: false;
819
+ }): Promise<void>;
820
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
821
+ button?: "left" | "right" | "middle";
822
+ steps?: number;
823
+ delay?: number;
824
+ returnXpath: boolean;
825
+ }): Promise<void | [string, string]>;
826
+ /**
827
+ * Type a string by dispatching keyDown/keyUp events per character.
828
+ * Focus must already be on the desired element. Uses CDP Input.dispatchKeyEvent
829
+ * and never falls back to Input.insertText. Optional delay applies between
830
+ * successive characters.
831
+ */
832
+ type(text: string, options?: {
833
+ delay?: number;
834
+ withMistakes?: boolean;
835
+ }): Promise<void>;
836
+ /**
837
+ * Press a single key or key combination (keyDown then keyUp).
838
+ * For printable characters, uses the text path on keyDown; for named keys, sets key/code/VK.
839
+ * Supports key combinations with modifiers like "Cmd+A", "Ctrl+C", "Shift+Tab", etc.
840
+ */
841
+ keyPress(key: string, options?: {
842
+ delay?: number;
843
+ }): Promise<void>;
844
+ private _pressedModifiers;
845
+ /** Press a key down without releasing it */
846
+ private keyDown;
847
+ /** Release a pressed key */
848
+ private keyUp;
849
+ /** Normalize modifier key names to match CDP expectations */
850
+ private normalizeModifierKey;
851
+ /**
852
+ * Get the map of named keys with their properties
853
+ */
854
+ private getNamedKeys;
855
+ /**
856
+ * Minimal description for printable keys (letters/digits/space) to provide code and VK.
857
+ * Used when non-Shift modifiers are pressed to avoid sending text while keeping accelerator info.
858
+ */
859
+ private describePrintableKey;
860
+ private isMacOS;
861
+ /**
862
+ * Return Chromium mac editing commands (without trailing ':') for a given code like 'KeyA'
863
+ * Only used on macOS to trigger system editing shortcuts (e.g., selectAll, copy, paste...).
864
+ */
865
+ private macCommandsFor;
866
+ /**
867
+ * Create an isolated world for the **current** main frame and return its context id.
868
+ */
869
+ private createIsolatedWorldForCurrentMain;
870
+ /**
871
+ * Wait until the **current** main frame reaches a lifecycle state.
872
+ * - Fast path via `document.readyState`.
873
+ * - Event path listens at the session level and compares incoming `frameId`
874
+ * to `mainFrameId()` **at event time** to follow root swaps.
875
+ */
876
+ waitForMainLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
537
877
  }
538
- type AgentProviderType = "openai" | "anthropic" | "google";
539
- interface AgentClientOptions {
540
- apiKey: string;
541
- organization?: string;
542
- baseURL?: string;
543
- defaultMaxSteps?: number;
878
+
879
+ interface AgentAction {
880
+ type: string;
881
+ reasoning?: string;
882
+ taskCompleted?: boolean;
883
+ action?: string;
884
+ timeMs?: number;
885
+ pageText?: string;
886
+ pageUrl?: string;
887
+ instruction?: string;
544
888
  [key: string]: unknown;
545
889
  }
890
+ interface AgentResult {
891
+ success: boolean;
892
+ message: string;
893
+ actions: AgentAction[];
894
+ completed: boolean;
895
+ metadata?: Record<string, unknown>;
896
+ usage?: {
897
+ input_tokens: number;
898
+ output_tokens: number;
899
+ inference_time_ms: number;
900
+ };
901
+ }
902
+ interface AgentExecuteOptions {
903
+ instruction: string;
904
+ maxSteps?: number;
905
+ page?: Page$1 | Page$2 | Page$3 | Page;
906
+ highlightCursor?: boolean;
907
+ }
546
908
  type AgentType = "openai" | "anthropic" | "google";
547
- interface AgentExecutionOptions {
548
- options: AgentExecuteOptions$1;
909
+ declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025"];
910
+ type AvailableCuaModel = (typeof AVAILABLE_CUA_MODELS)[number];
911
+ interface AgentExecutionOptions<TOptions extends AgentExecuteOptions = AgentExecuteOptions> {
912
+ options: TOptions;
549
913
  logger: (message: LogLine) => void;
550
914
  retries?: number;
551
915
  }
@@ -553,7 +917,6 @@ interface AgentHandlerOptions {
553
917
  modelName: string;
554
918
  clientOptions?: Record<string, unknown>;
555
919
  userProvidedInstructions?: string;
556
- agentType: AgentType;
557
920
  experimental?: boolean;
558
921
  }
559
922
  interface ActionExecutionResult {
@@ -632,276 +995,40 @@ type ResponseInputItem = {
632
995
  output: string;
633
996
  };
634
997
  interface AgentInstance {
635
- execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
636
- setScreenshotCollector?: (collector: unknown) => void;
998
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
637
999
  }
638
-
639
- declare const defaultExtractSchema: z.ZodObject<{
640
- extraction: z.ZodString;
641
- }, "strip", z.ZodTypeAny, {
642
- extraction?: string;
643
- }, {
644
- extraction?: string;
645
- }>;
646
- declare const pageTextSchema: z.ZodObject<{
647
- page_text: z.ZodString;
648
- }, "strip", z.ZodTypeAny, {
649
- page_text?: string;
650
- }, {
651
- page_text?: string;
652
- }>;
653
- interface StagehandScreenshotOptions extends PageScreenshotOptions {
654
- /** Controls whether to use CDP for screenshots in Browserbase environment. Defaults to true. */
655
- useCDP?: boolean;
656
- }
657
- interface Page extends Omit<Page$1, "on" | "screenshot"> {
658
- act(action: string): Promise<ActResult>;
659
- act(options: ActOptions): Promise<ActResult>;
660
- act(observation: ObserveResult$1): Promise<ActResult>;
661
- extract(instruction: string): Promise<ExtractResult<typeof defaultExtractSchema>>;
662
- extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
663
- extract(): Promise<ExtractResult<typeof pageTextSchema>>;
664
- observe(): Promise<ObserveResult$1[]>;
665
- observe(instruction: string): Promise<ObserveResult$1[]>;
666
- observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
667
- screenshot(options?: StagehandScreenshotOptions): Promise<Buffer>;
668
- on: {
669
- (event: "popup", listener: (page: Page) => unknown): Page;
670
- } & Page$1["on"];
671
- }
672
- type BrowserContext = BrowserContext$1;
673
- type Browser = Browser$1;
674
-
675
- interface EnhancedContext extends Omit<BrowserContext$1, "newPage" | "pages"> {
676
- newPage(): Promise<Page>;
677
- pages(): Page[];
678
- }
679
- type EncodedId = `${number}-${number}`;
680
-
681
- interface StagehandAPIConstructorParams {
682
- apiKey: string;
683
- projectId: string;
684
- logger: (message: LogLine) => void;
685
- }
686
- interface StartSessionParams {
687
- modelName: string;
688
- modelApiKey: string;
689
- domSettleTimeoutMs: number;
690
- verbose: number;
691
- debugDom: boolean;
1000
+ type AgentProviderType = AgentType;
1001
+ type AgentModelConfig<TModelName extends string = string> = {
1002
+ modelName: TModelName;
1003
+ } & Record<string, unknown>;
1004
+ type AgentConfig = {
1005
+ /**
1006
+ * Custom system prompt to provide to the agent. Overrides the default system prompt.
1007
+ */
692
1008
  systemPrompt?: string;
693
- browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
694
- projectId?: string;
695
- };
696
- selfHeal?: boolean;
697
- waitForCaptchaSolves?: boolean;
698
- actionTimeoutMs?: number;
699
- browserbaseSessionID?: string;
700
- }
701
- interface StartSessionResult {
702
- sessionId: string;
703
- available?: boolean;
704
- }
705
-
706
- interface ObserveResult {
707
- selector: string;
708
- description: string;
709
- backendNodeId?: number;
710
- method?: string;
711
- arguments?: string[];
712
- }
713
- interface AgentAction {
714
- type: string;
715
- reasoning?: string;
716
- taskCompleted?: boolean;
717
- action?: string;
718
- timeMs?: number;
719
- pageText?: string;
720
- timestamp?: number;
721
- pageUrl?: string;
722
- instruction?: string;
723
- playwrightArguments?: ObserveResult | null;
724
- [key: string]: unknown;
725
- }
726
- interface AgentResult {
727
- success: boolean;
728
- message: string;
729
- actions: AgentAction[];
730
- completed: boolean;
731
- metadata?: Record<string, unknown>;
732
- usage?: {
733
- input_tokens: number;
734
- output_tokens: number;
735
- inference_time_ms: number;
736
- };
737
- }
738
- interface AgentOptions {
739
- maxSteps?: number;
740
- autoScreenshot?: boolean;
741
- waitBetweenActions?: number;
742
- context?: string;
743
- highlightCursor?: boolean;
744
- }
745
- interface AgentExecuteOptions extends AgentOptions {
746
- instruction: string;
747
- }
748
-
749
- declare class StagehandAPI {
750
- private apiKey;
751
- private projectId;
752
- private sessionId?;
753
- private modelApiKey;
754
- private logger;
755
- private fetchWithCookies;
756
- constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
757
- init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, debugDom, systemPrompt, selfHeal, waitForCaptchaSolves, actionTimeoutMs, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
758
- act(options: ActOptions | ObserveResult$1): Promise<ActResult>;
759
- extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
760
- observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
761
- goto(url: string, options?: GotoOptions): Promise<void>;
762
- agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions): Promise<AgentResult>;
763
- end(): Promise<Response>;
764
- private execute;
765
- private request;
766
- }
767
-
768
- declare class StagehandContext {
769
- private readonly stagehand;
770
- private readonly intContext;
771
- private pageMap;
772
- private activeStagehandPage;
773
- private readonly frameIdMap;
774
- private constructor();
775
- private createStagehandPage;
776
- static init(context: BrowserContext$1, stagehand: Stagehand): Promise<StagehandContext>;
777
- get frameIdLookup(): ReadonlyMap<string, StagehandPage>;
778
- registerFrameId(frameId: string, page: StagehandPage): void;
779
- unregisterFrameId(frameId: string): void;
780
- getStagehandPageByFrameId(frameId: string): StagehandPage | undefined;
781
- get context(): EnhancedContext;
782
- getStagehandPage(page: Page$1): Promise<StagehandPage>;
783
- getStagehandPages(): Promise<StagehandPage[]>;
784
- setActivePage(page: StagehandPage): void;
785
- getActivePage(): StagehandPage | null;
786
- private handleNewPlaywrightPage;
787
- private attachFrameNavigatedListener;
788
- }
789
-
790
- declare class StagehandPage {
791
- private stagehand;
792
- private rawPage;
793
- private intPage;
794
- private intContext;
795
- private actHandler;
796
- private extractHandler;
797
- private observeHandler;
798
- private llmClient;
799
- private cdpClient;
800
- private api;
801
- private userProvidedInstructions?;
802
- private waitForCaptchaSolves;
803
- private initialized;
804
- private readonly cdpClients;
805
- private fidOrdinals;
806
- private rootFrameId;
807
- get frameId(): string;
808
- updateRootFrameId(newId: string): void;
809
- constructor(page: Page$1, stagehand: Stagehand, context: StagehandContext, llmClient: LLMClient, userProvidedInstructions?: string, api?: StagehandAPI, waitForCaptchaSolves?: boolean);
810
- ordinalForFrameId(fid: string | undefined): number;
811
- encodeWithFrameId(fid: string | undefined, backendId: number): EncodedId;
812
- resetFrameOrdinals(): void;
813
- private ensureStagehandScript;
814
- /** Register the custom selector engine that pierces open/closed shadow roots. */
815
- private ensureStagehandSelectorEngine;
816
- /**
817
- * Waits for a captcha to be solved when using Browserbase environment.
818
- *
819
- * @param timeoutMs - Optional timeout in milliseconds. If provided, the promise will reject if the captcha solving hasn't started within the given time.
820
- * @throws StagehandEnvironmentError if called in a LOCAL environment
821
- * @throws CaptchaTimeoutError if the timeout is reached before captcha solving starts
822
- * @returns Promise that resolves when the captcha is solved
823
- */
824
- waitForCaptchaSolve(timeoutMs?: number): Promise<void>;
825
- init(): Promise<StagehandPage>;
826
- get page(): Page;
827
- get context(): EnhancedContext;
828
- /**
829
- * `_waitForSettledDom` waits until the DOM is settled, and therefore is
830
- * ready for actions to be taken.
831
- *
832
- * **Definition of "settled"**
833
- * • No in-flight network requests (except WebSocket / Server-Sent-Events).
834
- * • That idle state lasts for at least **500 ms** (the "quiet-window").
835
- *
836
- * **How it works**
837
- * 1. Subscribes to CDP Network and Page events for the main target and all
838
- * out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
839
- * 2. Every time `Network.requestWillBeSent` fires, the request ID is added
840
- * to an **`inflight`** `Set`.
841
- * 3. When the request finishes—`loadingFinished`, `loadingFailed`,
842
- * `requestServedFromCache`, or a *data:* response—the request ID is
843
- * removed.
844
- * 4. *Document* requests are also mapped **frameId → requestId**; when
845
- * `Page.frameStoppedLoading` fires the corresponding Document request is
846
- * removed immediately (covers iframes whose network events never close).
847
- * 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
848
- * request has been open for ≥ 2 s it is forcibly removed; this prevents
849
- * ad/analytics iframes from blocking the wait forever.
850
- * 6. When `inflight` becomes empty the helper starts a 500 ms timer.
851
- * If no new request appears before the timer fires, the promise
852
- * resolves → **DOM is considered settled**.
853
- * 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
854
- * default ≈ 30 s) ensures we always resolve; if it fires we log how many
855
- * requests were still outstanding.
856
- *
857
- * @param timeoutMs – Optional hard cap (ms). Defaults to
858
- * `this.stagehand.domSettleTimeoutMs`.
1009
+ /**
1010
+ * MCP integrations - Array of Client objects
859
1011
  */
860
- _waitForSettledDom(timeoutMs?: number): Promise<void>;
861
- act(actionOrOptions: string | ActOptions | ObserveResult$1): Promise<ActResult>;
862
- extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions?: string | ExtractOptions<T>): Promise<ExtractResult<T>>;
863
- observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult$1[]>;
1012
+ integrations?: (Client | string)[];
864
1013
  /**
865
- * Get or create a CDP session for the given target.
866
- * @param target The Page or (OOPIF) Frame you want to talk to.
1014
+ * Tools passed to the agent client
867
1015
  */
868
- getCDPClient(target?: Page$1 | Frame): Promise<CDPSession>;
1016
+ tools?: ToolSet;
869
1017
  /**
870
- * Send a CDP command to the chosen DevTools target.
871
- *
872
- * @param method Any valid CDP method, e.g. `"DOM.getDocument"`.
873
- * @param params Command parameters (optional).
874
- * @param target A `Page` or OOPIF `Frame`. Defaults to the main page.
875
- *
876
- * @typeParam T Expected result shape (defaults to `unknown`).
1018
+ * Indicates CUA is disabled for this configuration
877
1019
  */
878
- sendCDP<T = unknown>(method: string, params?: Record<string, unknown>, target?: Page$1 | Frame): Promise<T>;
879
- /** Enable a CDP domain (e.g. `"Network"` or `"DOM"`) on the chosen target. */
880
- enableCDP(domain: string, target?: Page$1 | Frame): Promise<void>;
881
- /** Disable a CDP domain on the chosen target. */
882
- disableCDP(domain: string, target?: Page$1 | Frame): Promise<void>;
883
- }
884
-
885
- interface ConnectToMCPServerOptions {
886
- serverUrl: string | URL;
887
- clientOptions?: ClientOptions$3;
888
- }
889
- interface StdioServerConfig {
890
- command: string;
891
- args?: string[];
892
- env?: Record<string, string>;
893
- }
894
- declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client$1>;
895
-
896
- interface BrowserResult {
897
- env: "LOCAL" | "BROWSERBASE";
898
- browser?: Browser;
899
- context: BrowserContext;
900
- debugUrl?: string;
901
- sessionUrl?: string;
902
- contextPath?: string;
903
- sessionId?: string;
904
- }
1020
+ cua?: boolean;
1021
+ /**
1022
+ * The model to use for agent functionality
1023
+ */
1024
+ model?: string | AgentModelConfig<string>;
1025
+ /**
1026
+ * The model to use for tool execution (observe/act calls within agent tools).
1027
+ * If not specified, inherits from the main model configuration.
1028
+ * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
1029
+ */
1030
+ executionModel?: string | AgentModelConfig<string>;
1031
+ };
905
1032
 
906
1033
  declare class StagehandAPIError extends Error {
907
1034
  constructor(message: string);
@@ -922,6 +1049,144 @@ declare class StagehandResponseParseError extends StagehandAPIError {
922
1049
  constructor(message: string);
923
1050
  }
924
1051
 
1052
+ interface ActOptions {
1053
+ model?: ModelConfiguration;
1054
+ variables?: Record<string, string>;
1055
+ timeout?: number;
1056
+ page?: Page$1 | Page$2 | Page$3 | Page;
1057
+ }
1058
+ interface ActResult {
1059
+ success: boolean;
1060
+ message: string;
1061
+ actionDescription: string;
1062
+ actions: Action[];
1063
+ }
1064
+ type ExtractResult<T extends z$1.AnyZodObject> = z$1.infer<T>;
1065
+ interface Action {
1066
+ selector: string;
1067
+ description: string;
1068
+ method?: string;
1069
+ arguments?: string[];
1070
+ }
1071
+ interface HistoryEntry {
1072
+ method: "act" | "extract" | "observe" | "navigate" | "agent";
1073
+ parameters: unknown;
1074
+ result: unknown;
1075
+ timestamp: string;
1076
+ }
1077
+ interface ExtractOptions {
1078
+ model?: ModelConfiguration;
1079
+ timeout?: number;
1080
+ selector?: string;
1081
+ page?: Page$1 | Page$2 | Page$3 | Page;
1082
+ }
1083
+ declare const defaultExtractSchema: z$1.ZodObject<{
1084
+ extraction: z$1.ZodString;
1085
+ }, "strip", z$1.ZodTypeAny, {
1086
+ extraction?: string;
1087
+ }, {
1088
+ extraction?: string;
1089
+ }>;
1090
+ declare const pageTextSchema: z$1.ZodObject<{
1091
+ pageText: z$1.ZodString;
1092
+ }, "strip", z$1.ZodTypeAny, {
1093
+ pageText?: string;
1094
+ }, {
1095
+ pageText?: string;
1096
+ }>;
1097
+ interface ObserveOptions {
1098
+ model?: ModelConfiguration;
1099
+ timeout?: number;
1100
+ selector?: string;
1101
+ page?: Page$1 | Page$2 | Page$3 | Page;
1102
+ }
1103
+ declare enum V3FunctionName {
1104
+ ACT = "ACT",
1105
+ EXTRACT = "EXTRACT",
1106
+ OBSERVE = "OBSERVE",
1107
+ AGENT = "AGENT"
1108
+ }
1109
+
1110
+ interface StagehandMetrics {
1111
+ actPromptTokens: number;
1112
+ actCompletionTokens: number;
1113
+ actInferenceTimeMs: number;
1114
+ extractPromptTokens: number;
1115
+ extractCompletionTokens: number;
1116
+ extractInferenceTimeMs: number;
1117
+ observePromptTokens: number;
1118
+ observeCompletionTokens: number;
1119
+ observeInferenceTimeMs: number;
1120
+ agentPromptTokens: number;
1121
+ agentCompletionTokens: number;
1122
+ agentInferenceTimeMs: number;
1123
+ totalPromptTokens: number;
1124
+ totalCompletionTokens: number;
1125
+ totalInferenceTimeMs: number;
1126
+ }
1127
+
1128
+ type V3Env = "LOCAL" | "BROWSERBASE";
1129
+ /** Local launch options for V3 (chrome-launcher + CDP).
1130
+ * Matches v2 shape where feasible; unsupported fields are accepted but ignored.
1131
+ */
1132
+ interface LocalBrowserLaunchOptions {
1133
+ args?: string[];
1134
+ executablePath?: string;
1135
+ userDataDir?: string;
1136
+ preserveUserDataDir?: boolean;
1137
+ headless?: boolean;
1138
+ devtools?: boolean;
1139
+ chromiumSandbox?: boolean;
1140
+ ignoreDefaultArgs?: boolean | string[];
1141
+ proxy?: {
1142
+ server: string;
1143
+ bypass?: string;
1144
+ username?: string;
1145
+ password?: string;
1146
+ };
1147
+ locale?: string;
1148
+ viewport?: {
1149
+ width: number;
1150
+ height: number;
1151
+ };
1152
+ deviceScaleFactor?: number;
1153
+ hasTouch?: boolean;
1154
+ ignoreHTTPSErrors?: boolean;
1155
+ cdpUrl?: string;
1156
+ connectTimeoutMs?: number;
1157
+ downloadsPath?: string;
1158
+ acceptDownloads?: boolean;
1159
+ }
1160
+ /** Constructor options for V3 */
1161
+ interface V3Options {
1162
+ env: V3Env;
1163
+ apiKey?: string;
1164
+ projectId?: string;
1165
+ /**
1166
+ * Optional: fine-tune Browserbase session creation or resume an existing session.
1167
+ */
1168
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
1169
+ projectId?: string;
1170
+ };
1171
+ browserbaseSessionID?: string;
1172
+ localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
1173
+ model?: ModelConfiguration;
1174
+ llmClient?: LLMClient;
1175
+ systemPrompt?: string;
1176
+ logInferenceToFile?: boolean;
1177
+ experimental?: boolean;
1178
+ verbose?: 0 | 1 | 2;
1179
+ selfHeal?: boolean;
1180
+ /** Disable pino logging backend (useful for tests or minimal environments). */
1181
+ disablePino?: boolean;
1182
+ /** Optional external logger hook for integrating with host apps. */
1183
+ logger?: (line: LogLine) => void;
1184
+ /** Directory used to persist cached actions for act(). */
1185
+ cacheDir?: string;
1186
+ domSettleTimeout?: number;
1187
+ disableAPI?: boolean;
1188
+ }
1189
+
925
1190
  declare class StagehandError extends Error {
926
1191
  constructor(message: string);
927
1192
  }
@@ -1026,178 +1291,529 @@ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1026
1291
  constructor(segment: string, hint?: string);
1027
1292
  }
1028
1293
 
1029
- interface CacheEntry {
1030
- timestamp: number;
1031
- data: unknown;
1032
- requestId: string;
1033
- }
1034
- interface CacheStore {
1035
- [key: string]: CacheEntry;
1036
- }
1037
- declare class BaseCache<T extends CacheEntry> {
1038
- private readonly CACHE_MAX_AGE_MS;
1039
- private readonly CLEANUP_PROBABILITY;
1040
- protected cacheDir: string;
1041
- protected cacheFile: string;
1042
- protected lockFile: string;
1043
- protected logger: (message: LogLine) => void;
1044
- private readonly LOCK_TIMEOUT_MS;
1045
- protected lockAcquired: boolean;
1046
- protected lockAcquireFailures: number;
1047
- protected requestIdToUsedHashes: {
1048
- [key: string]: string[];
1294
+ interface StagehandAPIConstructorParams {
1295
+ apiKey: string;
1296
+ projectId: string;
1297
+ logger: (message: LogLine) => void;
1298
+ }
1299
+ interface StartSessionParams {
1300
+ modelName: string;
1301
+ modelApiKey: string;
1302
+ domSettleTimeoutMs: number;
1303
+ verbose: number;
1304
+ systemPrompt?: string;
1305
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
1306
+ projectId?: string;
1049
1307
  };
1050
- constructor(logger: (message: LogLine) => void, cacheDir?: string, cacheFile?: string);
1051
- private setupProcessHandlers;
1052
- protected ensureCacheDirectory(): void;
1053
- protected createHash(data: unknown): string;
1054
- protected sleep(ms: number): Promise<void>;
1055
- acquireLock(): Promise<boolean>;
1056
- releaseLock(): void;
1308
+ selfHeal?: boolean;
1309
+ browserbaseSessionID?: string;
1310
+ }
1311
+ interface StartSessionResult {
1312
+ sessionId: string;
1313
+ available?: boolean;
1314
+ }
1315
+ interface APIActParameters {
1316
+ input: string | Action;
1317
+ options?: ActOptions;
1318
+ frameId?: string;
1319
+ }
1320
+ interface APIExtractParameters {
1321
+ instruction?: string;
1322
+ schema?: ZodTypeAny;
1323
+ options?: ExtractOptions;
1324
+ frameId?: string;
1325
+ }
1326
+ interface APIObserveParameters {
1327
+ instruction?: string;
1328
+ options?: ObserveOptions;
1329
+ frameId?: string;
1330
+ }
1331
+
1332
+ /**
1333
+ * Represents a path through a Zod schema from the root object down to a
1334
+ * particular field. The `segments` array describes the chain of keys/indices.
1335
+ *
1336
+ * - **String** segments indicate object property names.
1337
+ * - **Number** segments indicate array indices.
1338
+ *
1339
+ * For example, `["users", 0, "homepage"]` might describe reaching
1340
+ * the `homepage` field in `schema.users[0].homepage`.
1341
+ */
1342
+ interface ZodPathSegments {
1057
1343
  /**
1058
- * Cleans up stale cache entries that exceed the maximum age.
1344
+ * The ordered list of keys/indices leading from the schema root
1345
+ * to the targeted field.
1059
1346
  */
1060
- cleanupStaleEntries(): Promise<void>;
1061
- protected readCache(): CacheStore;
1062
- protected writeCache(cache: CacheStore): void;
1347
+ segments: Array<string | number>;
1348
+ }
1349
+
1350
+ type EvaluateOptions = {
1351
+ /** The question to ask about the task state */
1352
+ question: string;
1353
+ /** The answer to the question */
1354
+ answer?: string;
1355
+ /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
1356
+ screenshot?: boolean | Buffer[];
1357
+ /** Custom system prompt for the evaluator */
1358
+ systemPrompt?: string;
1359
+ /** Delay in milliseconds before taking the screenshot @default 250 */
1360
+ screenshotDelayMs?: number;
1361
+ /** The agent's reasoning/thought process for completing the task */
1362
+ agentReasoning?: string;
1363
+ };
1364
+ type BatchAskOptions = {
1365
+ /** Array of questions with optional answers */
1366
+ questions: Array<{
1367
+ question: string;
1368
+ answer?: string;
1369
+ }>;
1370
+ /** Whether to take a screenshot of the task state */
1371
+ screenshot?: boolean;
1372
+ /** Custom system prompt for the evaluator */
1373
+ systemPrompt?: string;
1374
+ /** Delay in milliseconds before taking the screenshot @default 1000 */
1375
+ screenshotDelayMs?: number;
1376
+ };
1377
+ /**
1378
+ * Result of an evaluation
1379
+ */
1380
+ interface EvaluationResult {
1063
1381
  /**
1064
- * Retrieves data from the cache based on the provided options.
1065
- * @param hashObj - The options used to generate the cache key.
1066
- * @param requestId - The identifier for the current request.
1067
- * @returns The cached data if available, otherwise null.
1382
+ * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
1068
1383
  */
1069
- get(hashObj: Record<string, unknown> | string, requestId: string): Promise<T["data"] | null>;
1384
+ evaluation: "YES" | "NO" | "INVALID";
1070
1385
  /**
1071
- * Stores data in the cache based on the provided options and requestId.
1072
- * @param hashObj - The options used to generate the cache key.
1073
- * @param data - The data to be cached.
1074
- * @param requestId - The identifier for the cache entry.
1386
+ * The reasoning behind the evaluation
1075
1387
  */
1076
- set(hashObj: Record<string, unknown>, data: T["data"], requestId: string): Promise<void>;
1077
- delete(hashObj: Record<string, unknown>): Promise<void>;
1388
+ reasoning: string;
1389
+ }
1390
+
1391
+ /**
1392
+ * V3Context
1393
+ *
1394
+ * Owns the root CDP connection and wires Target/Page events into Page.
1395
+ * Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
1396
+ * and tracks target→page and (root) frame→target mappings for lookups.
1397
+ *
1398
+ * IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
1399
+ * Context never “guesses” owners; it simply forwards events (with the emitting session)
1400
+ * so Page can record the correct owner at event time.
1401
+ */
1402
+ declare class V3Context {
1403
+ readonly conn: CdpConnection;
1404
+ private readonly env;
1405
+ private readonly apiClient;
1406
+ private readonly localBrowserLaunchOptions;
1407
+ private constructor();
1408
+ private readonly _piercerInstalled;
1409
+ private _lastPopupSignalAt;
1410
+ private sessionKey;
1411
+ private readonly _sessionInit;
1412
+ private pagesByTarget;
1413
+ private mainFrameToTarget;
1414
+ private sessionOwnerPage;
1415
+ private frameOwnerPage;
1416
+ private pendingOopifByMainFrame;
1417
+ private createdAtByTarget;
1418
+ private typeByTarget;
1419
+ private _pageOrder;
1420
+ private pendingCreatedTargetUrl;
1078
1421
  /**
1079
- * Tracks the usage of a hash with a specific requestId.
1080
- * @param requestId - The identifier for the current request.
1081
- * @param hash - The cache key hash.
1422
+ * Create a Context for a given CDP websocket URL and bootstrap target wiring.
1082
1423
  */
1083
- protected trackRequestIdUsage(requestId: string, hash: string): void;
1424
+ static create(wsUrl: string, opts?: {
1425
+ env?: "LOCAL" | "BROWSERBASE";
1426
+ apiClient?: StagehandAPIClient | null;
1427
+ localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null;
1428
+ }): Promise<V3Context>;
1084
1429
  /**
1085
- * Deletes all cache entries associated with a specific requestId.
1086
- * @param requestId - The identifier for the request whose cache entries should be deleted.
1430
+ * Wait until at least one top-level Page has been created and registered.
1431
+ * We poll internal maps that bootstrap/onAttachedToTarget populate.
1087
1432
  */
1088
- deleteCacheForRequestId(requestId: string): Promise<void>;
1433
+ private waitForFirstTopLevelPage;
1434
+ private ensurePiercer;
1435
+ /** Mark a page target as the most-recent one (active). */
1436
+ private _pushActive;
1437
+ /** Remove a page target from the recency list (used on close). */
1438
+ private _removeFromOrder;
1439
+ /** Return the current active Page (most-recent page that still exists). */
1440
+ activePage(): Page | undefined;
1441
+ /** Explicitly mark a known Page as the most-recent active page (and focus it). */
1442
+ setActivePage(page: Page): void;
1089
1443
  /**
1090
- * Resets the entire cache by clearing the cache file.
1444
+ * Return top-level `Page`s (oldest newest). OOPIF targets are not included.
1091
1445
  */
1092
- resetCache(): void;
1093
- }
1094
-
1095
- declare class LLMCache extends BaseCache<CacheEntry> {
1096
- constructor(logger: (message: {
1097
- category?: string;
1098
- message: string;
1099
- level?: number;
1100
- }) => void, cacheDir?: string, cacheFile?: string);
1446
+ pages(): Page[];
1101
1447
  /**
1102
- * Overrides the get method to track used hashes by requestId.
1103
- * @param options - The options used to generate the cache key.
1104
- * @param requestId - The identifier for the current request.
1105
- * @returns The cached data if available, otherwise null.
1448
+ * Resolve an owning `Page` by the **top-level main frame id**.
1449
+ * Note: child (OOPIF) roots are intentionally not present in this mapping.
1106
1450
  */
1107
- get<T>(options: Record<string, unknown>, requestId: string): Promise<T | null>;
1451
+ resolvePageByMainFrameId(frameId: string): Page | undefined;
1108
1452
  /**
1109
- * Overrides the set method to include cache cleanup logic.
1110
- * @param options - The options used to generate the cache key.
1111
- * @param data - The data to be cached.
1112
- * @param requestId - The identifier for the current request.
1453
+ * Serialize the full frame tree for a given top-level main frame id.
1113
1454
  */
1114
- set(options: Record<string, unknown>, data: unknown, requestId: string): Promise<void>;
1455
+ getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
1456
+ /**
1457
+ * Create a new top-level page (tab) with the given URL and return its Page object.
1458
+ * Waits until the target is attached and registered.
1459
+ */
1460
+ newPage(url?: string): Promise<Page>;
1461
+ /**
1462
+ * Close CDP and clear all mappings. Best-effort cleanup.
1463
+ */
1464
+ close(): Promise<void>;
1465
+ /**
1466
+ * Bootstrap target lifecycle:
1467
+ * - Attach to existing targets.
1468
+ * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1469
+ * - Handle auto-attach events.
1470
+ * - Clean up on detach/destroy.
1471
+ */
1472
+ private bootstrap;
1473
+ /**
1474
+ * Handle a newly attached target (top-level or potential OOPIF):
1475
+ * - Enable Page domain and lifecycle events.
1476
+ * - If top-level → create Page, wire listeners, resume.
1477
+ * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1478
+ * if the parent is known; otherwise stage until parent `frameAttached`.
1479
+ * - Resume the target only after listeners are wired.
1480
+ */
1481
+ private onAttachedToTarget;
1482
+ /**
1483
+ * Detach handler:
1484
+ * - Remove child session ownership and prune its subtree.
1485
+ * - If a top-level target, cleanup its `Page` and mappings.
1486
+ * - Drop any staged child for this session.
1487
+ */
1488
+ private onDetachedFromTarget;
1489
+ /**
1490
+ * Cleanup a top-level Page by target id, removing its root and staged children.
1491
+ */
1492
+ private cleanupByTarget;
1493
+ /**
1494
+ * Wire Page-domain frame events for a session into the owning Page & mappings.
1495
+ * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1496
+ */
1497
+ private installFrameEventBridges;
1498
+ /**
1499
+ * Register that a session belongs to a Page (used by event routing).
1500
+ */
1501
+ private wireSessionToOwnerPage;
1502
+ /**
1503
+ * Utility: reverse-lookup the top-level target id that owns a given Page.
1504
+ */
1505
+ private findTargetIdByPage;
1506
+ private _notePopupSignal;
1507
+ /**
1508
+ * Await the current active page, waiting briefly if a popup/open was just triggered.
1509
+ * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1510
+ */
1511
+ awaitActivePage(timeoutMs?: number): Promise<Page>;
1115
1512
  }
1116
1513
 
1117
- declare class AISdkClient extends LLMClient {
1118
- type: "aisdk";
1119
- private model;
1120
- private logger?;
1121
- private cache;
1122
- private enableCaching;
1123
- constructor({ model, logger, enableCaching, cache, }: {
1124
- model: LanguageModel;
1125
- logger?: (message: LogLine) => void;
1126
- enableCaching?: boolean;
1127
- cache?: LLMCache;
1128
- });
1129
- getLanguageModel(): LanguageModel;
1130
- createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
1514
+ type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1515
+ type: string;
1516
+ [key: string]: unknown;
1517
+ };
1518
+ interface AgentReplayActStep {
1519
+ type: "act";
1520
+ instruction: string;
1521
+ actions?: Action[];
1522
+ actionDescription?: string;
1523
+ message?: string;
1524
+ timeout?: number;
1131
1525
  }
1132
-
1133
- declare class Stagehand {
1134
- private stagehandPage;
1135
- private stagehandContext;
1136
- browserbaseSessionID?: string;
1137
- readonly domSettleTimeoutMs: number;
1138
- readonly debugDom: boolean;
1139
- readonly headless: boolean;
1140
- verbose: 0 | 1 | 2;
1141
- llmProvider: LLMProvider;
1142
- enableCaching: boolean;
1143
- protected apiKey: string | undefined;
1144
- private projectId;
1145
- private externalLogger?;
1146
- private browserbaseSessionCreateParams?;
1147
- variables: {
1148
- [key: string]: unknown;
1526
+ interface AgentReplayFillFormStep {
1527
+ type: "fillForm";
1528
+ fields?: Array<{
1529
+ action: string;
1530
+ value: string;
1531
+ }>;
1532
+ observeResults?: Action[];
1533
+ actions?: Action[];
1534
+ }
1535
+ interface AgentReplayGotoStep {
1536
+ type: "goto";
1537
+ url: string;
1538
+ waitUntil?: LoadState;
1539
+ }
1540
+ interface AgentReplayScrollStep {
1541
+ type: "scroll";
1542
+ deltaX?: number;
1543
+ deltaY?: number;
1544
+ anchor?: {
1545
+ x: number;
1546
+ y: number;
1149
1547
  };
1150
- private contextPath?;
1548
+ }
1549
+ interface AgentReplayWaitStep {
1550
+ type: "wait";
1551
+ timeMs: number;
1552
+ }
1553
+ interface AgentReplayNavBackStep {
1554
+ type: "navback";
1555
+ waitUntil?: LoadState;
1556
+ }
1557
+
1558
+ /**
1559
+ * V3
1560
+ *
1561
+ * Purpose:
1562
+ * A high-level orchestrator for Stagehand V3. Abstracts away whether the browser
1563
+ * runs **locally via Chrome** or remotely on **Browserbase**, and exposes simple
1564
+ * entrypoints (`act`, `extract`, `observe`) that delegate to the corresponding
1565
+ * handler classes.
1566
+ *
1567
+ * Responsibilities:
1568
+ * - Bootstraps Chrome or Browserbase, ensures a working CDP WebSocket, and builds a `V3Context`.
1569
+ * - Manages lifecycle: init, context access, cleanup.
1570
+ * - Bridges external page objects (Playwright/Puppeteer) into internal frameIds for handlers.
1571
+ * - Provides a stable API surface for downstream code regardless of runtime environment.
1572
+ */
1573
+ declare class V3 {
1574
+ private readonly opts;
1575
+ private state;
1576
+ private actHandler;
1577
+ private extractHandler;
1578
+ private observeHandler;
1579
+ private ctx;
1151
1580
  llmClient: LLMClient;
1152
- readonly userProvidedInstructions?: string;
1153
- private usingAPI;
1154
1581
  private modelName;
1155
- apiClient: StagehandAPI | undefined;
1156
- readonly waitForCaptchaSolves: boolean;
1157
- private localBrowserLaunchOptions?;
1158
- readonly selfHeal: boolean;
1159
- private cleanupCalled;
1160
- readonly actTimeoutMs: number;
1161
- readonly logInferenceToFile?: boolean;
1582
+ private modelClientOptions;
1583
+ private llmProvider;
1584
+ private overrideLlmClients;
1585
+ private readonly domSettleTimeoutMs?;
1586
+ private _isClosing;
1587
+ browserbaseSessionId?: string;
1588
+ private _onCdpClosed;
1589
+ readonly experimental: boolean;
1590
+ readonly logInferenceToFile: boolean;
1591
+ readonly disableAPI: boolean;
1592
+ private externalLogger?;
1593
+ verbose: 0 | 1 | 2;
1162
1594
  private stagehandLogger;
1163
- private disablePino;
1164
- protected modelClientOptions: ClientOptions;
1165
- private _env;
1166
- private _browser;
1167
- private _isClosed;
1168
1595
  private _history;
1169
- readonly experimental: boolean;
1170
- private _livePageProxy?;
1171
- private createLivePageProxy;
1172
- get history(): ReadonlyArray<HistoryEntry>;
1173
- protected setActivePage(page: StagehandPage): void;
1174
- get page(): Page;
1596
+ private readonly instanceId;
1597
+ private static _processGuardsInstalled;
1598
+ private static _instances;
1599
+ private cacheStorage;
1600
+ private actCache;
1601
+ private agentCache;
1602
+ private apiClient;
1175
1603
  stagehandMetrics: StagehandMetrics;
1176
- get metrics(): StagehandMetrics;
1177
- get isClosed(): boolean;
1178
- updateMetrics(functionName: StagehandFunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1604
+ constructor(opts: V3Options);
1605
+ /**
1606
+ * Async property for metrics so callers can `await v3.metrics`.
1607
+ * Returning a Promise future-proofs async aggregation/storage.
1608
+ */
1609
+ get metrics(): Promise<StagehandMetrics>;
1610
+ private resolveLlmClient;
1611
+ private beginAgentReplayRecording;
1612
+ private endAgentReplayRecording;
1613
+ private discardAgentReplayRecording;
1614
+ private isAgentReplayRecording;
1615
+ isAgentReplayActive(): boolean;
1616
+ recordAgentReplayStep(step: AgentReplayStep): void;
1617
+ /**
1618
+ * Async property for history so callers can `await v3.history`.
1619
+ * Returns a frozen copy to avoid external mutation.
1620
+ */
1621
+ get history(): Promise<ReadonlyArray<HistoryEntry>>;
1622
+ addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1623
+ updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1179
1624
  private updateTotalMetrics;
1180
- constructor({ env, apiKey, projectId, verbose, llmProvider, llmClient, logger, browserbaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseSessionID, modelName, modelClientOptions, systemPrompt, useAPI, localBrowserLaunchOptions, waitForCaptchaSolves, logInferenceToFile, selfHeal, disablePino, experimental, }?: ConstructorParams);
1181
- private registerSignalHandlers;
1625
+ private _immediateShutdown;
1626
+ private static _installProcessGuards;
1627
+ /**
1628
+ * Entrypoint: initializes handlers, launches Chrome or Browserbase,
1629
+ * and sets up a CDP context.
1630
+ */
1631
+ init(): Promise<void>;
1632
+ /** Apply post-connect local browser options that require CDP. */
1633
+ private _applyPostConnectLocalOptions;
1634
+ private _ensureBrowserbaseDownloadsEnabled;
1635
+ /**
1636
+ * Run an "act" instruction through the ActHandler.
1637
+ *
1638
+ * New API:
1639
+ * - act(instruction: string, options?: ActOptions)
1640
+ * - act(action: Action, options?: ActOptions)
1641
+ */
1642
+ act(instruction: string, options?: ActOptions): Promise<ActResult>;
1643
+ act(action: Action, options?: ActOptions): Promise<ActResult>;
1644
+ /**
1645
+ * Run an "extract" instruction through the ExtractHandler.
1646
+ *
1647
+ * Accepted forms:
1648
+ * - extract() → pageText
1649
+ * - extract(options) → pageText
1650
+ * - extract(instruction) → defaultExtractSchema
1651
+ * - extract(instruction, schema) → schema-inferred
1652
+ * - extract(instruction, schema, options)
1653
+ */
1654
+ extract(): Promise<z$1.infer<typeof pageTextSchema>>;
1655
+ extract(options: ExtractOptions): Promise<z$1.infer<typeof pageTextSchema>>;
1656
+ extract(instruction: string, options?: ExtractOptions): Promise<z$1.infer<typeof defaultExtractSchema>>;
1657
+ extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z$1.infer<T>>;
1658
+ /**
1659
+ * Run an "observe" instruction through the ObserveHandler.
1660
+ */
1661
+ observe(): Promise<Action[]>;
1662
+ observe(options: ObserveOptions): Promise<Action[]>;
1663
+ observe(instruction: string, options?: ObserveOptions): Promise<Action[]>;
1664
+ /** Return the browser-level CDP WebSocket endpoint. */
1665
+ connectURL(): string;
1666
+ /** Expose the current CDP-backed context. */
1667
+ get context(): V3Context;
1668
+ /** Best-effort cleanup of context and launched resources. */
1669
+ close(opts?: {
1670
+ force?: boolean;
1671
+ }): Promise<void>;
1672
+ /** Guard: ensure Browserbase credentials exist in options. */
1673
+ private requireBrowserbaseCreds;
1182
1674
  get logger(): (logLine: LogLine) => void;
1183
- get env(): "LOCAL" | "BROWSERBASE";
1184
- get downloadsPath(): string;
1185
- get context(): EnhancedContext;
1186
- init(): Promise<InitResult>;
1187
- log(logObj: LogLine): void;
1188
- close(): Promise<void>;
1189
- addToHistory(method: HistoryEntry["method"], parameters: ActOptions | ExtractOptions<z.AnyZodObject> | ObserveOptions | {
1190
- url: string;
1191
- options: GotoOptions;
1192
- } | string, result?: unknown): void;
1193
1675
  /**
1194
- * Create an agent instance that can be executed with different instructions
1195
- * @returns An agent instance with execute() method
1676
+ * Normalize a Playwright/Puppeteer page object into its top frame id,
1677
+ * so handlers can resolve it to a `Page` within our V3Context.
1678
+ */
1679
+ private resolveTopFrameId;
1680
+ private isPlaywrightPage;
1681
+ private isPatchrightPage;
1682
+ private isPuppeteerPage;
1683
+ /** Resolve an external page reference or fall back to the active V3 page. */
1684
+ private resolvePage;
1685
+ private normalizeToV3Page;
1686
+ /**
1687
+ * Create a v3 agent instance (AISDK tool-based) with execute().
1688
+ * Mirrors the v2 Stagehand.agent() tool mode (no CUA provider here).
1196
1689
  */
1197
1690
  agent(options?: AgentConfig): {
1198
- execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
1199
- setScreenshotCollector?: (collector: unknown) => void;
1691
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1200
1692
  };
1201
1693
  }
1202
1694
 
1203
- export { AISdkClient, type ActOptions, type ActResult, type ActToolResult, type ActionExecutionResult, type AgentAction$1 as AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions$1 as AgentExecuteOptions, type AgentExecuteParams, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions$1 as AgentOptions, type AgentProviderType, type AgentResult$1 as AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConstructorParams, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, type GotoOptions, HandlerNotInitializedError, type HistoryEntry, type InitResult, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelProvider, type ObserveOptions, type ObserveResult$1 as ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, type ResponseInputItem, type ResponseItem, Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandFunctionName, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, type StagehandScreenshotOptions, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, pageTextSchema };
1695
+ /**
1696
+ * Abstract base class for agent clients
1697
+ * This provides a common interface for all agent implementations
1698
+ */
1699
+ declare abstract class AgentClient {
1700
+ type: AgentType;
1701
+ modelName: string;
1702
+ clientOptions: Record<string, unknown>;
1703
+ userProvidedInstructions?: string;
1704
+ constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
1705
+ abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
1706
+ abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
1707
+ abstract setViewport(width: number, height: number): void;
1708
+ abstract setCurrentUrl(url: string): void;
1709
+ abstract setScreenshotProvider(provider: () => Promise<string>): void;
1710
+ abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
1711
+ }
1712
+
1713
+ declare const modelToAgentProviderMap: Record<string, AgentProviderType>;
1714
+ /**
1715
+ * Provider for agent clients
1716
+ * This class is responsible for creating the appropriate agent client
1717
+ * based on the provider type
1718
+ */
1719
+ declare class AgentProvider {
1720
+ private logger;
1721
+ /**
1722
+ * Create a new agent provider
1723
+ */
1724
+ constructor(logger: (message: LogLine) => void);
1725
+ getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet$1): AgentClient;
1726
+ static getAgentProvider(modelName: string): AgentProviderType;
1727
+ }
1728
+
1729
+ declare function validateZodSchema(schema: z$1.ZodTypeAny, data: unknown): boolean;
1730
+ /**
1731
+ * Detects if the code is running in the Bun runtime environment.
1732
+ * @returns {boolean} True if running in Bun, false otherwise.
1733
+ */
1734
+ declare function isRunningInBun(): boolean;
1735
+ declare function toGeminiSchema(zodSchema: z$1.ZodTypeAny): Schema;
1736
+ declare function getZodType(schema: z$1.ZodTypeAny): string;
1737
+ /**
1738
+ * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
1739
+ * For each such field, it replaces the `z.string().url()` with `z.number()`.
1740
+ *
1741
+ * This function is used internally by higher-level utilities (e.g., transforming entire object schemas)
1742
+ * and handles nested objects, arrays, unions, intersections, optionals.
1743
+ *
1744
+ * @param schema - The Zod schema to transform.
1745
+ * @param currentPath - An array of string/number keys representing the current schema path (used internally for recursion).
1746
+ * @returns A two-element tuple:
1747
+ * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
1748
+ * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
1749
+ */
1750
+ declare function transformSchema(schema: z$1.ZodTypeAny, currentPath: Array<string | number>): [z$1.ZodTypeAny, ZodPathSegments[]];
1751
+ /**
1752
+ * Once we get the final extracted object that has numeric IDs in place of URLs,
1753
+ * use `injectUrls` to walk the object and replace numeric IDs
1754
+ * with the real URL strings from idToUrlMapping. The `path` may include `*`
1755
+ * for array indices (indicating "all items in the array").
1756
+ */
1757
+ declare function injectUrls(obj: unknown, path: Array<string | number>, idToUrlMapping: Record<string, string>): void;
1758
+ /**
1759
+ * Mapping from LLM provider names to their corresponding environment variable names for API keys.
1760
+ */
1761
+ declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string | Array<string>>>;
1762
+ /**
1763
+ * Loads an API key for a provider, checking environment variables.
1764
+ * @param provider The name of the provider (e.g., 'openai', 'anthropic')
1765
+ * @param logger Optional logger for info/error messages
1766
+ * @returns The API key if found, undefined otherwise
1767
+ */
1768
+ declare function loadApiKeyFromEnv(provider: string | undefined, logger: (logLine: LogLine) => void): string | undefined;
1769
+ declare function trimTrailingTextNode(path: string | undefined): string | undefined;
1770
+ interface JsonSchemaProperty {
1771
+ type: string;
1772
+ enum?: unknown[];
1773
+ items?: JsonSchemaProperty;
1774
+ properties?: Record<string, JsonSchemaProperty>;
1775
+ required?: string[];
1776
+ minimum?: number;
1777
+ maximum?: number;
1778
+ description?: string;
1779
+ }
1780
+ interface JsonSchema extends JsonSchemaProperty {
1781
+ type: string;
1782
+ }
1783
+ /**
1784
+ * Converts a JSON Schema object to a Zod schema
1785
+ * @param schema The JSON Schema object to convert
1786
+ * @returns A Zod schema equivalent to the input JSON Schema
1787
+ */
1788
+ declare function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny;
1789
+
1790
+ interface ConnectToMCPServerOptions {
1791
+ serverUrl: string | URL;
1792
+ clientOptions?: ClientOptions$3;
1793
+ }
1794
+ interface StdioServerConfig {
1795
+ command: string;
1796
+ args?: string[];
1797
+ env?: Record<string, string>;
1798
+ }
1799
+ declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client>;
1800
+
1801
+ /**
1802
+ * V3Evaluator mirrors Evaluator but operates on a V3 instance instead of Stagehand.
1803
+ * It uses the V3 page/screenshot APIs and constructs an LLM client to run
1804
+ * structured evaluations (YES/NO with reasoning) on screenshots and/or text.
1805
+ */
1806
+
1807
+ declare class V3Evaluator {
1808
+ private v3;
1809
+ private modelName;
1810
+ private modelClientOptions;
1811
+ private silentLogger;
1812
+ constructor(v3: V3, modelName?: AvailableModel, modelClientOptions?: ClientOptions);
1813
+ private getClient;
1814
+ ask(options: EvaluateOptions): Promise<EvaluationResult>;
1815
+ batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]>;
1816
+ private _evaluateWithMultipleScreenshots;
1817
+ }
1818
+
1819
+ export { type AISDKCustomProvider, type AISDKProvider, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };