@browserbasehq/orca 3.0.0-preview.1 → 3.0.0-preview.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/index.d.ts +806 -682
  2. package/dist/index.js +34207 -23774
  3. package/package.json +46 -83
  4. package/LICENSE +0 -21
  5. package/README.md +0 -165
  6. package/dist/lib/StagehandContext.d.ts +0 -25
  7. package/dist/lib/StagehandPage.d.ts +0 -103
  8. package/dist/lib/a11y/utils.d.ts +0 -144
  9. package/dist/lib/agent/AgentClient.d.ts +0 -20
  10. package/dist/lib/agent/AgentProvider.d.ts +0 -19
  11. package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
  12. package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
  13. package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
  14. package/dist/lib/agent/StagehandAgent.d.ts +0 -15
  15. package/dist/lib/agent/tools/act.d.ts +0 -59
  16. package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
  17. package/dist/lib/agent/tools/close.d.ts +0 -22
  18. package/dist/lib/agent/tools/extract.d.ts +0 -38
  19. package/dist/lib/agent/tools/fillform.d.ts +0 -37
  20. package/dist/lib/agent/tools/goto.d.ts +0 -29
  21. package/dist/lib/agent/tools/index.d.ts +0 -257
  22. package/dist/lib/agent/tools/navback.d.ts +0 -17
  23. package/dist/lib/agent/tools/screenshot.d.ts +0 -13
  24. package/dist/lib/agent/tools/scroll.d.ts +0 -23
  25. package/dist/lib/agent/tools/wait.d.ts +0 -18
  26. package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
  27. package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
  28. package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
  29. package/dist/lib/api.d.ts +0 -23
  30. package/dist/lib/browserbaseDefaults.d.ts +0 -9
  31. package/dist/lib/cache/ActionCache.d.ts +0 -62
  32. package/dist/lib/cache/BaseCache.d.ts +0 -66
  33. package/dist/lib/cache/LLMCache.d.ts +0 -22
  34. package/dist/lib/cache.d.ts +0 -29
  35. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  36. package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
  37. package/dist/lib/dom/genDomScripts.d.ts +0 -1
  38. package/dist/lib/dom/index.d.ts +0 -2
  39. package/dist/lib/dom/process.d.ts +0 -17
  40. package/dist/lib/dom/utils.d.ts +0 -7
  41. package/dist/lib/dom/xpathUtils.d.ts +0 -14
  42. package/dist/lib/handlers/actHandler.d.ts +0 -33
  43. package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
  44. package/dist/lib/handlers/extractHandler.d.ts +0 -54
  45. package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
  46. package/dist/lib/handlers/observeHandler.d.ts +0 -40
  47. package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
  48. package/dist/lib/index.d.ts +0 -94
  49. package/dist/lib/inference.d.ts +0 -71
  50. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  51. package/dist/lib/llm/AnthropicClient.d.ts +0 -21
  52. package/dist/lib/llm/CerebrasClient.d.ts +0 -22
  53. package/dist/lib/llm/GoogleClient.d.ts +0 -24
  54. package/dist/lib/llm/GroqClient.d.ts +0 -22
  55. package/dist/lib/llm/LLMClient.d.ts +0 -99
  56. package/dist/lib/llm/LLMProvider.d.ts +0 -13
  57. package/dist/lib/llm/OpenAIClient.d.ts +0 -20
  58. package/dist/lib/llm/aisdk.d.ts +0 -20
  59. package/dist/lib/logger.d.ts +0 -54
  60. package/dist/lib/mcp/connection.d.ts +0 -11
  61. package/dist/lib/mcp/utils.d.ts +0 -3
  62. package/dist/lib/prompt.d.ts +0 -12
  63. package/dist/lib/utils.d.ts +0 -65
  64. package/dist/lib/v3/agent/AgentClient.d.ts +0 -18
  65. package/dist/lib/v3/agent/AgentProvider.d.ts +0 -18
  66. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +0 -55
  67. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +0 -64
  68. package/dist/lib/v3/agent/StagehandAgent.d.ts +0 -15
  69. package/dist/lib/v3/agent/tools/index.d.ts +0 -229
  70. package/dist/lib/v3/agent/tools/v3-act.d.ts +0 -29
  71. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +0 -11
  72. package/dist/lib/v3/agent/tools/v3-close.d.ts +0 -24
  73. package/dist/lib/v3/agent/tools/v3-extract.d.ts +0 -38
  74. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +0 -37
  75. package/dist/lib/v3/agent/tools/v3-goto.d.ts +0 -29
  76. package/dist/lib/v3/agent/tools/v3-navback.d.ts +0 -17
  77. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +0 -13
  78. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +0 -23
  79. package/dist/lib/v3/agent/tools/v3-wait.d.ts +0 -19
  80. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +0 -10
  81. package/dist/lib/v3/agent/utils/imageCompression.d.ts +0 -18
  82. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +0 -13
  83. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +0 -1
  84. package/dist/lib/v3/dom/genDomScripts.d.ts +0 -1
  85. package/dist/lib/v3/dom/index.d.ts +0 -1
  86. package/dist/lib/v3/dom/piercer.entry.d.ts +0 -1
  87. package/dist/lib/v3/dom/piercer.runtime.d.ts +0 -25
  88. package/dist/lib/v3/handlers/actHandler.d.ts +0 -18
  89. package/dist/lib/v3/handlers/extractHandler.d.ts +0 -29
  90. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +0 -18
  91. package/dist/lib/v3/handlers/observeHandler.d.ts +0 -15
  92. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +0 -17
  93. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +0 -26
  94. package/dist/lib/v3/index.d.ts +0 -10
  95. package/dist/lib/v3/launch/browserbase.d.ts +0 -8
  96. package/dist/lib/v3/launch/local.d.ts +0 -13
  97. package/dist/lib/v3/llm/AnthropicClient.d.ts +0 -16
  98. package/dist/lib/v3/llm/CerebrasClient.d.ts +0 -17
  99. package/dist/lib/v3/llm/GoogleClient.d.ts +0 -19
  100. package/dist/lib/v3/llm/GroqClient.d.ts +0 -17
  101. package/dist/lib/v3/llm/LLMClient.d.ts +0 -99
  102. package/dist/lib/v3/llm/LLMProvider.d.ts +0 -10
  103. package/dist/lib/v3/llm/OpenAIClient.d.ts +0 -15
  104. package/dist/lib/v3/llm/aisdk.d.ts +0 -15
  105. package/dist/lib/v3/logger.d.ts +0 -48
  106. package/dist/lib/v3/mcp/connection.d.ts +0 -11
  107. package/dist/lib/v3/mcp/utils.d.ts +0 -3
  108. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +0 -1
  109. package/dist/lib/v3/tests/downloads.spec.d.ts +0 -1
  110. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +0 -1
  111. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +0 -1
  112. package/dist/lib/v3/tests/timeouts.spec.d.ts +0 -1
  113. package/dist/lib/v3/tests/v3.bb.config.d.ts +0 -4
  114. package/dist/lib/v3/tests/v3.config.d.ts +0 -4
  115. package/dist/lib/v3/tests/v3.playwright.config.d.ts +0 -2
  116. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +0 -1
  117. package/dist/lib/v3/types/act.d.ts +0 -10
  118. package/dist/lib/v3/types/agent.d.ts +0 -132
  119. package/dist/lib/v3/types/api.d.ts +0 -40
  120. package/dist/lib/v3/types/cache.d.ts +0 -71
  121. package/dist/lib/v3/types/context.d.ts +0 -2
  122. package/dist/lib/v3/types/evals.d.ts +0 -71
  123. package/dist/lib/v3/types/evaluator.d.ts +0 -40
  124. package/dist/lib/v3/types/llm.d.ts +0 -11
  125. package/dist/lib/v3/types/log.d.ts +0 -23
  126. package/dist/lib/v3/types/model.d.ts +0 -20
  127. package/dist/lib/v3/types/playwright.d.ts +0 -6
  128. package/dist/lib/v3/types/stagehand.d.ts +0 -113
  129. package/dist/lib/v3/types/stagehandApiErrors.d.ts +0 -18
  130. package/dist/lib/v3/types/stagehandErrors.d.ts +0 -104
  131. package/dist/lib/v3/types.d.ts +0 -176
  132. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +0 -71
  133. package/dist/lib/v3/understudy/cdp.d.ts +0 -58
  134. package/dist/lib/v3/understudy/context.d.ts +0 -120
  135. package/dist/lib/v3/understudy/deepLocator.d.ts +0 -69
  136. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +0 -15
  137. package/dist/lib/v3/understudy/frame.d.ts +0 -63
  138. package/dist/lib/v3/understudy/frameLocator.d.ts +0 -46
  139. package/dist/lib/v3/understudy/frameRegistry.d.ts +0 -100
  140. package/dist/lib/v3/understudy/locator.d.ts +0 -196
  141. package/dist/lib/v3/understudy/page.d.ts +0 -241
  142. package/dist/lib/v3/understudy/piercer.d.ts +0 -4
  143. package/dist/lib/v3/v3.d.ts +0 -158
  144. package/dist/lib/version.d.ts +0 -5
  145. package/dist/stagehand.config.d.ts +0 -3
  146. package/dist/types/act.d.ts +0 -50
  147. package/dist/types/agent.d.ts +0 -143
  148. package/dist/types/api.d.ts +0 -40
  149. package/dist/types/browser.d.ts +0 -10
  150. package/dist/types/context.d.ts +0 -117
  151. package/dist/types/evals.d.ts +0 -94
  152. package/dist/types/evaluator.d.ts +0 -40
  153. package/dist/types/llm.d.ts +0 -11
  154. package/dist/types/log.d.ts +0 -23
  155. package/dist/types/model.d.ts +0 -17
  156. package/dist/types/page.d.ts +0 -38
  157. package/dist/types/playwright.d.ts +0 -12
  158. package/dist/types/stagehand.d.ts +0 -330
  159. package/dist/types/stagehandApiErrors.d.ts +0 -18
  160. package/dist/types/stagehandErrors.d.ts +0 -104
package/dist/index.d.ts CHANGED
@@ -1,15 +1,166 @@
1
- import * as puppeteer_core from 'puppeteer-core';
2
- import * as patchright_core from 'patchright-core';
3
- import * as playwright_core from 'playwright-core';
4
- import Browserbase from '@browserbasehq/sdk';
5
- import { Protocol } from 'devtools-protocol';
6
- import { Buffer as Buffer$1 } from 'buffer';
1
+ import { ZodType, z, ZodError, ZodTypeAny } from 'zod/v3';
7
2
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
+ import { LanguageModelV2 } from '@ai-sdk/provider';
8
4
  import { ClientOptions as ClientOptions$1 } from 'openai';
9
- import { z, ZodType, ZodTypeAny, ZodError } from 'zod/v3';
10
- import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, LanguageModel, ToolSet } from 'ai';
11
- import { Client } from '@modelcontextprotocol/sdk/dist/esm/client';
12
- import { ClientOptions as ClientOptions$3, Client as Client$1 } from '@modelcontextprotocol/sdk/client/index.js';
5
+ import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, ToolSet } from 'ai';
6
+ import { Protocol } from 'devtools-protocol';
7
+ import { Buffer as Buffer$1 } from 'buffer';
8
+ import { Page as Page$1 } from 'playwright-core';
9
+ export { Page as PlaywrightPage } from 'playwright-core';
10
+ import { Page as Page$3 } from 'patchright-core';
11
+ export { Page as PatchrightPage } from 'patchright-core';
12
+ import { Page as Page$2 } from 'puppeteer-core';
13
+ export { Page as PuppeteerPage } from 'puppeteer-core';
14
+ import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
15
+ import Browserbase from '@browserbasehq/sdk';
16
+ import { ToolSet as ToolSet$1 } from 'ai/dist';
17
+ import { Schema } from '@google/genai';
18
+
19
+ type AnthropicJsonSchemaObject = {
20
+ definitions?: {
21
+ MySchema?: {
22
+ properties?: Record<string, unknown>;
23
+ required?: string[];
24
+ };
25
+ };
26
+ properties?: Record<string, unknown>;
27
+ required?: string[];
28
+ } & Record<string, unknown>;
29
+ interface LLMTool {
30
+ type: "function";
31
+ name: string;
32
+ description: string;
33
+ parameters: Record<string, unknown>;
34
+ }
35
+ type AISDKProvider = (modelName: string) => LanguageModelV2;
36
+ type AISDKCustomProvider = (options: {
37
+ apiKey: string;
38
+ }) => AISDKProvider;
39
+ type AvailableModel = "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" | "o4-mini" | "o3" | "o3-mini" | "o1" | "o1-mini" | "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "gpt-4.5-preview" | "o1-preview" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "claude-3-7-sonnet-latest" | "claude-3-7-sonnet-20250219" | "cerebras-llama-3.3-70b" | "cerebras-llama-3.1-8b" | "groq-llama-3.3-70b-versatile" | "groq-llama-3.3-70b-specdec" | "gemini-1.5-flash" | "gemini-1.5-pro" | "gemini-1.5-flash-8b" | "gemini-2.0-flash-lite" | "gemini-2.0-flash" | "gemini-2.5-flash-preview-04-17" | "gemini-2.5-pro-preview-03-25" | string;
40
+ type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
41
+ type ClientOptions = ClientOptions$1 | ClientOptions$2;
42
+ type ModelConfiguration = AvailableModel | (ClientOptions & {
43
+ modelName: AvailableModel;
44
+ });
45
+
46
+ type LogLevel = 0 | 1 | 2;
47
+ /**
48
+ * Mapping between numeric log levels and their names
49
+ *
50
+ * 0 - error/warn - Critical issues or important warnings
51
+ * 1 - info - Standard information messages
52
+ * 2 - debug - Detailed information for debugging
53
+ */
54
+ declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
55
+ type LogLine = {
56
+ id?: string;
57
+ category?: string;
58
+ message: string;
59
+ level?: LogLevel;
60
+ timestamp?: string;
61
+ auxiliary?: {
62
+ [key: string]: {
63
+ value: string;
64
+ type: "object" | "string" | "html" | "integer" | "float" | "boolean";
65
+ };
66
+ };
67
+ };
68
+ type Logger = (logLine: LogLine) => void;
69
+
70
+ interface ChatMessage {
71
+ role: "system" | "user" | "assistant";
72
+ content: ChatMessageContent;
73
+ }
74
+ type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
75
+ interface ChatMessageImageContent {
76
+ type: string;
77
+ image_url?: {
78
+ url: string;
79
+ };
80
+ text?: string;
81
+ source?: {
82
+ type: string;
83
+ media_type: string;
84
+ data: string;
85
+ };
86
+ }
87
+ interface ChatMessageTextContent {
88
+ type: string;
89
+ text: string;
90
+ }
91
+ declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
92
+ interface ChatCompletionOptions {
93
+ messages: ChatMessage[];
94
+ temperature?: number;
95
+ top_p?: number;
96
+ frequency_penalty?: number;
97
+ presence_penalty?: number;
98
+ image?: {
99
+ buffer: Buffer;
100
+ description?: string;
101
+ };
102
+ response_model?: {
103
+ name: string;
104
+ schema: ZodType;
105
+ };
106
+ tools?: LLMTool[];
107
+ tool_choice?: "auto" | "none" | "required";
108
+ maxOutputTokens?: number;
109
+ requestId?: string;
110
+ }
111
+ type LLMResponse = {
112
+ id: string;
113
+ object: string;
114
+ created: number;
115
+ model: string;
116
+ choices: {
117
+ index: number;
118
+ message: {
119
+ role: string;
120
+ content: string | null;
121
+ tool_calls: {
122
+ id: string;
123
+ type: string;
124
+ function: {
125
+ name: string;
126
+ arguments: string;
127
+ };
128
+ }[];
129
+ };
130
+ finish_reason: string;
131
+ }[];
132
+ usage: {
133
+ prompt_tokens: number;
134
+ completion_tokens: number;
135
+ total_tokens: number;
136
+ };
137
+ };
138
+ interface CreateChatCompletionOptions {
139
+ options: ChatCompletionOptions;
140
+ logger: (message: LogLine) => void;
141
+ retries?: number;
142
+ }
143
+ declare abstract class LLMClient {
144
+ type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
145
+ modelName: AvailableModel | (string & {});
146
+ hasVision: boolean;
147
+ clientOptions: ClientOptions;
148
+ userProvidedInstructions?: string;
149
+ constructor(modelName: AvailableModel, userProvidedInstructions?: string);
150
+ abstract createChatCompletion<T = LLMResponse & {
151
+ usage?: LLMResponse["usage"];
152
+ }>(options: CreateChatCompletionOptions): Promise<T>;
153
+ generateObject: typeof generateObject;
154
+ generateText: typeof generateText;
155
+ streamText: typeof streamText;
156
+ streamObject: typeof streamObject;
157
+ generateImage: typeof experimental_generateImage;
158
+ embed: typeof embed;
159
+ embedMany: typeof embedMany;
160
+ transcribe: typeof experimental_transcribe;
161
+ generateSpeech: typeof experimental_generateSpeech;
162
+ getLanguageModel?(): LanguageModelV2;
163
+ }
13
164
 
14
165
  /**
15
166
  * CDP transport & session multiplexer
@@ -115,7 +266,7 @@ declare class Frame implements FrameManager {
115
266
  width: number;
116
267
  height: number;
117
268
  };
118
- }): Promise<string>;
269
+ }): Promise<Buffer>;
119
270
  /** Child frames via Page.getFrameTree */
120
271
  childFrames(): Promise<Frame[]>;
121
272
  /** Wait for a lifecycle state (load/domcontentloaded/networkidle) */
@@ -153,10 +304,13 @@ declare class Locator {
153
304
  private readonly frame;
154
305
  private readonly selector;
155
306
  private readonly options?;
307
+ private readonly selectorResolver;
308
+ private readonly selectorQuery;
309
+ private readonly nthIndex;
156
310
  constructor(frame: Frame, selector: string, options?: {
157
311
  deep?: boolean;
158
312
  depth?: number;
159
- });
313
+ }, nthIndex?: number);
160
314
  /** Return the owning Frame for this locator (typed accessor, no private access). */
161
315
  getFrame(): Frame;
162
316
  /**
@@ -182,6 +336,8 @@ declare class Locator {
182
336
  * Useful for identity comparisons without needing element handles.
183
337
  */
184
338
  backendNodeId(): Promise<Protocol.DOM.BackendNodeId>;
339
+ /** Return how many nodes the current selector resolves to. */
340
+ count(): Promise<number>;
185
341
  /**
186
342
  * Return the center of the element's bounding box in the owning frame's viewport
187
343
  * (CSS pixels), rounded to integers. Scrolls into view best-effort.
@@ -210,6 +366,11 @@ declare class Locator {
210
366
  a?: number;
211
367
  };
212
368
  }): Promise<void>;
369
+ /**
370
+ * Move the mouse cursor to the element's visual center without clicking.
371
+ * - Scrolls into view best-effort, resolves geometry, then dispatches a mouse move.
372
+ */
373
+ hover(): Promise<void>;
213
374
  /**
214
375
  * Click the element at its visual center.
215
376
  * Steps:
@@ -241,9 +402,9 @@ declare class Locator {
241
402
  scrollTo(percent: number | string): Promise<void>;
242
403
  /**
243
404
  * Fill an input/textarea/contenteditable element.
244
- * - Sets the value/text directly in DOM.
245
- * - Dispatches `input` and `change` events to mimic user input.
246
- * - Releases the underlying `objectId` afterwards to avoid leaks.
405
+ * Mirrors Playwright semantics: the DOM helper either applies the native
406
+ * value setter (for special input types) or asks us to type text via the CDP
407
+ * Input domain after focusing/selecting.
247
408
  */
248
409
  fill(value: string): Promise<void>;
249
410
  /**
@@ -289,35 +450,16 @@ declare class Locator {
289
450
  * For API parity, returns the same locator (querySelector already returns the first match).
290
451
  */
291
452
  first(): Locator;
453
+ /** Return a locator narrowed to the element at the given zero-based index. */
454
+ nth(index: number): Locator;
292
455
  /**
293
456
  * Resolve `this.selector` within the frame to `{ objectId, nodeId? }`:
294
- * - Ensures Runtime/DOM are enabled.
295
- * - Creates (or reuses) an isolated world for this frame.
296
- * - Evaluates a CSS or XPath query in that isolated world.
297
- * - Best-effort: attempts to convert `objectId` to `nodeId`; failure is non-fatal.
298
- *
299
- * - For XPath: first try page-side resolver (__stagehandV3__.resolveSimpleXPath).
300
- * If it returns null (e.g. closed DSD not captured), fall back to CDP DOM with
301
- * `pierce: true` to traverse closed shadow roots and resolve by backendNodeId.
457
+ * Delegates to a shared selector resolver so all selector logic stays in sync.
302
458
  */
303
459
  resolveNode(): Promise<{
304
460
  nodeId: Protocol.DOM.NodeId | null;
305
461
  objectId: Protocol.Runtime.RemoteObjectId;
306
462
  }>;
307
- /**
308
- * CDP fallback for XPath resolution that needs to cross *closed* shadow roots
309
- * created via Declarative Shadow DOM (no attachShadow call to intercept).
310
- *
311
- * Strategy:
312
- * - Fetch full DOM with `pierce: true` so closed shadow roots are included.
313
- * - Run a small, tolerant XPath stepper over the CDP node tree:
314
- * • supports absolute paths like `/html/body/...`
315
- * • supports `//` descendant jumps
316
- * • supports `tag[n]` numeric predicates per sibling group
317
- * • supports `*`
318
- * - Resolve the winning backendNodeId to an objectId for downstream actions.
319
- */
320
- private resolveViaDomPierceXPath;
321
463
  /** Compute a center point from a BoxModel content quad */
322
464
  private centerFromBoxContent;
323
465
  }
@@ -333,12 +475,15 @@ declare class DeepLocatorDelegate {
333
475
  private readonly page;
334
476
  private readonly root;
335
477
  private readonly selector;
336
- constructor(page: Page, root: Frame, selector: string);
478
+ private readonly nthIndex;
479
+ constructor(page: Page, root: Frame, selector: string, nthIndex?: number);
337
480
  private real;
338
481
  click(options?: {
339
482
  button?: "left" | "right" | "middle";
340
483
  clickCount?: number;
341
484
  }): Promise<void>;
485
+ count(): Promise<number>;
486
+ hover(): Promise<void>;
342
487
  fill(value: string): Promise<void>;
343
488
  type(text: string, options?: {
344
489
  delay?: number;
@@ -377,7 +522,8 @@ declare class DeepLocatorDelegate {
377
522
  composed?: boolean;
378
523
  detail?: number;
379
524
  }): Promise<void>;
380
- first(): this;
525
+ first(): DeepLocatorDelegate;
526
+ nth(index: number): DeepLocatorDelegate;
381
527
  }
382
528
 
383
529
  /**
@@ -407,6 +553,7 @@ declare class LocatorDelegate {
407
553
  button?: "left" | "right" | "middle";
408
554
  clickCount?: number;
409
555
  }): Promise<void>;
556
+ hover(): Promise<void>;
410
557
  fill(value: string): Promise<void>;
411
558
  type(text: string, options?: {
412
559
  delay?: number;
@@ -419,9 +566,13 @@ declare class LocatorDelegate {
419
566
  textContent(): Promise<string>;
420
567
  innerHtml(): Promise<string>;
421
568
  innerText(): Promise<string>;
569
+ count(): Promise<number>;
422
570
  first(): LocatorDelegate;
423
571
  }
424
572
 
573
+ type AnyPage = Page$1 | Page$2 | Page$3 | Page;
574
+ type LoadState = "load" | "domcontentloaded" | "networkidle";
575
+
425
576
  declare class Page {
426
577
  private readonly conn;
427
578
  private readonly mainSession;
@@ -439,6 +590,9 @@ declare class Page {
439
590
  private readonly frameCache;
440
591
  /** Stable id for Frames created by this Page (use top-level TargetId). */
441
592
  private readonly pageId;
593
+ /** Cached current URL for synchronous page.url() */
594
+ private _currentUrl;
595
+ private readonly networkManager;
442
596
  private constructor();
443
597
  private cursorEnabled;
444
598
  private ensureCursorScript;
@@ -463,6 +617,7 @@ declare class Page {
463
617
  * Topology + ownership update. Handles root swaps.
464
618
  */
465
619
  onFrameNavigated(frame: Protocol.Page.Frame, session: CDPSessionLike): void;
620
+ onNavigatedWithinDocument(frameId: string, url: string, session: CDPSessionLike): void;
466
621
  /**
467
622
  * An OOPIF child session whose **main** frame id equals the parent iframe’s frameId
468
623
  * has been attached; adopt the session into this Page and seed ownership for its subtree.
@@ -476,7 +631,11 @@ declare class Page {
476
631
  frameForId(frameId: string): Frame;
477
632
  /** Expose a session by id (used by snapshot to resolve session id -> session) */
478
633
  getSessionById(id: string): CDPSessionLike | undefined;
634
+ registerSessionForNetwork(session: CDPSessionLike): void;
635
+ unregisterSessionForNetwork(sessionId: string | undefined): void;
479
636
  targetId(): string;
637
+ /** Seed the cached URL before navigation events converge. */
638
+ seedCurrentUrl(url: string | undefined | null): void;
480
639
  mainFrameId(): string;
481
640
  mainFrame(): Frame;
482
641
  /**
@@ -520,9 +679,9 @@ declare class Page {
520
679
  timeoutMs?: number;
521
680
  }): Promise<void>;
522
681
  /**
523
- * Return the current page URL (from navigation history).
682
+ * Return the current page URL (synchronous, cached from navigation events).
524
683
  */
525
- url(): Promise<string>;
684
+ url(): string;
526
685
  /**
527
686
  * Return the current page title.
528
687
  * Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
@@ -534,7 +693,7 @@ declare class Page {
534
693
  */
535
694
  screenshot(options?: {
536
695
  fullPage?: boolean;
537
- }): Promise<string>;
696
+ }): Promise<Buffer>;
538
697
  /**
539
698
  * Create a locator bound to the current main frame.
540
699
  */
@@ -639,12 +798,35 @@ declare class Page {
639
798
  withMistakes?: boolean;
640
799
  }): Promise<void>;
641
800
  /**
642
- * Press a single key (keyDown then keyUp). For printable characters,
643
- * uses the text path on keyDown; for named keys, sets key/code/VK.
801
+ * Press a single key or key combination (keyDown then keyUp).
802
+ * For printable characters, uses the text path on keyDown; for named keys, sets key/code/VK.
803
+ * Supports key combinations with modifiers like "Cmd+A", "Ctrl+C", "Shift+Tab", etc.
644
804
  */
645
805
  keyPress(key: string, options?: {
646
806
  delay?: number;
647
807
  }): Promise<void>;
808
+ private _pressedModifiers;
809
+ /** Press a key down without releasing it */
810
+ private keyDown;
811
+ /** Release a pressed key */
812
+ private keyUp;
813
+ /** Normalize modifier key names to match CDP expectations */
814
+ private normalizeModifierKey;
815
+ /**
816
+ * Get the map of named keys with their properties
817
+ */
818
+ private getNamedKeys;
819
+ /**
820
+ * Minimal description for printable keys (letters/digits/space) to provide code and VK.
821
+ * Used when non-Shift modifiers are pressed to avoid sending text while keeping accelerator info.
822
+ */
823
+ private describePrintableKey;
824
+ private isMacOS;
825
+ /**
826
+ * Return Chromium mac editing commands (without trailing ':') for a given code like 'KeyA'
827
+ * Only used on macOS to trigger system editing shortcuts (e.g., selectAll, copy, paste...).
828
+ */
829
+ private macCommandsFor;
648
830
  /**
649
831
  * Create an isolated world for the **current** main frame and return its context id.
650
832
  */
@@ -655,151 +837,315 @@ declare class Page {
655
837
  * - Event path listens at the session level and compares incoming `frameId`
656
838
  * to `mainFrameId()` **at event time** to follow root swaps.
657
839
  */
658
- private waitForMainLoadState;
659
- }
660
-
661
- declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
662
- type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
663
- type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
664
- type ClientOptions = ClientOptions$1 | ClientOptions$2;
665
- type ModelConfiguration = AvailableModel | (ClientOptions & {
666
- modelName: AvailableModel;
667
- });
668
- interface AnthropicJsonSchemaObject {
669
- definitions?: {
670
- MySchema?: {
671
- properties?: Record<string, unknown>;
672
- required?: string[];
673
- };
674
- };
675
- properties?: Record<string, unknown>;
676
- required?: string[];
677
- }
678
-
679
- interface LLMTool {
680
- type: "function";
681
- name: string;
682
- description: string;
683
- parameters: Record<string, unknown>;
840
+ waitForMainLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
684
841
  }
685
842
 
686
- type LogLevel = 0 | 1 | 2;
687
843
  /**
688
- * Mapping between numeric log levels and their names
844
+ * Represents a path through a Zod schema from the root object down to a
845
+ * particular field. The `segments` array describes the chain of keys/indices.
689
846
  *
690
- * 0 - error/warn - Critical issues or important warnings
691
- * 1 - info - Standard information messages
692
- * 2 - debug - Detailed information for debugging
847
+ * - **String** segments indicate object property names.
848
+ * - **Number** segments indicate array indices.
849
+ *
850
+ * For example, `["users", 0, "homepage"]` might describe reaching
851
+ * the `homepage` field in `schema.users[0].homepage`.
693
852
  */
694
- declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
695
- type LogLine = {
696
- id?: string;
697
- category?: string;
698
- message: string;
699
- level?: LogLevel;
700
- timestamp?: string;
701
- auxiliary?: {
702
- [key: string]: {
703
- value: string;
704
- type: "object" | "string" | "html" | "integer" | "float" | "boolean";
705
- };
706
- };
707
- };
708
- type Logger = (logLine: LogLine) => void;
709
-
710
- interface ChatMessage {
711
- role: "system" | "user" | "assistant";
712
- content: ChatMessageContent;
853
+ interface ZodPathSegments {
854
+ /**
855
+ * The ordered list of keys/indices leading from the schema root
856
+ * to the targeted field.
857
+ */
858
+ segments: Array<string | number>;
713
859
  }
714
- type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
715
- interface ChatMessageImageContent {
716
- type: string;
717
- image_url?: {
718
- url: string;
719
- };
720
- text?: string;
721
- source?: {
722
- type: string;
723
- media_type: string;
724
- data: string;
725
- };
860
+
861
+ type EvaluateOptions = {
862
+ /** The question to ask about the task state */
863
+ question: string;
864
+ /** The answer to the question */
865
+ answer?: string;
866
+ /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
867
+ screenshot?: boolean | Buffer[];
868
+ /** Custom system prompt for the evaluator */
869
+ systemPrompt?: string;
870
+ /** Delay in milliseconds before taking the screenshot @default 250 */
871
+ screenshotDelayMs?: number;
872
+ /** The agent's reasoning/thought process for completing the task */
873
+ agentReasoning?: string;
874
+ };
875
+ type BatchAskOptions = {
876
+ /** Array of questions with optional answers */
877
+ questions: Array<{
878
+ question: string;
879
+ answer?: string;
880
+ }>;
881
+ /** Whether to take a screenshot of the task state */
882
+ screenshot?: boolean;
883
+ /** Custom system prompt for the evaluator */
884
+ systemPrompt?: string;
885
+ /** Delay in milliseconds before taking the screenshot @default 1000 */
886
+ screenshotDelayMs?: number;
887
+ };
888
+ /**
889
+ * Result of an evaluation
890
+ */
891
+ interface EvaluationResult {
892
+ /**
893
+ * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
894
+ */
895
+ evaluation: "YES" | "NO" | "INVALID";
896
+ /**
897
+ * The reasoning behind the evaluation
898
+ */
899
+ reasoning: string;
726
900
  }
727
- interface ChatMessageTextContent {
901
+
902
+ interface AgentAction {
728
903
  type: string;
729
- text: string;
904
+ reasoning?: string;
905
+ taskCompleted?: boolean;
906
+ action?: string;
907
+ timeMs?: number;
908
+ pageText?: string;
909
+ pageUrl?: string;
910
+ instruction?: string;
911
+ [key: string]: unknown;
730
912
  }
731
- declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
732
- interface ChatCompletionOptions {
733
- messages: ChatMessage[];
734
- temperature?: number;
735
- top_p?: number;
736
- frequency_penalty?: number;
737
- presence_penalty?: number;
738
- image?: {
739
- buffer: Buffer;
740
- description?: string;
741
- };
742
- response_model?: {
743
- name: string;
744
- schema: ZodType;
913
+ interface AgentResult {
914
+ success: boolean;
915
+ message: string;
916
+ actions: AgentAction[];
917
+ completed: boolean;
918
+ metadata?: Record<string, unknown>;
919
+ usage?: {
920
+ input_tokens: number;
921
+ output_tokens: number;
922
+ inference_time_ms: number;
745
923
  };
746
- tools?: LLMTool[];
747
- tool_choice?: "auto" | "none" | "required";
748
- maxTokens?: number;
749
- requestId?: string;
750
924
  }
751
- type LLMResponse = {
752
- id: string;
753
- object: string;
754
- created: number;
755
- model: string;
756
- choices: {
757
- index: number;
758
- message: {
759
- role: string;
760
- content: string | null;
761
- tool_calls: {
762
- id: string;
763
- type: string;
764
- function: {
765
- name: string;
766
- arguments: string;
767
- };
768
- }[];
769
- };
770
- finish_reason: string;
771
- }[];
772
- usage: {
773
- prompt_tokens: number;
774
- completion_tokens: number;
775
- total_tokens: number;
776
- };
777
- };
778
- interface CreateChatCompletionOptions {
779
- options: ChatCompletionOptions;
925
+ interface AgentExecuteOptions {
926
+ instruction: string;
927
+ maxSteps?: number;
928
+ page?: Page$1 | Page$2 | Page$3 | Page;
929
+ highlightCursor?: boolean;
930
+ }
931
+ type AgentType = "openai" | "anthropic" | "google";
932
+ declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025"];
933
+ type AvailableCuaModel = (typeof AVAILABLE_CUA_MODELS)[number];
934
+ interface AgentExecutionOptions<TOptions extends AgentExecuteOptions = AgentExecuteOptions> {
935
+ options: TOptions;
780
936
  logger: (message: LogLine) => void;
781
937
  retries?: number;
782
938
  }
783
- declare abstract class LLMClient {
784
- type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
785
- modelName: AvailableModel | (string & {});
786
- hasVision: boolean;
787
- clientOptions: ClientOptions;
939
+ interface AgentHandlerOptions {
940
+ modelName: string;
941
+ clientOptions?: Record<string, unknown>;
788
942
  userProvidedInstructions?: string;
789
- constructor(modelName: AvailableModel, userProvidedInstructions?: string);
790
- abstract createChatCompletion<T = LLMResponse & {
791
- usage?: LLMResponse["usage"];
792
- }>(options: CreateChatCompletionOptions): Promise<T>;
793
- generateObject: typeof generateObject;
794
- generateText: typeof generateText;
795
- streamText: typeof streamText;
796
- streamObject: typeof streamObject;
797
- generateImage: typeof experimental_generateImage;
798
- embed: typeof embed;
799
- embedMany: typeof embedMany;
800
- transcribe: typeof experimental_transcribe;
801
- generateSpeech: typeof experimental_generateSpeech;
802
- getLanguageModel?(): LanguageModel;
943
+ experimental?: boolean;
944
+ }
945
+ interface ActionExecutionResult {
946
+ success: boolean;
947
+ error?: string;
948
+ data?: unknown;
949
+ }
950
+ interface ToolUseItem extends ResponseItem {
951
+ type: "tool_use";
952
+ id: string;
953
+ name: string;
954
+ input: Record<string, unknown>;
955
+ }
956
+ interface AnthropicMessage {
957
+ role: string;
958
+ content: string | Array<AnthropicContentBlock>;
959
+ }
960
+ interface AnthropicContentBlock {
961
+ type: string;
962
+ [key: string]: unknown;
963
+ }
964
+ interface AnthropicTextBlock extends AnthropicContentBlock {
965
+ type: "text";
966
+ text: string;
967
+ }
968
+ interface AnthropicToolResult {
969
+ type: "tool_result";
970
+ tool_use_id: string;
971
+ content: string | Array<AnthropicContentBlock>;
972
+ }
973
+ interface ResponseItem {
974
+ type: string;
975
+ id: string;
976
+ [key: string]: unknown;
977
+ }
978
+ interface ComputerCallItem extends ResponseItem {
979
+ type: "computer_call";
980
+ call_id: string;
981
+ action: {
982
+ type: string;
983
+ [key: string]: unknown;
984
+ };
985
+ pending_safety_checks?: Array<{
986
+ id: string;
987
+ code: string;
988
+ message: string;
989
+ }>;
990
+ }
991
+ interface FunctionCallItem extends ResponseItem {
992
+ type: "function_call";
993
+ call_id: string;
994
+ name: string;
995
+ arguments: string;
996
+ }
997
+ type ResponseInputItem = {
998
+ role: string;
999
+ content: string;
1000
+ } | {
1001
+ type: "computer_call_output";
1002
+ call_id: string;
1003
+ output: {
1004
+ type: "input_image";
1005
+ image_url: string;
1006
+ current_url?: string;
1007
+ error?: string;
1008
+ [key: string]: unknown;
1009
+ } | string;
1010
+ acknowledged_safety_checks?: Array<{
1011
+ id: string;
1012
+ code: string;
1013
+ message: string;
1014
+ }>;
1015
+ } | {
1016
+ type: "function_call_output";
1017
+ call_id: string;
1018
+ output: string;
1019
+ };
1020
+ interface AgentInstance {
1021
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1022
+ }
1023
+ type AgentProviderType = AgentType;
1024
+ type AgentModelConfig<TModelName extends string = string> = {
1025
+ modelName: TModelName;
1026
+ } & Record<string, unknown>;
1027
+ type AgentConfig = {
1028
+ /**
1029
+ * Custom system prompt to provide to the agent. Overrides the default system prompt.
1030
+ */
1031
+ systemPrompt?: string;
1032
+ /**
1033
+ * MCP integrations - Array of Client objects
1034
+ */
1035
+ integrations?: (Client | string)[];
1036
+ /**
1037
+ * Tools passed to the agent client
1038
+ */
1039
+ tools?: ToolSet;
1040
+ /**
1041
+ * Indicates CUA is disabled for this configuration
1042
+ */
1043
+ cua?: boolean;
1044
+ /**
1045
+ * The model to use for agent functionality
1046
+ */
1047
+ model?: string | AgentModelConfig<string>;
1048
+ /**
1049
+ * The model to use for tool execution (observe/act calls within agent tools).
1050
+ * If not specified, inherits from the main model configuration.
1051
+ * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
1052
+ */
1053
+ executionModel?: string | AgentModelConfig<string>;
1054
+ };
1055
+
1056
+ declare class StagehandAPIError extends Error {
1057
+ constructor(message: string);
1058
+ }
1059
+ declare class StagehandAPIUnauthorizedError extends StagehandAPIError {
1060
+ constructor(message?: string);
1061
+ }
1062
+ declare class StagehandHttpError extends StagehandAPIError {
1063
+ constructor(message: string);
1064
+ }
1065
+ declare class StagehandServerError extends StagehandAPIError {
1066
+ constructor(message: string);
1067
+ }
1068
+ declare class StagehandResponseBodyError extends StagehandAPIError {
1069
+ constructor();
1070
+ }
1071
+ declare class StagehandResponseParseError extends StagehandAPIError {
1072
+ constructor(message: string);
1073
+ }
1074
+
1075
+ interface ActOptions {
1076
+ model?: ModelConfiguration;
1077
+ variables?: Record<string, string>;
1078
+ timeout?: number;
1079
+ page?: Page$1 | Page$2 | Page$3 | Page;
1080
+ }
1081
+ interface ActResult {
1082
+ success: boolean;
1083
+ message: string;
1084
+ actionDescription: string;
1085
+ actions: Action[];
1086
+ }
1087
+ type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1088
+ interface Action {
1089
+ selector: string;
1090
+ description: string;
1091
+ method?: string;
1092
+ arguments?: string[];
1093
+ }
1094
+ interface HistoryEntry {
1095
+ method: "act" | "extract" | "observe" | "navigate";
1096
+ parameters: unknown;
1097
+ result: unknown;
1098
+ timestamp: string;
1099
+ }
1100
+ interface ExtractOptions {
1101
+ model?: ModelConfiguration;
1102
+ timeout?: number;
1103
+ selector?: string;
1104
+ page?: Page$1 | Page$2 | Page$3 | Page;
1105
+ }
1106
+ declare const defaultExtractSchema: z.ZodObject<{
1107
+ extraction: z.ZodString;
1108
+ }, "strip", z.ZodTypeAny, {
1109
+ extraction?: string;
1110
+ }, {
1111
+ extraction?: string;
1112
+ }>;
1113
+ declare const pageTextSchema: z.ZodObject<{
1114
+ pageText: z.ZodString;
1115
+ }, "strip", z.ZodTypeAny, {
1116
+ pageText?: string;
1117
+ }, {
1118
+ pageText?: string;
1119
+ }>;
1120
+ interface ObserveOptions {
1121
+ model?: ModelConfiguration;
1122
+ timeout?: number;
1123
+ selector?: string;
1124
+ page?: Page$1 | Page$2 | Page$3 | Page;
1125
+ }
1126
+ declare enum V3FunctionName {
1127
+ ACT = "ACT",
1128
+ EXTRACT = "EXTRACT",
1129
+ OBSERVE = "OBSERVE",
1130
+ AGENT = "AGENT"
1131
+ }
1132
+
1133
+ interface V3Metrics {
1134
+ actPromptTokens: number;
1135
+ actCompletionTokens: number;
1136
+ actInferenceTimeMs: number;
1137
+ extractPromptTokens: number;
1138
+ extractCompletionTokens: number;
1139
+ extractInferenceTimeMs: number;
1140
+ observePromptTokens: number;
1141
+ observeCompletionTokens: number;
1142
+ observeInferenceTimeMs: number;
1143
+ agentPromptTokens: number;
1144
+ agentCompletionTokens: number;
1145
+ agentInferenceTimeMs: number;
1146
+ totalPromptTokens: number;
1147
+ totalCompletionTokens: number;
1148
+ totalInferenceTimeMs: number;
803
1149
  }
804
1150
 
805
1151
  type V3Env = "LOCAL" | "BROWSERBASE";
@@ -858,70 +1204,113 @@ interface V3Options {
858
1204
  disablePino?: boolean;
859
1205
  /** Optional external logger hook for integrating with host apps. */
860
1206
  logger?: (line: LogLine) => void;
861
- /** Show a visual cursor overlay that follows our mouse events. */
862
- includeCursor?: boolean;
863
1207
  /** Directory used to persist cached actions for act(). */
864
1208
  cacheDir?: string;
865
1209
  domSettleTimeout?: number;
866
1210
  }
867
- type PlaywrightPage = playwright_core.Page;
868
- type PatchrightPage = patchright_core.Page;
869
- type PuppeteerPage = puppeteer_core.Page;
870
- interface ActOptions$1 {
871
- model?: ModelConfiguration;
872
- variables?: Record<string, string>;
873
- timeout?: number;
874
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
1211
+
1212
+ declare class StagehandError extends Error {
1213
+ constructor(message: string);
875
1214
  }
876
- interface ExtractOptions$1 {
877
- model?: ModelConfiguration;
878
- timeout?: number;
879
- selector?: string;
880
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
1215
+ declare class StagehandDefaultError extends StagehandError {
1216
+ constructor(error?: unknown);
881
1217
  }
882
- declare const defaultExtractSchema: z.ZodObject<{
883
- extraction: z.ZodString;
884
- }, "strip", z.ZodTypeAny, {
885
- extraction?: string;
886
- }, {
887
- extraction?: string;
888
- }>;
889
- declare const pageTextSchema: z.ZodObject<{
890
- pageText: z.ZodString;
891
- }, "strip", z.ZodTypeAny, {
892
- pageText?: string;
893
- }, {
894
- pageText?: string;
895
- }>;
896
- interface ObserveOptions$1 {
897
- model?: ModelConfiguration;
898
- timeout?: number;
899
- selector?: string;
900
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
1218
+ declare class StagehandEnvironmentError extends StagehandError {
1219
+ constructor(currentEnvironment: string, requiredEnvironment: string, feature: string);
1220
+ }
1221
+ declare class MissingEnvironmentVariableError extends StagehandError {
1222
+ constructor(missingEnvironmentVariable: string, feature: string);
1223
+ }
1224
+ declare class UnsupportedModelError extends StagehandError {
1225
+ constructor(supportedModels: string[], feature?: string);
1226
+ }
1227
+ declare class UnsupportedModelProviderError extends StagehandError {
1228
+ constructor(supportedProviders: string[], feature?: string);
1229
+ }
1230
+ declare class UnsupportedAISDKModelProviderError extends StagehandError {
1231
+ constructor(provider: string, supportedProviders: string[]);
1232
+ }
1233
+ declare class InvalidAISDKModelFormatError extends StagehandError {
1234
+ constructor(modelName: string);
1235
+ }
1236
+ declare class StagehandNotInitializedError extends StagehandError {
1237
+ constructor(prop: string);
1238
+ }
1239
+ declare class BrowserbaseSessionNotFoundError extends StagehandError {
1240
+ constructor();
1241
+ }
1242
+ declare class CaptchaTimeoutError extends StagehandError {
1243
+ constructor();
1244
+ }
1245
+ declare class MissingLLMConfigurationError extends StagehandError {
1246
+ constructor();
1247
+ }
1248
+ declare class HandlerNotInitializedError extends StagehandError {
1249
+ constructor(handlerType: string);
1250
+ }
1251
+ declare class StagehandInvalidArgumentError extends StagehandError {
1252
+ constructor(message: string);
1253
+ }
1254
+ declare class StagehandElementNotFoundError extends StagehandError {
1255
+ constructor(xpaths: string[]);
1256
+ }
1257
+ declare class AgentScreenshotProviderError extends StagehandError {
1258
+ constructor(message: string);
1259
+ }
1260
+ declare class StagehandMissingArgumentError extends StagehandError {
1261
+ constructor(message: string);
1262
+ }
1263
+ declare class CreateChatCompletionResponseError extends StagehandError {
1264
+ constructor(message: string);
1265
+ }
1266
+ declare class StagehandEvalError extends StagehandError {
1267
+ constructor(message: string);
1268
+ }
1269
+ declare class StagehandDomProcessError extends StagehandError {
1270
+ constructor(message: string);
1271
+ }
1272
+ declare class StagehandClickError extends StagehandError {
1273
+ constructor(message: string, selector: string);
1274
+ }
1275
+ declare class LLMResponseError extends StagehandError {
1276
+ constructor(primitive: string, message: string);
1277
+ }
1278
+ declare class StagehandIframeError extends StagehandError {
1279
+ constructor(frameUrl: string, message: string);
1280
+ }
1281
+ declare class ContentFrameNotFoundError extends StagehandError {
1282
+ constructor(selector: string);
1283
+ }
1284
+ declare class XPathResolutionError extends StagehandError {
1285
+ constructor(xpath: string);
1286
+ }
1287
+ declare class ExperimentalApiConflictError extends StagehandError {
1288
+ constructor();
1289
+ }
1290
+ declare class ExperimentalNotConfiguredError extends StagehandError {
1291
+ constructor(featureName: string);
901
1292
  }
902
- type LoadState = "load" | "domcontentloaded" | "networkidle";
903
- interface V3Metrics {
904
- actPromptTokens: number;
905
- actCompletionTokens: number;
906
- actInferenceTimeMs: number;
907
- extractPromptTokens: number;
908
- extractCompletionTokens: number;
909
- extractInferenceTimeMs: number;
910
- observePromptTokens: number;
911
- observeCompletionTokens: number;
912
- observeInferenceTimeMs: number;
913
- agentPromptTokens: number;
914
- agentCompletionTokens: number;
915
- agentInferenceTimeMs: number;
916
- totalPromptTokens: number;
917
- totalCompletionTokens: number;
918
- totalInferenceTimeMs: number;
1293
+ declare class ZodSchemaValidationError extends Error {
1294
+ readonly received: unknown;
1295
+ readonly issues: ReturnType<ZodError["format"]>;
1296
+ constructor(received: unknown, issues: ReturnType<ZodError["format"]>);
919
1297
  }
920
- declare enum V3FunctionName {
921
- ACT = "ACT",
922
- EXTRACT = "EXTRACT",
923
- OBSERVE = "OBSERVE",
924
- AGENT = "AGENT"
1298
+ declare class StagehandInitError extends StagehandError {
1299
+ constructor(message: string);
1300
+ }
1301
+ declare class MCPConnectionError extends StagehandError {
1302
+ readonly serverUrl: string;
1303
+ readonly originalError: unknown;
1304
+ constructor(serverUrl: string, originalError: unknown);
1305
+ }
1306
+ declare class StagehandShadowRootMissingError extends StagehandError {
1307
+ constructor(detail?: string);
1308
+ }
1309
+ declare class StagehandShadowSegmentEmptyError extends StagehandError {
1310
+ constructor();
1311
+ }
1312
+ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1313
+ constructor(segment: string, hint?: string);
925
1314
  }
926
1315
 
927
1316
  /**
@@ -937,7 +1326,6 @@ declare enum V3FunctionName {
937
1326
  */
938
1327
  declare class V3Context {
939
1328
  readonly conn: CdpConnection;
940
- private readonly includeCursor;
941
1329
  private readonly env;
942
1330
  private constructor();
943
1331
  private readonly _piercerInstalled;
@@ -952,335 +1340,96 @@ declare class V3Context {
952
1340
  private createdAtByTarget;
953
1341
  private typeByTarget;
954
1342
  private _pageOrder;
1343
+ private pendingCreatedTargetUrl;
955
1344
  /**
956
1345
  * Create a Context for a given CDP websocket URL and bootstrap target wiring.
957
1346
  */
958
1347
  static create(wsUrl: string, opts?: {
959
- includeCursor?: boolean;
960
1348
  env?: "LOCAL" | "BROWSERBASE";
961
1349
  }): Promise<V3Context>;
962
1350
  /**
963
- * Wait until at least one top-level Page has been created and registered.
964
- * We poll internal maps that bootstrap/onAttachedToTarget populate.
965
- */
966
- private waitForFirstTopLevelPage;
967
- private ensurePiercer;
968
- /** Mark a page target as the most-recent one (active). */
969
- private _pushActive;
970
- /** Remove a page target from the recency list (used on close). */
971
- private _removeFromOrder;
972
- /** Return the current active Page (most-recent page that still exists). */
973
- activePage(): Page | undefined;
974
- /**
975
- * Return top-level `Page`s (oldest → newest). OOPIF targets are not included.
976
- */
977
- pages(): Page[];
978
- /**
979
- * Resolve an owning `Page` by the **top-level main frame id**.
980
- * Note: child (OOPIF) roots are intentionally not present in this mapping.
981
- */
982
- resolvePageByMainFrameId(frameId: string): Page | undefined;
983
- /**
984
- * Serialize the full frame tree for a given top-level main frame id.
985
- */
986
- getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
987
- /**
988
- * Create a new top-level page (tab) with the given URL and return its Page object.
989
- * Waits until the target is attached and registered.
990
- */
991
- newPage(url?: string): Promise<Page>;
992
- /**
993
- * Close CDP and clear all mappings. Best-effort cleanup.
994
- */
995
- close(): Promise<void>;
996
- /**
997
- * Bootstrap target lifecycle:
998
- * - Attach to existing targets.
999
- * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1000
- * - Handle auto-attach events.
1001
- * - Clean up on detach/destroy.
1002
- */
1003
- private bootstrap;
1004
- /**
1005
- * Handle a newly attached target (top-level or potential OOPIF):
1006
- * - Enable Page domain and lifecycle events.
1007
- * - If top-level → create Page, wire listeners, resume.
1008
- * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1009
- * if the parent is known; otherwise stage until parent `frameAttached`.
1010
- * - Resume the target only after listeners are wired.
1011
- */
1012
- private onAttachedToTarget;
1013
- /**
1014
- * Detach handler:
1015
- * - Remove child session ownership and prune its subtree.
1016
- * - If a top-level target, cleanup its `Page` and mappings.
1017
- * - Drop any staged child for this session.
1018
- */
1019
- private onDetachedFromTarget;
1020
- /**
1021
- * Cleanup a top-level Page by target id, removing its root and staged children.
1022
- */
1023
- private cleanupByTarget;
1024
- /**
1025
- * Wire Page-domain frame events for a session into the owning Page & mappings.
1026
- * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1027
- */
1028
- private installFrameEventBridges;
1029
- /**
1030
- * Register that a session belongs to a Page (used by event routing).
1031
- */
1032
- private wireSessionToOwnerPage;
1033
- /**
1034
- * Utility: reverse-lookup the top-level target id that owns a given Page.
1035
- */
1036
- private findTargetIdByPage;
1037
- private _notePopupSignal;
1038
- /**
1039
- * Await the current active page, waiting briefly if a popup/open was just triggered.
1040
- * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1041
- */
1042
- awaitActivePage(timeoutMs?: number): Promise<Page>;
1043
- }
1044
-
1045
- interface AgentAction {
1046
- type: string;
1047
- reasoning?: string;
1048
- taskCompleted?: boolean;
1049
- action?: string;
1050
- timeMs?: number;
1051
- pageText?: string;
1052
- pageUrl?: string;
1053
- instruction?: string;
1054
- [key: string]: unknown;
1055
- }
1056
- interface AgentResult {
1057
- success: boolean;
1058
- message: string;
1059
- actions: AgentAction[];
1060
- completed: boolean;
1061
- metadata?: Record<string, unknown>;
1062
- usage?: {
1063
- input_tokens: number;
1064
- output_tokens: number;
1065
- inference_time_ms: number;
1066
- };
1067
- }
1068
- interface AgentOptions {
1069
- maxSteps?: number;
1070
- autoScreenshot?: boolean;
1071
- waitBetweenActions?: number;
1072
- context?: string;
1073
- }
1074
- interface AgentExecuteOptions extends AgentOptions {
1075
- instruction: string;
1076
- }
1077
- type AgentProviderType = "openai" | "anthropic";
1078
- interface AgentClientOptions {
1079
- apiKey: string;
1080
- organization?: string;
1081
- baseURL?: string;
1082
- defaultMaxSteps?: number;
1083
- [key: string]: unknown;
1084
- }
1085
- type AgentType = "openai" | "anthropic";
1086
- interface AgentExecutionOptions {
1087
- options: AgentExecuteOptions;
1088
- logger: (message: LogLine) => void;
1089
- retries?: number;
1090
- }
1091
- interface AgentHandlerOptions {
1092
- modelName: string;
1093
- clientOptions?: Record<string, unknown>;
1094
- userProvidedInstructions?: string;
1095
- agentType: AgentType;
1096
- experimental?: boolean;
1097
- }
1098
- interface ActionExecutionResult {
1099
- success: boolean;
1100
- error?: string;
1101
- data?: unknown;
1102
- }
1103
- interface ToolUseItem extends ResponseItem {
1104
- type: "tool_use";
1105
- id: string;
1106
- name: string;
1107
- input: Record<string, unknown>;
1108
- }
1109
- interface AnthropicMessage {
1110
- role: string;
1111
- content: string | Array<AnthropicContentBlock>;
1112
- }
1113
- interface AnthropicContentBlock {
1114
- type: string;
1115
- [key: string]: unknown;
1116
- }
1117
- interface AnthropicTextBlock extends AnthropicContentBlock {
1118
- type: "text";
1119
- text: string;
1120
- }
1121
- interface AnthropicToolResult {
1122
- type: "tool_result";
1123
- tool_use_id: string;
1124
- content: string | Array<AnthropicContentBlock>;
1125
- }
1126
- interface ResponseItem {
1127
- type: string;
1128
- id: string;
1129
- [key: string]: unknown;
1130
- }
1131
- interface ComputerCallItem extends ResponseItem {
1132
- type: "computer_call";
1133
- call_id: string;
1134
- action: {
1135
- type: string;
1136
- [key: string]: unknown;
1137
- };
1138
- pending_safety_checks?: Array<{
1139
- id: string;
1140
- code: string;
1141
- message: string;
1142
- }>;
1143
- }
1144
- interface FunctionCallItem extends ResponseItem {
1145
- type: "function_call";
1146
- call_id: string;
1147
- name: string;
1148
- arguments: string;
1149
- }
1150
- type ResponseInputItem = {
1151
- role: string;
1152
- content: string;
1153
- } | {
1154
- type: "computer_call_output";
1155
- call_id: string;
1156
- output: {
1157
- type: "input_image";
1158
- image_url: string;
1159
- current_url?: string;
1160
- error?: string;
1161
- [key: string]: unknown;
1162
- } | string;
1163
- acknowledged_safety_checks?: Array<{
1164
- id: string;
1165
- code: string;
1166
- message: string;
1167
- }>;
1168
- } | {
1169
- type: "function_call_output";
1170
- call_id: string;
1171
- output: string;
1172
- };
1173
- interface AgentInstance {
1174
- execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1175
- }
1176
-
1177
- interface ActOptions {
1178
- action: string;
1179
- model?: ModelConfiguration;
1180
- variables?: Record<string, string>;
1181
- domSettleTimeoutMs?: number;
1182
- timeoutMs?: number;
1183
- iframes?: boolean;
1184
- frameId?: string;
1185
- }
1186
- interface ActResult {
1187
- success: boolean;
1188
- message: string;
1189
- actionDescription: string;
1190
- actions: Action[];
1191
- }
1192
- interface ExtractOptions<T extends z.AnyZodObject> {
1193
- instruction?: string;
1194
- schema?: T;
1195
- model?: ModelConfiguration;
1196
- domSettleTimeoutMs?: number;
1197
- /**
1198
- * @deprecated The `useTextExtract` parameter has no effect in this version of Stagehand and will be removed in later versions.
1351
+ * Wait until at least one top-level Page has been created and registered.
1352
+ * We poll internal maps that bootstrap/onAttachedToTarget populate.
1199
1353
  */
1200
- useTextExtract?: boolean;
1201
- selector?: string;
1202
- iframes?: boolean;
1203
- frameId?: string;
1204
- }
1205
- type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1206
- interface ObserveOptions {
1207
- instruction?: string;
1208
- model?: ModelConfiguration;
1209
- domSettleTimeoutMs?: number;
1210
- returnAction?: boolean;
1211
- selector?: string;
1354
+ private waitForFirstTopLevelPage;
1355
+ private ensurePiercer;
1356
+ /** Mark a page target as the most-recent one (active). */
1357
+ private _pushActive;
1358
+ /** Remove a page target from the recency list (used on close). */
1359
+ private _removeFromOrder;
1360
+ /** Return the current active Page (most-recent page that still exists). */
1361
+ activePage(): Page | undefined;
1362
+ /** Explicitly mark a known Page as the most-recent active page (and focus it). */
1363
+ setActivePage(page: Page): void;
1212
1364
  /**
1213
- * @deprecated The `onlyVisible` parameter has no effect in this version of Stagehand and will be removed in later versions.
1365
+ * Return top-level `Page`s (oldest newest). OOPIF targets are not included.
1214
1366
  */
1215
- onlyVisible?: boolean;
1216
- drawOverlay?: boolean;
1217
- iframes?: boolean;
1218
- frameId?: string;
1219
- }
1220
- interface Action {
1221
- selector: string;
1222
- description: string;
1223
- backendNodeId?: number;
1224
- method?: string;
1225
- arguments?: string[];
1226
- }
1227
- /**
1228
- * Configuration for agent functionality
1229
- */
1230
- interface AgentConfig {
1367
+ pages(): Page[];
1231
1368
  /**
1232
- * The provider to use for agent functionality
1369
+ * Resolve an owning `Page` by the **top-level main frame id**.
1370
+ * Note: child (OOPIF) roots are intentionally not present in this mapping.
1233
1371
  */
1234
- provider?: AgentProviderType;
1372
+ resolvePageByMainFrameId(frameId: string): Page | undefined;
1235
1373
  /**
1236
- * The model to use for agent functionality
1374
+ * Serialize the full frame tree for a given top-level main frame id.
1237
1375
  */
1238
- model?: string;
1376
+ getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
1239
1377
  /**
1240
- * The model to use for tool execution (observe/act calls within agent tools).
1241
- * If not specified, inherits from the main model configuration.
1242
- * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
1378
+ * Create a new top-level page (tab) with the given URL and return its Page object.
1379
+ * Waits until the target is attached and registered.
1243
1380
  */
1244
- executionModel?: string;
1381
+ newPage(url?: string): Promise<Page>;
1245
1382
  /**
1246
- * Custom instructions to provide to the agent
1383
+ * Close CDP and clear all mappings. Best-effort cleanup.
1247
1384
  */
1248
- instructions?: string;
1385
+ close(): Promise<void>;
1249
1386
  /**
1250
- * Additional options to pass to the agent client
1387
+ * Bootstrap target lifecycle:
1388
+ * - Attach to existing targets.
1389
+ * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1390
+ * - Handle auto-attach events.
1391
+ * - Clean up on detach/destroy.
1251
1392
  */
1252
- options?: Record<string, unknown>;
1393
+ private bootstrap;
1253
1394
  /**
1254
- * MCP integrations - Array of Client objects
1395
+ * Handle a newly attached target (top-level or potential OOPIF):
1396
+ * - Enable Page domain and lifecycle events.
1397
+ * - If top-level → create Page, wire listeners, resume.
1398
+ * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1399
+ * if the parent is known; otherwise stage until parent `frameAttached`.
1400
+ * - Resume the target only after listeners are wired.
1255
1401
  */
1256
- integrations?: (Client | string)[];
1402
+ private onAttachedToTarget;
1257
1403
  /**
1258
- * Tools passed to the agent client
1404
+ * Detach handler:
1405
+ * - Remove child session ownership and prune its subtree.
1406
+ * - If a top-level target, cleanup its `Page` and mappings.
1407
+ * - Drop any staged child for this session.
1259
1408
  */
1260
- tools?: ToolSet;
1261
- }
1262
- interface HistoryEntry {
1263
- method: "act" | "extract" | "observe" | "navigate";
1264
- parameters: unknown;
1265
- result: unknown;
1266
- timestamp: string;
1267
- }
1268
- /**
1269
- * Represents a path through a Zod schema from the root object down to a
1270
- * particular field. The `segments` array describes the chain of keys/indices.
1271
- *
1272
- * - **String** segments indicate object property names.
1273
- * - **Number** segments indicate array indices.
1274
- *
1275
- * For example, `["users", 0, "homepage"]` might describe reaching
1276
- * the `homepage` field in `schema.users[0].homepage`.
1277
- */
1278
- interface ZodPathSegments {
1409
+ private onDetachedFromTarget;
1279
1410
  /**
1280
- * The ordered list of keys/indices leading from the schema root
1281
- * to the targeted field.
1411
+ * Cleanup a top-level Page by target id, removing its root and staged children.
1282
1412
  */
1283
- segments: Array<string | number>;
1413
+ private cleanupByTarget;
1414
+ /**
1415
+ * Wire Page-domain frame events for a session into the owning Page & mappings.
1416
+ * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1417
+ */
1418
+ private installFrameEventBridges;
1419
+ /**
1420
+ * Register that a session belongs to a Page (used by event routing).
1421
+ */
1422
+ private wireSessionToOwnerPage;
1423
+ /**
1424
+ * Utility: reverse-lookup the top-level target id that owns a given Page.
1425
+ */
1426
+ private findTargetIdByPage;
1427
+ private _notePopupSignal;
1428
+ /**
1429
+ * Await the current active page, waiting briefly if a popup/open was just triggered.
1430
+ * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1431
+ */
1432
+ awaitActivePage(timeoutMs?: number): Promise<Page>;
1284
1433
  }
1285
1434
 
1286
1435
  type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
@@ -1353,6 +1502,7 @@ declare class V3 {
1353
1502
  private modelName;
1354
1503
  private modelClientOptions;
1355
1504
  private llmProvider;
1505
+ private overrideLlmClients;
1356
1506
  private readonly domSettleTimeoutMs?;
1357
1507
  private _isClosing;
1358
1508
  browserbaseSessionId?: string;
@@ -1365,15 +1515,16 @@ declare class V3 {
1365
1515
  private readonly instanceId;
1366
1516
  private static _processGuardsInstalled;
1367
1517
  private static _instances;
1368
- private cacheDir?;
1369
- private _agentReplayRecording;
1518
+ private cacheStorage;
1519
+ private actCache;
1520
+ private agentCache;
1370
1521
  v3Metrics: V3Metrics;
1371
1522
  /**
1372
1523
  * Async property for metrics so callers can `await v3.metrics`.
1373
1524
  * Returning a Promise future-proofs async aggregation/storage.
1374
1525
  */
1375
1526
  get metrics(): Promise<V3Metrics>;
1376
- private cloneForCache;
1527
+ private resolveLlmClient;
1377
1528
  private beginAgentReplayRecording;
1378
1529
  private endAgentReplayRecording;
1379
1530
  private discardAgentReplayRecording;
@@ -1406,8 +1557,8 @@ declare class V3 {
1406
1557
  * - act(instruction: string, options?: ActOptions)
1407
1558
  * - act(action: Action, options?: ActOptions)
1408
1559
  */
1409
- act(instruction: string, options?: ActOptions$1): Promise<ActResult>;
1410
- act(action: Action, options?: ActOptions$1): Promise<ActResult>;
1560
+ act(instruction: string, options?: ActOptions): Promise<ActResult>;
1561
+ act(action: Action, options?: ActOptions): Promise<ActResult>;
1411
1562
  /**
1412
1563
  * Run an "extract" instruction through the ExtractHandler.
1413
1564
  *
@@ -1419,15 +1570,15 @@ declare class V3 {
1419
1570
  * - extract(instruction, schema, options)
1420
1571
  */
1421
1572
  extract(): Promise<z.infer<typeof pageTextSchema>>;
1422
- extract(options: ExtractOptions$1): Promise<z.infer<typeof pageTextSchema>>;
1423
- extract(instruction: string, options?: ExtractOptions$1): Promise<z.infer<typeof defaultExtractSchema>>;
1424
- extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions$1): Promise<z.infer<T>>;
1573
+ extract(options: ExtractOptions): Promise<z.infer<typeof pageTextSchema>>;
1574
+ extract(instruction: string, options?: ExtractOptions): Promise<z.infer<typeof defaultExtractSchema>>;
1575
+ extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z.infer<T>>;
1425
1576
  /**
1426
1577
  * Run an "observe" instruction through the ObserveHandler.
1427
1578
  */
1428
1579
  observe(): Promise<Action[]>;
1429
- observe(options: ObserveOptions$1): Promise<Action[]>;
1430
- observe(instruction: string, options?: ObserveOptions$1): Promise<Action[]>;
1580
+ observe(options: ObserveOptions): Promise<Action[]>;
1581
+ observe(instruction: string, options?: ObserveOptions): Promise<Action[]>;
1431
1582
  /** Return the browser-level CDP WebSocket endpoint. */
1432
1583
  connectURL(): string;
1433
1584
  /** Expose the current CDP-backed context. */
@@ -1447,26 +1598,9 @@ declare class V3 {
1447
1598
  private isPlaywrightPage;
1448
1599
  private isPatchrightPage;
1449
1600
  private isPuppeteerPage;
1601
+ /** Resolve an external page reference or fall back to the active V3 page. */
1602
+ private resolvePage;
1450
1603
  private normalizeToV3Page;
1451
- private buildActCacheKey;
1452
- private safeGetPageUrl;
1453
- private readActCacheEntry;
1454
- private writeActCacheEntry;
1455
- private sanitizeAgentExecuteOptions;
1456
- private buildAgentCacheSignature;
1457
- private buildAgentCacheKey;
1458
- private readAgentCacheEntry;
1459
- private writeAgentCacheEntry;
1460
- private replayAgentCacheEntry;
1461
- private executeAgentReplayStep;
1462
- private replayAgentActStep;
1463
- private replayAgentFillFormStep;
1464
- private replayAgentGotoStep;
1465
- private replayAgentScrollStep;
1466
- private replayAgentWaitStep;
1467
- private replayAgentNavBackStep;
1468
- private replayCachedActions;
1469
- private runWithActTimeout;
1470
1604
  /**
1471
1605
  * Create a v3 agent instance (AISDK tool-based) with execute().
1472
1606
  * Mirrors the v2 Stagehand.agent() tool mode (no CUA provider here).
@@ -1476,128 +1610,100 @@ declare class V3 {
1476
1610
  };
1477
1611
  }
1478
1612
 
1479
- declare class StagehandAPIError extends Error {
1480
- constructor(message: string);
1481
- }
1482
- declare class StagehandAPIUnauthorizedError extends StagehandAPIError {
1483
- constructor(message?: string);
1484
- }
1485
- declare class StagehandHttpError extends StagehandAPIError {
1486
- constructor(message: string);
1487
- }
1488
- declare class StagehandServerError extends StagehandAPIError {
1489
- constructor(message: string);
1490
- }
1491
- declare class StagehandResponseBodyError extends StagehandAPIError {
1492
- constructor();
1493
- }
1494
- declare class StagehandResponseParseError extends StagehandAPIError {
1495
- constructor(message: string);
1613
+ /**
1614
+ * Abstract base class for agent clients
1615
+ * This provides a common interface for all agent implementations
1616
+ */
1617
+ declare abstract class AgentClient {
1618
+ type: AgentType;
1619
+ modelName: string;
1620
+ clientOptions: Record<string, unknown>;
1621
+ userProvidedInstructions?: string;
1622
+ constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
1623
+ abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
1624
+ abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
1625
+ abstract setViewport(width: number, height: number): void;
1626
+ abstract setCurrentUrl(url: string): void;
1627
+ abstract setScreenshotProvider(provider: () => Promise<string>): void;
1628
+ abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
1496
1629
  }
1497
1630
 
1498
- declare class StagehandError extends Error {
1499
- constructor(message: string);
1500
- }
1501
- declare class StagehandDefaultError extends StagehandError {
1502
- constructor(error?: unknown);
1503
- }
1504
- declare class StagehandEnvironmentError extends StagehandError {
1505
- constructor(currentEnvironment: string, requiredEnvironment: string, feature: string);
1506
- }
1507
- declare class MissingEnvironmentVariableError extends StagehandError {
1508
- constructor(missingEnvironmentVariable: string, feature: string);
1509
- }
1510
- declare class UnsupportedModelError extends StagehandError {
1511
- constructor(supportedModels: string[], feature?: string);
1512
- }
1513
- declare class UnsupportedModelProviderError extends StagehandError {
1514
- constructor(supportedProviders: string[], feature?: string);
1515
- }
1516
- declare class UnsupportedAISDKModelProviderError extends StagehandError {
1517
- constructor(provider: string, supportedProviders: string[]);
1518
- }
1519
- declare class InvalidAISDKModelFormatError extends StagehandError {
1520
- constructor(modelName: string);
1521
- }
1522
- declare class StagehandNotInitializedError extends StagehandError {
1523
- constructor(prop: string);
1524
- }
1525
- declare class BrowserbaseSessionNotFoundError extends StagehandError {
1526
- constructor();
1527
- }
1528
- declare class CaptchaTimeoutError extends StagehandError {
1529
- constructor();
1530
- }
1531
- declare class MissingLLMConfigurationError extends StagehandError {
1532
- constructor();
1533
- }
1534
- declare class HandlerNotInitializedError extends StagehandError {
1535
- constructor(handlerType: string);
1536
- }
1537
- declare class StagehandInvalidArgumentError extends StagehandError {
1538
- constructor(message: string);
1539
- }
1540
- declare class StagehandElementNotFoundError extends StagehandError {
1541
- constructor(xpaths: string[]);
1542
- }
1543
- declare class AgentScreenshotProviderError extends StagehandError {
1544
- constructor(message: string);
1545
- }
1546
- declare class StagehandMissingArgumentError extends StagehandError {
1547
- constructor(message: string);
1548
- }
1549
- declare class CreateChatCompletionResponseError extends StagehandError {
1550
- constructor(message: string);
1551
- }
1552
- declare class StagehandEvalError extends StagehandError {
1553
- constructor(message: string);
1554
- }
1555
- declare class StagehandDomProcessError extends StagehandError {
1556
- constructor(message: string);
1557
- }
1558
- declare class StagehandClickError extends StagehandError {
1559
- constructor(message: string, selector: string);
1560
- }
1561
- declare class LLMResponseError extends StagehandError {
1562
- constructor(primitive: string, message: string);
1563
- }
1564
- declare class StagehandIframeError extends StagehandError {
1565
- constructor(frameUrl: string, message: string);
1566
- }
1567
- declare class ContentFrameNotFoundError extends StagehandError {
1568
- constructor(selector: string);
1569
- }
1570
- declare class XPathResolutionError extends StagehandError {
1571
- constructor(xpath: string);
1572
- }
1573
- declare class ExperimentalApiConflictError extends StagehandError {
1574
- constructor();
1575
- }
1576
- declare class ExperimentalNotConfiguredError extends StagehandError {
1577
- constructor(featureName: string);
1578
- }
1579
- declare class ZodSchemaValidationError extends Error {
1580
- readonly received: unknown;
1581
- readonly issues: ReturnType<ZodError["format"]>;
1582
- constructor(received: unknown, issues: ReturnType<ZodError["format"]>);
1583
- }
1584
- declare class StagehandInitError extends StagehandError {
1585
- constructor(message: string);
1586
- }
1587
- declare class MCPConnectionError extends StagehandError {
1588
- readonly serverUrl: string;
1589
- readonly originalError: unknown;
1590
- constructor(serverUrl: string, originalError: unknown);
1591
- }
1592
- declare class StagehandShadowRootMissingError extends StagehandError {
1593
- constructor(detail?: string);
1631
+ declare const modelToAgentProviderMap: Record<string, AgentProviderType>;
1632
+ /**
1633
+ * Provider for agent clients
1634
+ * This class is responsible for creating the appropriate agent client
1635
+ * based on the provider type
1636
+ */
1637
+ declare class AgentProvider {
1638
+ private logger;
1639
+ /**
1640
+ * Create a new agent provider
1641
+ */
1642
+ constructor(logger: (message: LogLine) => void);
1643
+ getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet$1): AgentClient;
1644
+ static getAgentProvider(modelName: string): AgentProviderType;
1594
1645
  }
1595
- declare class StagehandShadowSegmentEmptyError extends StagehandError {
1596
- constructor();
1646
+
1647
+ declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;
1648
+ /**
1649
+ * Detects if the code is running in the Bun runtime environment.
1650
+ * @returns {boolean} True if running in Bun, false otherwise.
1651
+ */
1652
+ declare function isRunningInBun(): boolean;
1653
+ declare function toGeminiSchema(zodSchema: z.ZodTypeAny): Schema;
1654
+ declare function getZodType(schema: z.ZodTypeAny): string;
1655
+ /**
1656
+ * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
1657
+ * For each such field, it replaces the `z.string().url()` with `z.number()`.
1658
+ *
1659
+ * This function is used internally by higher-level utilities (e.g., transforming entire object schemas)
1660
+ * and handles nested objects, arrays, unions, intersections, optionals.
1661
+ *
1662
+ * @param schema - The Zod schema to transform.
1663
+ * @param currentPath - An array of string/number keys representing the current schema path (used internally for recursion).
1664
+ * @returns A two-element tuple:
1665
+ * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
1666
+ * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
1667
+ */
1668
+ declare function transformSchema(schema: z.ZodTypeAny, currentPath: Array<string | number>): [z.ZodTypeAny, ZodPathSegments[]];
1669
+ /**
1670
+ * Once we get the final extracted object that has numeric IDs in place of URLs,
1671
+ * use `injectUrls` to walk the object and replace numeric IDs
1672
+ * with the real URL strings from idToUrlMapping. The `path` may include `*`
1673
+ * for array indices (indicating "all items in the array").
1674
+ */
1675
+ declare function injectUrls(obj: unknown, path: Array<string | number>, idToUrlMapping: Record<string, string>): void;
1676
+ /**
1677
+ * Mapping from LLM provider names to their corresponding environment variable names for API keys.
1678
+ */
1679
+ declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string>>;
1680
+ /**
1681
+ * Loads an API key for a provider, checking environment variables.
1682
+ * @param provider The name of the provider (e.g., 'openai', 'anthropic')
1683
+ * @param logger Optional logger for info/error messages
1684
+ * @returns The API key if found, undefined otherwise
1685
+ */
1686
+ declare function loadApiKeyFromEnv(provider: string | undefined, logger: (logLine: LogLine) => void): string | undefined;
1687
+ declare function trimTrailingTextNode(path: string | undefined): string | undefined;
1688
+ interface JsonSchemaProperty {
1689
+ type: string;
1690
+ enum?: unknown[];
1691
+ items?: JsonSchemaProperty;
1692
+ properties?: Record<string, JsonSchemaProperty>;
1693
+ required?: string[];
1694
+ minimum?: number;
1695
+ maximum?: number;
1696
+ description?: string;
1597
1697
  }
1598
- declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1599
- constructor(segment: string, hint?: string);
1698
+ interface JsonSchema extends JsonSchemaProperty {
1699
+ type: string;
1600
1700
  }
1701
+ /**
1702
+ * Converts a JSON Schema object to a Zod schema
1703
+ * @param schema The JSON Schema object to convert
1704
+ * @returns A Zod schema equivalent to the input JSON Schema
1705
+ */
1706
+ declare function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny;
1601
1707
 
1602
1708
  interface ConnectToMCPServerOptions {
1603
1709
  serverUrl: string | URL;
@@ -1608,6 +1714,24 @@ interface StdioServerConfig {
1608
1714
  args?: string[];
1609
1715
  env?: Record<string, string>;
1610
1716
  }
1611
- declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client$1>;
1717
+ declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client>;
1718
+
1719
+ /**
1720
+ * V3Evaluator mirrors Evaluator but operates on a V3 instance instead of Stagehand.
1721
+ * It uses the V3 page/screenshot APIs and constructs an LLM client to run
1722
+ * structured evaluations (YES/NO with reasoning) on screenshots and/or text.
1723
+ */
1724
+
1725
+ declare class V3Evaluator {
1726
+ private v3;
1727
+ private modelName;
1728
+ private modelClientOptions;
1729
+ private silentLogger;
1730
+ constructor(v3: V3, modelName?: AvailableModel, modelClientOptions?: ClientOptions);
1731
+ private getClient;
1732
+ ask(options: EvaluateOptions): Promise<EvaluationResult>;
1733
+ batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]>;
1734
+ private _evaluateWithMultipleScreenshots;
1735
+ }
1612
1736
 
1613
- export { type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer };
1737
+ export { type AISDKCustomProvider, type AISDKProvider, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Metrics, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };