@browserbasehq/orca 3.0.0-preview.1 → 3.0.0-preview.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/dist/index.d.ts +759 -593
  2. package/dist/index.js +25560 -24375
  3. package/package.json +35 -74
  4. package/README.md +0 -165
  5. package/dist/lib/StagehandContext.d.ts +0 -25
  6. package/dist/lib/StagehandPage.d.ts +0 -103
  7. package/dist/lib/a11y/utils.d.ts +0 -144
  8. package/dist/lib/agent/AgentClient.d.ts +0 -20
  9. package/dist/lib/agent/AgentProvider.d.ts +0 -19
  10. package/dist/lib/agent/AnthropicCUAClient.d.ts +0 -56
  11. package/dist/lib/agent/GoogleCUAClient.d.ts +0 -63
  12. package/dist/lib/agent/OpenAICUAClient.d.ts +0 -65
  13. package/dist/lib/agent/StagehandAgent.d.ts +0 -15
  14. package/dist/lib/agent/tools/act.d.ts +0 -59
  15. package/dist/lib/agent/tools/ariaTree.d.ts +0 -11
  16. package/dist/lib/agent/tools/close.d.ts +0 -22
  17. package/dist/lib/agent/tools/extract.d.ts +0 -38
  18. package/dist/lib/agent/tools/fillform.d.ts +0 -37
  19. package/dist/lib/agent/tools/goto.d.ts +0 -29
  20. package/dist/lib/agent/tools/index.d.ts +0 -257
  21. package/dist/lib/agent/tools/navback.d.ts +0 -17
  22. package/dist/lib/agent/tools/screenshot.d.ts +0 -13
  23. package/dist/lib/agent/tools/scroll.d.ts +0 -23
  24. package/dist/lib/agent/tools/wait.d.ts +0 -18
  25. package/dist/lib/agent/utils/cuaKeyMapping.d.ts +0 -10
  26. package/dist/lib/agent/utils/imageCompression.d.ts +0 -53
  27. package/dist/lib/agent/utils/messageProcessing.d.ts +0 -13
  28. package/dist/lib/api.d.ts +0 -23
  29. package/dist/lib/browserbaseDefaults.d.ts +0 -9
  30. package/dist/lib/cache/ActionCache.d.ts +0 -62
  31. package/dist/lib/cache/BaseCache.d.ts +0 -66
  32. package/dist/lib/cache/LLMCache.d.ts +0 -22
  33. package/dist/lib/cache.d.ts +0 -29
  34. package/dist/lib/dom/build/scriptContent.d.ts +0 -1
  35. package/dist/lib/dom/elementCheckUtils.d.ts +0 -2
  36. package/dist/lib/dom/genDomScripts.d.ts +0 -1
  37. package/dist/lib/dom/index.d.ts +0 -2
  38. package/dist/lib/dom/process.d.ts +0 -17
  39. package/dist/lib/dom/utils.d.ts +0 -7
  40. package/dist/lib/dom/xpathUtils.d.ts +0 -14
  41. package/dist/lib/handlers/actHandler.d.ts +0 -33
  42. package/dist/lib/handlers/cuaAgentHandler.d.ts +0 -58
  43. package/dist/lib/handlers/extractHandler.d.ts +0 -54
  44. package/dist/lib/handlers/handlerUtils/actHandlerUtils.d.ts +0 -21
  45. package/dist/lib/handlers/observeHandler.d.ts +0 -40
  46. package/dist/lib/handlers/stagehandAgentHandler.d.ts +0 -27
  47. package/dist/lib/index.d.ts +0 -94
  48. package/dist/lib/inference.d.ts +0 -71
  49. package/dist/lib/inferenceLogUtils.d.ts +0 -12
  50. package/dist/lib/llm/AnthropicClient.d.ts +0 -21
  51. package/dist/lib/llm/CerebrasClient.d.ts +0 -22
  52. package/dist/lib/llm/GoogleClient.d.ts +0 -24
  53. package/dist/lib/llm/GroqClient.d.ts +0 -22
  54. package/dist/lib/llm/LLMClient.d.ts +0 -99
  55. package/dist/lib/llm/LLMProvider.d.ts +0 -13
  56. package/dist/lib/llm/OpenAIClient.d.ts +0 -20
  57. package/dist/lib/llm/aisdk.d.ts +0 -20
  58. package/dist/lib/logger.d.ts +0 -54
  59. package/dist/lib/mcp/connection.d.ts +0 -11
  60. package/dist/lib/mcp/utils.d.ts +0 -3
  61. package/dist/lib/prompt.d.ts +0 -12
  62. package/dist/lib/utils.d.ts +0 -65
  63. package/dist/lib/v3/agent/AgentClient.d.ts +0 -18
  64. package/dist/lib/v3/agent/AgentProvider.d.ts +0 -18
  65. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +0 -55
  66. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +0 -64
  67. package/dist/lib/v3/agent/StagehandAgent.d.ts +0 -15
  68. package/dist/lib/v3/agent/tools/index.d.ts +0 -229
  69. package/dist/lib/v3/agent/tools/v3-act.d.ts +0 -29
  70. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +0 -11
  71. package/dist/lib/v3/agent/tools/v3-close.d.ts +0 -24
  72. package/dist/lib/v3/agent/tools/v3-extract.d.ts +0 -38
  73. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +0 -37
  74. package/dist/lib/v3/agent/tools/v3-goto.d.ts +0 -29
  75. package/dist/lib/v3/agent/tools/v3-navback.d.ts +0 -17
  76. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +0 -13
  77. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +0 -23
  78. package/dist/lib/v3/agent/tools/v3-wait.d.ts +0 -19
  79. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +0 -10
  80. package/dist/lib/v3/agent/utils/imageCompression.d.ts +0 -18
  81. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +0 -13
  82. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +0 -1
  83. package/dist/lib/v3/dom/genDomScripts.d.ts +0 -1
  84. package/dist/lib/v3/dom/index.d.ts +0 -1
  85. package/dist/lib/v3/dom/piercer.entry.d.ts +0 -1
  86. package/dist/lib/v3/dom/piercer.runtime.d.ts +0 -25
  87. package/dist/lib/v3/handlers/actHandler.d.ts +0 -18
  88. package/dist/lib/v3/handlers/extractHandler.d.ts +0 -29
  89. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +0 -18
  90. package/dist/lib/v3/handlers/observeHandler.d.ts +0 -15
  91. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +0 -17
  92. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +0 -26
  93. package/dist/lib/v3/index.d.ts +0 -10
  94. package/dist/lib/v3/launch/browserbase.d.ts +0 -8
  95. package/dist/lib/v3/launch/local.d.ts +0 -13
  96. package/dist/lib/v3/llm/AnthropicClient.d.ts +0 -16
  97. package/dist/lib/v3/llm/CerebrasClient.d.ts +0 -17
  98. package/dist/lib/v3/llm/GoogleClient.d.ts +0 -19
  99. package/dist/lib/v3/llm/GroqClient.d.ts +0 -17
  100. package/dist/lib/v3/llm/LLMClient.d.ts +0 -99
  101. package/dist/lib/v3/llm/LLMProvider.d.ts +0 -10
  102. package/dist/lib/v3/llm/OpenAIClient.d.ts +0 -15
  103. package/dist/lib/v3/llm/aisdk.d.ts +0 -15
  104. package/dist/lib/v3/logger.d.ts +0 -48
  105. package/dist/lib/v3/mcp/connection.d.ts +0 -11
  106. package/dist/lib/v3/mcp/utils.d.ts +0 -3
  107. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +0 -1
  108. package/dist/lib/v3/tests/downloads.spec.d.ts +0 -1
  109. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +0 -1
  110. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +0 -1
  111. package/dist/lib/v3/tests/timeouts.spec.d.ts +0 -1
  112. package/dist/lib/v3/tests/v3.bb.config.d.ts +0 -4
  113. package/dist/lib/v3/tests/v3.config.d.ts +0 -4
  114. package/dist/lib/v3/tests/v3.playwright.config.d.ts +0 -2
  115. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +0 -1
  116. package/dist/lib/v3/types/act.d.ts +0 -10
  117. package/dist/lib/v3/types/agent.d.ts +0 -132
  118. package/dist/lib/v3/types/api.d.ts +0 -40
  119. package/dist/lib/v3/types/cache.d.ts +0 -71
  120. package/dist/lib/v3/types/context.d.ts +0 -2
  121. package/dist/lib/v3/types/evals.d.ts +0 -71
  122. package/dist/lib/v3/types/evaluator.d.ts +0 -40
  123. package/dist/lib/v3/types/llm.d.ts +0 -11
  124. package/dist/lib/v3/types/log.d.ts +0 -23
  125. package/dist/lib/v3/types/model.d.ts +0 -20
  126. package/dist/lib/v3/types/playwright.d.ts +0 -6
  127. package/dist/lib/v3/types/stagehand.d.ts +0 -113
  128. package/dist/lib/v3/types/stagehandApiErrors.d.ts +0 -18
  129. package/dist/lib/v3/types/stagehandErrors.d.ts +0 -104
  130. package/dist/lib/v3/types.d.ts +0 -176
  131. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +0 -71
  132. package/dist/lib/v3/understudy/cdp.d.ts +0 -58
  133. package/dist/lib/v3/understudy/context.d.ts +0 -120
  134. package/dist/lib/v3/understudy/deepLocator.d.ts +0 -69
  135. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +0 -15
  136. package/dist/lib/v3/understudy/frame.d.ts +0 -63
  137. package/dist/lib/v3/understudy/frameLocator.d.ts +0 -46
  138. package/dist/lib/v3/understudy/frameRegistry.d.ts +0 -100
  139. package/dist/lib/v3/understudy/locator.d.ts +0 -196
  140. package/dist/lib/v3/understudy/page.d.ts +0 -241
  141. package/dist/lib/v3/understudy/piercer.d.ts +0 -4
  142. package/dist/lib/v3/v3.d.ts +0 -158
  143. package/dist/lib/version.d.ts +0 -5
  144. package/dist/stagehand.config.d.ts +0 -3
  145. package/dist/types/act.d.ts +0 -50
  146. package/dist/types/agent.d.ts +0 -143
  147. package/dist/types/api.d.ts +0 -40
  148. package/dist/types/browser.d.ts +0 -10
  149. package/dist/types/context.d.ts +0 -117
  150. package/dist/types/evals.d.ts +0 -94
  151. package/dist/types/evaluator.d.ts +0 -40
  152. package/dist/types/llm.d.ts +0 -11
  153. package/dist/types/log.d.ts +0 -23
  154. package/dist/types/model.d.ts +0 -17
  155. package/dist/types/page.d.ts +0 -38
  156. package/dist/types/playwright.d.ts +0 -12
  157. package/dist/types/stagehand.d.ts +0 -330
  158. package/dist/types/stagehandApiErrors.d.ts +0 -18
  159. package/dist/types/stagehandErrors.d.ts +0 -104
package/dist/index.d.ts CHANGED
@@ -1,15 +1,165 @@
1
- import * as puppeteer_core from 'puppeteer-core';
2
- import * as patchright_core from 'patchright-core';
3
- import * as playwright_core from 'playwright-core';
4
- import Browserbase from '@browserbasehq/sdk';
5
- import { Protocol } from 'devtools-protocol';
6
- import { Buffer as Buffer$1 } from 'buffer';
1
+ import { ZodType, z, ZodTypeAny, ZodError } from 'zod/v3';
7
2
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
+ import { LanguageModel, generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, ToolSet } from 'ai';
8
4
  import { ClientOptions as ClientOptions$1 } from 'openai';
9
- import { z, ZodType, ZodTypeAny, ZodError } from 'zod/v3';
10
- import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, LanguageModel, ToolSet } from 'ai';
11
- import { Client } from '@modelcontextprotocol/sdk/dist/esm/client';
12
- import { ClientOptions as ClientOptions$3, Client as Client$1 } from '@modelcontextprotocol/sdk/client/index.js';
5
+ import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
6
+ import { Page as Page$1 } from 'playwright-core';
7
+ export { Page as PlaywrightPage } from 'playwright-core';
8
+ import { Page as Page$2 } from 'puppeteer-core';
9
+ export { Page as PuppeteerPage } from 'puppeteer-core';
10
+ import { Page as Page$3 } from 'patchright-core';
11
+ export { Page as PatchrightPage } from 'patchright-core';
12
+ import { Protocol } from 'devtools-protocol';
13
+ import { Buffer as Buffer$1 } from 'buffer';
14
+ import Browserbase from '@browserbasehq/sdk';
15
+ import { ToolSet as ToolSet$1 } from 'ai/dist';
16
+ import { Schema } from '@google/genai';
17
+
18
+ type AnthropicJsonSchemaObject = {
19
+ definitions?: {
20
+ MySchema?: {
21
+ properties?: Record<string, unknown>;
22
+ required?: string[];
23
+ };
24
+ };
25
+ properties?: Record<string, unknown>;
26
+ required?: string[];
27
+ } & Record<string, unknown>;
28
+ interface LLMTool {
29
+ type: "function";
30
+ name: string;
31
+ description: string;
32
+ parameters: Record<string, unknown>;
33
+ }
34
+ type AISDKProvider = (modelName: string) => LanguageModel;
35
+ type AISDKCustomProvider = (options: {
36
+ apiKey: string;
37
+ }) => AISDKProvider;
38
+ type AvailableModel = "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" | "o4-mini" | "o3" | "o3-mini" | "o1" | "o1-mini" | "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "gpt-4.5-preview" | "o1-preview" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "claude-3-7-sonnet-latest" | "claude-3-7-sonnet-20250219" | "cerebras-llama-3.3-70b" | "cerebras-llama-3.1-8b" | "groq-llama-3.3-70b-versatile" | "groq-llama-3.3-70b-specdec" | "gemini-1.5-flash" | "gemini-1.5-pro" | "gemini-1.5-flash-8b" | "gemini-2.0-flash-lite" | "gemini-2.0-flash" | "gemini-2.5-flash-preview-04-17" | "gemini-2.5-pro-preview-03-25" | string;
39
+ type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
40
+ type ClientOptions = ClientOptions$1 | ClientOptions$2;
41
+ type ModelConfiguration = AvailableModel | (ClientOptions & {
42
+ modelName: AvailableModel;
43
+ });
44
+
45
+ type LogLevel = 0 | 1 | 2;
46
+ /**
47
+ * Mapping between numeric log levels and their names
48
+ *
49
+ * 0 - error/warn - Critical issues or important warnings
50
+ * 1 - info - Standard information messages
51
+ * 2 - debug - Detailed information for debugging
52
+ */
53
+ declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
54
+ type LogLine = {
55
+ id?: string;
56
+ category?: string;
57
+ message: string;
58
+ level?: LogLevel;
59
+ timestamp?: string;
60
+ auxiliary?: {
61
+ [key: string]: {
62
+ value: string;
63
+ type: "object" | "string" | "html" | "integer" | "float" | "boolean";
64
+ };
65
+ };
66
+ };
67
+ type Logger = (logLine: LogLine) => void;
68
+
69
+ interface ChatMessage {
70
+ role: "system" | "user" | "assistant";
71
+ content: ChatMessageContent;
72
+ }
73
+ type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
74
+ interface ChatMessageImageContent {
75
+ type: string;
76
+ image_url?: {
77
+ url: string;
78
+ };
79
+ text?: string;
80
+ source?: {
81
+ type: string;
82
+ media_type: string;
83
+ data: string;
84
+ };
85
+ }
86
+ interface ChatMessageTextContent {
87
+ type: string;
88
+ text: string;
89
+ }
90
+ declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
91
+ interface ChatCompletionOptions {
92
+ messages: ChatMessage[];
93
+ temperature?: number;
94
+ top_p?: number;
95
+ frequency_penalty?: number;
96
+ presence_penalty?: number;
97
+ image?: {
98
+ buffer: Buffer;
99
+ description?: string;
100
+ };
101
+ response_model?: {
102
+ name: string;
103
+ schema: ZodType;
104
+ };
105
+ tools?: LLMTool[];
106
+ tool_choice?: "auto" | "none" | "required";
107
+ maxTokens?: number;
108
+ requestId?: string;
109
+ }
110
+ type LLMResponse = {
111
+ id: string;
112
+ object: string;
113
+ created: number;
114
+ model: string;
115
+ choices: {
116
+ index: number;
117
+ message: {
118
+ role: string;
119
+ content: string | null;
120
+ tool_calls: {
121
+ id: string;
122
+ type: string;
123
+ function: {
124
+ name: string;
125
+ arguments: string;
126
+ };
127
+ }[];
128
+ };
129
+ finish_reason: string;
130
+ }[];
131
+ usage: {
132
+ prompt_tokens: number;
133
+ completion_tokens: number;
134
+ total_tokens: number;
135
+ };
136
+ };
137
+ interface CreateChatCompletionOptions {
138
+ options: ChatCompletionOptions;
139
+ logger: (message: LogLine) => void;
140
+ retries?: number;
141
+ }
142
+ declare abstract class LLMClient {
143
+ type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
144
+ modelName: AvailableModel | (string & {});
145
+ hasVision: boolean;
146
+ clientOptions: ClientOptions;
147
+ userProvidedInstructions?: string;
148
+ constructor(modelName: AvailableModel, userProvidedInstructions?: string);
149
+ abstract createChatCompletion<T = LLMResponse & {
150
+ usage?: LLMResponse["usage"];
151
+ }>(options: CreateChatCompletionOptions): Promise<T>;
152
+ generateObject: typeof generateObject;
153
+ generateText: typeof generateText;
154
+ streamText: typeof streamText;
155
+ streamObject: typeof streamObject;
156
+ generateImage: typeof experimental_generateImage;
157
+ embed: typeof embed;
158
+ embedMany: typeof embedMany;
159
+ transcribe: typeof experimental_transcribe;
160
+ generateSpeech: typeof experimental_generateSpeech;
161
+ getLanguageModel?(): LanguageModel;
162
+ }
13
163
 
14
164
  /**
15
165
  * CDP transport & session multiplexer
@@ -115,7 +265,7 @@ declare class Frame implements FrameManager {
115
265
  width: number;
116
266
  height: number;
117
267
  };
118
- }): Promise<string>;
268
+ }): Promise<Buffer>;
119
269
  /** Child frames via Page.getFrameTree */
120
270
  childFrames(): Promise<Frame[]>;
121
271
  /** Wait for a lifecycle state (load/domcontentloaded/networkidle) */
@@ -153,10 +303,13 @@ declare class Locator {
153
303
  private readonly frame;
154
304
  private readonly selector;
155
305
  private readonly options?;
306
+ private readonly selectorResolver;
307
+ private readonly selectorQuery;
308
+ private readonly nthIndex;
156
309
  constructor(frame: Frame, selector: string, options?: {
157
310
  deep?: boolean;
158
311
  depth?: number;
159
- });
312
+ }, nthIndex?: number);
160
313
  /** Return the owning Frame for this locator (typed accessor, no private access). */
161
314
  getFrame(): Frame;
162
315
  /**
@@ -182,6 +335,8 @@ declare class Locator {
182
335
  * Useful for identity comparisons without needing element handles.
183
336
  */
184
337
  backendNodeId(): Promise<Protocol.DOM.BackendNodeId>;
338
+ /** Return how many nodes the current selector resolves to. */
339
+ count(): Promise<number>;
185
340
  /**
186
341
  * Return the center of the element's bounding box in the owning frame's viewport
187
342
  * (CSS pixels), rounded to integers. Scrolls into view best-effort.
@@ -210,6 +365,11 @@ declare class Locator {
210
365
  a?: number;
211
366
  };
212
367
  }): Promise<void>;
368
+ /**
369
+ * Move the mouse cursor to the element's visual center without clicking.
370
+ * - Scrolls into view best-effort, resolves geometry, then dispatches a mouse move.
371
+ */
372
+ hover(): Promise<void>;
213
373
  /**
214
374
  * Click the element at its visual center.
215
375
  * Steps:
@@ -289,35 +449,16 @@ declare class Locator {
289
449
  * For API parity, returns the same locator (querySelector already returns the first match).
290
450
  */
291
451
  first(): Locator;
452
+ /** Return a locator narrowed to the element at the given zero-based index. */
453
+ nth(index: number): Locator;
292
454
  /**
293
455
  * Resolve `this.selector` within the frame to `{ objectId, nodeId? }`:
294
- * - Ensures Runtime/DOM are enabled.
295
- * - Creates (or reuses) an isolated world for this frame.
296
- * - Evaluates a CSS or XPath query in that isolated world.
297
- * - Best-effort: attempts to convert `objectId` to `nodeId`; failure is non-fatal.
298
- *
299
- * - For XPath: first try page-side resolver (__stagehandV3__.resolveSimpleXPath).
300
- * If it returns null (e.g. closed DSD not captured), fall back to CDP DOM with
301
- * `pierce: true` to traverse closed shadow roots and resolve by backendNodeId.
456
+ * Delegates to a shared selector resolver so all selector logic stays in sync.
302
457
  */
303
458
  resolveNode(): Promise<{
304
459
  nodeId: Protocol.DOM.NodeId | null;
305
460
  objectId: Protocol.Runtime.RemoteObjectId;
306
461
  }>;
307
- /**
308
- * CDP fallback for XPath resolution that needs to cross *closed* shadow roots
309
- * created via Declarative Shadow DOM (no attachShadow call to intercept).
310
- *
311
- * Strategy:
312
- * - Fetch full DOM with `pierce: true` so closed shadow roots are included.
313
- * - Run a small, tolerant XPath stepper over the CDP node tree:
314
- * • supports absolute paths like `/html/body/...`
315
- * • supports `//` descendant jumps
316
- * • supports `tag[n]` numeric predicates per sibling group
317
- * • supports `*`
318
- * - Resolve the winning backendNodeId to an objectId for downstream actions.
319
- */
320
- private resolveViaDomPierceXPath;
321
462
  /** Compute a center point from a BoxModel content quad */
322
463
  private centerFromBoxContent;
323
464
  }
@@ -333,12 +474,15 @@ declare class DeepLocatorDelegate {
333
474
  private readonly page;
334
475
  private readonly root;
335
476
  private readonly selector;
336
- constructor(page: Page, root: Frame, selector: string);
477
+ private readonly nthIndex;
478
+ constructor(page: Page, root: Frame, selector: string, nthIndex?: number);
337
479
  private real;
338
480
  click(options?: {
339
481
  button?: "left" | "right" | "middle";
340
482
  clickCount?: number;
341
483
  }): Promise<void>;
484
+ count(): Promise<number>;
485
+ hover(): Promise<void>;
342
486
  fill(value: string): Promise<void>;
343
487
  type(text: string, options?: {
344
488
  delay?: number;
@@ -377,7 +521,8 @@ declare class DeepLocatorDelegate {
377
521
  composed?: boolean;
378
522
  detail?: number;
379
523
  }): Promise<void>;
380
- first(): this;
524
+ first(): DeepLocatorDelegate;
525
+ nth(index: number): DeepLocatorDelegate;
381
526
  }
382
527
 
383
528
  /**
@@ -407,6 +552,7 @@ declare class LocatorDelegate {
407
552
  button?: "left" | "right" | "middle";
408
553
  clickCount?: number;
409
554
  }): Promise<void>;
555
+ hover(): Promise<void>;
410
556
  fill(value: string): Promise<void>;
411
557
  type(text: string, options?: {
412
558
  delay?: number;
@@ -419,9 +565,13 @@ declare class LocatorDelegate {
419
565
  textContent(): Promise<string>;
420
566
  innerHtml(): Promise<string>;
421
567
  innerText(): Promise<string>;
568
+ count(): Promise<number>;
422
569
  first(): LocatorDelegate;
423
570
  }
424
571
 
572
+ type AnyPage = Page$1 | Page$2 | Page$3 | Page;
573
+ type LoadState = "load" | "domcontentloaded" | "networkidle";
574
+
425
575
  declare class Page {
426
576
  private readonly conn;
427
577
  private readonly mainSession;
@@ -439,6 +589,8 @@ declare class Page {
439
589
  private readonly frameCache;
440
590
  /** Stable id for Frames created by this Page (use top-level TargetId). */
441
591
  private readonly pageId;
592
+ /** Cached current URL for synchronous page.url() */
593
+ private _currentUrl;
442
594
  private constructor();
443
595
  private cursorEnabled;
444
596
  private ensureCursorScript;
@@ -463,6 +615,7 @@ declare class Page {
463
615
  * Topology + ownership update. Handles root swaps.
464
616
  */
465
617
  onFrameNavigated(frame: Protocol.Page.Frame, session: CDPSessionLike): void;
618
+ onNavigatedWithinDocument(frameId: string, url: string, session: CDPSessionLike): void;
466
619
  /**
467
620
  * An OOPIF child session whose **main** frame id equals the parent iframe’s frameId
468
621
  * has been attached; adopt the session into this Page and seed ownership for its subtree.
@@ -477,6 +630,8 @@ declare class Page {
477
630
  /** Expose a session by id (used by snapshot to resolve session id -> session) */
478
631
  getSessionById(id: string): CDPSessionLike | undefined;
479
632
  targetId(): string;
633
+ /** Seed the cached URL before navigation events converge. */
634
+ seedCurrentUrl(url: string | undefined | null): void;
480
635
  mainFrameId(): string;
481
636
  mainFrame(): Frame;
482
637
  /**
@@ -520,9 +675,9 @@ declare class Page {
520
675
  timeoutMs?: number;
521
676
  }): Promise<void>;
522
677
  /**
523
- * Return the current page URL (from navigation history).
678
+ * Return the current page URL (synchronous, cached from navigation events).
524
679
  */
525
- url(): Promise<string>;
680
+ url(): string;
526
681
  /**
527
682
  * Return the current page title.
528
683
  * Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
@@ -534,7 +689,7 @@ declare class Page {
534
689
  */
535
690
  screenshot(options?: {
536
691
  fullPage?: boolean;
537
- }): Promise<string>;
692
+ }): Promise<Buffer>;
538
693
  /**
539
694
  * Create a locator bound to the current main frame.
540
695
  */
@@ -658,441 +813,46 @@ declare class Page {
658
813
  private waitForMainLoadState;
659
814
  }
660
815
 
661
- declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
662
- type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
663
- type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
664
- type ClientOptions = ClientOptions$1 | ClientOptions$2;
665
- type ModelConfiguration = AvailableModel | (ClientOptions & {
666
- modelName: AvailableModel;
667
- });
668
- interface AnthropicJsonSchemaObject {
669
- definitions?: {
670
- MySchema?: {
671
- properties?: Record<string, unknown>;
672
- required?: string[];
673
- };
674
- };
675
- properties?: Record<string, unknown>;
676
- required?: string[];
677
- }
678
-
679
- interface LLMTool {
680
- type: "function";
681
- name: string;
682
- description: string;
683
- parameters: Record<string, unknown>;
816
+ interface AgentAction {
817
+ type: string;
818
+ reasoning?: string;
819
+ taskCompleted?: boolean;
820
+ action?: string;
821
+ timeMs?: number;
822
+ pageText?: string;
823
+ pageUrl?: string;
824
+ instruction?: string;
825
+ [key: string]: unknown;
684
826
  }
685
-
686
- type LogLevel = 0 | 1 | 2;
687
- /**
688
- * Mapping between numeric log levels and their names
689
- *
690
- * 0 - error/warn - Critical issues or important warnings
691
- * 1 - info - Standard information messages
692
- * 2 - debug - Detailed information for debugging
693
- */
694
- declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
695
- type LogLine = {
696
- id?: string;
697
- category?: string;
827
+ interface AgentResult {
828
+ success: boolean;
698
829
  message: string;
699
- level?: LogLevel;
700
- timestamp?: string;
701
- auxiliary?: {
702
- [key: string]: {
703
- value: string;
704
- type: "object" | "string" | "html" | "integer" | "float" | "boolean";
705
- };
830
+ actions: AgentAction[];
831
+ completed: boolean;
832
+ metadata?: Record<string, unknown>;
833
+ usage?: {
834
+ input_tokens: number;
835
+ output_tokens: number;
836
+ inference_time_ms: number;
706
837
  };
707
- };
708
- type Logger = (logLine: LogLine) => void;
709
-
710
- interface ChatMessage {
711
- role: "system" | "user" | "assistant";
712
- content: ChatMessageContent;
713
838
  }
714
- type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
715
- interface ChatMessageImageContent {
716
- type: string;
717
- image_url?: {
718
- url: string;
719
- };
720
- text?: string;
721
- source?: {
722
- type: string;
723
- media_type: string;
724
- data: string;
725
- };
839
+ interface AgentExecuteOptions {
840
+ instruction: string;
841
+ maxSteps?: number;
842
+ highlightCursor?: boolean;
843
+ page?: Page$1 | Page$2 | Page$3 | Page;
726
844
  }
727
- interface ChatMessageTextContent {
728
- type: string;
729
- text: string;
730
- }
731
- declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
732
- interface ChatCompletionOptions {
733
- messages: ChatMessage[];
734
- temperature?: number;
735
- top_p?: number;
736
- frequency_penalty?: number;
737
- presence_penalty?: number;
738
- image?: {
739
- buffer: Buffer;
740
- description?: string;
741
- };
742
- response_model?: {
743
- name: string;
744
- schema: ZodType;
745
- };
746
- tools?: LLMTool[];
747
- tool_choice?: "auto" | "none" | "required";
748
- maxTokens?: number;
749
- requestId?: string;
750
- }
751
- type LLMResponse = {
752
- id: string;
753
- object: string;
754
- created: number;
755
- model: string;
756
- choices: {
757
- index: number;
758
- message: {
759
- role: string;
760
- content: string | null;
761
- tool_calls: {
762
- id: string;
763
- type: string;
764
- function: {
765
- name: string;
766
- arguments: string;
767
- };
768
- }[];
769
- };
770
- finish_reason: string;
771
- }[];
772
- usage: {
773
- prompt_tokens: number;
774
- completion_tokens: number;
775
- total_tokens: number;
776
- };
777
- };
778
- interface CreateChatCompletionOptions {
779
- options: ChatCompletionOptions;
780
- logger: (message: LogLine) => void;
781
- retries?: number;
782
- }
783
- declare abstract class LLMClient {
784
- type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
785
- modelName: AvailableModel | (string & {});
786
- hasVision: boolean;
787
- clientOptions: ClientOptions;
788
- userProvidedInstructions?: string;
789
- constructor(modelName: AvailableModel, userProvidedInstructions?: string);
790
- abstract createChatCompletion<T = LLMResponse & {
791
- usage?: LLMResponse["usage"];
792
- }>(options: CreateChatCompletionOptions): Promise<T>;
793
- generateObject: typeof generateObject;
794
- generateText: typeof generateText;
795
- streamText: typeof streamText;
796
- streamObject: typeof streamObject;
797
- generateImage: typeof experimental_generateImage;
798
- embed: typeof embed;
799
- embedMany: typeof embedMany;
800
- transcribe: typeof experimental_transcribe;
801
- generateSpeech: typeof experimental_generateSpeech;
802
- getLanguageModel?(): LanguageModel;
803
- }
804
-
805
- type V3Env = "LOCAL" | "BROWSERBASE";
806
- /** Local launch options for V3 (chrome-launcher + CDP).
807
- * Matches v2 shape where feasible; unsupported fields are accepted but ignored.
808
- */
809
- interface LocalBrowserLaunchOptions {
810
- args?: string[];
811
- executablePath?: string;
812
- userDataDir?: string;
813
- preserveUserDataDir?: boolean;
814
- headless?: boolean;
815
- devtools?: boolean;
816
- chromiumSandbox?: boolean;
817
- ignoreDefaultArgs?: boolean | string[];
818
- proxy?: {
819
- server: string;
820
- bypass?: string;
821
- username?: string;
822
- password?: string;
823
- };
824
- locale?: string;
825
- viewport?: {
826
- width: number;
827
- height: number;
828
- };
829
- deviceScaleFactor?: number;
830
- hasTouch?: boolean;
831
- ignoreHTTPSErrors?: boolean;
832
- cdpUrl?: string;
833
- connectTimeoutMs?: number;
834
- downloadsPath?: string;
835
- acceptDownloads?: boolean;
836
- }
837
- /** Constructor options for V3 */
838
- interface V3Options {
839
- env: V3Env;
840
- apiKey?: string;
841
- projectId?: string;
842
- /**
843
- * Optional: fine-tune Browserbase session creation or resume an existing session.
844
- */
845
- browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
846
- projectId?: string;
847
- };
848
- browserbaseSessionID?: string;
849
- localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
850
- model?: ModelConfiguration;
851
- llmClient?: LLMClient;
852
- systemPrompt?: string;
853
- logInferenceToFile?: boolean;
854
- experimental?: boolean;
855
- verbose?: 0 | 1 | 2;
856
- selfHeal?: boolean;
857
- /** Disable pino logging backend (useful for tests or minimal environments). */
858
- disablePino?: boolean;
859
- /** Optional external logger hook for integrating with host apps. */
860
- logger?: (line: LogLine) => void;
861
- /** Show a visual cursor overlay that follows our mouse events. */
862
- includeCursor?: boolean;
863
- /** Directory used to persist cached actions for act(). */
864
- cacheDir?: string;
865
- domSettleTimeout?: number;
866
- }
867
- type PlaywrightPage = playwright_core.Page;
868
- type PatchrightPage = patchright_core.Page;
869
- type PuppeteerPage = puppeteer_core.Page;
870
- interface ActOptions$1 {
871
- model?: ModelConfiguration;
872
- variables?: Record<string, string>;
873
- timeout?: number;
874
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
875
- }
876
- interface ExtractOptions$1 {
877
- model?: ModelConfiguration;
878
- timeout?: number;
879
- selector?: string;
880
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
881
- }
882
- declare const defaultExtractSchema: z.ZodObject<{
883
- extraction: z.ZodString;
884
- }, "strip", z.ZodTypeAny, {
885
- extraction?: string;
886
- }, {
887
- extraction?: string;
888
- }>;
889
- declare const pageTextSchema: z.ZodObject<{
890
- pageText: z.ZodString;
891
- }, "strip", z.ZodTypeAny, {
892
- pageText?: string;
893
- }, {
894
- pageText?: string;
895
- }>;
896
- interface ObserveOptions$1 {
897
- model?: ModelConfiguration;
898
- timeout?: number;
899
- selector?: string;
900
- page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
901
- }
902
- type LoadState = "load" | "domcontentloaded" | "networkidle";
903
- interface V3Metrics {
904
- actPromptTokens: number;
905
- actCompletionTokens: number;
906
- actInferenceTimeMs: number;
907
- extractPromptTokens: number;
908
- extractCompletionTokens: number;
909
- extractInferenceTimeMs: number;
910
- observePromptTokens: number;
911
- observeCompletionTokens: number;
912
- observeInferenceTimeMs: number;
913
- agentPromptTokens: number;
914
- agentCompletionTokens: number;
915
- agentInferenceTimeMs: number;
916
- totalPromptTokens: number;
917
- totalCompletionTokens: number;
918
- totalInferenceTimeMs: number;
919
- }
920
- declare enum V3FunctionName {
921
- ACT = "ACT",
922
- EXTRACT = "EXTRACT",
923
- OBSERVE = "OBSERVE",
924
- AGENT = "AGENT"
925
- }
926
-
927
- /**
928
- * V3Context
929
- *
930
- * Owns the root CDP connection and wires Target/Page events into Page.
931
- * Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
932
- * and tracks target→page and (root) frame→target mappings for lookups.
933
- *
934
- * IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
935
- * Context never “guesses” owners; it simply forwards events (with the emitting session)
936
- * so Page can record the correct owner at event time.
937
- */
938
- declare class V3Context {
939
- readonly conn: CdpConnection;
940
- private readonly includeCursor;
941
- private readonly env;
942
- private constructor();
943
- private readonly _piercerInstalled;
944
- private _lastPopupSignalAt;
945
- private sessionKey;
946
- private readonly _sessionInit;
947
- private pagesByTarget;
948
- private mainFrameToTarget;
949
- private sessionOwnerPage;
950
- private frameOwnerPage;
951
- private pendingOopifByMainFrame;
952
- private createdAtByTarget;
953
- private typeByTarget;
954
- private _pageOrder;
955
- /**
956
- * Create a Context for a given CDP websocket URL and bootstrap target wiring.
957
- */
958
- static create(wsUrl: string, opts?: {
959
- includeCursor?: boolean;
960
- env?: "LOCAL" | "BROWSERBASE";
961
- }): Promise<V3Context>;
962
- /**
963
- * Wait until at least one top-level Page has been created and registered.
964
- * We poll internal maps that bootstrap/onAttachedToTarget populate.
965
- */
966
- private waitForFirstTopLevelPage;
967
- private ensurePiercer;
968
- /** Mark a page target as the most-recent one (active). */
969
- private _pushActive;
970
- /** Remove a page target from the recency list (used on close). */
971
- private _removeFromOrder;
972
- /** Return the current active Page (most-recent page that still exists). */
973
- activePage(): Page | undefined;
974
- /**
975
- * Return top-level `Page`s (oldest → newest). OOPIF targets are not included.
976
- */
977
- pages(): Page[];
978
- /**
979
- * Resolve an owning `Page` by the **top-level main frame id**.
980
- * Note: child (OOPIF) roots are intentionally not present in this mapping.
981
- */
982
- resolvePageByMainFrameId(frameId: string): Page | undefined;
983
- /**
984
- * Serialize the full frame tree for a given top-level main frame id.
985
- */
986
- getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
987
- /**
988
- * Create a new top-level page (tab) with the given URL and return its Page object.
989
- * Waits until the target is attached and registered.
990
- */
991
- newPage(url?: string): Promise<Page>;
992
- /**
993
- * Close CDP and clear all mappings. Best-effort cleanup.
994
- */
995
- close(): Promise<void>;
996
- /**
997
- * Bootstrap target lifecycle:
998
- * - Attach to existing targets.
999
- * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1000
- * - Handle auto-attach events.
1001
- * - Clean up on detach/destroy.
1002
- */
1003
- private bootstrap;
1004
- /**
1005
- * Handle a newly attached target (top-level or potential OOPIF):
1006
- * - Enable Page domain and lifecycle events.
1007
- * - If top-level → create Page, wire listeners, resume.
1008
- * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1009
- * if the parent is known; otherwise stage until parent `frameAttached`.
1010
- * - Resume the target only after listeners are wired.
1011
- */
1012
- private onAttachedToTarget;
1013
- /**
1014
- * Detach handler:
1015
- * - Remove child session ownership and prune its subtree.
1016
- * - If a top-level target, cleanup its `Page` and mappings.
1017
- * - Drop any staged child for this session.
1018
- */
1019
- private onDetachedFromTarget;
1020
- /**
1021
- * Cleanup a top-level Page by target id, removing its root and staged children.
1022
- */
1023
- private cleanupByTarget;
1024
- /**
1025
- * Wire Page-domain frame events for a session into the owning Page & mappings.
1026
- * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1027
- */
1028
- private installFrameEventBridges;
1029
- /**
1030
- * Register that a session belongs to a Page (used by event routing).
1031
- */
1032
- private wireSessionToOwnerPage;
1033
- /**
1034
- * Utility: reverse-lookup the top-level target id that owns a given Page.
1035
- */
1036
- private findTargetIdByPage;
1037
- private _notePopupSignal;
1038
- /**
1039
- * Await the current active page, waiting briefly if a popup/open was just triggered.
1040
- * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1041
- */
1042
- awaitActivePage(timeoutMs?: number): Promise<Page>;
1043
- }
1044
-
1045
- interface AgentAction {
1046
- type: string;
1047
- reasoning?: string;
1048
- taskCompleted?: boolean;
1049
- action?: string;
1050
- timeMs?: number;
1051
- pageText?: string;
1052
- pageUrl?: string;
1053
- instruction?: string;
1054
- [key: string]: unknown;
1055
- }
1056
- interface AgentResult {
1057
- success: boolean;
1058
- message: string;
1059
- actions: AgentAction[];
1060
- completed: boolean;
1061
- metadata?: Record<string, unknown>;
1062
- usage?: {
1063
- input_tokens: number;
1064
- output_tokens: number;
1065
- inference_time_ms: number;
1066
- };
1067
- }
1068
- interface AgentOptions {
1069
- maxSteps?: number;
1070
- autoScreenshot?: boolean;
1071
- waitBetweenActions?: number;
1072
- context?: string;
1073
- }
1074
- interface AgentExecuteOptions extends AgentOptions {
1075
- instruction: string;
1076
- }
1077
- type AgentProviderType = "openai" | "anthropic";
1078
- interface AgentClientOptions {
1079
- apiKey: string;
1080
- organization?: string;
1081
- baseURL?: string;
1082
- defaultMaxSteps?: number;
1083
- [key: string]: unknown;
1084
- }
1085
- type AgentType = "openai" | "anthropic";
1086
- interface AgentExecutionOptions {
1087
- options: AgentExecuteOptions;
1088
- logger: (message: LogLine) => void;
1089
- retries?: number;
845
+ type AgentType = "openai" | "anthropic" | "google";
846
+ type AvailableCuaModel = "openai/computer-use-preview" | "openai/computer-use-preview-2025-03-11" | "anthropic/claude-3-7-sonnet-latest" | "anthropic/claude-sonnet-4-20250514" | "anthropic/claude-sonnet-4-5-20250929" | "google/gemini-2.5-computer-use-preview-10-2025";
847
+ interface AgentExecutionOptions {
848
+ options: AgentExecuteOptions;
849
+ logger: (message: LogLine) => void;
850
+ retries?: number;
1090
851
  }
1091
852
  interface AgentHandlerOptions {
1092
853
  modelName: string;
1093
854
  clientOptions?: Record<string, unknown>;
1094
855
  userProvidedInstructions?: string;
1095
- agentType: AgentType;
1096
856
  experimental?: boolean;
1097
857
  }
1098
858
  interface ActionExecutionResult {
@@ -1173,15 +933,64 @@ type ResponseInputItem = {
1173
933
  interface AgentInstance {
1174
934
  execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1175
935
  }
936
+ type AgentProviderType = AgentType;
937
+ type AgentModelConfig<TModelName extends string = string> = {
938
+ modelName: TModelName;
939
+ } & Record<string, unknown>;
940
+ type SharedAgentConfigFields = {
941
+ /**
942
+ * Custom system prompt to provide to the agent. Overrides the default system prompt.
943
+ */
944
+ systemPrompt?: string;
945
+ /**
946
+ * MCP integrations - Array of Client objects
947
+ */
948
+ integrations?: (Client | string)[];
949
+ /**
950
+ * Tools passed to the agent client
951
+ */
952
+ tools?: ToolSet;
953
+ };
954
+ type StandardAgentConfig = SharedAgentConfigFields & {
955
+ /**
956
+ * Indicates CUA is disabled for this configuration
957
+ */
958
+ cua?: false;
959
+ /**
960
+ * The model to use for agent functionality
961
+ */
962
+ model?: string | AgentModelConfig<string>;
963
+ /**
964
+ * The model to use for tool execution (observe/act calls within agent tools).
965
+ * If not specified, inherits from the main model configuration.
966
+ * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
967
+ */
968
+ executionModel?: string;
969
+ };
970
+ type CuaAgentConfig = SharedAgentConfigFields & {
971
+ /**
972
+ * Indicates CUA is enabled for this configuration
973
+ */
974
+ cua: true;
975
+ /**
976
+ * The model to use for agent functionality when CUA is enabled
977
+ */
978
+ model: AvailableCuaModel | AgentModelConfig<AvailableCuaModel>;
979
+ /**
980
+ * Execution models are not supported when CUA is enabled
981
+ */
982
+ executionModel?: never;
983
+ };
984
+ /**
985
+ * Configuration for agent functionality
986
+ */
987
+ type AgentConfig = StandardAgentConfig | CuaAgentConfig;
1176
988
 
1177
989
  interface ActOptions {
1178
- action: string;
1179
990
  model?: ModelConfiguration;
1180
991
  variables?: Record<string, string>;
1181
- domSettleTimeoutMs?: number;
1182
- timeoutMs?: number;
1183
- iframes?: boolean;
1184
- frameId?: string;
992
+ timeout?: number;
993
+ page?: Page$1 | Page$2 | Page$3 | Page;
1185
994
  }
1186
995
  interface ActResult {
1187
996
  success: boolean;
@@ -1189,142 +998,321 @@ interface ActResult {
1189
998
  actionDescription: string;
1190
999
  actions: Action[];
1191
1000
  }
1192
- interface ExtractOptions<T extends z.AnyZodObject> {
1193
- instruction?: string;
1194
- schema?: T;
1001
+ type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1002
+ interface Action {
1003
+ selector: string;
1004
+ description: string;
1005
+ method?: string;
1006
+ arguments?: string[];
1007
+ }
1008
+ interface HistoryEntry {
1009
+ method: "act" | "extract" | "observe" | "navigate";
1010
+ parameters: unknown;
1011
+ result: unknown;
1012
+ timestamp: string;
1013
+ }
1014
+ interface ExtractOptions {
1015
+ model?: ModelConfiguration;
1016
+ timeout?: number;
1017
+ selector?: string;
1018
+ page?: Page$1 | Page$2 | Page$3 | Page;
1019
+ }
1020
+ declare const defaultExtractSchema: z.ZodObject<{
1021
+ extraction: z.ZodString;
1022
+ }, "strip", z.ZodTypeAny, {
1023
+ extraction?: string;
1024
+ }, {
1025
+ extraction?: string;
1026
+ }>;
1027
+ declare const pageTextSchema: z.ZodObject<{
1028
+ pageText: z.ZodString;
1029
+ }, "strip", z.ZodTypeAny, {
1030
+ pageText?: string;
1031
+ }, {
1032
+ pageText?: string;
1033
+ }>;
1034
+ interface ObserveOptions {
1035
+ model?: ModelConfiguration;
1036
+ timeout?: number;
1037
+ selector?: string;
1038
+ page?: Page$1 | Page$2 | Page$3 | Page;
1039
+ }
1040
+ declare enum V3FunctionName {
1041
+ ACT = "ACT",
1042
+ EXTRACT = "EXTRACT",
1043
+ OBSERVE = "OBSERVE",
1044
+ AGENT = "AGENT"
1045
+ }
1046
+
1047
+ interface CachedActEntry {
1048
+ version: 1;
1049
+ instruction: string;
1050
+ url: string;
1051
+ variables: Record<string, string>;
1052
+ actions: Action[];
1053
+ actionDescription?: string;
1054
+ message?: string;
1055
+ }
1056
+ type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1057
+ type: string;
1058
+ [key: string]: unknown;
1059
+ };
1060
+ interface AgentReplayActStep {
1061
+ type: "act";
1062
+ instruction: string;
1063
+ actions?: Action[];
1064
+ actionDescription?: string;
1065
+ message?: string;
1066
+ timeout?: number;
1067
+ }
1068
+ interface AgentReplayFillFormStep {
1069
+ type: "fillForm";
1070
+ fields?: Array<{
1071
+ action: string;
1072
+ value: string;
1073
+ }>;
1074
+ observeResults?: Action[];
1075
+ actions?: Action[];
1076
+ }
1077
+ interface AgentReplayGotoStep {
1078
+ type: "goto";
1079
+ url: string;
1080
+ waitUntil?: LoadState;
1081
+ }
1082
+ interface AgentReplayScrollStep {
1083
+ type: "scroll";
1084
+ deltaX?: number;
1085
+ deltaY?: number;
1086
+ anchor?: {
1087
+ x: number;
1088
+ y: number;
1089
+ };
1090
+ }
1091
+ interface AgentReplayWaitStep {
1092
+ type: "wait";
1093
+ timeMs: number;
1094
+ }
1095
+ interface AgentReplayNavBackStep {
1096
+ type: "navback";
1097
+ waitUntil?: LoadState;
1098
+ }
1099
+ interface SanitizedAgentExecuteOptions {
1100
+ maxSteps?: number;
1101
+ autoScreenshot?: boolean;
1102
+ waitBetweenActions?: number;
1103
+ context?: string;
1104
+ }
1105
+ interface CachedAgentEntry {
1106
+ version: 1;
1107
+ instruction: string;
1108
+ startUrl: string;
1109
+ options: SanitizedAgentExecuteOptions;
1110
+ configSignature: string;
1111
+ steps: AgentReplayStep[];
1112
+ result: AgentResult;
1113
+ timestamp: string;
1114
+ }
1115
+
1116
+ interface V3Metrics {
1117
+ actPromptTokens: number;
1118
+ actCompletionTokens: number;
1119
+ actInferenceTimeMs: number;
1120
+ extractPromptTokens: number;
1121
+ extractCompletionTokens: number;
1122
+ extractInferenceTimeMs: number;
1123
+ observePromptTokens: number;
1124
+ observeCompletionTokens: number;
1125
+ observeInferenceTimeMs: number;
1126
+ agentPromptTokens: number;
1127
+ agentCompletionTokens: number;
1128
+ agentInferenceTimeMs: number;
1129
+ totalPromptTokens: number;
1130
+ totalCompletionTokens: number;
1131
+ totalInferenceTimeMs: number;
1132
+ }
1133
+
1134
+ type V3Env = "LOCAL" | "BROWSERBASE";
1135
+ /** Local launch options for V3 (chrome-launcher + CDP).
1136
+ * Matches v2 shape where feasible; unsupported fields are accepted but ignored.
1137
+ */
1138
+ interface LocalBrowserLaunchOptions {
1139
+ args?: string[];
1140
+ executablePath?: string;
1141
+ userDataDir?: string;
1142
+ preserveUserDataDir?: boolean;
1143
+ headless?: boolean;
1144
+ devtools?: boolean;
1145
+ chromiumSandbox?: boolean;
1146
+ ignoreDefaultArgs?: boolean | string[];
1147
+ proxy?: {
1148
+ server: string;
1149
+ bypass?: string;
1150
+ username?: string;
1151
+ password?: string;
1152
+ };
1153
+ locale?: string;
1154
+ viewport?: {
1155
+ width: number;
1156
+ height: number;
1157
+ };
1158
+ deviceScaleFactor?: number;
1159
+ hasTouch?: boolean;
1160
+ ignoreHTTPSErrors?: boolean;
1161
+ cdpUrl?: string;
1162
+ connectTimeoutMs?: number;
1163
+ downloadsPath?: string;
1164
+ acceptDownloads?: boolean;
1165
+ }
1166
+ /** Constructor options for V3 */
1167
+ interface V3Options {
1168
+ env: V3Env;
1169
+ apiKey?: string;
1170
+ projectId?: string;
1171
+ /**
1172
+ * Optional: fine-tune Browserbase session creation or resume an existing session.
1173
+ */
1174
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
1175
+ projectId?: string;
1176
+ };
1177
+ browserbaseSessionID?: string;
1178
+ localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
1195
1179
  model?: ModelConfiguration;
1196
- domSettleTimeoutMs?: number;
1180
+ llmClient?: LLMClient;
1181
+ systemPrompt?: string;
1182
+ logInferenceToFile?: boolean;
1183
+ experimental?: boolean;
1184
+ verbose?: 0 | 1 | 2;
1185
+ selfHeal?: boolean;
1186
+ /** Disable pino logging backend (useful for tests or minimal environments). */
1187
+ disablePino?: boolean;
1188
+ /** Optional external logger hook for integrating with host apps. */
1189
+ logger?: (line: LogLine) => void;
1190
+ /** Show a visual cursor overlay that follows our mouse events. */
1191
+ includeCursor?: boolean;
1192
+ /** Directory used to persist cached actions for act(). */
1193
+ cacheDir?: string;
1194
+ domSettleTimeout?: number;
1195
+ }
1196
+
1197
+ /**
1198
+ * V3Context
1199
+ *
1200
+ * Owns the root CDP connection and wires Target/Page events into Page.
1201
+ * Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
1202
+ * and tracks target→page and (root) frame→target mappings for lookups.
1203
+ *
1204
+ * IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
1205
+ * Context never “guesses” owners; it simply forwards events (with the emitting session)
1206
+ * so Page can record the correct owner at event time.
1207
+ */
1208
+ declare class V3Context {
1209
+ readonly conn: CdpConnection;
1210
+ private readonly includeCursor;
1211
+ private readonly env;
1212
+ private constructor();
1213
+ private readonly _piercerInstalled;
1214
+ private _lastPopupSignalAt;
1215
+ private sessionKey;
1216
+ private readonly _sessionInit;
1217
+ private pagesByTarget;
1218
+ private mainFrameToTarget;
1219
+ private sessionOwnerPage;
1220
+ private frameOwnerPage;
1221
+ private pendingOopifByMainFrame;
1222
+ private createdAtByTarget;
1223
+ private typeByTarget;
1224
+ private _pageOrder;
1225
+ private pendingCreatedTargetUrl;
1197
1226
  /**
1198
- * @deprecated The `useTextExtract` parameter has no effect in this version of Stagehand and will be removed in later versions.
1227
+ * Create a Context for a given CDP websocket URL and bootstrap target wiring.
1199
1228
  */
1200
- useTextExtract?: boolean;
1201
- selector?: string;
1202
- iframes?: boolean;
1203
- frameId?: string;
1204
- }
1205
- type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1206
- interface ObserveOptions {
1207
- instruction?: string;
1208
- model?: ModelConfiguration;
1209
- domSettleTimeoutMs?: number;
1210
- returnAction?: boolean;
1211
- selector?: string;
1229
+ static create(wsUrl: string, opts?: {
1230
+ includeCursor?: boolean;
1231
+ env?: "LOCAL" | "BROWSERBASE";
1232
+ }): Promise<V3Context>;
1212
1233
  /**
1213
- * @deprecated The `onlyVisible` parameter has no effect in this version of Stagehand and will be removed in later versions.
1234
+ * Wait until at least one top-level Page has been created and registered.
1235
+ * We poll internal maps that bootstrap/onAttachedToTarget populate.
1214
1236
  */
1215
- onlyVisible?: boolean;
1216
- drawOverlay?: boolean;
1217
- iframes?: boolean;
1218
- frameId?: string;
1219
- }
1220
- interface Action {
1221
- selector: string;
1222
- description: string;
1223
- backendNodeId?: number;
1224
- method?: string;
1225
- arguments?: string[];
1226
- }
1227
- /**
1228
- * Configuration for agent functionality
1229
- */
1230
- interface AgentConfig {
1237
+ private waitForFirstTopLevelPage;
1238
+ private ensurePiercer;
1239
+ /** Mark a page target as the most-recent one (active). */
1240
+ private _pushActive;
1241
+ /** Remove a page target from the recency list (used on close). */
1242
+ private _removeFromOrder;
1243
+ /** Return the current active Page (most-recent page that still exists). */
1244
+ activePage(): Page | undefined;
1245
+ /** Explicitly mark a known Page as the most-recent active page (and focus it). */
1246
+ setActivePage(page: Page): void;
1231
1247
  /**
1232
- * The provider to use for agent functionality
1248
+ * Return top-level `Page`s (oldest newest). OOPIF targets are not included.
1233
1249
  */
1234
- provider?: AgentProviderType;
1250
+ pages(): Page[];
1235
1251
  /**
1236
- * The model to use for agent functionality
1252
+ * Resolve an owning `Page` by the **top-level main frame id**.
1253
+ * Note: child (OOPIF) roots are intentionally not present in this mapping.
1237
1254
  */
1238
- model?: string;
1255
+ resolvePageByMainFrameId(frameId: string): Page | undefined;
1239
1256
  /**
1240
- * The model to use for tool execution (observe/act calls within agent tools).
1241
- * If not specified, inherits from the main model configuration.
1242
- * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
1257
+ * Serialize the full frame tree for a given top-level main frame id.
1243
1258
  */
1244
- executionModel?: string;
1259
+ getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
1245
1260
  /**
1246
- * Custom instructions to provide to the agent
1261
+ * Create a new top-level page (tab) with the given URL and return its Page object.
1262
+ * Waits until the target is attached and registered.
1247
1263
  */
1248
- instructions?: string;
1264
+ newPage(url?: string): Promise<Page>;
1249
1265
  /**
1250
- * Additional options to pass to the agent client
1266
+ * Close CDP and clear all mappings. Best-effort cleanup.
1251
1267
  */
1252
- options?: Record<string, unknown>;
1268
+ close(): Promise<void>;
1253
1269
  /**
1254
- * MCP integrations - Array of Client objects
1270
+ * Bootstrap target lifecycle:
1271
+ * - Attach to existing targets.
1272
+ * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1273
+ * - Handle auto-attach events.
1274
+ * - Clean up on detach/destroy.
1255
1275
  */
1256
- integrations?: (Client | string)[];
1276
+ private bootstrap;
1257
1277
  /**
1258
- * Tools passed to the agent client
1278
+ * Handle a newly attached target (top-level or potential OOPIF):
1279
+ * - Enable Page domain and lifecycle events.
1280
+ * - If top-level → create Page, wire listeners, resume.
1281
+ * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1282
+ * if the parent is known; otherwise stage until parent `frameAttached`.
1283
+ * - Resume the target only after listeners are wired.
1259
1284
  */
1260
- tools?: ToolSet;
1261
- }
1262
- interface HistoryEntry {
1263
- method: "act" | "extract" | "observe" | "navigate";
1264
- parameters: unknown;
1265
- result: unknown;
1266
- timestamp: string;
1267
- }
1268
- /**
1269
- * Represents a path through a Zod schema from the root object down to a
1270
- * particular field. The `segments` array describes the chain of keys/indices.
1271
- *
1272
- * - **String** segments indicate object property names.
1273
- * - **Number** segments indicate array indices.
1274
- *
1275
- * For example, `["users", 0, "homepage"]` might describe reaching
1276
- * the `homepage` field in `schema.users[0].homepage`.
1277
- */
1278
- interface ZodPathSegments {
1285
+ private onAttachedToTarget;
1279
1286
  /**
1280
- * The ordered list of keys/indices leading from the schema root
1281
- * to the targeted field.
1287
+ * Detach handler:
1288
+ * - Remove child session ownership and prune its subtree.
1289
+ * - If a top-level target, cleanup its `Page` and mappings.
1290
+ * - Drop any staged child for this session.
1282
1291
  */
1283
- segments: Array<string | number>;
1284
- }
1285
-
1286
- type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1287
- type: string;
1288
- [key: string]: unknown;
1289
- };
1290
- interface AgentReplayActStep {
1291
- type: "act";
1292
- instruction: string;
1293
- actions?: Action[];
1294
- actionDescription?: string;
1295
- message?: string;
1296
- timeout?: number;
1297
- }
1298
- interface AgentReplayFillFormStep {
1299
- type: "fillForm";
1300
- fields?: Array<{
1301
- action: string;
1302
- value: string;
1303
- }>;
1304
- observeResults?: Action[];
1305
- actions?: Action[];
1306
- }
1307
- interface AgentReplayGotoStep {
1308
- type: "goto";
1309
- url: string;
1310
- waitUntil?: LoadState;
1311
- }
1312
- interface AgentReplayScrollStep {
1313
- type: "scroll";
1314
- deltaX?: number;
1315
- deltaY?: number;
1316
- anchor?: {
1317
- x: number;
1318
- y: number;
1319
- };
1320
- }
1321
- interface AgentReplayWaitStep {
1322
- type: "wait";
1323
- timeMs: number;
1324
- }
1325
- interface AgentReplayNavBackStep {
1326
- type: "navback";
1327
- waitUntil?: LoadState;
1292
+ private onDetachedFromTarget;
1293
+ /**
1294
+ * Cleanup a top-level Page by target id, removing its root and staged children.
1295
+ */
1296
+ private cleanupByTarget;
1297
+ /**
1298
+ * Wire Page-domain frame events for a session into the owning Page & mappings.
1299
+ * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1300
+ */
1301
+ private installFrameEventBridges;
1302
+ /**
1303
+ * Register that a session belongs to a Page (used by event routing).
1304
+ */
1305
+ private wireSessionToOwnerPage;
1306
+ /**
1307
+ * Utility: reverse-lookup the top-level target id that owns a given Page.
1308
+ */
1309
+ private findTargetIdByPage;
1310
+ private _notePopupSignal;
1311
+ /**
1312
+ * Await the current active page, waiting briefly if a popup/open was just triggered.
1313
+ * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1314
+ */
1315
+ awaitActivePage(timeoutMs?: number): Promise<Page>;
1328
1316
  }
1329
1317
 
1330
1318
  /**
@@ -1353,6 +1341,7 @@ declare class V3 {
1353
1341
  private modelName;
1354
1342
  private modelClientOptions;
1355
1343
  private llmProvider;
1344
+ private overrideLlmClients;
1356
1345
  private readonly domSettleTimeoutMs?;
1357
1346
  private _isClosing;
1358
1347
  browserbaseSessionId?: string;
@@ -1374,6 +1363,7 @@ declare class V3 {
1374
1363
  */
1375
1364
  get metrics(): Promise<V3Metrics>;
1376
1365
  private cloneForCache;
1366
+ private resolveLlmClient;
1377
1367
  private beginAgentReplayRecording;
1378
1368
  private endAgentReplayRecording;
1379
1369
  private discardAgentReplayRecording;
@@ -1406,8 +1396,8 @@ declare class V3 {
1406
1396
  * - act(instruction: string, options?: ActOptions)
1407
1397
  * - act(action: Action, options?: ActOptions)
1408
1398
  */
1409
- act(instruction: string, options?: ActOptions$1): Promise<ActResult>;
1410
- act(action: Action, options?: ActOptions$1): Promise<ActResult>;
1399
+ act(instruction: string, options?: ActOptions): Promise<ActResult>;
1400
+ act(action: Action, options?: ActOptions): Promise<ActResult>;
1411
1401
  /**
1412
1402
  * Run an "extract" instruction through the ExtractHandler.
1413
1403
  *
@@ -1419,15 +1409,15 @@ declare class V3 {
1419
1409
  * - extract(instruction, schema, options)
1420
1410
  */
1421
1411
  extract(): Promise<z.infer<typeof pageTextSchema>>;
1422
- extract(options: ExtractOptions$1): Promise<z.infer<typeof pageTextSchema>>;
1423
- extract(instruction: string, options?: ExtractOptions$1): Promise<z.infer<typeof defaultExtractSchema>>;
1424
- extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions$1): Promise<z.infer<T>>;
1412
+ extract(options: ExtractOptions): Promise<z.infer<typeof pageTextSchema>>;
1413
+ extract(instruction: string, options?: ExtractOptions): Promise<z.infer<typeof defaultExtractSchema>>;
1414
+ extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z.infer<T>>;
1425
1415
  /**
1426
1416
  * Run an "observe" instruction through the ObserveHandler.
1427
1417
  */
1428
1418
  observe(): Promise<Action[]>;
1429
- observe(options: ObserveOptions$1): Promise<Action[]>;
1430
- observe(instruction: string, options?: ObserveOptions$1): Promise<Action[]>;
1419
+ observe(options: ObserveOptions): Promise<Action[]>;
1420
+ observe(instruction: string, options?: ObserveOptions): Promise<Action[]>;
1431
1421
  /** Return the browser-level CDP WebSocket endpoint. */
1432
1422
  connectURL(): string;
1433
1423
  /** Expose the current CDP-backed context. */
@@ -1453,6 +1443,10 @@ declare class V3 {
1453
1443
  private readActCacheEntry;
1454
1444
  private writeActCacheEntry;
1455
1445
  private sanitizeAgentExecuteOptions;
1446
+ private createLlmClientOverride;
1447
+ private inferProviderFromModelName;
1448
+ private extractAgentModel;
1449
+ private serializeAgentModelForCache;
1456
1450
  private buildAgentCacheSignature;
1457
1451
  private buildAgentCacheKey;
1458
1452
  private readAgentCacheEntry;
@@ -1599,6 +1593,119 @@ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1599
1593
  constructor(segment: string, hint?: string);
1600
1594
  }
1601
1595
 
1596
+ /**
1597
+ * Abstract base class for agent clients
1598
+ * This provides a common interface for all agent implementations
1599
+ */
1600
+ declare abstract class AgentClient {
1601
+ type: AgentType;
1602
+ modelName: string;
1603
+ clientOptions: Record<string, unknown>;
1604
+ userProvidedInstructions?: string;
1605
+ constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
1606
+ abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
1607
+ abstract captureScreenshot(options?: Record<string, unknown>): Promise<unknown>;
1608
+ abstract setViewport(width: number, height: number): void;
1609
+ abstract setCurrentUrl(url: string): void;
1610
+ abstract setScreenshotProvider(provider: () => Promise<string>): void;
1611
+ abstract setActionHandler(handler: (action: AgentAction) => Promise<void>): void;
1612
+ }
1613
+
1614
+ declare const modelToAgentProviderMap: Record<string, AgentProviderType>;
1615
+ /**
1616
+ * Provider for agent clients
1617
+ * This class is responsible for creating the appropriate agent client
1618
+ * based on the provider type
1619
+ */
1620
+ declare class AgentProvider {
1621
+ private logger;
1622
+ /**
1623
+ * Create a new agent provider
1624
+ */
1625
+ constructor(logger: (message: LogLine) => void);
1626
+ getClient(modelName: string, clientOptions?: Record<string, unknown>, userProvidedInstructions?: string, tools?: ToolSet$1): AgentClient;
1627
+ static getAgentProvider(modelName: string): AgentProviderType;
1628
+ }
1629
+
1630
+ /**
1631
+ * Represents a path through a Zod schema from the root object down to a
1632
+ * particular field. The `segments` array describes the chain of keys/indices.
1633
+ *
1634
+ * - **String** segments indicate object property names.
1635
+ * - **Number** segments indicate array indices.
1636
+ *
1637
+ * For example, `["users", 0, "homepage"]` might describe reaching
1638
+ * the `homepage` field in `schema.users[0].homepage`.
1639
+ */
1640
+ interface ZodPathSegments {
1641
+ /**
1642
+ * The ordered list of keys/indices leading from the schema root
1643
+ * to the targeted field.
1644
+ */
1645
+ segments: Array<string | number>;
1646
+ }
1647
+
1648
+ declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;
1649
+ /**
1650
+ * Detects if the code is running in the Bun runtime environment.
1651
+ * @returns {boolean} True if running in Bun, false otherwise.
1652
+ */
1653
+ declare function isRunningInBun(): boolean;
1654
+ declare function toGeminiSchema(zodSchema: z.ZodTypeAny): Schema;
1655
+ declare function getZodType(schema: z.ZodTypeAny): string;
1656
+ /**
1657
+ * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
1658
+ * For each such field, it replaces the `z.string().url()` with `z.number()`.
1659
+ *
1660
+ * This function is used internally by higher-level utilities (e.g., transforming entire object schemas)
1661
+ * and handles nested objects, arrays, unions, intersections, optionals.
1662
+ *
1663
+ * @param schema - The Zod schema to transform.
1664
+ * @param currentPath - An array of string/number keys representing the current schema path (used internally for recursion).
1665
+ * @returns A two-element tuple:
1666
+ * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
1667
+ * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
1668
+ */
1669
+ declare function transformSchema(schema: z.ZodTypeAny, currentPath: Array<string | number>): [z.ZodTypeAny, ZodPathSegments[]];
1670
+ /**
1671
+ * Once we get the final extracted object that has numeric IDs in place of URLs,
1672
+ * use `injectUrls` to walk the object and replace numeric IDs
1673
+ * with the real URL strings from idToUrlMapping. The `path` may include `*`
1674
+ * for array indices (indicating "all items in the array").
1675
+ */
1676
+ declare function injectUrls(obj: unknown, path: Array<string | number>, idToUrlMapping: Record<string, string>): void;
1677
+ /**
1678
+ * Mapping from LLM provider names to their corresponding environment variable names for API keys.
1679
+ */
1680
+ declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string>>;
1681
+ /**
1682
+ * Loads an API key for a provider, checking environment variables.
1683
+ * @param provider The name of the provider (e.g., 'openai', 'anthropic')
1684
+ * @param logger Optional logger for info/error messages
1685
+ * @returns The API key if found, undefined otherwise
1686
+ */
1687
+ declare function loadApiKeyFromEnv(provider: string | undefined, logger: (logLine: LogLine) => void): string | undefined;
1688
+ declare function trimTrailingTextNode(path: string | undefined): string | undefined;
1689
+ interface JsonSchemaProperty {
1690
+ type: string;
1691
+ enum?: unknown[];
1692
+ items?: JsonSchemaProperty;
1693
+ properties?: Record<string, JsonSchemaProperty>;
1694
+ required?: string[];
1695
+ minimum?: number;
1696
+ maximum?: number;
1697
+ description?: string;
1698
+ }
1699
+ interface JsonSchema extends JsonSchemaProperty {
1700
+ type: string;
1701
+ }
1702
+ /**
1703
+ * Converts a JSON Schema object to a Zod schema
1704
+ * @param schema The JSON Schema object to convert
1705
+ * @returns A Zod schema equivalent to the input JSON Schema
1706
+ */
1707
+ declare function jsonSchemaToZod(schema: JsonSchema): ZodTypeAny;
1708
+
1602
1709
  interface ConnectToMCPServerOptions {
1603
1710
  serverUrl: string | URL;
1604
1711
  clientOptions?: ClientOptions$3;
@@ -1608,6 +1715,65 @@ interface StdioServerConfig {
1608
1715
  args?: string[];
1609
1716
  env?: Record<string, string>;
1610
1717
  }
1611
- declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client$1>;
1718
+ declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client>;
1719
+
1720
+ type EvaluateOptions = {
1721
+ /** The question to ask about the task state */
1722
+ question: string;
1723
+ /** The answer to the question */
1724
+ answer?: string;
1725
+ /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
1726
+ screenshot?: boolean | Buffer[];
1727
+ /** Custom system prompt for the evaluator */
1728
+ systemPrompt?: string;
1729
+ /** Delay in milliseconds before taking the screenshot @default 250 */
1730
+ screenshotDelayMs?: number;
1731
+ /** The agent's reasoning/thought process for completing the task */
1732
+ agentReasoning?: string;
1733
+ };
1734
+ type BatchAskOptions = {
1735
+ /** Array of questions with optional answers */
1736
+ questions: Array<{
1737
+ question: string;
1738
+ answer?: string;
1739
+ }>;
1740
+ /** Whether to take a screenshot of the task state */
1741
+ screenshot?: boolean;
1742
+ /** Custom system prompt for the evaluator */
1743
+ systemPrompt?: string;
1744
+ /** Delay in milliseconds before taking the screenshot @default 1000 */
1745
+ screenshotDelayMs?: number;
1746
+ };
1747
+ /**
1748
+ * Result of an evaluation
1749
+ */
1750
+ interface EvaluationResult {
1751
+ /**
1752
+ * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
1753
+ */
1754
+ evaluation: "YES" | "NO" | "INVALID";
1755
+ /**
1756
+ * The reasoning behind the evaluation
1757
+ */
1758
+ reasoning: string;
1759
+ }
1760
+
1761
+ /**
1762
+ * V3Evaluator mirrors Evaluator but operates on a V3 instance instead of Stagehand.
1763
+ * It uses the V3 page/screenshot APIs and constructs an LLM client to run
1764
+ * structured evaluations (YES/NO with reasoning) on screenshots and/or text.
1765
+ */
1766
+
1767
+ declare class V3Evaluator {
1768
+ private v3;
1769
+ private modelName;
1770
+ private modelClientOptions;
1771
+ private silentLogger;
1772
+ constructor(v3: V3, modelName?: AvailableModel, modelClientOptions?: ClientOptions);
1773
+ private getClient;
1774
+ ask(options: EvaluateOptions): Promise<EvaluationResult>;
1775
+ batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]>;
1776
+ private _evaluateWithMultipleScreenshots;
1777
+ }
1612
1778
 
1613
- export { type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer };
1779
+ export { type AISDKCustomProvider, type AISDKProvider, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentReplayActStep, type AgentReplayFillFormStep, type AgentReplayGotoStep, type AgentReplayNavBackStep, type AgentReplayScrollStep, type AgentReplayStep, type AgentReplayWaitStep, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, type CachedActEntry, type CachedAgentEntry, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, type SanitizedAgentExecuteOptions, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Metrics, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };