@browserbasehq/orca 3.0.0-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -0
  3. package/dist/index.d.ts +1611 -0
  4. package/dist/index.js +28681 -0
  5. package/dist/lib/api.d.ts +23 -0
  6. package/dist/lib/dom/build/scriptContent.d.ts +1 -0
  7. package/dist/lib/inference.d.ts +71 -0
  8. package/dist/lib/inferenceLogUtils.d.ts +12 -0
  9. package/dist/lib/logger.d.ts +54 -0
  10. package/dist/lib/prompt.d.ts +12 -0
  11. package/dist/lib/utils.d.ts +65 -0
  12. package/dist/lib/v3/agent/AgentClient.d.ts +18 -0
  13. package/dist/lib/v3/agent/AgentProvider.d.ts +18 -0
  14. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +55 -0
  15. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +64 -0
  16. package/dist/lib/v3/agent/StagehandAgent.d.ts +15 -0
  17. package/dist/lib/v3/agent/tools/index.d.ts +229 -0
  18. package/dist/lib/v3/agent/tools/v3-act.d.ts +29 -0
  19. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +11 -0
  20. package/dist/lib/v3/agent/tools/v3-close.d.ts +24 -0
  21. package/dist/lib/v3/agent/tools/v3-extract.d.ts +38 -0
  22. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +37 -0
  23. package/dist/lib/v3/agent/tools/v3-goto.d.ts +29 -0
  24. package/dist/lib/v3/agent/tools/v3-navback.d.ts +17 -0
  25. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +13 -0
  26. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +23 -0
  27. package/dist/lib/v3/agent/tools/v3-wait.d.ts +19 -0
  28. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +10 -0
  29. package/dist/lib/v3/agent/utils/imageCompression.d.ts +18 -0
  30. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +13 -0
  31. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +1 -0
  32. package/dist/lib/v3/dom/genDomScripts.d.ts +1 -0
  33. package/dist/lib/v3/dom/index.d.ts +1 -0
  34. package/dist/lib/v3/dom/piercer.entry.d.ts +1 -0
  35. package/dist/lib/v3/dom/piercer.runtime.d.ts +25 -0
  36. package/dist/lib/v3/handlers/actHandler.d.ts +18 -0
  37. package/dist/lib/v3/handlers/extractHandler.d.ts +29 -0
  38. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +18 -0
  39. package/dist/lib/v3/handlers/observeHandler.d.ts +15 -0
  40. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +17 -0
  41. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +26 -0
  42. package/dist/lib/v3/index.d.ts +10 -0
  43. package/dist/lib/v3/launch/browserbase.d.ts +8 -0
  44. package/dist/lib/v3/launch/local.d.ts +13 -0
  45. package/dist/lib/v3/llm/AnthropicClient.d.ts +16 -0
  46. package/dist/lib/v3/llm/CerebrasClient.d.ts +17 -0
  47. package/dist/lib/v3/llm/GoogleClient.d.ts +19 -0
  48. package/dist/lib/v3/llm/GroqClient.d.ts +17 -0
  49. package/dist/lib/v3/llm/LLMClient.d.ts +99 -0
  50. package/dist/lib/v3/llm/LLMProvider.d.ts +10 -0
  51. package/dist/lib/v3/llm/OpenAIClient.d.ts +15 -0
  52. package/dist/lib/v3/llm/aisdk.d.ts +15 -0
  53. package/dist/lib/v3/logger.d.ts +48 -0
  54. package/dist/lib/v3/mcp/connection.d.ts +11 -0
  55. package/dist/lib/v3/mcp/utils.d.ts +3 -0
  56. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +1 -0
  57. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +1 -0
  58. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +1 -0
  59. package/dist/lib/v3/tests/timeouts.spec.d.ts +1 -0
  60. package/dist/lib/v3/tests/v3.config.d.ts +4 -0
  61. package/dist/lib/v3/tests/v3.playwright.config.d.ts +2 -0
  62. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +1 -0
  63. package/dist/lib/v3/types/act.d.ts +10 -0
  64. package/dist/lib/v3/types/agent.d.ts +132 -0
  65. package/dist/lib/v3/types/api.d.ts +40 -0
  66. package/dist/lib/v3/types/cache.d.ts +71 -0
  67. package/dist/lib/v3/types/context.d.ts +2 -0
  68. package/dist/lib/v3/types/evals.d.ts +71 -0
  69. package/dist/lib/v3/types/evaluator.d.ts +40 -0
  70. package/dist/lib/v3/types/llm.d.ts +11 -0
  71. package/dist/lib/v3/types/log.d.ts +23 -0
  72. package/dist/lib/v3/types/model.d.ts +20 -0
  73. package/dist/lib/v3/types/playwright.d.ts +6 -0
  74. package/dist/lib/v3/types/stagehand.d.ts +113 -0
  75. package/dist/lib/v3/types/stagehandApiErrors.d.ts +18 -0
  76. package/dist/lib/v3/types/stagehandErrors.d.ts +104 -0
  77. package/dist/lib/v3/types.d.ts +176 -0
  78. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +71 -0
  79. package/dist/lib/v3/understudy/cdp.d.ts +58 -0
  80. package/dist/lib/v3/understudy/context.d.ts +120 -0
  81. package/dist/lib/v3/understudy/deepLocator.d.ts +69 -0
  82. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +15 -0
  83. package/dist/lib/v3/understudy/frame.d.ts +63 -0
  84. package/dist/lib/v3/understudy/frameLocator.d.ts +46 -0
  85. package/dist/lib/v3/understudy/frameRegistry.d.ts +100 -0
  86. package/dist/lib/v3/understudy/locator.d.ts +196 -0
  87. package/dist/lib/v3/understudy/page.d.ts +241 -0
  88. package/dist/lib/v3/understudy/piercer.d.ts +4 -0
  89. package/dist/lib/v3/v3.d.ts +156 -0
  90. package/dist/lib/version.d.ts +5 -0
  91. package/package.json +130 -0
@@ -0,0 +1,1611 @@
1
+ import * as puppeteer_core from 'puppeteer-core';
2
+ import * as patchright_core from 'patchright-core';
3
+ import * as playwright_core from 'playwright-core';
4
+ import Browserbase from '@browserbasehq/sdk';
5
+ import { Protocol } from 'devtools-protocol';
6
+ import { Buffer as Buffer$1 } from 'buffer';
7
+ import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
8
+ import { ClientOptions as ClientOptions$1 } from 'openai';
9
+ import { z, ZodType, ZodTypeAny, ZodError } from 'zod/v3';
10
+ import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, LanguageModel, ToolSet } from 'ai';
11
+ import { Client } from '@modelcontextprotocol/sdk/dist/esm/client';
12
+ import { ClientOptions as ClientOptions$3, Client as Client$1 } from '@modelcontextprotocol/sdk/client/index.js';
13
+
14
+ /**
15
+ * CDP transport & session multiplexer
16
+ *
17
+ * Owns the browser WebSocket and multiplexes flattened Target sessions.
18
+ * Tracks inflight CDP calls, routes responses to the right session, and forwards events.
19
+ *
20
+ * This does not interpret Page/DOM/Runtime semantics — callers own that logic.
21
+ */
22
+ interface CDPSessionLike {
23
+ send<R = unknown>(method: string, params?: object): Promise<R>;
24
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
25
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
26
+ close(): Promise<void>;
27
+ readonly id: string | null;
28
+ }
29
+ type EventHandler = (params: unknown) => void;
30
+ declare class CdpConnection implements CDPSessionLike {
31
+ private ws;
32
+ private nextId;
33
+ private inflight;
34
+ private eventHandlers;
35
+ private sessions;
36
+ readonly id: string | null;
37
+ private transportCloseHandlers;
38
+ onTransportClosed(handler: (why: string) => void): void;
39
+ offTransportClosed(handler: (why: string) => void): void;
40
+ private emitTransportClosed;
41
+ private constructor();
42
+ static connect(wsUrl: string): Promise<CdpConnection>;
43
+ enableAutoAttach(): Promise<void>;
44
+ send<R = unknown>(method: string, params?: object): Promise<R>;
45
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
46
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
47
+ close(): Promise<void>;
48
+ getSession(sessionId: string): CdpSession | undefined;
49
+ attachToTarget(targetId: string): Promise<CdpSession>;
50
+ getTargets(): Promise<Array<{
51
+ targetId: string;
52
+ type: string;
53
+ url: string;
54
+ }>>;
55
+ private onMessage;
56
+ _sendViaSession<R = unknown>(sessionId: string, method: string, params?: object): Promise<R>;
57
+ _onSessionEvent(sessionId: string, event: string, handler: EventHandler): void;
58
+ _offSessionEvent(sessionId: string, event: string, handler: EventHandler): void;
59
+ _dispatchToSession(sessionId: string, event: string, params: unknown): void;
60
+ }
61
+ declare class CdpSession implements CDPSessionLike {
62
+ private readonly root;
63
+ readonly id: string;
64
+ constructor(root: CdpConnection, id: string);
65
+ send<R = unknown>(method: string, params?: object): Promise<R>;
66
+ on<P = unknown>(event: string, handler: (params: P) => void): void;
67
+ off<P = unknown>(event: string, handler: (params: P) => void): void;
68
+ close(): Promise<void>;
69
+ dispatch(event: string, params: unknown): void;
70
+ }
71
+
72
+ interface FrameManager {
73
+ session: CDPSessionLike;
74
+ frameId: string;
75
+ pageId: string;
76
+ }
77
+ /**
78
+ * Frame
79
+ *
80
+ * A thin, session-bound handle to a specific DOM frame (by frameId).
81
+ * All CDP calls in this class go through `this.session`, which MUST be the
82
+ * owning session for `this.frameId`. Page is responsible for constructing
83
+ * Frames with the correct session.
84
+ */
85
+ declare class Frame implements FrameManager {
86
+ session: CDPSessionLike;
87
+ frameId: string;
88
+ pageId: string;
89
+ /** Owning CDP session id (useful for logs); null for root connection (should not happen for targets) */
90
+ readonly sessionId: string | null;
91
+ constructor(session: CDPSessionLike, frameId: string, pageId: string);
92
+ /** DOM.getNodeForLocation → DOM.describeNode */
93
+ getNodeAtLocation(x: number, y: number): Promise<Protocol.DOM.Node>;
94
+ /** CSS selector → DOM.querySelector → DOM.getBoxModel */
95
+ getLocationForSelector(selector: string): Promise<{
96
+ x: number;
97
+ y: number;
98
+ width: number;
99
+ height: number;
100
+ }>;
101
+ /** Accessibility.getFullAXTree (+ recurse into child frames if requested) */
102
+ getAccessibilityTree(withFrames?: boolean): Promise<Protocol.Accessibility.AXNode[]>;
103
+ /**
104
+ * Evaluate a function or expression in this frame's isolated world.
105
+ * - If a string is provided, treated as a JS expression.
106
+ * - If a function is provided, it is stringified and invoked with the optional argument.
107
+ */
108
+ evaluate<R = unknown, Arg = unknown>(pageFunctionOrExpression: string | ((arg: Arg) => R | Promise<R>), arg?: Arg): Promise<R>;
109
+ /** Page.captureScreenshot (frame-scoped session) */
110
+ screenshot(options?: {
111
+ fullPage?: boolean;
112
+ clip?: {
113
+ x: number;
114
+ y: number;
115
+ width: number;
116
+ height: number;
117
+ };
118
+ }): Promise<string>;
119
+ /** Child frames via Page.getFrameTree */
120
+ childFrames(): Promise<Frame[]>;
121
+ /** Wait for a lifecycle state (load/domcontentloaded/networkidle) */
122
+ waitForLoadState(state?: "load" | "domcontentloaded" | "networkidle"): Promise<void>;
123
+ /** Simple placeholder for your own locator abstraction */
124
+ locator(selector: string, options?: {
125
+ deep?: boolean;
126
+ depth?: number;
127
+ }): Locator;
128
+ /** Create/get an isolated world for this frame and return its executionContextId */
129
+ private getExecutionContextId;
130
+ }
131
+
132
+ type MouseButton = "left" | "right" | "middle";
133
+ /**
134
+ * Locator
135
+ *
136
+ * Purpose:
137
+ * A small, CDP-based element interaction helper scoped to a specific `Frame`.
138
+ * It resolves a CSS/XPath selector inside the frame’s **isolated world**, and then
139
+ * performs low-level actions (click, type, select) using DOM/Runtime/Input
140
+ * protocol domains with minimal abstraction.
141
+ *
142
+ * Key change:
143
+ * - Prefer **objectId**-based CDP calls (scroll, geometry) to avoid brittle
144
+ * frontend nodeId mappings. nodeId is resolved on a best-effort basis and
145
+ * returned for compatibility, but actions do not depend on it.
146
+ *
147
+ * Notes:
148
+ * - Resolution is lazy: every action resolves the selector again.
149
+ * - Uses `Page.createIsolatedWorld` so evaluation is isolated from page scripts.
150
+ * - Releases remote objects (`Runtime.releaseObject`) where appropriate.
151
+ */
152
+ declare class Locator {
153
+ private readonly frame;
154
+ private readonly selector;
155
+ private readonly options?;
156
+ constructor(frame: Frame, selector: string, options?: {
157
+ deep?: boolean;
158
+ depth?: number;
159
+ });
160
+ /** Return the owning Frame for this locator (typed accessor, no private access). */
161
+ getFrame(): Frame;
162
+ /**
163
+ * Set files on an <input type="file"> element.
164
+ *
165
+ * Mirrors Playwright's Locator.setInputFiles basics:
166
+ * - Accepts file path(s) or payload object(s) { name, mimeType, buffer }.
167
+ * - Uses CDP DOM.setFileInputFiles under the hood.
168
+ * - Best‑effort dispatches change/input via CDP (Chrome does by default).
169
+ * - Passing an empty array clears the selection.
170
+ */
171
+ setInputFiles(files: string | string[] | {
172
+ name: string;
173
+ mimeType: string;
174
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
175
+ } | Array<{
176
+ name: string;
177
+ mimeType: string;
178
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
179
+ }>): Promise<void>;
180
+ /**
181
+ * Return the DOM backendNodeId for this locator's target element.
182
+ * Useful for identity comparisons without needing element handles.
183
+ */
184
+ backendNodeId(): Promise<Protocol.DOM.BackendNodeId>;
185
+ /**
186
+ * Return the center of the element's bounding box in the owning frame's viewport
187
+ * (CSS pixels), rounded to integers. Scrolls into view best-effort.
188
+ */
189
+ centroid(): Promise<{
190
+ x: number;
191
+ y: number;
192
+ }>;
193
+ /**
194
+ * Highlight the element's bounding box using the CDP Overlay domain.
195
+ * - Scrolls element into view best-effort.
196
+ * - Shows a semi-transparent overlay briefly, then hides it.
197
+ */
198
+ highlight(options?: {
199
+ durationMs?: number;
200
+ borderColor?: {
201
+ r: number;
202
+ g: number;
203
+ b: number;
204
+ a?: number;
205
+ };
206
+ contentColor?: {
207
+ r: number;
208
+ g: number;
209
+ b: number;
210
+ a?: number;
211
+ };
212
+ }): Promise<void>;
213
+ /**
214
+ * Click the element at its visual center.
215
+ * Steps:
216
+ * 1) Resolve selector to { objectId } in the frame world.
217
+ * 2) Scroll into view via `DOM.scrollIntoViewIfNeeded({ objectId })`.
218
+ * 3) Read geometry via `DOM.getBoxModel({ objectId })` → compute a center point.
219
+ * 4) Synthesize mouse press + release via `Input.dispatchMouseEvent`.
220
+ */
221
+ click(options?: {
222
+ button?: MouseButton;
223
+ clickCount?: number;
224
+ }): Promise<void>;
225
+ /**
226
+ * Dispatch a DOM 'click' MouseEvent on the element itself.
227
+ * - Does not synthesize real pointer input; directly dispatches an event.
228
+ * - Useful for elements that rely on click handlers without needing hit-testing.
229
+ */
230
+ sendClickEvent(options?: {
231
+ bubbles?: boolean;
232
+ cancelable?: boolean;
233
+ composed?: boolean;
234
+ detail?: number;
235
+ }): Promise<void>;
236
+ /**
237
+ * Scroll the element vertically to a given percentage (0–100).
238
+ * - If the element is <html> or <body>, scrolls the window/document.
239
+ * - Otherwise, scrolls the element itself via element.scrollTo.
240
+ */
241
+ scrollTo(percent: number | string): Promise<void>;
242
+ /**
243
+ * Fill an input/textarea/contenteditable element.
244
+ * - Sets the value/text directly in DOM.
245
+ * - Dispatches `input` and `change` events to mimic user input.
246
+ * - Releases the underlying `objectId` afterwards to avoid leaks.
247
+ */
248
+ fill(value: string): Promise<void>;
249
+ /**
250
+ * Type text into the element (focuses first).
251
+ * - Focus via element.focus() in page JS (no DOM.focus(nodeId)).
252
+ * - If no delay, uses `Input.insertText` for efficiency.
253
+ * - With delay, synthesizes `keyDown`/`keyUp` per character.
254
+ */
255
+ type(text: string, options?: {
256
+ delay?: number;
257
+ }): Promise<void>;
258
+ /**
259
+ * Select one or more options on a `<select>` element.
260
+ * Returns the values actually selected after the operation.
261
+ */
262
+ selectOption(values: string | string[]): Promise<string[]>;
263
+ /**
264
+ * Return true if the element is attached and visible (rough heuristic).
265
+ */
266
+ isVisible(): Promise<boolean>;
267
+ /**
268
+ * Return true if the element is an input[type=checkbox|radio] and is checked.
269
+ * Also considers aria-checked for ARIA widgets.
270
+ */
271
+ isChecked(): Promise<boolean>;
272
+ /**
273
+ * Return the element's input value (for input/textarea/select/contenteditable).
274
+ */
275
+ inputValue(): Promise<string>;
276
+ /**
277
+ * Return the element's textContent (raw, not innerText).
278
+ */
279
+ textContent(): Promise<string>;
280
+ /**
281
+ * Return the element's innerHTML string.
282
+ */
283
+ innerHtml(): Promise<string>;
284
+ /**
285
+ * Return the element's innerText (layout-aware, visible text).
286
+ */
287
+ innerText(): Promise<string>;
288
+ /**
289
+ * For API parity, returns the same locator (querySelector already returns the first match).
290
+ */
291
+ first(): Locator;
292
+ /**
293
+ * Resolve `this.selector` within the frame to `{ objectId, nodeId? }`:
294
+ * - Ensures Runtime/DOM are enabled.
295
+ * - Creates (or reuses) an isolated world for this frame.
296
+ * - Evaluates a CSS or XPath query in that isolated world.
297
+ * - Best-effort: attempts to convert `objectId` to `nodeId`; failure is non-fatal.
298
+ *
299
+ * - For XPath: first try page-side resolver (__stagehandV3__.resolveSimpleXPath).
300
+ * If it returns null (e.g. closed DSD not captured), fall back to CDP DOM with
301
+ * `pierce: true` to traverse closed shadow roots and resolve by backendNodeId.
302
+ */
303
+ resolveNode(): Promise<{
304
+ nodeId: Protocol.DOM.NodeId | null;
305
+ objectId: Protocol.Runtime.RemoteObjectId;
306
+ }>;
307
+ /**
308
+ * CDP fallback for XPath resolution that needs to cross *closed* shadow roots
309
+ * created via Declarative Shadow DOM (no attachShadow call to intercept).
310
+ *
311
+ * Strategy:
312
+ * - Fetch full DOM with `pierce: true` so closed shadow roots are included.
313
+ * - Run a small, tolerant XPath stepper over the CDP node tree:
314
+ * • supports absolute paths like `/html/body/...`
315
+ * • supports `//` descendant jumps
316
+ * • supports `tag[n]` numeric predicates per sibling group
317
+ * • supports `*`
318
+ * - Resolve the winning backendNodeId to an objectId for downstream actions.
319
+ */
320
+ private resolveViaDomPierceXPath;
321
+ /** Compute a center point from a BoxModel content quad */
322
+ private centerFromBoxContent;
323
+ }
324
+
325
+ /**
326
+ * DeepLocatorDelegate: a lightweight wrapper that looks like a Locator and
327
+ * resolves to the correct frame/element on each call using hop/deep-XPath logic.
328
+ *
329
+ * Returned by `page.deepLocator()` for ergonomic, await-free chaining:
330
+ * page.deepLocator('iframe#ifrA >> #btn').click()
331
+ */
332
+ declare class DeepLocatorDelegate {
333
+ private readonly page;
334
+ private readonly root;
335
+ private readonly selector;
336
+ constructor(page: Page, root: Frame, selector: string);
337
+ private real;
338
+ click(options?: {
339
+ button?: "left" | "right" | "middle";
340
+ clickCount?: number;
341
+ }): Promise<void>;
342
+ fill(value: string): Promise<void>;
343
+ type(text: string, options?: {
344
+ delay?: number;
345
+ }): Promise<void>;
346
+ selectOption(values: string | string[]): Promise<string[]>;
347
+ scrollTo(percent: number | string): Promise<void>;
348
+ isVisible(): Promise<boolean>;
349
+ isChecked(): Promise<boolean>;
350
+ inputValue(): Promise<string>;
351
+ textContent(): Promise<string>;
352
+ innerHtml(): Promise<string>;
353
+ innerText(): Promise<string>;
354
+ centroid(): Promise<{
355
+ x: number;
356
+ y: number;
357
+ }>;
358
+ backendNodeId(): Promise<number>;
359
+ highlight(options?: {
360
+ durationMs?: number;
361
+ borderColor?: {
362
+ r: number;
363
+ g: number;
364
+ b: number;
365
+ a?: number;
366
+ };
367
+ contentColor?: {
368
+ r: number;
369
+ g: number;
370
+ b: number;
371
+ a?: number;
372
+ };
373
+ }): Promise<void>;
374
+ sendClickEvent(options?: {
375
+ bubbles?: boolean;
376
+ cancelable?: boolean;
377
+ composed?: boolean;
378
+ detail?: number;
379
+ }): Promise<void>;
380
+ first(): this;
381
+ }
382
+
383
+ /**
384
+ * FrameLocator: resolves iframe elements to their child Frames and allows
385
+ * creating locators scoped to that frame. Supports chaining.
386
+ */
387
+ declare class FrameLocator {
388
+ private readonly parent?;
389
+ private readonly selector;
390
+ private readonly page;
391
+ private readonly root?;
392
+ constructor(page: Page, selector: string, parent?: FrameLocator, root?: Frame);
393
+ /** Create a nested FrameLocator under this one. */
394
+ frameLocator(selector: string): FrameLocator;
395
+ /** Resolve to the concrete Frame for this FrameLocator chain. */
396
+ resolveFrame(): Promise<Frame>;
397
+ /** Return a Locator scoped to this frame. Methods delegate to the frame lazily. */
398
+ locator(selector: string): LocatorDelegate;
399
+ }
400
+ /** A small delegating wrapper that resolves the frame lazily per call. */
401
+ declare class LocatorDelegate {
402
+ private readonly fl;
403
+ private readonly sel;
404
+ constructor(fl: FrameLocator, sel: string);
405
+ private real;
406
+ click(options?: {
407
+ button?: "left" | "right" | "middle";
408
+ clickCount?: number;
409
+ }): Promise<void>;
410
+ fill(value: string): Promise<void>;
411
+ type(text: string, options?: {
412
+ delay?: number;
413
+ }): Promise<void>;
414
+ selectOption(values: string | string[]): Promise<string[]>;
415
+ scrollTo(percent: number | string): Promise<void>;
416
+ isVisible(): Promise<boolean>;
417
+ isChecked(): Promise<boolean>;
418
+ inputValue(): Promise<string>;
419
+ textContent(): Promise<string>;
420
+ innerHtml(): Promise<string>;
421
+ innerText(): Promise<string>;
422
+ first(): LocatorDelegate;
423
+ }
424
+
425
+ declare class Page {
426
+ private readonly conn;
427
+ private readonly mainSession;
428
+ private readonly _targetId;
429
+ /** Every CDP child session this page owns (top-level + adopted OOPIF sessions). */
430
+ private readonly sessions;
431
+ /** Unified truth for frame topology + ownership. */
432
+ private readonly registry;
433
+ /** A convenience wrapper bound to the current main frame id (top-level session). */
434
+ private mainFrameWrapper;
435
+ /** Compact ordinal per frameId (used by snapshot encoding). */
436
+ private frameOrdinals;
437
+ private nextOrdinal;
438
+ /** cache Frames per frameId so everyone uses the same one */
439
+ private readonly frameCache;
440
+ /** Stable id for Frames created by this Page (use top-level TargetId). */
441
+ private readonly pageId;
442
+ private constructor();
443
+ private cursorEnabled;
444
+ private ensureCursorScript;
445
+ enableCursorOverlay(): Promise<void>;
446
+ private updateCursor;
447
+ /**
448
+ * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
449
+ * Assumes Page domain is already enabled on the session passed in.
450
+ */
451
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string): Promise<Page>;
452
+ /**
453
+ * Parent/child session emitted a `frameAttached`.
454
+ * Topology update + ownership stamped to **emitting session**.
455
+ */
456
+ onFrameAttached(frameId: string, parentId: string | null, session: CDPSessionLike): void;
457
+ /**
458
+ * Parent/child session emitted a `frameDetached`.
459
+ */
460
+ onFrameDetached(frameId: string, reason?: "remove" | "swap" | string): void;
461
+ /**
462
+ * Parent/child session emitted a `frameNavigated`.
463
+ * Topology + ownership update. Handles root swaps.
464
+ */
465
+ onFrameNavigated(frame: Protocol.Page.Frame, session: CDPSessionLike): void;
466
+ /**
467
+ * An OOPIF child session whose **main** frame id equals the parent iframe’s frameId
468
+ * has been attached; adopt the session into this Page and seed ownership for its subtree.
469
+ */
470
+ adoptOopifSession(childSession: CDPSessionLike, childMainFrameId: string): void;
471
+ /** Detach an adopted child session and prune its subtree */
472
+ detachOopifSession(sessionId: string): void;
473
+ /** Return the owning CDP session for a frameId (falls back to main session) */
474
+ getSessionForFrame(frameId: string): CDPSessionLike;
475
+ /** Always returns a Frame bound to the owning session */
476
+ frameForId(frameId: string): Frame;
477
+ /** Expose a session by id (used by snapshot to resolve session id -> session) */
478
+ getSessionById(id: string): CDPSessionLike | undefined;
479
+ targetId(): string;
480
+ mainFrameId(): string;
481
+ mainFrame(): Frame;
482
+ /**
483
+ * Close this top-level page (tab). Best-effort via Target.closeTarget.
484
+ */
485
+ close(): Promise<void>;
486
+ getFullFrameTree(): Protocol.Page.FrameTree;
487
+ asProtocolFrameTree(rootMainFrameId: string): Protocol.Page.FrameTree;
488
+ private ensureOrdinal;
489
+ /** Public getter for snapshot code / handlers. */
490
+ getOrdinal(frameId: string): number;
491
+ listAllFrameIds(): string[];
492
+ /**
493
+ * Navigate the page; optionally wait for a lifecycle state.
494
+ * Waits on the **current** main frame and follows root swaps during navigation.
495
+ */
496
+ goto(url: string, options?: {
497
+ waitUntil?: LoadState;
498
+ timeoutMs?: number;
499
+ }): Promise<void>;
500
+ /**
501
+ * Reload the page; optionally wait for a lifecycle state.
502
+ */
503
+ reload(options?: {
504
+ waitUntil?: LoadState;
505
+ timeoutMs?: number;
506
+ ignoreCache?: boolean;
507
+ }): Promise<void>;
508
+ /**
509
+ * Navigate back in history if possible; optionally wait for a lifecycle state.
510
+ */
511
+ goBack(options?: {
512
+ waitUntil?: LoadState;
513
+ timeoutMs?: number;
514
+ }): Promise<void>;
515
+ /**
516
+ * Navigate forward in history if possible; optionally wait for a lifecycle state.
517
+ */
518
+ goForward(options?: {
519
+ waitUntil?: LoadState;
520
+ timeoutMs?: number;
521
+ }): Promise<void>;
522
+ /**
523
+ * Return the current page URL (from navigation history).
524
+ */
525
+ url(): Promise<string>;
526
+ /**
527
+ * Return the current page title.
528
+ * Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
529
+ * Falls back to navigation history title if evaluation is unavailable.
530
+ */
531
+ title(): Promise<string>;
532
+ /**
533
+ * Capture a screenshot (delegated to the current main frame).
534
+ */
535
+ screenshot(options?: {
536
+ fullPage?: boolean;
537
+ }): Promise<string>;
538
+ /**
539
+ * Create a locator bound to the current main frame.
540
+ */
541
+ locator(selector: string): ReturnType<Frame["locator"]>;
542
+ /**
543
+ * Deep locator that supports cross-iframe traversal.
544
+ * - Recognizes '>>' hop notation to enter iframe contexts.
545
+ * - Supports deep XPath that includes iframe steps (e.g., '/html/body/iframe[2]//div').
546
+ * Returns a Locator scoped to the appropriate frame.
547
+ */
548
+ deepLocator(selector: string): DeepLocatorDelegate;
549
+ /**
550
+ * Frame locator similar to Playwright: targets iframe elements and scopes
551
+ * subsequent locators to that frame. Supports chaining.
552
+ */
553
+ frameLocator(selector: string): FrameLocator;
554
+ /**
555
+ * List all frames belonging to this page as Frame objects bound to their owning sessions.
556
+ * The list is ordered by a stable ordinal assigned during the page lifetime.
557
+ */
558
+ frames(): Frame[];
559
+ /**
560
+ * Wait until the page reaches a lifecycle state on the current main frame.
561
+ * Mirrors Playwright's API signatures.
562
+ */
563
+ waitForLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
564
+ /**
565
+ * Evaluate a function or expression in the current main frame's isolated world.
566
+ * - If a string is provided, it is treated as a JS expression.
567
+ * - If a function is provided, it is stringified and invoked with the optional argument.
568
+ * - The return value should be JSON-serializable. Non-serializable objects will
569
+ * best-effort serialize via JSON.stringify inside the page context.
570
+ */
571
+ evaluate<R = unknown, Arg = unknown>(pageFunctionOrExpression: string | ((arg: Arg) => R | Promise<R>), arg?: Arg): Promise<R>;
572
+ /**
573
+ * Force the page viewport to an exact CSS size and device scale factor.
574
+ * Ensures screenshots match width x height pixels when deviceScaleFactor = 1.
575
+ */
576
+ setViewportSize(width: number, height: number, options?: {
577
+ deviceScaleFactor?: number;
578
+ }): Promise<void>;
579
+ /**
580
+ * Click at absolute page coordinates (CSS pixels).
581
+ * Dispatches mouseMoved → mousePressed → mouseReleased via CDP Input domain
582
+ * on the top-level page target's session. Coordinates are relative to the
583
+ * viewport origin (top-left). Does not scroll.
584
+ */
585
+ click(x: number, y: number, options: {
586
+ button?: "left" | "right" | "middle";
587
+ clickCount?: number;
588
+ returnXpath: true;
589
+ }): Promise<string>;
590
+ click(x: number, y: number, options?: {
591
+ button?: "left" | "right" | "middle";
592
+ clickCount?: number;
593
+ returnXpath?: false;
594
+ }): Promise<void>;
595
+ click(x: number, y: number, options: {
596
+ button?: "left" | "right" | "middle";
597
+ clickCount?: number;
598
+ returnXpath: boolean;
599
+ }): Promise<void | string>;
600
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
601
+ returnXpath: true;
602
+ }): Promise<string>;
603
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options?: {
604
+ returnXpath?: false;
605
+ }): Promise<void>;
606
+ scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
607
+ returnXpath: boolean;
608
+ }): Promise<void | string>;
609
+ /**
610
+ * Drag from (fromX, fromY) to (toX, toY) using mouse events.
611
+ * Sends mouseMoved → mousePressed → mouseMoved (steps) → mouseReleased.
612
+ */
613
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
614
+ button?: "left" | "right" | "middle";
615
+ steps?: number;
616
+ delay?: number;
617
+ returnXpath: true;
618
+ }): Promise<[string, string]>;
619
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options?: {
620
+ button?: "left" | "right" | "middle";
621
+ steps?: number;
622
+ delay?: number;
623
+ returnXpath?: false;
624
+ }): Promise<void>;
625
+ dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
626
+ button?: "left" | "right" | "middle";
627
+ steps?: number;
628
+ delay?: number;
629
+ returnXpath: boolean;
630
+ }): Promise<void | [string, string]>;
631
+ /**
632
+ * Type a string by dispatching keyDown/keyUp events per character.
633
+ * Focus must already be on the desired element. Uses CDP Input.dispatchKeyEvent
634
+ * and never falls back to Input.insertText. Optional delay applies between
635
+ * successive characters.
636
+ */
637
+ type(text: string, options?: {
638
+ delay?: number;
639
+ withMistakes?: boolean;
640
+ }): Promise<void>;
641
+ /**
642
+ * Press a single key (keyDown then keyUp). For printable characters,
643
+ * uses the text path on keyDown; for named keys, sets key/code/VK.
644
+ */
645
+ keyPress(key: string, options?: {
646
+ delay?: number;
647
+ }): Promise<void>;
648
+ /**
649
+ * Create an isolated world for the **current** main frame and return its context id.
650
+ */
651
+ private createIsolatedWorldForCurrentMain;
652
+ /**
653
+ * Wait until the **current** main frame reaches a lifecycle state.
654
+ * - Fast path via `document.readyState`.
655
+ * - Event path listens at the session level and compares incoming `frameId`
656
+ * to `mainFrameId()` **at event time** to follow root swaps.
657
+ */
658
+ private waitForMainLoadState;
659
+ }
660
+
661
+ declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
662
+ type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
663
+ type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
664
+ type ClientOptions = ClientOptions$1 | ClientOptions$2;
665
+ type ModelConfiguration = AvailableModel | (ClientOptions & {
666
+ modelName: AvailableModel;
667
+ });
668
+ interface AnthropicJsonSchemaObject {
669
+ definitions?: {
670
+ MySchema?: {
671
+ properties?: Record<string, unknown>;
672
+ required?: string[];
673
+ };
674
+ };
675
+ properties?: Record<string, unknown>;
676
+ required?: string[];
677
+ }
678
+
679
+ interface LLMTool {
680
+ type: "function";
681
+ name: string;
682
+ description: string;
683
+ parameters: Record<string, unknown>;
684
+ }
685
+
686
+ type LogLevel = 0 | 1 | 2;
687
+ /**
688
+ * Mapping between numeric log levels and their names
689
+ *
690
+ * 0 - error/warn - Critical issues or important warnings
691
+ * 1 - info - Standard information messages
692
+ * 2 - debug - Detailed information for debugging
693
+ */
694
+ declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
695
+ type LogLine = {
696
+ id?: string;
697
+ category?: string;
698
+ message: string;
699
+ level?: LogLevel;
700
+ timestamp?: string;
701
+ auxiliary?: {
702
+ [key: string]: {
703
+ value: string;
704
+ type: "object" | "string" | "html" | "integer" | "float" | "boolean";
705
+ };
706
+ };
707
+ };
708
+ type Logger = (logLine: LogLine) => void;
709
+
710
+ interface ChatMessage {
711
+ role: "system" | "user" | "assistant";
712
+ content: ChatMessageContent;
713
+ }
714
+ type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
715
+ interface ChatMessageImageContent {
716
+ type: string;
717
+ image_url?: {
718
+ url: string;
719
+ };
720
+ text?: string;
721
+ source?: {
722
+ type: string;
723
+ media_type: string;
724
+ data: string;
725
+ };
726
+ }
727
+ interface ChatMessageTextContent {
728
+ type: string;
729
+ text: string;
730
+ }
731
+ declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
732
+ interface ChatCompletionOptions {
733
+ messages: ChatMessage[];
734
+ temperature?: number;
735
+ top_p?: number;
736
+ frequency_penalty?: number;
737
+ presence_penalty?: number;
738
+ image?: {
739
+ buffer: Buffer;
740
+ description?: string;
741
+ };
742
+ response_model?: {
743
+ name: string;
744
+ schema: ZodType;
745
+ };
746
+ tools?: LLMTool[];
747
+ tool_choice?: "auto" | "none" | "required";
748
+ maxTokens?: number;
749
+ requestId?: string;
750
+ }
751
+ type LLMResponse = {
752
+ id: string;
753
+ object: string;
754
+ created: number;
755
+ model: string;
756
+ choices: {
757
+ index: number;
758
+ message: {
759
+ role: string;
760
+ content: string | null;
761
+ tool_calls: {
762
+ id: string;
763
+ type: string;
764
+ function: {
765
+ name: string;
766
+ arguments: string;
767
+ };
768
+ }[];
769
+ };
770
+ finish_reason: string;
771
+ }[];
772
+ usage: {
773
+ prompt_tokens: number;
774
+ completion_tokens: number;
775
+ total_tokens: number;
776
+ };
777
+ };
778
+ interface CreateChatCompletionOptions {
779
+ options: ChatCompletionOptions;
780
+ logger: (message: LogLine) => void;
781
+ retries?: number;
782
+ }
783
+ declare abstract class LLMClient {
784
+ type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
785
+ modelName: AvailableModel | (string & {});
786
+ hasVision: boolean;
787
+ clientOptions: ClientOptions;
788
+ userProvidedInstructions?: string;
789
+ constructor(modelName: AvailableModel, userProvidedInstructions?: string);
790
+ abstract createChatCompletion<T = LLMResponse & {
791
+ usage?: LLMResponse["usage"];
792
+ }>(options: CreateChatCompletionOptions): Promise<T>;
793
+ generateObject: typeof generateObject;
794
+ generateText: typeof generateText;
795
+ streamText: typeof streamText;
796
+ streamObject: typeof streamObject;
797
+ generateImage: typeof experimental_generateImage;
798
+ embed: typeof embed;
799
+ embedMany: typeof embedMany;
800
+ transcribe: typeof experimental_transcribe;
801
+ generateSpeech: typeof experimental_generateSpeech;
802
+ getLanguageModel?(): LanguageModel;
803
+ }
804
+
805
+ type V3Env = "LOCAL" | "BROWSERBASE";
806
+ /** Local launch options for V3 (chrome-launcher + CDP).
807
+ * Matches v2 shape where feasible; unsupported fields are accepted but ignored.
808
+ */
809
+ interface LocalBrowserLaunchOptions {
810
+ args?: string[];
811
+ executablePath?: string;
812
+ userDataDir?: string;
813
+ preserveUserDataDir?: boolean;
814
+ headless?: boolean;
815
+ devtools?: boolean;
816
+ chromiumSandbox?: boolean;
817
+ ignoreDefaultArgs?: boolean | string[];
818
+ proxy?: {
819
+ server: string;
820
+ bypass?: string;
821
+ username?: string;
822
+ password?: string;
823
+ };
824
+ locale?: string;
825
+ viewport?: {
826
+ width: number;
827
+ height: number;
828
+ };
829
+ deviceScaleFactor?: number;
830
+ hasTouch?: boolean;
831
+ ignoreHTTPSErrors?: boolean;
832
+ cdpUrl?: string;
833
+ connectTimeoutMs?: number;
834
+ downloadsPath?: string;
835
+ acceptDownloads?: boolean;
836
+ }
837
+ /** Constructor options for V3 */
838
+ interface V3Options {
839
+ env: V3Env;
840
+ apiKey?: string;
841
+ projectId?: string;
842
+ /**
843
+ * Optional: fine-tune Browserbase session creation or resume an existing session.
844
+ */
845
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
846
+ projectId?: string;
847
+ };
848
+ browserbaseSessionID?: string;
849
+ localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
850
+ model?: ModelConfiguration;
851
+ llmClient?: LLMClient;
852
+ systemPrompt?: string;
853
+ logInferenceToFile?: boolean;
854
+ experimental?: boolean;
855
+ verbose?: 0 | 1 | 2;
856
+ selfHeal?: boolean;
857
+ /** Disable pino logging backend (useful for tests or minimal environments). */
858
+ disablePino?: boolean;
859
+ /** Optional external logger hook for integrating with host apps. */
860
+ logger?: (line: LogLine) => void;
861
+ /** Show a visual cursor overlay that follows our mouse events. */
862
+ includeCursor?: boolean;
863
+ /** Directory used to persist cached actions for act(). */
864
+ cacheDir?: string;
865
+ domSettleTimeout?: number;
866
+ }
867
+ type PlaywrightPage = playwright_core.Page;
868
+ type PatchrightPage = patchright_core.Page;
869
+ type PuppeteerPage = puppeteer_core.Page;
870
+ interface ActOptions$1 {
871
+ model?: ModelConfiguration;
872
+ variables?: Record<string, string>;
873
+ timeout?: number;
874
+ page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
875
+ }
876
+ interface ExtractOptions$1 {
877
+ model?: ModelConfiguration;
878
+ timeout?: number;
879
+ selector?: string;
880
+ page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
881
+ }
882
+ declare const defaultExtractSchema: z.ZodObject<{
883
+ extraction: z.ZodString;
884
+ }, "strip", z.ZodTypeAny, {
885
+ extraction?: string;
886
+ }, {
887
+ extraction?: string;
888
+ }>;
889
+ declare const pageTextSchema: z.ZodObject<{
890
+ pageText: z.ZodString;
891
+ }, "strip", z.ZodTypeAny, {
892
+ pageText?: string;
893
+ }, {
894
+ pageText?: string;
895
+ }>;
896
+ interface ObserveOptions$1 {
897
+ model?: ModelConfiguration;
898
+ timeout?: number;
899
+ selector?: string;
900
+ page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page;
901
+ }
902
+ type LoadState = "load" | "domcontentloaded" | "networkidle";
903
+ interface V3Metrics {
904
+ actPromptTokens: number;
905
+ actCompletionTokens: number;
906
+ actInferenceTimeMs: number;
907
+ extractPromptTokens: number;
908
+ extractCompletionTokens: number;
909
+ extractInferenceTimeMs: number;
910
+ observePromptTokens: number;
911
+ observeCompletionTokens: number;
912
+ observeInferenceTimeMs: number;
913
+ agentPromptTokens: number;
914
+ agentCompletionTokens: number;
915
+ agentInferenceTimeMs: number;
916
+ totalPromptTokens: number;
917
+ totalCompletionTokens: number;
918
+ totalInferenceTimeMs: number;
919
+ }
920
+ declare enum V3FunctionName {
921
+ ACT = "ACT",
922
+ EXTRACT = "EXTRACT",
923
+ OBSERVE = "OBSERVE",
924
+ AGENT = "AGENT"
925
+ }
926
+
927
+ /**
928
+ * V3Context
929
+ *
930
+ * Owns the root CDP connection and wires Target/Page events into Page.
931
+ * Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
932
+ * and tracks target→page and (root) frame→target mappings for lookups.
933
+ *
934
+ * IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
935
+ * Context never “guesses” owners; it simply forwards events (with the emitting session)
936
+ * so Page can record the correct owner at event time.
937
+ */
938
+ declare class V3Context {
939
+ readonly conn: CdpConnection;
940
+ private readonly includeCursor;
941
+ private readonly env;
942
+ private constructor();
943
+ private readonly _piercerInstalled;
944
+ private _lastPopupSignalAt;
945
+ private sessionKey;
946
+ private readonly _sessionInit;
947
+ private pagesByTarget;
948
+ private mainFrameToTarget;
949
+ private sessionOwnerPage;
950
+ private frameOwnerPage;
951
+ private pendingOopifByMainFrame;
952
+ private createdAtByTarget;
953
+ private typeByTarget;
954
+ private _pageOrder;
955
+ /**
956
+ * Create a Context for a given CDP websocket URL and bootstrap target wiring.
957
+ */
958
+ static create(wsUrl: string, opts?: {
959
+ includeCursor?: boolean;
960
+ env?: "LOCAL" | "BROWSERBASE";
961
+ }): Promise<V3Context>;
962
+ /**
963
+ * Wait until at least one top-level Page has been created and registered.
964
+ * We poll internal maps that bootstrap/onAttachedToTarget populate.
965
+ */
966
+ private waitForFirstTopLevelPage;
967
+ private ensurePiercer;
968
+ /** Mark a page target as the most-recent one (active). */
969
+ private _pushActive;
970
+ /** Remove a page target from the recency list (used on close). */
971
+ private _removeFromOrder;
972
+ /** Return the current active Page (most-recent page that still exists). */
973
+ activePage(): Page | undefined;
974
+ /**
975
+ * Return top-level `Page`s (oldest → newest). OOPIF targets are not included.
976
+ */
977
+ pages(): Page[];
978
+ /**
979
+ * Resolve an owning `Page` by the **top-level main frame id**.
980
+ * Note: child (OOPIF) roots are intentionally not present in this mapping.
981
+ */
982
+ resolvePageByMainFrameId(frameId: string): Page | undefined;
983
+ /**
984
+ * Serialize the full frame tree for a given top-level main frame id.
985
+ */
986
+ getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
987
+ /**
988
+ * Create a new top-level page (tab) with the given URL and return its Page object.
989
+ * Waits until the target is attached and registered.
990
+ */
991
+ newPage(url?: string): Promise<Page>;
992
+ /**
993
+ * Close CDP and clear all mappings. Best-effort cleanup.
994
+ */
995
+ close(): Promise<void>;
996
+ /**
997
+ * Bootstrap target lifecycle:
998
+ * - Attach to existing targets.
999
+ * - Attach on `Target.targetCreated` (fallback for OOPIFs).
1000
+ * - Handle auto-attach events.
1001
+ * - Clean up on detach/destroy.
1002
+ */
1003
+ private bootstrap;
1004
+ /**
1005
+ * Handle a newly attached target (top-level or potential OOPIF):
1006
+ * - Enable Page domain and lifecycle events.
1007
+ * - If top-level → create Page, wire listeners, resume.
1008
+ * - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
1009
+ * if the parent is known; otherwise stage until parent `frameAttached`.
1010
+ * - Resume the target only after listeners are wired.
1011
+ */
1012
+ private onAttachedToTarget;
1013
+ /**
1014
+ * Detach handler:
1015
+ * - Remove child session ownership and prune its subtree.
1016
+ * - If a top-level target, cleanup its `Page` and mappings.
1017
+ * - Drop any staged child for this session.
1018
+ */
1019
+ private onDetachedFromTarget;
1020
+ /**
1021
+ * Cleanup a top-level Page by target id, removing its root and staged children.
1022
+ */
1023
+ private cleanupByTarget;
1024
+ /**
1025
+ * Wire Page-domain frame events for a session into the owning Page & mappings.
1026
+ * We forward the *emitting session* with every event so Page can stamp ownership precisely.
1027
+ */
1028
+ private installFrameEventBridges;
1029
+ /**
1030
+ * Register that a session belongs to a Page (used by event routing).
1031
+ */
1032
+ private wireSessionToOwnerPage;
1033
+ /**
1034
+ * Utility: reverse-lookup the top-level target id that owns a given Page.
1035
+ */
1036
+ private findTargetIdByPage;
1037
+ private _notePopupSignal;
1038
+ /**
1039
+ * Await the current active page, waiting briefly if a popup/open was just triggered.
1040
+ * Normal path returns immediately; popup path waits up to timeoutMs for the new page.
1041
+ */
1042
+ awaitActivePage(timeoutMs?: number): Promise<Page>;
1043
+ }
1044
+
1045
+ interface AgentAction {
1046
+ type: string;
1047
+ reasoning?: string;
1048
+ taskCompleted?: boolean;
1049
+ action?: string;
1050
+ timeMs?: number;
1051
+ pageText?: string;
1052
+ pageUrl?: string;
1053
+ instruction?: string;
1054
+ [key: string]: unknown;
1055
+ }
1056
+ interface AgentResult {
1057
+ success: boolean;
1058
+ message: string;
1059
+ actions: AgentAction[];
1060
+ completed: boolean;
1061
+ metadata?: Record<string, unknown>;
1062
+ usage?: {
1063
+ input_tokens: number;
1064
+ output_tokens: number;
1065
+ inference_time_ms: number;
1066
+ };
1067
+ }
1068
+ interface AgentOptions {
1069
+ maxSteps?: number;
1070
+ autoScreenshot?: boolean;
1071
+ waitBetweenActions?: number;
1072
+ context?: string;
1073
+ }
1074
+ interface AgentExecuteOptions extends AgentOptions {
1075
+ instruction: string;
1076
+ }
1077
+ type AgentProviderType = "openai" | "anthropic";
1078
+ interface AgentClientOptions {
1079
+ apiKey: string;
1080
+ organization?: string;
1081
+ baseURL?: string;
1082
+ defaultMaxSteps?: number;
1083
+ [key: string]: unknown;
1084
+ }
1085
+ type AgentType = "openai" | "anthropic";
1086
+ interface AgentExecutionOptions {
1087
+ options: AgentExecuteOptions;
1088
+ logger: (message: LogLine) => void;
1089
+ retries?: number;
1090
+ }
1091
+ interface AgentHandlerOptions {
1092
+ modelName: string;
1093
+ clientOptions?: Record<string, unknown>;
1094
+ userProvidedInstructions?: string;
1095
+ agentType: AgentType;
1096
+ experimental?: boolean;
1097
+ }
1098
+ interface ActionExecutionResult {
1099
+ success: boolean;
1100
+ error?: string;
1101
+ data?: unknown;
1102
+ }
1103
+ interface ToolUseItem extends ResponseItem {
1104
+ type: "tool_use";
1105
+ id: string;
1106
+ name: string;
1107
+ input: Record<string, unknown>;
1108
+ }
1109
+ interface AnthropicMessage {
1110
+ role: string;
1111
+ content: string | Array<AnthropicContentBlock>;
1112
+ }
1113
+ interface AnthropicContentBlock {
1114
+ type: string;
1115
+ [key: string]: unknown;
1116
+ }
1117
+ interface AnthropicTextBlock extends AnthropicContentBlock {
1118
+ type: "text";
1119
+ text: string;
1120
+ }
1121
+ interface AnthropicToolResult {
1122
+ type: "tool_result";
1123
+ tool_use_id: string;
1124
+ content: string | Array<AnthropicContentBlock>;
1125
+ }
1126
+ interface ResponseItem {
1127
+ type: string;
1128
+ id: string;
1129
+ [key: string]: unknown;
1130
+ }
1131
+ interface ComputerCallItem extends ResponseItem {
1132
+ type: "computer_call";
1133
+ call_id: string;
1134
+ action: {
1135
+ type: string;
1136
+ [key: string]: unknown;
1137
+ };
1138
+ pending_safety_checks?: Array<{
1139
+ id: string;
1140
+ code: string;
1141
+ message: string;
1142
+ }>;
1143
+ }
1144
+ interface FunctionCallItem extends ResponseItem {
1145
+ type: "function_call";
1146
+ call_id: string;
1147
+ name: string;
1148
+ arguments: string;
1149
+ }
1150
+ type ResponseInputItem = {
1151
+ role: string;
1152
+ content: string;
1153
+ } | {
1154
+ type: "computer_call_output";
1155
+ call_id: string;
1156
+ output: {
1157
+ type: "input_image";
1158
+ image_url: string;
1159
+ current_url?: string;
1160
+ error?: string;
1161
+ [key: string]: unknown;
1162
+ } | string;
1163
+ acknowledged_safety_checks?: Array<{
1164
+ id: string;
1165
+ code: string;
1166
+ message: string;
1167
+ }>;
1168
+ } | {
1169
+ type: "function_call_output";
1170
+ call_id: string;
1171
+ output: string;
1172
+ };
1173
+ interface AgentInstance {
1174
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1175
+ }
1176
+
1177
+ interface ActOptions {
1178
+ action: string;
1179
+ model?: ModelConfiguration;
1180
+ variables?: Record<string, string>;
1181
+ domSettleTimeoutMs?: number;
1182
+ timeoutMs?: number;
1183
+ iframes?: boolean;
1184
+ frameId?: string;
1185
+ }
1186
+ interface ActResult {
1187
+ success: boolean;
1188
+ message: string;
1189
+ actionDescription: string;
1190
+ actions: Action[];
1191
+ }
1192
+ interface ExtractOptions<T extends z.AnyZodObject> {
1193
+ instruction?: string;
1194
+ schema?: T;
1195
+ model?: ModelConfiguration;
1196
+ domSettleTimeoutMs?: number;
1197
+ /**
1198
+ * @deprecated The `useTextExtract` parameter has no effect in this version of Stagehand and will be removed in later versions.
1199
+ */
1200
+ useTextExtract?: boolean;
1201
+ selector?: string;
1202
+ iframes?: boolean;
1203
+ frameId?: string;
1204
+ }
1205
+ type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1206
+ interface ObserveOptions {
1207
+ instruction?: string;
1208
+ model?: ModelConfiguration;
1209
+ domSettleTimeoutMs?: number;
1210
+ returnAction?: boolean;
1211
+ selector?: string;
1212
+ /**
1213
+ * @deprecated The `onlyVisible` parameter has no effect in this version of Stagehand and will be removed in later versions.
1214
+ */
1215
+ onlyVisible?: boolean;
1216
+ drawOverlay?: boolean;
1217
+ iframes?: boolean;
1218
+ frameId?: string;
1219
+ }
1220
+ interface Action {
1221
+ selector: string;
1222
+ description: string;
1223
+ backendNodeId?: number;
1224
+ method?: string;
1225
+ arguments?: string[];
1226
+ }
1227
+ /**
1228
+ * Configuration for agent functionality
1229
+ */
1230
+ interface AgentConfig {
1231
+ /**
1232
+ * The provider to use for agent functionality
1233
+ */
1234
+ provider?: AgentProviderType;
1235
+ /**
1236
+ * The model to use for agent functionality
1237
+ */
1238
+ model?: string;
1239
+ /**
1240
+ * The model to use for tool execution (observe/act calls within agent tools).
1241
+ * If not specified, inherits from the main model configuration.
1242
+ * Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
1243
+ */
1244
+ executionModel?: string;
1245
+ /**
1246
+ * Custom instructions to provide to the agent
1247
+ */
1248
+ instructions?: string;
1249
+ /**
1250
+ * Additional options to pass to the agent client
1251
+ */
1252
+ options?: Record<string, unknown>;
1253
+ /**
1254
+ * MCP integrations - Array of Client objects
1255
+ */
1256
+ integrations?: (Client | string)[];
1257
+ /**
1258
+ * Tools passed to the agent client
1259
+ */
1260
+ tools?: ToolSet;
1261
+ }
1262
+ interface HistoryEntry {
1263
+ method: "act" | "extract" | "observe" | "navigate";
1264
+ parameters: unknown;
1265
+ result: unknown;
1266
+ timestamp: string;
1267
+ }
1268
+ /**
1269
+ * Represents a path through a Zod schema from the root object down to a
1270
+ * particular field. The `segments` array describes the chain of keys/indices.
1271
+ *
1272
+ * - **String** segments indicate object property names.
1273
+ * - **Number** segments indicate array indices.
1274
+ *
1275
+ * For example, `["users", 0, "homepage"]` might describe reaching
1276
+ * the `homepage` field in `schema.users[0].homepage`.
1277
+ */
1278
+ interface ZodPathSegments {
1279
+ /**
1280
+ * The ordered list of keys/indices leading from the schema root
1281
+ * to the targeted field.
1282
+ */
1283
+ segments: Array<string | number>;
1284
+ }
1285
+
1286
+ type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
1287
+ type: string;
1288
+ [key: string]: unknown;
1289
+ };
1290
+ interface AgentReplayActStep {
1291
+ type: "act";
1292
+ instruction: string;
1293
+ actions?: Action[];
1294
+ actionDescription?: string;
1295
+ message?: string;
1296
+ timeout?: number;
1297
+ }
1298
+ interface AgentReplayFillFormStep {
1299
+ type: "fillForm";
1300
+ fields?: Array<{
1301
+ action: string;
1302
+ value: string;
1303
+ }>;
1304
+ observeResults?: Action[];
1305
+ actions?: Action[];
1306
+ }
1307
+ interface AgentReplayGotoStep {
1308
+ type: "goto";
1309
+ url: string;
1310
+ waitUntil?: LoadState;
1311
+ }
1312
+ interface AgentReplayScrollStep {
1313
+ type: "scroll";
1314
+ deltaX?: number;
1315
+ deltaY?: number;
1316
+ anchor?: {
1317
+ x: number;
1318
+ y: number;
1319
+ };
1320
+ }
1321
+ interface AgentReplayWaitStep {
1322
+ type: "wait";
1323
+ timeMs: number;
1324
+ }
1325
+ interface AgentReplayNavBackStep {
1326
+ type: "navback";
1327
+ waitUntil?: LoadState;
1328
+ }
1329
+
1330
+ /**
1331
+ * V3
1332
+ *
1333
+ * Purpose:
1334
+ * A high-level orchestrator for Stagehand V3. Abstracts away whether the browser
1335
+ * runs **locally via Chrome** or remotely on **Browserbase**, and exposes simple
1336
+ * entrypoints (`act`, `extract`, `observe`) that delegate to the corresponding
1337
+ * handler classes.
1338
+ *
1339
+ * Responsibilities:
1340
+ * - Bootstraps Chrome or Browserbase, ensures a working CDP WebSocket, and builds a `V3Context`.
1341
+ * - Manages lifecycle: init, context access, cleanup.
1342
+ * - Bridges external page objects (Playwright/Puppeteer) into internal frameIds for handlers.
1343
+ * - Provides a stable API surface for downstream code regardless of runtime environment.
1344
+ */
1345
+ declare class V3 {
1346
+ private readonly opts;
1347
+ private state;
1348
+ private actHandler;
1349
+ private extractHandler;
1350
+ private observeHandler;
1351
+ private ctx;
1352
+ llmClient: LLMClient;
1353
+ private modelName;
1354
+ private modelClientOptions;
1355
+ private llmProvider;
1356
+ private readonly domSettleTimeoutMs?;
1357
+ private _isClosing;
1358
+ private _onCdpClosed;
1359
+ readonly experimental: boolean;
1360
+ readonly logInferenceToFile: boolean;
1361
+ private externalLogger?;
1362
+ verbose: 0 | 1 | 2;
1363
+ private _history;
1364
+ private readonly instanceId;
1365
+ private static _processGuardsInstalled;
1366
+ private static _instances;
1367
+ private cacheDir?;
1368
+ private _agentReplayRecording;
1369
+ v3Metrics: V3Metrics;
1370
+ /**
1371
+ * Async property for metrics so callers can `await v3.metrics`.
1372
+ * Returning a Promise future-proofs async aggregation/storage.
1373
+ */
1374
+ get metrics(): Promise<V3Metrics>;
1375
+ private cloneForCache;
1376
+ private beginAgentReplayRecording;
1377
+ private endAgentReplayRecording;
1378
+ private discardAgentReplayRecording;
1379
+ private isAgentReplayRecording;
1380
+ isAgentReplayActive(): boolean;
1381
+ recordAgentReplayStep(step: AgentReplayStep): void;
1382
+ /**
1383
+ * Async property for history so callers can `await v3.history`.
1384
+ * Returns a frozen copy to avoid external mutation.
1385
+ */
1386
+ get history(): Promise<ReadonlyArray<HistoryEntry>>;
1387
+ addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1388
+ updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1389
+ private updateTotalMetrics;
1390
+ constructor(opts: V3Options);
1391
+ private _immediateShutdown;
1392
+ private static _installProcessGuards;
1393
+ /**
1394
+ * Entrypoint: initializes handlers, launches Chrome or Browserbase,
1395
+ * and sets up a CDP context.
1396
+ */
1397
+ init(): Promise<void>;
1398
+ /** Apply post-connect local browser options that require CDP. */
1399
+ private _applyPostConnectLocalOptions;
1400
+ /**
1401
+ * Run an "act" instruction through the ActHandler.
1402
+ *
1403
+ * New API:
1404
+ * - act(instruction: string, options?: ActOptions)
1405
+ * - act(action: Action, options?: ActOptions)
1406
+ */
1407
+ act(instruction: string, options?: ActOptions$1): Promise<ActResult>;
1408
+ act(action: Action, options?: ActOptions$1): Promise<ActResult>;
1409
+ /**
1410
+ * Run an "extract" instruction through the ExtractHandler.
1411
+ *
1412
+ * Accepted forms:
1413
+ * - extract() → pageText
1414
+ * - extract(options) → pageText
1415
+ * - extract(instruction) → defaultExtractSchema
1416
+ * - extract(instruction, schema) → schema-inferred
1417
+ * - extract(instruction, schema, options)
1418
+ */
1419
+ extract(): Promise<z.infer<typeof pageTextSchema>>;
1420
+ extract(options: ExtractOptions$1): Promise<z.infer<typeof pageTextSchema>>;
1421
+ extract(instruction: string, options?: ExtractOptions$1): Promise<z.infer<typeof defaultExtractSchema>>;
1422
+ extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions$1): Promise<z.infer<T>>;
1423
+ /**
1424
+ * Run an "observe" instruction through the ObserveHandler.
1425
+ */
1426
+ observe(): Promise<Action[]>;
1427
+ observe(options: ObserveOptions$1): Promise<Action[]>;
1428
+ observe(instruction: string, options?: ObserveOptions$1): Promise<Action[]>;
1429
+ /** Return the browser-level CDP WebSocket endpoint. */
1430
+ connectURL(): string;
1431
+ /** Expose the current CDP-backed context. */
1432
+ get context(): V3Context;
1433
+ /** Best-effort cleanup of context and launched resources. */
1434
+ close(opts?: {
1435
+ force?: boolean;
1436
+ }): Promise<void>;
1437
+ /** Guard: ensure Browserbase credentials exist in options. */
1438
+ private requireBrowserbaseCreds;
1439
+ get logger(): (logLine: LogLine) => void;
1440
+ /**
1441
+ * Normalize a Playwright/Puppeteer page object into its top frame id,
1442
+ * so handlers can resolve it to a `Page` within our V3Context.
1443
+ */
1444
+ private resolveTopFrameId;
1445
+ private isPlaywrightPage;
1446
+ private isPatchrightPage;
1447
+ private isPuppeteerPage;
1448
+ private normalizeToV3Page;
1449
+ private buildActCacheKey;
1450
+ private safeGetPageUrl;
1451
+ private readActCacheEntry;
1452
+ private writeActCacheEntry;
1453
+ private sanitizeAgentExecuteOptions;
1454
+ private buildAgentCacheSignature;
1455
+ private buildAgentCacheKey;
1456
+ private readAgentCacheEntry;
1457
+ private writeAgentCacheEntry;
1458
+ private replayAgentCacheEntry;
1459
+ private executeAgentReplayStep;
1460
+ private replayAgentActStep;
1461
+ private replayAgentFillFormStep;
1462
+ private replayAgentGotoStep;
1463
+ private replayAgentScrollStep;
1464
+ private replayAgentWaitStep;
1465
+ private replayAgentNavBackStep;
1466
+ private replayCachedActions;
1467
+ private runWithActTimeout;
1468
+ /**
1469
+ * Create a v3 agent instance (AISDK tool-based) with execute().
1470
+ * Mirrors the v2 Stagehand.agent() tool mode (no CUA provider here).
1471
+ */
1472
+ agent(options?: AgentConfig): {
1473
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1474
+ };
1475
+ }
1476
+
1477
+ declare class StagehandAPIError extends Error {
1478
+ constructor(message: string);
1479
+ }
1480
+ declare class StagehandAPIUnauthorizedError extends StagehandAPIError {
1481
+ constructor(message?: string);
1482
+ }
1483
+ declare class StagehandHttpError extends StagehandAPIError {
1484
+ constructor(message: string);
1485
+ }
1486
+ declare class StagehandServerError extends StagehandAPIError {
1487
+ constructor(message: string);
1488
+ }
1489
+ declare class StagehandResponseBodyError extends StagehandAPIError {
1490
+ constructor();
1491
+ }
1492
+ declare class StagehandResponseParseError extends StagehandAPIError {
1493
+ constructor(message: string);
1494
+ }
1495
+
1496
+ declare class StagehandError extends Error {
1497
+ constructor(message: string);
1498
+ }
1499
+ declare class StagehandDefaultError extends StagehandError {
1500
+ constructor(error?: unknown);
1501
+ }
1502
+ declare class StagehandEnvironmentError extends StagehandError {
1503
+ constructor(currentEnvironment: string, requiredEnvironment: string, feature: string);
1504
+ }
1505
+ declare class MissingEnvironmentVariableError extends StagehandError {
1506
+ constructor(missingEnvironmentVariable: string, feature: string);
1507
+ }
1508
+ declare class UnsupportedModelError extends StagehandError {
1509
+ constructor(supportedModels: string[], feature?: string);
1510
+ }
1511
+ declare class UnsupportedModelProviderError extends StagehandError {
1512
+ constructor(supportedProviders: string[], feature?: string);
1513
+ }
1514
+ declare class UnsupportedAISDKModelProviderError extends StagehandError {
1515
+ constructor(provider: string, supportedProviders: string[]);
1516
+ }
1517
+ declare class InvalidAISDKModelFormatError extends StagehandError {
1518
+ constructor(modelName: string);
1519
+ }
1520
+ declare class StagehandNotInitializedError extends StagehandError {
1521
+ constructor(prop: string);
1522
+ }
1523
+ declare class BrowserbaseSessionNotFoundError extends StagehandError {
1524
+ constructor();
1525
+ }
1526
+ declare class CaptchaTimeoutError extends StagehandError {
1527
+ constructor();
1528
+ }
1529
+ declare class MissingLLMConfigurationError extends StagehandError {
1530
+ constructor();
1531
+ }
1532
+ declare class HandlerNotInitializedError extends StagehandError {
1533
+ constructor(handlerType: string);
1534
+ }
1535
+ declare class StagehandInvalidArgumentError extends StagehandError {
1536
+ constructor(message: string);
1537
+ }
1538
+ declare class StagehandElementNotFoundError extends StagehandError {
1539
+ constructor(xpaths: string[]);
1540
+ }
1541
+ declare class AgentScreenshotProviderError extends StagehandError {
1542
+ constructor(message: string);
1543
+ }
1544
+ declare class StagehandMissingArgumentError extends StagehandError {
1545
+ constructor(message: string);
1546
+ }
1547
+ declare class CreateChatCompletionResponseError extends StagehandError {
1548
+ constructor(message: string);
1549
+ }
1550
+ declare class StagehandEvalError extends StagehandError {
1551
+ constructor(message: string);
1552
+ }
1553
+ declare class StagehandDomProcessError extends StagehandError {
1554
+ constructor(message: string);
1555
+ }
1556
+ declare class StagehandClickError extends StagehandError {
1557
+ constructor(message: string, selector: string);
1558
+ }
1559
+ declare class LLMResponseError extends StagehandError {
1560
+ constructor(primitive: string, message: string);
1561
+ }
1562
+ declare class StagehandIframeError extends StagehandError {
1563
+ constructor(frameUrl: string, message: string);
1564
+ }
1565
+ declare class ContentFrameNotFoundError extends StagehandError {
1566
+ constructor(selector: string);
1567
+ }
1568
+ declare class XPathResolutionError extends StagehandError {
1569
+ constructor(xpath: string);
1570
+ }
1571
+ declare class ExperimentalApiConflictError extends StagehandError {
1572
+ constructor();
1573
+ }
1574
+ declare class ExperimentalNotConfiguredError extends StagehandError {
1575
+ constructor(featureName: string);
1576
+ }
1577
+ declare class ZodSchemaValidationError extends Error {
1578
+ readonly received: unknown;
1579
+ readonly issues: ReturnType<ZodError["format"]>;
1580
+ constructor(received: unknown, issues: ReturnType<ZodError["format"]>);
1581
+ }
1582
+ declare class StagehandInitError extends StagehandError {
1583
+ constructor(message: string);
1584
+ }
1585
+ declare class MCPConnectionError extends StagehandError {
1586
+ readonly serverUrl: string;
1587
+ readonly originalError: unknown;
1588
+ constructor(serverUrl: string, originalError: unknown);
1589
+ }
1590
+ declare class StagehandShadowRootMissingError extends StagehandError {
1591
+ constructor(detail?: string);
1592
+ }
1593
+ declare class StagehandShadowSegmentEmptyError extends StagehandError {
1594
+ constructor();
1595
+ }
1596
+ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1597
+ constructor(segment: string, hint?: string);
1598
+ }
1599
+
1600
+ interface ConnectToMCPServerOptions {
1601
+ serverUrl: string | URL;
1602
+ clientOptions?: ClientOptions$3;
1603
+ }
1604
+ interface StdioServerConfig {
1605
+ command: string;
1606
+ args?: string[];
1607
+ env?: Record<string, string>;
1608
+ }
1609
+ declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client$1>;
1610
+
1611
+ export { type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer };