@browserbasehq/orca 3.0.3-zod-1 → 3.0.6-alpha-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.ts +1572 -1259
- package/dist/index.js +45152 -21325
- package/package.json +30 -20
- package/LICENSE +0 -21
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { ZodTypeAny, z, ZodObject, ZodRawShape, ZodError } from 'zod';
|
|
2
2
|
import * as z3 from 'zod/v3';
|
|
3
3
|
import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
|
|
4
|
+
import { GoogleVertexProviderSettings as GoogleVertexProviderSettings$1 } from '@ai-sdk/google-vertex';
|
|
4
5
|
import { LanguageModelV2 } from '@ai-sdk/provider';
|
|
5
6
|
import { ClientOptions as ClientOptions$1 } from 'openai';
|
|
6
|
-
import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, ToolSet } from 'ai';
|
|
7
7
|
import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
|
|
8
|
+
import { ToolSet, ModelMessage, PrepareStepFunction, GenerateTextOnStepFinishCallback, StreamTextOnStepFinishCallback, StreamTextOnErrorCallback, StreamTextOnChunkCallback, StreamTextOnFinishCallback, StepResult, StreamTextResult, wrapLanguageModel, generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech } from 'ai';
|
|
9
|
+
export { ModelMessage } from 'ai';
|
|
8
10
|
import { Page as Page$1 } from 'playwright-core';
|
|
9
11
|
export { Page as PlaywrightPage } from 'playwright-core';
|
|
10
12
|
import { Page as Page$2 } from 'puppeteer-core';
|
|
@@ -28,33 +30,6 @@ declare const isZod3Schema: (schema: StagehandZodSchema) => schema is z3.ZodType
|
|
|
28
30
|
type JsonSchemaDocument = Record<string, unknown>;
|
|
29
31
|
declare function toJsonSchema(schema: StagehandZodSchema): JsonSchemaDocument;
|
|
30
32
|
|
|
31
|
-
type AnthropicJsonSchemaObject = {
|
|
32
|
-
definitions?: {
|
|
33
|
-
MySchema?: {
|
|
34
|
-
properties?: Record<string, unknown>;
|
|
35
|
-
required?: string[];
|
|
36
|
-
};
|
|
37
|
-
};
|
|
38
|
-
properties?: Record<string, unknown>;
|
|
39
|
-
required?: string[];
|
|
40
|
-
} & Record<string, unknown>;
|
|
41
|
-
interface LLMTool {
|
|
42
|
-
type: "function";
|
|
43
|
-
name: string;
|
|
44
|
-
description: string;
|
|
45
|
-
parameters: Record<string, unknown>;
|
|
46
|
-
}
|
|
47
|
-
type AISDKProvider = (modelName: string) => LanguageModelV2;
|
|
48
|
-
type AISDKCustomProvider = (options: {
|
|
49
|
-
apiKey: string;
|
|
50
|
-
}) => AISDKProvider;
|
|
51
|
-
type AvailableModel = "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" | "o4-mini" | "o3" | "o3-mini" | "o1" | "o1-mini" | "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "gpt-4.5-preview" | "o1-preview" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "claude-3-7-sonnet-latest" | "claude-3-7-sonnet-20250219" | "cerebras-llama-3.3-70b" | "cerebras-llama-3.1-8b" | "groq-llama-3.3-70b-versatile" | "groq-llama-3.3-70b-specdec" | "gemini-1.5-flash" | "gemini-1.5-pro" | "gemini-1.5-flash-8b" | "gemini-2.0-flash-lite" | "gemini-2.0-flash" | "gemini-2.5-flash-preview-04-17" | "gemini-2.5-pro-preview-03-25" | string;
|
|
52
|
-
type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
|
|
53
|
-
type ClientOptions = ClientOptions$1 | ClientOptions$2;
|
|
54
|
-
type ModelConfiguration = AvailableModel | (ClientOptions & {
|
|
55
|
-
modelName: AvailableModel;
|
|
56
|
-
});
|
|
57
|
-
|
|
58
33
|
type LogLevel = 0 | 1 | 2;
|
|
59
34
|
/**
|
|
60
35
|
* Mapping between numeric log levels and their names
|
|
@@ -79,122 +54,6 @@ type LogLine = {
|
|
|
79
54
|
};
|
|
80
55
|
type Logger = (logLine: LogLine) => void;
|
|
81
56
|
|
|
82
|
-
interface ChatMessage {
|
|
83
|
-
role: "system" | "user" | "assistant";
|
|
84
|
-
content: ChatMessageContent;
|
|
85
|
-
}
|
|
86
|
-
type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
|
|
87
|
-
interface ChatMessageImageContent {
|
|
88
|
-
type: string;
|
|
89
|
-
image_url?: {
|
|
90
|
-
url: string;
|
|
91
|
-
};
|
|
92
|
-
text?: string;
|
|
93
|
-
source?: {
|
|
94
|
-
type: string;
|
|
95
|
-
media_type: string;
|
|
96
|
-
data: string;
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
interface ChatMessageTextContent {
|
|
100
|
-
type: string;
|
|
101
|
-
text: string;
|
|
102
|
-
}
|
|
103
|
-
declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
|
|
104
|
-
interface ChatCompletionOptions {
|
|
105
|
-
messages: ChatMessage[];
|
|
106
|
-
temperature?: number;
|
|
107
|
-
top_p?: number;
|
|
108
|
-
frequency_penalty?: number;
|
|
109
|
-
presence_penalty?: number;
|
|
110
|
-
image?: {
|
|
111
|
-
buffer: Buffer;
|
|
112
|
-
description?: string;
|
|
113
|
-
};
|
|
114
|
-
response_model?: {
|
|
115
|
-
name: string;
|
|
116
|
-
schema: StagehandZodSchema;
|
|
117
|
-
};
|
|
118
|
-
tools?: LLMTool[];
|
|
119
|
-
tool_choice?: "auto" | "none" | "required";
|
|
120
|
-
maxOutputTokens?: number;
|
|
121
|
-
requestId?: string;
|
|
122
|
-
}
|
|
123
|
-
type LLMResponse = {
|
|
124
|
-
id: string;
|
|
125
|
-
object: string;
|
|
126
|
-
created: number;
|
|
127
|
-
model: string;
|
|
128
|
-
choices: {
|
|
129
|
-
index: number;
|
|
130
|
-
message: {
|
|
131
|
-
role: string;
|
|
132
|
-
content: string | null;
|
|
133
|
-
tool_calls: {
|
|
134
|
-
id: string;
|
|
135
|
-
type: string;
|
|
136
|
-
function: {
|
|
137
|
-
name: string;
|
|
138
|
-
arguments: string;
|
|
139
|
-
};
|
|
140
|
-
}[];
|
|
141
|
-
};
|
|
142
|
-
finish_reason: string;
|
|
143
|
-
}[];
|
|
144
|
-
usage: {
|
|
145
|
-
prompt_tokens: number;
|
|
146
|
-
completion_tokens: number;
|
|
147
|
-
total_tokens: number;
|
|
148
|
-
};
|
|
149
|
-
};
|
|
150
|
-
interface CreateChatCompletionOptions {
|
|
151
|
-
options: ChatCompletionOptions;
|
|
152
|
-
logger: (message: LogLine) => void;
|
|
153
|
-
retries?: number;
|
|
154
|
-
}
|
|
155
|
-
/** Simple usage shape if your LLM returns usage tokens. */
|
|
156
|
-
interface LLMUsage {
|
|
157
|
-
prompt_tokens: number;
|
|
158
|
-
completion_tokens: number;
|
|
159
|
-
total_tokens: number;
|
|
160
|
-
reasoning_tokens?: number;
|
|
161
|
-
cached_input_tokens?: number;
|
|
162
|
-
}
|
|
163
|
-
/**
|
|
164
|
-
* For calls that use a schema: the LLMClient may return { data: T; usage?: LLMUsage }
|
|
165
|
-
*/
|
|
166
|
-
interface LLMParsedResponse<T> {
|
|
167
|
-
data: T;
|
|
168
|
-
usage?: LLMUsage;
|
|
169
|
-
}
|
|
170
|
-
declare abstract class LLMClient {
|
|
171
|
-
type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
|
|
172
|
-
modelName: AvailableModel | (string & {});
|
|
173
|
-
hasVision: boolean;
|
|
174
|
-
clientOptions: ClientOptions;
|
|
175
|
-
userProvidedInstructions?: string;
|
|
176
|
-
constructor(modelName: AvailableModel, userProvidedInstructions?: string);
|
|
177
|
-
abstract createChatCompletion<T>(options: CreateChatCompletionOptions & {
|
|
178
|
-
options: {
|
|
179
|
-
response_model: {
|
|
180
|
-
name: string;
|
|
181
|
-
schema: StagehandZodSchema;
|
|
182
|
-
};
|
|
183
|
-
};
|
|
184
|
-
}): Promise<LLMParsedResponse<T>>;
|
|
185
|
-
abstract createChatCompletion<T = LLMResponse>(options: CreateChatCompletionOptions): Promise<T>;
|
|
186
|
-
generateObject: typeof generateObject;
|
|
187
|
-
generateText: typeof generateText;
|
|
188
|
-
streamText: typeof streamText;
|
|
189
|
-
streamObject: typeof streamObject;
|
|
190
|
-
generateImage: typeof experimental_generateImage;
|
|
191
|
-
embed: typeof embed;
|
|
192
|
-
embedMany: typeof embedMany;
|
|
193
|
-
transcribe: typeof experimental_transcribe;
|
|
194
|
-
generateSpeech: typeof experimental_generateSpeech;
|
|
195
|
-
getLanguageModel?(): LanguageModelV2;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
57
|
/**
|
|
199
58
|
* CDP transport & session multiplexer
|
|
200
59
|
*
|
|
@@ -217,8 +76,22 @@ declare class CdpConnection implements CDPSessionLike {
|
|
|
217
76
|
private inflight;
|
|
218
77
|
private eventHandlers;
|
|
219
78
|
private sessions;
|
|
79
|
+
/** Maps sessionId -> targetId (1:1 mapping) */
|
|
80
|
+
private sessionToTarget;
|
|
220
81
|
readonly id: string | null;
|
|
221
82
|
private transportCloseHandlers;
|
|
83
|
+
/** Optional CDP logger - set this to receive all outgoing CDP method calls */
|
|
84
|
+
cdpLogger?: (info: {
|
|
85
|
+
method: string;
|
|
86
|
+
params?: object;
|
|
87
|
+
targetId?: string | null;
|
|
88
|
+
}) => void;
|
|
89
|
+
/** Optional CDP event logger - set this to receive all incoming CDP events */
|
|
90
|
+
cdpEventLogger?: (info: {
|
|
91
|
+
method: string;
|
|
92
|
+
params?: unknown;
|
|
93
|
+
targetId?: string | null;
|
|
94
|
+
}) => void;
|
|
222
95
|
onTransportClosed(handler: (why: string) => void): void;
|
|
223
96
|
offTransportClosed(handler: (why: string) => void): void;
|
|
224
97
|
private emitTransportClosed;
|
|
@@ -284,7 +157,7 @@ declare class Frame implements FrameManager {
|
|
|
284
157
|
/** Accessibility.getFullAXTree (+ recurse into child frames if requested) */
|
|
285
158
|
getAccessibilityTree(withFrames?: boolean): Promise<Protocol.Accessibility.AXNode[]>;
|
|
286
159
|
/**
|
|
287
|
-
* Evaluate a function or expression in this frame's
|
|
160
|
+
* Evaluate a function or expression in this frame's main world.
|
|
288
161
|
* - If a string is provided, treated as a JS expression.
|
|
289
162
|
* - If a function is provided, it is stringified and invoked with the optional argument.
|
|
290
163
|
*/
|
|
@@ -305,14 +178,14 @@ declare class Frame implements FrameManager {
|
|
|
305
178
|
/** Child frames via Page.getFrameTree */
|
|
306
179
|
childFrames(): Promise<Frame[]>;
|
|
307
180
|
/** Wait for a lifecycle state (load/domcontentloaded/networkidle) */
|
|
308
|
-
waitForLoadState(state?: "load" | "domcontentloaded" | "networkidle"): Promise<void>;
|
|
181
|
+
waitForLoadState(state?: "load" | "domcontentloaded" | "networkidle", timeoutMs?: number): Promise<void>;
|
|
309
182
|
/** Simple placeholder for your own locator abstraction */
|
|
310
183
|
locator(selector: string, options?: {
|
|
311
184
|
deep?: boolean;
|
|
312
185
|
depth?: number;
|
|
313
186
|
}): Locator;
|
|
314
|
-
/**
|
|
315
|
-
private
|
|
187
|
+
/** Resolve the main-world execution context id for this frame. */
|
|
188
|
+
private getMainWorldExecutionContextId;
|
|
316
189
|
}
|
|
317
190
|
|
|
318
191
|
interface SetInputFilePayload {
|
|
@@ -643,1242 +516,1661 @@ declare class ConsoleMessage {
|
|
|
643
516
|
toString(): string;
|
|
644
517
|
}
|
|
645
518
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
519
|
+
declare class StagehandAPIError extends Error {
|
|
520
|
+
constructor(message: string);
|
|
521
|
+
}
|
|
522
|
+
declare class StagehandAPIUnauthorizedError extends StagehandAPIError {
|
|
523
|
+
constructor(message?: string);
|
|
524
|
+
}
|
|
525
|
+
declare class StagehandHttpError extends StagehandAPIError {
|
|
526
|
+
constructor(message: string);
|
|
527
|
+
}
|
|
528
|
+
declare class StagehandServerError extends StagehandAPIError {
|
|
529
|
+
constructor(message: string);
|
|
530
|
+
}
|
|
531
|
+
declare class StagehandResponseBodyError extends StagehandAPIError {
|
|
532
|
+
constructor();
|
|
533
|
+
}
|
|
534
|
+
declare class StagehandResponseParseError extends StagehandAPIError {
|
|
535
|
+
constructor(message: string);
|
|
536
|
+
}
|
|
660
537
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
* Thin wrapper around CDP response metadata that mirrors the ergonomics of
|
|
667
|
-
* Playwright's `Response` class. The class intentionally keeps the same method
|
|
668
|
-
* names so upstream integrations can transition with minimal code changes.
|
|
669
|
-
*/
|
|
670
|
-
declare class Response$1 {
|
|
671
|
-
private readonly page;
|
|
672
|
-
private readonly session;
|
|
673
|
-
private readonly requestId;
|
|
674
|
-
private readonly frameId?;
|
|
675
|
-
private readonly loaderId?;
|
|
676
|
-
private readonly response;
|
|
677
|
-
private readonly fromServiceWorkerFlag;
|
|
678
|
-
private readonly serverAddress?;
|
|
679
|
-
private headersObject;
|
|
680
|
-
private headersArrayCache;
|
|
681
|
-
private allHeadersCache;
|
|
682
|
-
private readonly headerValuesMap;
|
|
683
|
-
private finishedDeferred;
|
|
684
|
-
private finishedSettled;
|
|
685
|
-
private extraInfoHeaders;
|
|
686
|
-
private extraInfoHeadersText;
|
|
687
|
-
/**
|
|
688
|
-
* Build a response wrapper from the CDP notification associated with a
|
|
689
|
-
* navigation. The constructor captures the owning page/session so follow-up
|
|
690
|
-
* methods (body/text/json) can query CDP on-demand. The `response` payload is
|
|
691
|
-
* the raw `Protocol.Network.Response` object emitted by Chrome.
|
|
692
|
-
*/
|
|
693
|
-
constructor(params: {
|
|
694
|
-
page: Page;
|
|
695
|
-
session: CDPSessionLike;
|
|
696
|
-
requestId: string;
|
|
697
|
-
frameId?: string;
|
|
698
|
-
loaderId?: string;
|
|
699
|
-
response: Protocol.Network.Response;
|
|
700
|
-
fromServiceWorker: boolean;
|
|
701
|
-
});
|
|
702
|
-
/** URL associated with the navigation request. */
|
|
703
|
-
url(): string;
|
|
704
|
-
/** HTTP status code reported by Chrome. */
|
|
705
|
-
status(): number;
|
|
706
|
-
/** Human-readable status text that accompanied the response. */
|
|
707
|
-
statusText(): string;
|
|
708
|
-
/** Convenience predicate that checks for 2xx statuses. */
|
|
709
|
-
ok(): boolean;
|
|
710
|
-
/** Returns the Stagehand frame object that initiated the navigation. */
|
|
711
|
-
frame(): Frame | null;
|
|
712
|
-
/** Indicates whether the response was serviced by a Service Worker. */
|
|
713
|
-
fromServiceWorker(): boolean;
|
|
714
|
-
/**
|
|
715
|
-
* Returns TLS security metadata when provided by the browser. In practice
|
|
716
|
-
* this includes certificate issuer, protocol, and validity interval.
|
|
717
|
-
*/
|
|
718
|
-
securityDetails(): Promise<Protocol.Network.SecurityDetails | null>;
|
|
719
|
-
/** Returns the resolved server address for the navigation when available. */
|
|
720
|
-
serverAddr(): Promise<ServerAddr | null>;
|
|
721
|
-
/**
|
|
722
|
-
* Returns the response headers normalised to lowercase keys. Matches the
|
|
723
|
-
* behaviour of Playwright's `headers()` by eliding duplicate header entries.
|
|
724
|
-
*/
|
|
725
|
-
headers(): Record<string, string>;
|
|
726
|
-
/**
|
|
727
|
-
* Returns all headers including those only surfaced through
|
|
728
|
-
* `responseReceivedExtraInfo` such as `set-cookie`. Values are reported as the
|
|
729
|
-
* browser sends them (no further splitting or concatenation).
|
|
730
|
-
*/
|
|
731
|
-
allHeaders(): Promise<Record<string, string>>;
|
|
732
|
-
/** Returns a concatenated header string for the supplied header name. */
|
|
733
|
-
headerValue(name: string): Promise<string | null>;
|
|
734
|
-
/** Returns all values for a header (case-insensitive lookup). */
|
|
735
|
-
headerValues(name: string): Promise<string[]>;
|
|
736
|
-
/**
|
|
737
|
-
* Returns header entries preserving their original wire casing and ordering.
|
|
738
|
-
* Falls back to the CDP object when the raw header text is unavailable.
|
|
739
|
-
*/
|
|
740
|
-
headersArray(): Promise<Array<{
|
|
741
|
-
name: string;
|
|
742
|
-
value: string;
|
|
743
|
-
}>>;
|
|
744
|
-
/**
|
|
745
|
-
* Requests the raw response body from Chrome DevTools Protocol. The method is
|
|
746
|
-
* intentionally lazy because not every caller needs the payload, and CDP only
|
|
747
|
-
* allows retrieving it once the response completes.
|
|
748
|
-
*/
|
|
749
|
-
body(): Promise<Buffer>;
|
|
750
|
-
/** Decodes the response body as UTF-8 text. */
|
|
751
|
-
text(): Promise<string>;
|
|
752
|
-
/** Parses the response body as JSON and throws if parsing fails. */
|
|
753
|
-
json<T = unknown>(): Promise<T>;
|
|
754
|
-
/**
|
|
755
|
-
* Resolves once the underlying network request completes or fails. Mirrors
|
|
756
|
-
* Playwright's behaviour by resolving to `null` on success and to an `Error`
|
|
757
|
-
* instance when Chrome reports `Network.loadingFailed`.
|
|
758
|
-
*/
|
|
759
|
-
finished(): Promise<null | Error>;
|
|
760
|
-
/**
|
|
761
|
-
* Internal helper invoked by the navigation tracker when CDP reports extra
|
|
762
|
-
* header information. This keeps the cached header views in sync with the
|
|
763
|
-
* richer metadata.
|
|
764
|
-
*/
|
|
765
|
-
applyExtraInfo(event: Protocol.Network.ResponseReceivedExtraInfoEvent): void;
|
|
766
|
-
/**
|
|
767
|
-
* Internal helper for creating a Response object from a Serializable
|
|
768
|
-
* goto response from the Stagehand API
|
|
769
|
-
*/
|
|
770
|
-
static fromSerializable(serialized: SerializableResponse, context: {
|
|
771
|
-
page: Page;
|
|
772
|
-
session: CDPSessionLike;
|
|
773
|
-
}): Response$1;
|
|
774
|
-
/** Marks the response as finished and resolves the `finished()` promise. */
|
|
775
|
-
markFinished(error: Error | null): void;
|
|
538
|
+
interface ActOptions {
|
|
539
|
+
model?: ModelConfiguration;
|
|
540
|
+
variables?: Record<string, string>;
|
|
541
|
+
timeout?: number;
|
|
542
|
+
page?: Page$1 | Page$2 | Page$3 | Page;
|
|
776
543
|
}
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
declare class StagehandAPIClient {
|
|
783
|
-
private apiKey;
|
|
784
|
-
private projectId;
|
|
785
|
-
private sessionId?;
|
|
786
|
-
private modelApiKey;
|
|
787
|
-
private logger;
|
|
788
|
-
private fetchWithCookies;
|
|
789
|
-
constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
|
|
790
|
-
init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
|
|
791
|
-
act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
|
|
792
|
-
extract<T extends StagehandZodSchema>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
|
|
793
|
-
observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
|
|
794
|
-
goto(url: string, options?: {
|
|
795
|
-
waitUntil?: "load" | "domcontentloaded" | "networkidle";
|
|
796
|
-
}, frameId?: string): Promise<SerializableResponse | null>;
|
|
797
|
-
agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
|
|
798
|
-
end(): Promise<Response>;
|
|
799
|
-
getReplayMetrics(): Promise<StagehandMetrics>;
|
|
800
|
-
private execute;
|
|
801
|
-
private request;
|
|
544
|
+
interface ActResult {
|
|
545
|
+
success: boolean;
|
|
546
|
+
message: string;
|
|
547
|
+
actionDescription: string;
|
|
548
|
+
actions: Action[];
|
|
802
549
|
}
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
y: number;
|
|
810
|
-
width: number;
|
|
811
|
-
height: number;
|
|
550
|
+
type ExtractResult<T extends StagehandZodSchema> = InferStagehandSchema<T>;
|
|
551
|
+
interface Action {
|
|
552
|
+
selector: string;
|
|
553
|
+
description: string;
|
|
554
|
+
method?: string;
|
|
555
|
+
arguments?: string[];
|
|
812
556
|
}
|
|
813
|
-
interface
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
path?: string;
|
|
822
|
-
quality?: number;
|
|
823
|
-
scale?: ScreenshotScaleOption;
|
|
824
|
-
style?: string;
|
|
557
|
+
interface HistoryEntry {
|
|
558
|
+
method: "act" | "extract" | "observe" | "navigate" | "agent";
|
|
559
|
+
parameters: unknown;
|
|
560
|
+
result: unknown;
|
|
561
|
+
timestamp: string;
|
|
562
|
+
}
|
|
563
|
+
interface ExtractOptions {
|
|
564
|
+
model?: ModelConfiguration;
|
|
825
565
|
timeout?: number;
|
|
826
|
-
|
|
566
|
+
selector?: string;
|
|
567
|
+
page?: Page$1 | Page$2 | Page$3 | Page;
|
|
568
|
+
}
|
|
569
|
+
declare const defaultExtractSchema: z.ZodObject<{
|
|
570
|
+
extraction: z.ZodString;
|
|
571
|
+
}, z.core.$strip>;
|
|
572
|
+
declare const pageTextSchema: z.ZodObject<{
|
|
573
|
+
pageText: z.ZodString;
|
|
574
|
+
}, z.core.$strip>;
|
|
575
|
+
interface ObserveOptions {
|
|
576
|
+
model?: ModelConfiguration;
|
|
577
|
+
timeout?: number;
|
|
578
|
+
selector?: string;
|
|
579
|
+
page?: Page$1 | Page$2 | Page$3 | Page;
|
|
580
|
+
}
|
|
581
|
+
declare enum V3FunctionName {
|
|
582
|
+
ACT = "ACT",
|
|
583
|
+
EXTRACT = "EXTRACT",
|
|
584
|
+
OBSERVE = "OBSERVE",
|
|
585
|
+
AGENT = "AGENT"
|
|
827
586
|
}
|
|
828
587
|
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
588
|
+
interface StagehandMetrics {
|
|
589
|
+
actPromptTokens: number;
|
|
590
|
+
actCompletionTokens: number;
|
|
591
|
+
actReasoningTokens: number;
|
|
592
|
+
actCachedInputTokens: number;
|
|
593
|
+
actInferenceTimeMs: number;
|
|
594
|
+
extractPromptTokens: number;
|
|
595
|
+
extractCompletionTokens: number;
|
|
596
|
+
extractReasoningTokens: number;
|
|
597
|
+
extractCachedInputTokens: number;
|
|
598
|
+
extractInferenceTimeMs: number;
|
|
599
|
+
observePromptTokens: number;
|
|
600
|
+
observeCompletionTokens: number;
|
|
601
|
+
observeReasoningTokens: number;
|
|
602
|
+
observeCachedInputTokens: number;
|
|
603
|
+
observeInferenceTimeMs: number;
|
|
604
|
+
agentPromptTokens: number;
|
|
605
|
+
agentCompletionTokens: number;
|
|
606
|
+
agentReasoningTokens: number;
|
|
607
|
+
agentCachedInputTokens: number;
|
|
608
|
+
agentInferenceTimeMs: number;
|
|
609
|
+
totalPromptTokens: number;
|
|
610
|
+
totalCompletionTokens: number;
|
|
611
|
+
totalReasoningTokens: number;
|
|
612
|
+
totalCachedInputTokens: number;
|
|
613
|
+
totalInferenceTimeMs: number;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
type V3Env = "LOCAL" | "BROWSERBASE";
|
|
617
|
+
/** Local launch options for V3 (chrome-launcher + CDP).
|
|
618
|
+
* Matches v2 shape where feasible; unsupported fields are accepted but ignored.
|
|
619
|
+
*/
|
|
620
|
+
interface LocalBrowserLaunchOptions {
|
|
621
|
+
args?: string[];
|
|
622
|
+
executablePath?: string;
|
|
623
|
+
userDataDir?: string;
|
|
624
|
+
preserveUserDataDir?: boolean;
|
|
625
|
+
headless?: boolean;
|
|
626
|
+
devtools?: boolean;
|
|
627
|
+
chromiumSandbox?: boolean;
|
|
628
|
+
ignoreDefaultArgs?: boolean | string[];
|
|
629
|
+
proxy?: {
|
|
630
|
+
server: string;
|
|
631
|
+
bypass?: string;
|
|
632
|
+
username?: string;
|
|
633
|
+
password?: string;
|
|
634
|
+
};
|
|
635
|
+
locale?: string;
|
|
636
|
+
viewport?: {
|
|
637
|
+
width: number;
|
|
638
|
+
height: number;
|
|
639
|
+
};
|
|
640
|
+
deviceScaleFactor?: number;
|
|
641
|
+
hasTouch?: boolean;
|
|
642
|
+
ignoreHTTPSErrors?: boolean;
|
|
643
|
+
cdpUrl?: string;
|
|
644
|
+
connectTimeoutMs?: number;
|
|
645
|
+
downloadsPath?: string;
|
|
646
|
+
acceptDownloads?: boolean;
|
|
647
|
+
}
|
|
648
|
+
/** Constructor options for V3 */
|
|
649
|
+
interface V3Options {
|
|
650
|
+
env: V3Env;
|
|
651
|
+
apiKey?: string;
|
|
652
|
+
projectId?: string;
|
|
881
653
|
/**
|
|
882
|
-
*
|
|
883
|
-
* has been attached; adopt the session into this Page and seed ownership for its subtree.
|
|
654
|
+
* Optional: fine-tune Browserbase session creation or resume an existing session.
|
|
884
655
|
*/
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
656
|
+
browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
|
|
657
|
+
projectId?: string;
|
|
658
|
+
};
|
|
659
|
+
browserbaseSessionID?: string;
|
|
660
|
+
localBrowserLaunchOptions?: LocalBrowserLaunchOptions;
|
|
661
|
+
model?: ModelConfiguration;
|
|
662
|
+
llmClient?: LLMClient;
|
|
663
|
+
systemPrompt?: string;
|
|
664
|
+
logInferenceToFile?: boolean;
|
|
665
|
+
experimental?: boolean;
|
|
666
|
+
verbose?: 0 | 1 | 2;
|
|
667
|
+
selfHeal?: boolean;
|
|
668
|
+
/** Disable pino logging backend (useful for tests or minimal environments). */
|
|
669
|
+
disablePino?: boolean;
|
|
670
|
+
/** Optional external logger hook for integrating with host apps. */
|
|
671
|
+
logger?: (line: LogLine) => void;
|
|
672
|
+
/** Directory used to persist cached actions for act(). */
|
|
673
|
+
cacheDir?: string;
|
|
674
|
+
domSettleTimeout?: number;
|
|
675
|
+
disableAPI?: boolean;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
declare class StagehandError extends Error {
|
|
679
|
+
constructor(message: string);
|
|
680
|
+
}
|
|
681
|
+
declare class StagehandDefaultError extends StagehandError {
|
|
682
|
+
constructor(error?: unknown);
|
|
683
|
+
}
|
|
684
|
+
declare class StagehandEnvironmentError extends StagehandError {
|
|
685
|
+
constructor(currentEnvironment: string, requiredEnvironment: string, feature: string);
|
|
686
|
+
}
|
|
687
|
+
declare class MissingEnvironmentVariableError extends StagehandError {
|
|
688
|
+
constructor(missingEnvironmentVariable: string, feature: string);
|
|
689
|
+
}
|
|
690
|
+
declare class UnsupportedModelError extends StagehandError {
|
|
691
|
+
constructor(supportedModels: string[], feature?: string);
|
|
692
|
+
}
|
|
693
|
+
declare class UnsupportedModelProviderError extends StagehandError {
|
|
694
|
+
constructor(supportedProviders: string[], feature?: string);
|
|
695
|
+
}
|
|
696
|
+
declare class UnsupportedAISDKModelProviderError extends StagehandError {
|
|
697
|
+
constructor(provider: string, supportedProviders: string[]);
|
|
698
|
+
}
|
|
699
|
+
declare class InvalidAISDKModelFormatError extends StagehandError {
|
|
700
|
+
constructor(modelName: string);
|
|
701
|
+
}
|
|
702
|
+
declare class StagehandNotInitializedError extends StagehandError {
|
|
703
|
+
constructor(prop: string);
|
|
704
|
+
}
|
|
705
|
+
declare class BrowserbaseSessionNotFoundError extends StagehandError {
|
|
706
|
+
constructor();
|
|
707
|
+
}
|
|
708
|
+
declare class CaptchaTimeoutError extends StagehandError {
|
|
709
|
+
constructor();
|
|
710
|
+
}
|
|
711
|
+
declare class MissingLLMConfigurationError extends StagehandError {
|
|
712
|
+
constructor();
|
|
713
|
+
}
|
|
714
|
+
declare class HandlerNotInitializedError extends StagehandError {
|
|
715
|
+
constructor(handlerType: string);
|
|
716
|
+
}
|
|
717
|
+
declare class StagehandInvalidArgumentError extends StagehandError {
|
|
718
|
+
constructor(message: string);
|
|
719
|
+
}
|
|
720
|
+
declare class StagehandElementNotFoundError extends StagehandError {
|
|
721
|
+
constructor(xpaths: string[]);
|
|
722
|
+
}
|
|
723
|
+
declare class AgentScreenshotProviderError extends StagehandError {
|
|
724
|
+
constructor(message: string);
|
|
725
|
+
}
|
|
726
|
+
declare class StagehandMissingArgumentError extends StagehandError {
|
|
727
|
+
constructor(message: string);
|
|
728
|
+
}
|
|
729
|
+
declare class CreateChatCompletionResponseError extends StagehandError {
|
|
730
|
+
constructor(message: string);
|
|
731
|
+
}
|
|
732
|
+
declare class StagehandEvalError extends StagehandError {
|
|
733
|
+
constructor(message: string);
|
|
734
|
+
}
|
|
735
|
+
declare class StagehandDomProcessError extends StagehandError {
|
|
736
|
+
constructor(message: string);
|
|
737
|
+
}
|
|
738
|
+
declare class StagehandClickError extends StagehandError {
|
|
739
|
+
constructor(message: string, selector: string);
|
|
740
|
+
}
|
|
741
|
+
declare class LLMResponseError extends StagehandError {
|
|
742
|
+
constructor(primitive: string, message: string);
|
|
743
|
+
}
|
|
744
|
+
declare class StagehandIframeError extends StagehandError {
|
|
745
|
+
constructor(frameUrl: string, message: string);
|
|
746
|
+
}
|
|
747
|
+
declare class ContentFrameNotFoundError extends StagehandError {
|
|
748
|
+
constructor(selector: string);
|
|
749
|
+
}
|
|
750
|
+
declare class XPathResolutionError extends StagehandError {
|
|
751
|
+
constructor(xpath: string);
|
|
752
|
+
}
|
|
753
|
+
declare class ExperimentalApiConflictError extends StagehandError {
|
|
754
|
+
constructor();
|
|
755
|
+
}
|
|
756
|
+
declare class ExperimentalNotConfiguredError extends StagehandError {
|
|
757
|
+
constructor(featureName: string);
|
|
758
|
+
}
|
|
759
|
+
declare class CuaModelRequiredError extends StagehandError {
|
|
760
|
+
constructor(availableModels: readonly string[]);
|
|
761
|
+
}
|
|
762
|
+
declare class ZodSchemaValidationError extends Error {
|
|
763
|
+
readonly received: unknown;
|
|
764
|
+
readonly issues: ReturnType<ZodError["format"]>;
|
|
765
|
+
constructor(received: unknown, issues: ReturnType<ZodError["format"]>);
|
|
766
|
+
}
|
|
767
|
+
declare class StagehandInitError extends StagehandError {
|
|
768
|
+
constructor(message: string);
|
|
769
|
+
}
|
|
770
|
+
declare class MCPConnectionError extends StagehandError {
|
|
771
|
+
readonly serverUrl: string;
|
|
772
|
+
readonly originalError: unknown;
|
|
773
|
+
constructor(serverUrl: string, originalError: unknown);
|
|
774
|
+
}
|
|
775
|
+
declare class StagehandShadowRootMissingError extends StagehandError {
|
|
776
|
+
constructor(detail?: string);
|
|
777
|
+
}
|
|
778
|
+
declare class StagehandShadowSegmentEmptyError extends StagehandError {
|
|
779
|
+
constructor();
|
|
780
|
+
}
|
|
781
|
+
declare class StagehandShadowSegmentNotFoundError extends StagehandError {
|
|
782
|
+
constructor(segment: string, hint?: string);
|
|
783
|
+
}
|
|
784
|
+
declare class ElementNotVisibleError extends StagehandError {
|
|
785
|
+
constructor(selector: string);
|
|
786
|
+
}
|
|
787
|
+
declare class ResponseBodyError extends StagehandError {
|
|
788
|
+
constructor(message: string);
|
|
789
|
+
}
|
|
790
|
+
declare class ResponseParseError extends StagehandError {
|
|
791
|
+
constructor(message: string);
|
|
792
|
+
}
|
|
793
|
+
declare class TimeoutError extends StagehandError {
|
|
794
|
+
constructor(operation: string, timeoutMs: number);
|
|
795
|
+
}
|
|
796
|
+
declare class ActTimeoutError extends TimeoutError {
|
|
797
|
+
constructor(timeoutMs: number);
|
|
798
|
+
}
|
|
799
|
+
declare class ExtractTimeoutError extends TimeoutError {
|
|
800
|
+
constructor(timeoutMs: number);
|
|
801
|
+
}
|
|
802
|
+
declare class ObserveTimeoutError extends TimeoutError {
|
|
803
|
+
constructor(timeoutMs: number);
|
|
804
|
+
}
|
|
805
|
+
declare class PageNotFoundError extends StagehandError {
|
|
806
|
+
constructor(identifier: string);
|
|
807
|
+
}
|
|
808
|
+
declare class ConnectionTimeoutError extends StagehandError {
|
|
809
|
+
constructor(message: string);
|
|
810
|
+
}
|
|
811
|
+
declare class StreamingCallbacksInNonStreamingModeError extends StagehandError {
|
|
812
|
+
readonly invalidCallbacks: string[];
|
|
813
|
+
constructor(invalidCallbacks: string[]);
|
|
814
|
+
}
|
|
815
|
+
declare class AgentAbortError extends StagehandError {
|
|
816
|
+
readonly reason: string;
|
|
817
|
+
constructor(reason?: string);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
declare class AISdkClient extends LLMClient {
|
|
821
|
+
type: "aisdk";
|
|
822
|
+
private model;
|
|
823
|
+
constructor({ model }: {
|
|
824
|
+
model: LanguageModelV2;
|
|
825
|
+
});
|
|
826
|
+
createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
interface StagehandAPIConstructorParams {
|
|
830
|
+
apiKey: string;
|
|
831
|
+
projectId: string;
|
|
832
|
+
logger: (message: LogLine) => void;
|
|
833
|
+
}
|
|
834
|
+
interface StartSessionParams {
|
|
835
|
+
modelName: string;
|
|
836
|
+
modelApiKey: string;
|
|
837
|
+
domSettleTimeoutMs: number;
|
|
838
|
+
verbose: number;
|
|
839
|
+
systemPrompt?: string;
|
|
840
|
+
browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
|
|
841
|
+
projectId?: string;
|
|
842
|
+
};
|
|
843
|
+
selfHeal?: boolean;
|
|
844
|
+
browserbaseSessionID?: string;
|
|
845
|
+
}
|
|
846
|
+
interface StartSessionResult {
|
|
847
|
+
sessionId: string;
|
|
848
|
+
available?: boolean;
|
|
849
|
+
}
|
|
850
|
+
interface APIActParameters {
|
|
851
|
+
input: string | Action;
|
|
852
|
+
options?: ActOptions;
|
|
853
|
+
frameId?: string;
|
|
854
|
+
}
|
|
855
|
+
interface APIExtractParameters {
|
|
856
|
+
instruction?: string;
|
|
857
|
+
schema?: StagehandZodSchema;
|
|
858
|
+
options?: ExtractOptions;
|
|
859
|
+
frameId?: string;
|
|
860
|
+
}
|
|
861
|
+
interface APIObserveParameters {
|
|
862
|
+
instruction?: string;
|
|
863
|
+
options?: ObserveOptions;
|
|
864
|
+
frameId?: string;
|
|
865
|
+
}
|
|
866
|
+
interface SerializableResponse {
|
|
867
|
+
requestId: string;
|
|
868
|
+
frameId?: string;
|
|
869
|
+
loaderId?: string;
|
|
870
|
+
response: Protocol.Network.Response;
|
|
871
|
+
fromServiceWorkerFlag?: boolean;
|
|
872
|
+
finishedSettled?: boolean;
|
|
873
|
+
extraInfoHeaders?: Protocol.Network.Headers | null;
|
|
874
|
+
extraInfoHeadersText?: string;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
/**
|
|
878
|
+
* Represents a path through a Zod schema from the root object down to a
|
|
879
|
+
* particular field. The `segments` array describes the chain of keys/indices.
|
|
880
|
+
*
|
|
881
|
+
* - **String** segments indicate object property names.
|
|
882
|
+
* - **Number** segments indicate array indices.
|
|
883
|
+
*
|
|
884
|
+
* For example, `["users", 0, "homepage"]` might describe reaching
|
|
885
|
+
* the `homepage` field in `schema.users[0].homepage`.
|
|
886
|
+
*/
|
|
887
|
+
interface ZodPathSegments {
|
|
888
|
+
/**
|
|
889
|
+
* The ordered list of keys/indices leading from the schema root
|
|
890
|
+
* to the targeted field.
|
|
891
|
+
*/
|
|
892
|
+
segments: Array<string | number>;
|
|
893
|
+
}
|
|
894
|
+
type InitScriptSource<Arg> = string | {
|
|
895
|
+
path?: string;
|
|
896
|
+
content?: string;
|
|
897
|
+
} | ((arg: Arg) => unknown);
|
|
898
|
+
|
|
899
|
+
type EvaluateOptions = {
|
|
900
|
+
/** The question to ask about the task state */
|
|
901
|
+
question: string;
|
|
902
|
+
/** The answer to the question */
|
|
903
|
+
answer?: string;
|
|
904
|
+
/** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
|
|
905
|
+
screenshot?: boolean | Buffer[];
|
|
906
|
+
/** Custom system prompt for the evaluator */
|
|
907
|
+
systemPrompt?: string;
|
|
908
|
+
/** Delay in milliseconds before taking the screenshot @default 250 */
|
|
909
|
+
screenshotDelayMs?: number;
|
|
910
|
+
/** The agent's reasoning/thought process for completing the task */
|
|
911
|
+
agentReasoning?: string;
|
|
912
|
+
};
|
|
913
|
+
type BatchAskOptions = {
|
|
914
|
+
/** Array of questions with optional answers */
|
|
915
|
+
questions: Array<{
|
|
916
|
+
question: string;
|
|
917
|
+
answer?: string;
|
|
918
|
+
}>;
|
|
919
|
+
/** Whether to take a screenshot of the task state */
|
|
920
|
+
screenshot?: boolean;
|
|
921
|
+
/** Custom system prompt for the evaluator */
|
|
922
|
+
systemPrompt?: string;
|
|
923
|
+
/** Delay in milliseconds before taking the screenshot @default 1000 */
|
|
924
|
+
screenshotDelayMs?: number;
|
|
925
|
+
};
|
|
926
|
+
/**
|
|
927
|
+
* Result of an evaluation
|
|
928
|
+
*/
|
|
929
|
+
interface EvaluationResult {
|
|
900
930
|
/**
|
|
901
|
-
*
|
|
902
|
-
* Allows external consumers to execute arbitrary Chrome DevTools Protocol commands.
|
|
903
|
-
*
|
|
904
|
-
* @param method - The CDP method name (e.g., "Page.enable", "Runtime.evaluate")
|
|
905
|
-
* @param params - Optional parameters for the CDP command
|
|
906
|
-
* @returns Promise resolving to the typed CDP response
|
|
907
|
-
*
|
|
908
|
-
* @example
|
|
909
|
-
* // Enable the Runtime domain
|
|
910
|
-
* await page.sendCDP("Runtime.enable");
|
|
911
|
-
*
|
|
912
|
-
* @example
|
|
913
|
-
* // Evaluate JavaScript with typed response
|
|
914
|
-
* const result = await page.sendCDP<Protocol.Runtime.EvaluateResponse>(
|
|
915
|
-
* "Runtime.evaluate",
|
|
916
|
-
* { expression: "1 + 1" }
|
|
917
|
-
* );
|
|
931
|
+
* The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
|
|
918
932
|
*/
|
|
919
|
-
|
|
920
|
-
/** Seed the cached URL before navigation events converge. */
|
|
921
|
-
seedCurrentUrl(url: string | undefined | null): void;
|
|
922
|
-
mainFrameId(): string;
|
|
923
|
-
mainFrame(): Frame;
|
|
933
|
+
evaluation: "YES" | "NO" | "INVALID";
|
|
924
934
|
/**
|
|
925
|
-
*
|
|
935
|
+
* The reasoning behind the evaluation
|
|
926
936
|
*/
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
private
|
|
935
|
-
private
|
|
937
|
+
reasoning: string;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
declare class StagehandAPIClient {
|
|
941
|
+
private apiKey;
|
|
942
|
+
private projectId;
|
|
943
|
+
private sessionId?;
|
|
944
|
+
private modelApiKey;
|
|
945
|
+
private logger;
|
|
946
|
+
private fetchWithCookies;
|
|
947
|
+
constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
|
|
948
|
+
init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
|
|
949
|
+
act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
|
|
950
|
+
extract<T extends StagehandZodSchema>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
|
|
951
|
+
observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
|
|
952
|
+
goto(url: string, options?: {
|
|
953
|
+
waitUntil?: "load" | "domcontentloaded" | "networkidle";
|
|
954
|
+
}, frameId?: string): Promise<SerializableResponse | null>;
|
|
955
|
+
agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
|
|
956
|
+
end(): Promise<Response>;
|
|
957
|
+
getReplayMetrics(): Promise<StagehandMetrics>;
|
|
958
|
+
private execute;
|
|
959
|
+
private request;
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
/**
|
|
963
|
+
* V3Context
|
|
964
|
+
*
|
|
965
|
+
* Owns the root CDP connection and wires Target/Page events into Page.
|
|
966
|
+
* Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
|
|
967
|
+
* and tracks target→page and (root) frame→target mappings for lookups.
|
|
968
|
+
*
|
|
969
|
+
* IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
|
|
970
|
+
* Context never “guesses” owners; it simply forwards events (with the emitting session)
|
|
971
|
+
* so Page can record the correct owner at event time.
|
|
972
|
+
*/
|
|
973
|
+
declare class V3Context {
|
|
974
|
+
readonly conn: CdpConnection;
|
|
975
|
+
private readonly env;
|
|
976
|
+
private readonly apiClient;
|
|
977
|
+
private readonly localBrowserLaunchOptions;
|
|
978
|
+
private constructor();
|
|
979
|
+
private readonly _piercerInstalled;
|
|
980
|
+
private _lastPopupSignalAt;
|
|
936
981
|
private sessionKey;
|
|
937
|
-
private
|
|
938
|
-
private
|
|
939
|
-
private
|
|
940
|
-
private
|
|
982
|
+
private readonly _sessionInit;
|
|
983
|
+
private pagesByTarget;
|
|
984
|
+
private mainFrameToTarget;
|
|
985
|
+
private sessionOwnerPage;
|
|
986
|
+
private frameOwnerPage;
|
|
987
|
+
private pendingOopifByMainFrame;
|
|
988
|
+
private createdAtByTarget;
|
|
989
|
+
private typeByTarget;
|
|
990
|
+
private _pageOrder;
|
|
991
|
+
private pendingCreatedTargetUrl;
|
|
992
|
+
private readonly initScripts;
|
|
941
993
|
/**
|
|
942
|
-
*
|
|
943
|
-
* Waits on the **current** main frame and follows root swaps during navigation.
|
|
994
|
+
* Create a Context for a given CDP websocket URL and bootstrap target wiring.
|
|
944
995
|
*/
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
996
|
+
static create(wsUrl: string, opts?: {
|
|
997
|
+
env?: "LOCAL" | "BROWSERBASE";
|
|
998
|
+
apiClient?: StagehandAPIClient | null;
|
|
999
|
+
localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null;
|
|
1000
|
+
}): Promise<V3Context>;
|
|
949
1001
|
/**
|
|
950
|
-
*
|
|
1002
|
+
* Wait until at least one top-level Page has been created and registered.
|
|
1003
|
+
* We poll internal maps that bootstrap/onAttachedToTarget populate.
|
|
951
1004
|
*/
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
1005
|
+
private waitForFirstTopLevelPage;
|
|
1006
|
+
private waitForInitialTopLevelTargets;
|
|
1007
|
+
private ensurePiercer;
|
|
1008
|
+
/** Mark a page target as the most-recent one (active). */
|
|
1009
|
+
private _pushActive;
|
|
1010
|
+
/** Remove a page target from the recency list (used on close). */
|
|
1011
|
+
private _removeFromOrder;
|
|
1012
|
+
/** Return the current active Page (most-recent page that still exists). */
|
|
1013
|
+
activePage(): Page | undefined;
|
|
1014
|
+
/** Explicitly mark a known Page as the most-recent active page (and focus it). */
|
|
1015
|
+
setActivePage(page: Page): void;
|
|
1016
|
+
addInitScript<Arg>(script: InitScriptSource<Arg>, arg?: Arg): Promise<void>;
|
|
957
1017
|
/**
|
|
958
|
-
*
|
|
1018
|
+
* Return top-level `Page`s (oldest → newest). OOPIF targets are not included.
|
|
959
1019
|
*/
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
timeoutMs?: number;
|
|
963
|
-
}): Promise<Response$1 | null>;
|
|
1020
|
+
pages(): Page[];
|
|
1021
|
+
private applyInitScriptsToPage;
|
|
964
1022
|
/**
|
|
965
|
-
*
|
|
1023
|
+
* Resolve an owning `Page` by the **top-level main frame id**.
|
|
1024
|
+
* Note: child (OOPIF) roots are intentionally not present in this mapping.
|
|
966
1025
|
*/
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1026
|
+
resolvePageByMainFrameId(frameId: string): Page | undefined;
|
|
1027
|
+
/**
|
|
1028
|
+
* Serialize the full frame tree for a given top-level main frame id.
|
|
1029
|
+
*/
|
|
1030
|
+
getFullFrameTreeByMainFrameId(rootMainFrameId: string): Promise<Protocol.Page.FrameTree>;
|
|
1031
|
+
/**
|
|
1032
|
+
* Create a new top-level page (tab) with the given URL and return its Page object.
|
|
1033
|
+
* Waits until the target is attached and registered.
|
|
1034
|
+
*/
|
|
1035
|
+
newPage(url?: string): Promise<Page>;
|
|
1036
|
+
/**
|
|
1037
|
+
* Close CDP and clear all mappings. Best-effort cleanup.
|
|
1038
|
+
*/
|
|
1039
|
+
close(): Promise<void>;
|
|
1040
|
+
/**
|
|
1041
|
+
* Bootstrap target lifecycle:
|
|
1042
|
+
* - Attach to existing targets.
|
|
1043
|
+
* - Attach on `Target.targetCreated` (fallback for OOPIFs).
|
|
1044
|
+
* - Handle auto-attach events.
|
|
1045
|
+
* - Clean up on detach/destroy.
|
|
1046
|
+
*/
|
|
1047
|
+
private bootstrap;
|
|
1048
|
+
/**
|
|
1049
|
+
* Handle a newly attached target (top-level or potential OOPIF):
|
|
1050
|
+
* - Enable Page domain and lifecycle events.
|
|
1051
|
+
* - If top-level → create Page, wire listeners, resume.
|
|
1052
|
+
* - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
|
|
1053
|
+
* if the parent is known; otherwise stage until parent `frameAttached`.
|
|
1054
|
+
* - Resume the target only after listeners are wired.
|
|
1055
|
+
*/
|
|
1056
|
+
private onAttachedToTarget;
|
|
1057
|
+
/**
|
|
1058
|
+
* Detach handler:
|
|
1059
|
+
* - Remove child session ownership and prune its subtree.
|
|
1060
|
+
* - If a top-level target, cleanup its `Page` and mappings.
|
|
1061
|
+
* - Drop any staged child for this session.
|
|
1062
|
+
*/
|
|
1063
|
+
private onDetachedFromTarget;
|
|
1064
|
+
/**
|
|
1065
|
+
* Cleanup a top-level Page by target id, removing its root and staged children.
|
|
1066
|
+
*/
|
|
1067
|
+
private cleanupByTarget;
|
|
1068
|
+
/**
|
|
1069
|
+
* Wire Page-domain frame events for a session into the owning Page & mappings.
|
|
1070
|
+
* We forward the *emitting session* with every event so Page can stamp ownership precisely.
|
|
1071
|
+
*/
|
|
1072
|
+
private installFrameEventBridges;
|
|
1073
|
+
/**
|
|
1074
|
+
* Register that a session belongs to a Page (used by event routing).
|
|
1075
|
+
*/
|
|
1076
|
+
private wireSessionToOwnerPage;
|
|
1077
|
+
/**
|
|
1078
|
+
* Utility: reverse-lookup the top-level target id that owns a given Page.
|
|
1079
|
+
*/
|
|
1080
|
+
private findTargetIdByPage;
|
|
1081
|
+
private _notePopupSignal;
|
|
1082
|
+
/**
|
|
1083
|
+
* Await the current active page, waiting briefly if a popup/open was just triggered.
|
|
1084
|
+
* Normal path returns immediately; popup path waits up to timeoutMs for the new page.
|
|
1085
|
+
*/
|
|
1086
|
+
awaitActivePage(timeoutMs?: number): Promise<Page>;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
|
|
1090
|
+
type: string;
|
|
1091
|
+
[key: string]: unknown;
|
|
1092
|
+
};
|
|
1093
|
+
interface AgentReplayActStep {
|
|
1094
|
+
type: "act";
|
|
1095
|
+
instruction: string;
|
|
1096
|
+
actions?: Action[];
|
|
1097
|
+
actionDescription?: string;
|
|
1098
|
+
message?: string;
|
|
1099
|
+
timeout?: number;
|
|
1100
|
+
}
|
|
1101
|
+
interface AgentReplayFillFormStep {
|
|
1102
|
+
type: "fillForm";
|
|
1103
|
+
fields?: Array<{
|
|
1104
|
+
action: string;
|
|
1105
|
+
value: string;
|
|
1106
|
+
}>;
|
|
1107
|
+
observeResults?: Action[];
|
|
1108
|
+
actions?: Action[];
|
|
1109
|
+
}
|
|
1110
|
+
interface AgentReplayGotoStep {
|
|
1111
|
+
type: "goto";
|
|
1112
|
+
url: string;
|
|
1113
|
+
waitUntil?: LoadState;
|
|
1114
|
+
}
|
|
1115
|
+
interface AgentReplayScrollStep {
|
|
1116
|
+
type: "scroll";
|
|
1117
|
+
deltaX?: number;
|
|
1118
|
+
deltaY?: number;
|
|
1119
|
+
anchor?: {
|
|
1120
|
+
x: number;
|
|
1121
|
+
y: number;
|
|
1122
|
+
};
|
|
1123
|
+
}
|
|
1124
|
+
interface AgentReplayWaitStep {
|
|
1125
|
+
type: "wait";
|
|
1126
|
+
timeMs: number;
|
|
1127
|
+
}
|
|
1128
|
+
interface AgentReplayNavBackStep {
|
|
1129
|
+
type: "navback";
|
|
1130
|
+
waitUntil?: LoadState;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
/**
|
|
1134
|
+
* Response
|
|
1135
|
+
* -----------------
|
|
1136
|
+
*
|
|
1137
|
+
* This module implements a Playwright-inspired response wrapper that exposes
|
|
1138
|
+
* navigation metadata and helpers for retrieving HTTP response bodies. The
|
|
1139
|
+
* abstraction is consumed by navigation routines (e.g. `Page.goto`) so callers
|
|
1140
|
+
* can synchronously inspect status codes, lazily fetch body text, or await the
|
|
1141
|
+
* network layer finishing the request. The implementation is built directly on
|
|
1142
|
+
* Chrome DevTools Protocol primitives – it holds the originating `requestId`
|
|
1143
|
+
* so it can request payloads via `Network.getResponseBody`, and it listens for
|
|
1144
|
+
* `responseReceivedExtraInfo`, `loadingFinished`, and `loadingFailed` events to
|
|
1145
|
+
* hydrate the richer header view and resolve callers waiting on completion.
|
|
1146
|
+
*/
|
|
1147
|
+
|
|
1148
|
+
type ServerAddr = {
|
|
1149
|
+
ipAddress: string;
|
|
1150
|
+
port: number;
|
|
1151
|
+
};
|
|
1152
|
+
/**
|
|
1153
|
+
* Thin wrapper around CDP response metadata that mirrors the ergonomics of
|
|
1154
|
+
* Playwright's `Response` class. The class intentionally keeps the same method
|
|
1155
|
+
* names so upstream integrations can transition with minimal code changes.
|
|
1156
|
+
*/
|
|
1157
|
+
declare class Response$1 {
|
|
1158
|
+
private readonly page;
|
|
1159
|
+
private readonly session;
|
|
1160
|
+
private readonly requestId;
|
|
1161
|
+
private readonly frameId?;
|
|
1162
|
+
private readonly loaderId?;
|
|
1163
|
+
private readonly response;
|
|
1164
|
+
private readonly fromServiceWorkerFlag;
|
|
1165
|
+
private readonly serverAddress?;
|
|
1166
|
+
private headersObject;
|
|
1167
|
+
private headersArrayCache;
|
|
1168
|
+
private allHeadersCache;
|
|
1169
|
+
private readonly headerValuesMap;
|
|
1170
|
+
private finishedDeferred;
|
|
1171
|
+
private finishedSettled;
|
|
1172
|
+
private extraInfoHeaders;
|
|
1173
|
+
private extraInfoHeadersText;
|
|
971
1174
|
/**
|
|
972
|
-
*
|
|
1175
|
+
* Build a response wrapper from the CDP notification associated with a
|
|
1176
|
+
* navigation. The constructor captures the owning page/session so follow-up
|
|
1177
|
+
* methods (body/text/json) can query CDP on-demand. The `response` payload is
|
|
1178
|
+
* the raw `Protocol.Network.Response` object emitted by Chrome.
|
|
973
1179
|
*/
|
|
1180
|
+
constructor(params: {
|
|
1181
|
+
page: Page;
|
|
1182
|
+
session: CDPSessionLike;
|
|
1183
|
+
requestId: string;
|
|
1184
|
+
frameId?: string;
|
|
1185
|
+
loaderId?: string;
|
|
1186
|
+
response: Protocol.Network.Response;
|
|
1187
|
+
fromServiceWorker: boolean;
|
|
1188
|
+
});
|
|
1189
|
+
/** URL associated with the navigation request. */
|
|
974
1190
|
url(): string;
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
/**
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
/**
|
|
984
|
-
|
|
985
|
-
*
|
|
986
|
-
* @param options Optional screenshot configuration.
|
|
987
|
-
* @param options.animations Control CSS/Web animations during capture. Use
|
|
988
|
-
* "disabled" to fast-forward finite animations and pause infinite ones.
|
|
989
|
-
* @param options.caret Either hide the text caret (default) or leave it
|
|
990
|
-
* visible via "initial".
|
|
991
|
-
* @param options.clip Restrict capture to a specific rectangle (in CSS
|
|
992
|
-
* pixels). Cannot be combined with `fullPage`.
|
|
993
|
-
* @param options.fullPage Capture the full scrollable page instead of the
|
|
994
|
-
* current viewport.
|
|
995
|
-
* @param options.mask Array of locators that should be covered with an
|
|
996
|
-
* overlay while the screenshot is taken.
|
|
997
|
-
* @param options.maskColor CSS color used for the mask overlay (default
|
|
998
|
-
* `#FF00FF`).
|
|
999
|
-
* @param options.omitBackground Make the default page background transparent
|
|
1000
|
-
* (PNG only).
|
|
1001
|
-
* @param options.path File path to write the screenshot to. The file extension
|
|
1002
|
-
* determines the image type when `type` is not explicitly provided.
|
|
1003
|
-
* @param options.quality JPEG quality (0–100). Only applies when
|
|
1004
|
-
* `type === "jpeg"`.
|
|
1005
|
-
* @param options.scale Render scale: use "css" for one pixel per CSS pixel,
|
|
1006
|
-
* otherwise the default "device" leverages the current device pixel ratio.
|
|
1007
|
-
* @param options.style Additional CSS text injected into every frame before
|
|
1008
|
-
* capture (removed afterwards).
|
|
1009
|
-
* @param options.timeout Maximum capture duration in milliseconds before a
|
|
1010
|
-
* timeout error is thrown.
|
|
1011
|
-
* @param options.type Image format (`"png"` by default).
|
|
1012
|
-
*/
|
|
1013
|
-
screenshot(options?: ScreenshotOptions): Promise<Buffer>;
|
|
1191
|
+
/** HTTP status code reported by Chrome. */
|
|
1192
|
+
status(): number;
|
|
1193
|
+
/** Human-readable status text that accompanied the response. */
|
|
1194
|
+
statusText(): string;
|
|
1195
|
+
/** Convenience predicate that checks for 2xx statuses. */
|
|
1196
|
+
ok(): boolean;
|
|
1197
|
+
/** Returns the Stagehand frame object that initiated the navigation. */
|
|
1198
|
+
frame(): Frame | null;
|
|
1199
|
+
/** Indicates whether the response was serviced by a Service Worker. */
|
|
1200
|
+
fromServiceWorker(): boolean;
|
|
1014
1201
|
/**
|
|
1015
|
-
*
|
|
1202
|
+
* Returns TLS security metadata when provided by the browser. In practice
|
|
1203
|
+
* this includes certificate issuer, protocol, and validity interval.
|
|
1016
1204
|
*/
|
|
1017
|
-
|
|
1205
|
+
securityDetails(): Promise<Protocol.Network.SecurityDetails | null>;
|
|
1206
|
+
/** Returns the resolved server address for the navigation when available. */
|
|
1207
|
+
serverAddr(): Promise<ServerAddr | null>;
|
|
1018
1208
|
/**
|
|
1019
|
-
*
|
|
1020
|
-
*
|
|
1021
|
-
* - Supports deep XPath that includes iframe steps (e.g., '/html/body/iframe[2]//div').
|
|
1022
|
-
* Returns a Locator scoped to the appropriate frame.
|
|
1209
|
+
* Returns the response headers normalised to lowercase keys. Matches the
|
|
1210
|
+
* behaviour of Playwright's `headers()` by eliding duplicate header entries.
|
|
1023
1211
|
*/
|
|
1024
|
-
|
|
1212
|
+
headers(): Record<string, string>;
|
|
1025
1213
|
/**
|
|
1026
|
-
*
|
|
1027
|
-
*
|
|
1214
|
+
* Returns all headers including those only surfaced through
|
|
1215
|
+
* `responseReceivedExtraInfo` such as `set-cookie`. Values are reported as the
|
|
1216
|
+
* browser sends them (no further splitting or concatenation).
|
|
1028
1217
|
*/
|
|
1029
|
-
|
|
1218
|
+
allHeaders(): Promise<Record<string, string>>;
|
|
1219
|
+
/** Returns a concatenated header string for the supplied header name. */
|
|
1220
|
+
headerValue(name: string): Promise<string | null>;
|
|
1221
|
+
/** Returns all values for a header (case-insensitive lookup). */
|
|
1222
|
+
headerValues(name: string): Promise<string[]>;
|
|
1030
1223
|
/**
|
|
1031
|
-
*
|
|
1032
|
-
*
|
|
1224
|
+
* Returns header entries preserving their original wire casing and ordering.
|
|
1225
|
+
* Falls back to the CDP object when the raw header text is unavailable.
|
|
1033
1226
|
*/
|
|
1034
|
-
|
|
1227
|
+
headersArray(): Promise<Array<{
|
|
1228
|
+
name: string;
|
|
1229
|
+
value: string;
|
|
1230
|
+
}>>;
|
|
1035
1231
|
/**
|
|
1036
|
-
*
|
|
1037
|
-
*
|
|
1232
|
+
* Requests the raw response body from Chrome DevTools Protocol. The method is
|
|
1233
|
+
* intentionally lazy because not every caller needs the payload, and CDP only
|
|
1234
|
+
* allows retrieving it once the response completes.
|
|
1038
1235
|
*/
|
|
1039
|
-
|
|
1236
|
+
body(): Promise<Buffer>;
|
|
1237
|
+
/** Decodes the response body as UTF-8 text. */
|
|
1238
|
+
text(): Promise<string>;
|
|
1239
|
+
/** Parses the response body as JSON and throws if parsing fails. */
|
|
1240
|
+
json<T = unknown>(): Promise<T>;
|
|
1040
1241
|
/**
|
|
1041
|
-
*
|
|
1042
|
-
*
|
|
1043
|
-
*
|
|
1044
|
-
* - The return value should be JSON-serializable. Non-serializable objects will
|
|
1045
|
-
* best-effort serialize via JSON.stringify inside the page context.
|
|
1242
|
+
* Resolves once the underlying network request completes or fails. Mirrors
|
|
1243
|
+
* Playwright's behaviour by resolving to `null` on success and to an `Error`
|
|
1244
|
+
* instance when Chrome reports `Network.loadingFailed`.
|
|
1046
1245
|
*/
|
|
1047
|
-
|
|
1246
|
+
finished(): Promise<null | Error>;
|
|
1048
1247
|
/**
|
|
1049
|
-
*
|
|
1050
|
-
*
|
|
1248
|
+
* Internal helper invoked by the navigation tracker when CDP reports extra
|
|
1249
|
+
* header information. This keeps the cached header views in sync with the
|
|
1250
|
+
* richer metadata.
|
|
1051
1251
|
*/
|
|
1052
|
-
|
|
1053
|
-
deviceScaleFactor?: number;
|
|
1054
|
-
}): Promise<void>;
|
|
1252
|
+
applyExtraInfo(event: Protocol.Network.ResponseReceivedExtraInfoEvent): void;
|
|
1055
1253
|
/**
|
|
1056
|
-
*
|
|
1057
|
-
*
|
|
1058
|
-
* on the top-level page target's session. Coordinates are relative to the
|
|
1059
|
-
* viewport origin (top-left). Does not scroll.
|
|
1254
|
+
* Internal helper for creating a Response object from a Serializable
|
|
1255
|
+
* goto response from the Stagehand API
|
|
1060
1256
|
*/
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1257
|
+
static fromSerializable(serialized: SerializableResponse, context: {
|
|
1258
|
+
page: Page;
|
|
1259
|
+
session: CDPSessionLike;
|
|
1260
|
+
}): Response$1;
|
|
1261
|
+
/** Marks the response as finished and resolves the `finished()` promise. */
|
|
1262
|
+
markFinished(error: Error | null): void;
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
type AnyPage = Page$1 | Page$2 | Page$3 | Page;
|
|
1266
|
+
|
|
1267
|
+
type LoadState = "load" | "domcontentloaded" | "networkidle";
|
|
1268
|
+
|
|
1269
|
+
type ScreenshotAnimationsOption = "disabled" | "allow";
|
|
1270
|
+
type ScreenshotCaretOption = "hide" | "initial";
|
|
1271
|
+
type ScreenshotScaleOption = "css" | "device";
|
|
1272
|
+
interface ScreenshotClip {
|
|
1273
|
+
x: number;
|
|
1274
|
+
y: number;
|
|
1275
|
+
width: number;
|
|
1276
|
+
height: number;
|
|
1277
|
+
}
|
|
1278
|
+
interface ScreenshotOptions {
|
|
1279
|
+
animations?: ScreenshotAnimationsOption;
|
|
1280
|
+
caret?: ScreenshotCaretOption;
|
|
1281
|
+
clip?: ScreenshotClip;
|
|
1282
|
+
fullPage?: boolean;
|
|
1283
|
+
mask?: Locator[];
|
|
1284
|
+
maskColor?: string;
|
|
1285
|
+
omitBackground?: boolean;
|
|
1286
|
+
path?: string;
|
|
1287
|
+
quality?: number;
|
|
1288
|
+
scale?: ScreenshotScaleOption;
|
|
1289
|
+
style?: string;
|
|
1290
|
+
timeout?: number;
|
|
1291
|
+
type?: "png" | "jpeg";
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
declare class Page {
|
|
1295
|
+
private readonly conn;
|
|
1296
|
+
private readonly mainSession;
|
|
1297
|
+
private readonly _targetId;
|
|
1298
|
+
/** Every CDP child session this page owns (top-level + adopted OOPIF sessions). */
|
|
1299
|
+
private readonly sessions;
|
|
1300
|
+
/** Unified truth for frame topology + ownership. */
|
|
1301
|
+
private readonly registry;
|
|
1302
|
+
/** A convenience wrapper bound to the current main frame id (top-level session). */
|
|
1303
|
+
private mainFrameWrapper;
|
|
1304
|
+
/** Compact ordinal per frameId (used by snapshot encoding). */
|
|
1305
|
+
private frameOrdinals;
|
|
1306
|
+
private nextOrdinal;
|
|
1307
|
+
/** cache Frames per frameId so everyone uses the same one */
|
|
1308
|
+
private readonly frameCache;
|
|
1309
|
+
private readonly browserIsRemote;
|
|
1310
|
+
/** Stable id for Frames created by this Page (use top-level TargetId). */
|
|
1311
|
+
private readonly pageId;
|
|
1312
|
+
/** Cached current URL for synchronous page.url() */
|
|
1313
|
+
private _currentUrl;
|
|
1314
|
+
private navigationCommandSeq;
|
|
1315
|
+
private latestNavigationCommandId;
|
|
1316
|
+
private readonly networkManager;
|
|
1317
|
+
/** Optional API client for routing page operations to the API */
|
|
1318
|
+
private readonly apiClient;
|
|
1319
|
+
private readonly consoleListeners;
|
|
1320
|
+
private readonly consoleHandlers;
|
|
1321
|
+
/** Document-start scripts installed across every session this page owns. */
|
|
1322
|
+
private readonly initScripts;
|
|
1323
|
+
private constructor();
|
|
1324
|
+
private installInitScriptOnSession;
|
|
1325
|
+
private applyInitScriptsToSession;
|
|
1326
|
+
registerInitScript(source: string): Promise<void>;
|
|
1327
|
+
private cursorEnabled;
|
|
1328
|
+
private ensureCursorScript;
|
|
1329
|
+
enableCursorOverlay(): Promise<void>;
|
|
1330
|
+
private updateCursor;
|
|
1331
|
+
addInitScript<Arg>(script: InitScriptSource<Arg>, arg?: Arg): Promise<void>;
|
|
1085
1332
|
/**
|
|
1086
|
-
*
|
|
1087
|
-
*
|
|
1333
|
+
* Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
|
|
1334
|
+
* Assumes Page domain is already enabled on the session passed in.
|
|
1088
1335
|
*/
|
|
1089
|
-
|
|
1090
|
-
button?: "left" | "right" | "middle";
|
|
1091
|
-
steps?: number;
|
|
1092
|
-
delay?: number;
|
|
1093
|
-
returnXpath: true;
|
|
1094
|
-
}): Promise<[string, string]>;
|
|
1095
|
-
dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options?: {
|
|
1096
|
-
button?: "left" | "right" | "middle";
|
|
1097
|
-
steps?: number;
|
|
1098
|
-
delay?: number;
|
|
1099
|
-
returnXpath?: false;
|
|
1100
|
-
}): Promise<void>;
|
|
1101
|
-
dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
|
|
1102
|
-
button?: "left" | "right" | "middle";
|
|
1103
|
-
steps?: number;
|
|
1104
|
-
delay?: number;
|
|
1105
|
-
returnXpath: boolean;
|
|
1106
|
-
}): Promise<void | [string, string]>;
|
|
1336
|
+
static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null, browserIsRemote?: boolean): Promise<Page>;
|
|
1107
1337
|
/**
|
|
1108
|
-
*
|
|
1109
|
-
*
|
|
1110
|
-
* and never falls back to Input.insertText. Optional delay applies between
|
|
1111
|
-
* successive characters.
|
|
1338
|
+
* Parent/child session emitted a `frameAttached`.
|
|
1339
|
+
* Topology update + ownership stamped to **emitting session**.
|
|
1112
1340
|
*/
|
|
1113
|
-
|
|
1114
|
-
delay?: number;
|
|
1115
|
-
withMistakes?: boolean;
|
|
1116
|
-
}): Promise<void>;
|
|
1341
|
+
onFrameAttached(frameId: string, parentId: string | null, session: CDPSessionLike): void;
|
|
1117
1342
|
/**
|
|
1118
|
-
*
|
|
1119
|
-
* For printable characters, uses the text path on keyDown; for named keys, sets key/code/VK.
|
|
1120
|
-
* Supports key combinations with modifiers like "Cmd+A", "Ctrl+C", "Shift+Tab", etc.
|
|
1343
|
+
* Parent/child session emitted a `frameDetached`.
|
|
1121
1344
|
*/
|
|
1122
|
-
|
|
1123
|
-
delay?: number;
|
|
1124
|
-
}): Promise<void>;
|
|
1125
|
-
private _pressedModifiers;
|
|
1126
|
-
/** Press a key down without releasing it */
|
|
1127
|
-
private keyDown;
|
|
1128
|
-
/** Release a pressed key */
|
|
1129
|
-
private keyUp;
|
|
1130
|
-
/** Normalize modifier key names to match CDP expectations */
|
|
1131
|
-
private normalizeModifierKey;
|
|
1345
|
+
onFrameDetached(frameId: string, reason?: "remove" | "swap" | string): void;
|
|
1132
1346
|
/**
|
|
1133
|
-
*
|
|
1347
|
+
* Parent/child session emitted a `frameNavigated`.
|
|
1348
|
+
* Topology + ownership update. Handles root swaps.
|
|
1134
1349
|
*/
|
|
1135
|
-
|
|
1350
|
+
onFrameNavigated(frame: Protocol.Page.Frame, session: CDPSessionLike): void;
|
|
1351
|
+
onNavigatedWithinDocument(frameId: string, url: string, session: CDPSessionLike): void;
|
|
1136
1352
|
/**
|
|
1137
|
-
*
|
|
1138
|
-
*
|
|
1353
|
+
* An OOPIF child session whose **main** frame id equals the parent iframe’s frameId
|
|
1354
|
+
* has been attached; adopt the session into this Page and seed ownership for its subtree.
|
|
1139
1355
|
*/
|
|
1140
|
-
|
|
1141
|
-
|
|
1356
|
+
adoptOopifSession(childSession: CDPSessionLike, childMainFrameId: string): void;
|
|
1357
|
+
/** Detach an adopted child session and prune its subtree */
|
|
1358
|
+
detachOopifSession(sessionId: string): void;
|
|
1359
|
+
/** Return the owning CDP session for a frameId (falls back to main session) */
|
|
1360
|
+
getSessionForFrame(frameId: string): CDPSessionLike;
|
|
1361
|
+
/** Always returns a Frame bound to the owning session */
|
|
1362
|
+
frameForId(frameId: string): Frame;
|
|
1363
|
+
/** Expose a session by id (used by snapshot to resolve session id -> session) */
|
|
1364
|
+
getSessionById(id: string): CDPSessionLike | undefined;
|
|
1365
|
+
registerSessionForNetwork(session: CDPSessionLike): void;
|
|
1366
|
+
unregisterSessionForNetwork(sessionId: string | undefined): void;
|
|
1367
|
+
on(event: "console", listener: ConsoleListener): Page;
|
|
1368
|
+
once(event: "console", listener: ConsoleListener): Page;
|
|
1369
|
+
off(event: "console", listener: ConsoleListener): Page;
|
|
1370
|
+
targetId(): string;
|
|
1142
1371
|
/**
|
|
1143
|
-
*
|
|
1144
|
-
*
|
|
1372
|
+
* Send a CDP command through the main session.
|
|
1373
|
+
* Allows external consumers to execute arbitrary Chrome DevTools Protocol commands.
|
|
1374
|
+
*
|
|
1375
|
+
* @param method - The CDP method name (e.g., "Page.enable", "Runtime.evaluate")
|
|
1376
|
+
* @param params - Optional parameters for the CDP command
|
|
1377
|
+
* @returns Promise resolving to the typed CDP response
|
|
1378
|
+
*
|
|
1379
|
+
* @example
|
|
1380
|
+
* // Enable the Runtime domain
|
|
1381
|
+
* await page.sendCDP("Runtime.enable");
|
|
1382
|
+
*
|
|
1383
|
+
* @example
|
|
1384
|
+
* // Evaluate JavaScript with typed response
|
|
1385
|
+
* const result = await page.sendCDP<Protocol.Runtime.EvaluateResponse>(
|
|
1386
|
+
* "Runtime.evaluate",
|
|
1387
|
+
* { expression: "1 + 1" }
|
|
1388
|
+
* );
|
|
1145
1389
|
*/
|
|
1146
|
-
|
|
1390
|
+
sendCDP<T = unknown>(method: string, params?: object): Promise<T>;
|
|
1391
|
+
/** Seed the cached URL before navigation events converge. */
|
|
1392
|
+
seedCurrentUrl(url: string | undefined | null): void;
|
|
1393
|
+
mainFrameId(): string;
|
|
1394
|
+
mainFrame(): Frame;
|
|
1147
1395
|
/**
|
|
1148
|
-
*
|
|
1396
|
+
* Close this top-level page (tab). Best-effort via Target.closeTarget.
|
|
1149
1397
|
*/
|
|
1150
|
-
|
|
1398
|
+
close(): Promise<void>;
|
|
1399
|
+
getFullFrameTree(): Protocol.Page.FrameTree;
|
|
1400
|
+
asProtocolFrameTree(rootMainFrameId: string): Protocol.Page.FrameTree;
|
|
1401
|
+
private ensureOrdinal;
|
|
1402
|
+
/** Public getter for snapshot code / handlers. */
|
|
1403
|
+
getOrdinal(frameId: string): number;
|
|
1404
|
+
listAllFrameIds(): string[];
|
|
1405
|
+
private ensureConsoleTaps;
|
|
1406
|
+
private installConsoleTap;
|
|
1407
|
+
private sessionKey;
|
|
1408
|
+
private resolveSessionByKey;
|
|
1409
|
+
private teardownConsoleTap;
|
|
1410
|
+
private removeAllConsoleTaps;
|
|
1411
|
+
private emitConsole;
|
|
1151
1412
|
/**
|
|
1152
|
-
*
|
|
1153
|
-
*
|
|
1154
|
-
* - Event path listens at the session level and compares incoming `frameId`
|
|
1155
|
-
* to `mainFrameId()` **at event time** to follow root swaps.
|
|
1413
|
+
* Navigate the page; optionally wait for a lifecycle state.
|
|
1414
|
+
* Waits on the **current** main frame and follows root swaps during navigation.
|
|
1156
1415
|
*/
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
[key: string]: unknown;
|
|
1170
|
-
}
|
|
1171
|
-
interface AgentResult {
|
|
1172
|
-
success: boolean;
|
|
1173
|
-
message: string;
|
|
1174
|
-
actions: AgentAction[];
|
|
1175
|
-
completed: boolean;
|
|
1176
|
-
metadata?: Record<string, unknown>;
|
|
1177
|
-
usage?: {
|
|
1178
|
-
input_tokens: number;
|
|
1179
|
-
output_tokens: number;
|
|
1180
|
-
reasoning_tokens?: number;
|
|
1181
|
-
cached_input_tokens?: number;
|
|
1182
|
-
inference_time_ms: number;
|
|
1183
|
-
};
|
|
1184
|
-
}
|
|
1185
|
-
interface AgentExecuteOptions {
|
|
1186
|
-
instruction: string;
|
|
1187
|
-
maxSteps?: number;
|
|
1188
|
-
page?: Page$1 | Page$2 | Page$3 | Page;
|
|
1189
|
-
highlightCursor?: boolean;
|
|
1190
|
-
}
|
|
1191
|
-
type AgentType = "openai" | "anthropic" | "google";
|
|
1192
|
-
declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025"];
|
|
1193
|
-
type AvailableCuaModel = (typeof AVAILABLE_CUA_MODELS)[number];
|
|
1194
|
-
interface AgentExecutionOptions<TOptions extends AgentExecuteOptions = AgentExecuteOptions> {
|
|
1195
|
-
options: TOptions;
|
|
1196
|
-
logger: (message: LogLine) => void;
|
|
1197
|
-
retries?: number;
|
|
1198
|
-
}
|
|
1199
|
-
interface AgentHandlerOptions {
|
|
1200
|
-
modelName: string;
|
|
1201
|
-
clientOptions?: Record<string, unknown>;
|
|
1202
|
-
userProvidedInstructions?: string;
|
|
1203
|
-
experimental?: boolean;
|
|
1204
|
-
}
|
|
1205
|
-
interface ActionExecutionResult {
|
|
1206
|
-
success: boolean;
|
|
1207
|
-
error?: string;
|
|
1208
|
-
data?: unknown;
|
|
1209
|
-
}
|
|
1210
|
-
interface ToolUseItem extends ResponseItem {
|
|
1211
|
-
type: "tool_use";
|
|
1212
|
-
id: string;
|
|
1213
|
-
name: string;
|
|
1214
|
-
input: Record<string, unknown>;
|
|
1215
|
-
}
|
|
1216
|
-
interface AnthropicMessage {
|
|
1217
|
-
role: string;
|
|
1218
|
-
content: string | Array<AnthropicContentBlock>;
|
|
1219
|
-
}
|
|
1220
|
-
interface AnthropicContentBlock {
|
|
1221
|
-
type: string;
|
|
1222
|
-
[key: string]: unknown;
|
|
1223
|
-
}
|
|
1224
|
-
interface AnthropicTextBlock extends AnthropicContentBlock {
|
|
1225
|
-
type: "text";
|
|
1226
|
-
text: string;
|
|
1227
|
-
}
|
|
1228
|
-
interface AnthropicToolResult {
|
|
1229
|
-
type: "tool_result";
|
|
1230
|
-
tool_use_id: string;
|
|
1231
|
-
content: string | Array<AnthropicContentBlock>;
|
|
1232
|
-
}
|
|
1233
|
-
interface ResponseItem {
|
|
1234
|
-
type: string;
|
|
1235
|
-
id: string;
|
|
1236
|
-
[key: string]: unknown;
|
|
1237
|
-
}
|
|
1238
|
-
interface ComputerCallItem extends ResponseItem {
|
|
1239
|
-
type: "computer_call";
|
|
1240
|
-
call_id: string;
|
|
1241
|
-
action: {
|
|
1242
|
-
type: string;
|
|
1243
|
-
[key: string]: unknown;
|
|
1244
|
-
};
|
|
1245
|
-
pending_safety_checks?: Array<{
|
|
1246
|
-
id: string;
|
|
1247
|
-
code: string;
|
|
1248
|
-
message: string;
|
|
1249
|
-
}>;
|
|
1250
|
-
}
|
|
1251
|
-
interface FunctionCallItem extends ResponseItem {
|
|
1252
|
-
type: "function_call";
|
|
1253
|
-
call_id: string;
|
|
1254
|
-
name: string;
|
|
1255
|
-
arguments: string;
|
|
1256
|
-
}
|
|
1257
|
-
type ResponseInputItem = {
|
|
1258
|
-
role: string;
|
|
1259
|
-
content: string;
|
|
1260
|
-
} | {
|
|
1261
|
-
type: "computer_call_output";
|
|
1262
|
-
call_id: string;
|
|
1263
|
-
output: {
|
|
1264
|
-
type: "input_image";
|
|
1265
|
-
image_url: string;
|
|
1266
|
-
current_url?: string;
|
|
1267
|
-
error?: string;
|
|
1268
|
-
[key: string]: unknown;
|
|
1269
|
-
} | string;
|
|
1270
|
-
acknowledged_safety_checks?: Array<{
|
|
1271
|
-
id: string;
|
|
1272
|
-
code: string;
|
|
1273
|
-
message: string;
|
|
1274
|
-
}>;
|
|
1275
|
-
} | {
|
|
1276
|
-
type: "function_call_output";
|
|
1277
|
-
call_id: string;
|
|
1278
|
-
output: string;
|
|
1279
|
-
};
|
|
1280
|
-
interface AgentInstance {
|
|
1281
|
-
execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
|
|
1282
|
-
}
|
|
1283
|
-
type AgentProviderType = AgentType;
|
|
1284
|
-
type AgentModelConfig<TModelName extends string = string> = {
|
|
1285
|
-
modelName: TModelName;
|
|
1286
|
-
} & Record<string, unknown>;
|
|
1287
|
-
type AgentConfig = {
|
|
1416
|
+
goto(url: string, options?: {
|
|
1417
|
+
waitUntil?: LoadState;
|
|
1418
|
+
timeoutMs?: number;
|
|
1419
|
+
}): Promise<Response$1 | null>;
|
|
1420
|
+
/**
|
|
1421
|
+
* Reload the page; optionally wait for a lifecycle state.
|
|
1422
|
+
*/
|
|
1423
|
+
reload(options?: {
|
|
1424
|
+
waitUntil?: LoadState;
|
|
1425
|
+
timeoutMs?: number;
|
|
1426
|
+
ignoreCache?: boolean;
|
|
1427
|
+
}): Promise<Response$1 | null>;
|
|
1288
1428
|
/**
|
|
1289
|
-
*
|
|
1429
|
+
* Navigate back in history if possible; optionally wait for a lifecycle state.
|
|
1290
1430
|
*/
|
|
1291
|
-
|
|
1431
|
+
goBack(options?: {
|
|
1432
|
+
waitUntil?: LoadState;
|
|
1433
|
+
timeoutMs?: number;
|
|
1434
|
+
}): Promise<Response$1 | null>;
|
|
1292
1435
|
/**
|
|
1293
|
-
*
|
|
1436
|
+
* Navigate forward in history if possible; optionally wait for a lifecycle state.
|
|
1294
1437
|
*/
|
|
1295
|
-
|
|
1438
|
+
goForward(options?: {
|
|
1439
|
+
waitUntil?: LoadState;
|
|
1440
|
+
timeoutMs?: number;
|
|
1441
|
+
}): Promise<Response$1 | null>;
|
|
1296
1442
|
/**
|
|
1297
|
-
*
|
|
1443
|
+
* Return the current page URL (synchronous, cached from navigation events).
|
|
1298
1444
|
*/
|
|
1299
|
-
|
|
1445
|
+
url(): string;
|
|
1446
|
+
private beginNavigationCommand;
|
|
1447
|
+
isCurrentNavigationCommand(id: number): boolean;
|
|
1300
1448
|
/**
|
|
1301
|
-
*
|
|
1449
|
+
* Return the current page title.
|
|
1450
|
+
* Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
|
|
1451
|
+
* Falls back to navigation history title if evaluation is unavailable.
|
|
1302
1452
|
*/
|
|
1303
|
-
|
|
1453
|
+
title(): Promise<string>;
|
|
1304
1454
|
/**
|
|
1305
|
-
*
|
|
1455
|
+
* Capture a screenshot with Playwright-style options.
|
|
1456
|
+
*
|
|
1457
|
+
* @param options Optional screenshot configuration.
|
|
1458
|
+
* @param options.animations Control CSS/Web animations during capture. Use
|
|
1459
|
+
* "disabled" to fast-forward finite animations and pause infinite ones.
|
|
1460
|
+
* @param options.caret Either hide the text caret (default) or leave it
|
|
1461
|
+
* visible via "initial".
|
|
1462
|
+
* @param options.clip Restrict capture to a specific rectangle (in CSS
|
|
1463
|
+
* pixels). Cannot be combined with `fullPage`.
|
|
1464
|
+
* @param options.fullPage Capture the full scrollable page instead of the
|
|
1465
|
+
* current viewport.
|
|
1466
|
+
* @param options.mask Array of locators that should be covered with an
|
|
1467
|
+
* overlay while the screenshot is taken.
|
|
1468
|
+
* @param options.maskColor CSS color used for the mask overlay (default
|
|
1469
|
+
* `#FF00FF`).
|
|
1470
|
+
* @param options.omitBackground Make the default page background transparent
|
|
1471
|
+
* (PNG only).
|
|
1472
|
+
* @param options.path File path to write the screenshot to. The file extension
|
|
1473
|
+
* determines the image type when `type` is not explicitly provided.
|
|
1474
|
+
* @param options.quality JPEG quality (0–100). Only applies when
|
|
1475
|
+
* `type === "jpeg"`.
|
|
1476
|
+
* @param options.scale Render scale: use "css" for one pixel per CSS pixel,
|
|
1477
|
+
* otherwise the default "device" leverages the current device pixel ratio.
|
|
1478
|
+
* @param options.style Additional CSS text injected into every frame before
|
|
1479
|
+
* capture (removed afterwards).
|
|
1480
|
+
* @param options.timeout Maximum capture duration in milliseconds before a
|
|
1481
|
+
* timeout error is thrown.
|
|
1482
|
+
* @param options.type Image format (`"png"` by default).
|
|
1306
1483
|
*/
|
|
1307
|
-
|
|
1484
|
+
screenshot(options?: ScreenshotOptions): Promise<Buffer>;
|
|
1308
1485
|
/**
|
|
1309
|
-
*
|
|
1310
|
-
* If not specified, inherits from the main model configuration.
|
|
1311
|
-
* Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
|
|
1486
|
+
* Create a locator bound to the current main frame.
|
|
1312
1487
|
*/
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
}
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
}
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
OBSERVE = "OBSERVE",
|
|
1382
|
-
AGENT = "AGENT"
|
|
1383
|
-
}
|
|
1384
|
-
|
|
1385
|
-
interface StagehandMetrics {
|
|
1386
|
-
actPromptTokens: number;
|
|
1387
|
-
actCompletionTokens: number;
|
|
1388
|
-
actReasoningTokens: number;
|
|
1389
|
-
actCachedInputTokens: number;
|
|
1390
|
-
actInferenceTimeMs: number;
|
|
1391
|
-
extractPromptTokens: number;
|
|
1392
|
-
extractCompletionTokens: number;
|
|
1393
|
-
extractReasoningTokens: number;
|
|
1394
|
-
extractCachedInputTokens: number;
|
|
1395
|
-
extractInferenceTimeMs: number;
|
|
1396
|
-
observePromptTokens: number;
|
|
1397
|
-
observeCompletionTokens: number;
|
|
1398
|
-
observeReasoningTokens: number;
|
|
1399
|
-
observeCachedInputTokens: number;
|
|
1400
|
-
observeInferenceTimeMs: number;
|
|
1401
|
-
agentPromptTokens: number;
|
|
1402
|
-
agentCompletionTokens: number;
|
|
1403
|
-
agentReasoningTokens: number;
|
|
1404
|
-
agentCachedInputTokens: number;
|
|
1405
|
-
agentInferenceTimeMs: number;
|
|
1406
|
-
totalPromptTokens: number;
|
|
1407
|
-
totalCompletionTokens: number;
|
|
1408
|
-
totalReasoningTokens: number;
|
|
1409
|
-
totalCachedInputTokens: number;
|
|
1410
|
-
totalInferenceTimeMs: number;
|
|
1411
|
-
}
|
|
1412
|
-
|
|
1413
|
-
type V3Env = "LOCAL" | "BROWSERBASE";
|
|
1414
|
-
/** Local launch options for V3 (chrome-launcher + CDP).
|
|
1415
|
-
* Matches v2 shape where feasible; unsupported fields are accepted but ignored.
|
|
1416
|
-
*/
|
|
1417
|
-
interface LocalBrowserLaunchOptions {
|
|
1418
|
-
args?: string[];
|
|
1419
|
-
executablePath?: string;
|
|
1420
|
-
userDataDir?: string;
|
|
1421
|
-
preserveUserDataDir?: boolean;
|
|
1422
|
-
headless?: boolean;
|
|
1423
|
-
devtools?: boolean;
|
|
1424
|
-
chromiumSandbox?: boolean;
|
|
1425
|
-
ignoreDefaultArgs?: boolean | string[];
|
|
1426
|
-
proxy?: {
|
|
1427
|
-
server: string;
|
|
1428
|
-
bypass?: string;
|
|
1429
|
-
username?: string;
|
|
1430
|
-
password?: string;
|
|
1431
|
-
};
|
|
1432
|
-
locale?: string;
|
|
1433
|
-
viewport?: {
|
|
1434
|
-
width: number;
|
|
1435
|
-
height: number;
|
|
1436
|
-
};
|
|
1437
|
-
deviceScaleFactor?: number;
|
|
1438
|
-
hasTouch?: boolean;
|
|
1439
|
-
ignoreHTTPSErrors?: boolean;
|
|
1440
|
-
cdpUrl?: string;
|
|
1441
|
-
connectTimeoutMs?: number;
|
|
1442
|
-
downloadsPath?: string;
|
|
1443
|
-
acceptDownloads?: boolean;
|
|
1444
|
-
}
|
|
1445
|
-
/** Constructor options for V3 */
|
|
1446
|
-
interface V3Options {
|
|
1447
|
-
env: V3Env;
|
|
1448
|
-
apiKey?: string;
|
|
1449
|
-
projectId?: string;
|
|
1488
|
+
locator(selector: string): ReturnType<Frame["locator"]>;
|
|
1489
|
+
/**
|
|
1490
|
+
* Deep locator that supports cross-iframe traversal.
|
|
1491
|
+
* - Recognizes '>>' hop notation to enter iframe contexts.
|
|
1492
|
+
* - Supports deep XPath that includes iframe steps (e.g., '/html/body/iframe[2]//div').
|
|
1493
|
+
* Returns a Locator scoped to the appropriate frame.
|
|
1494
|
+
*/
|
|
1495
|
+
deepLocator(selector: string): DeepLocatorDelegate;
|
|
1496
|
+
/**
|
|
1497
|
+
* Frame locator similar to Playwright: targets iframe elements and scopes
|
|
1498
|
+
* subsequent locators to that frame. Supports chaining.
|
|
1499
|
+
*/
|
|
1500
|
+
frameLocator(selector: string): FrameLocator;
|
|
1501
|
+
/**
|
|
1502
|
+
* List all frames belonging to this page as Frame objects bound to their owning sessions.
|
|
1503
|
+
* The list is ordered by a stable ordinal assigned during the page lifetime.
|
|
1504
|
+
*/
|
|
1505
|
+
frames(): Frame[];
|
|
1506
|
+
/**
|
|
1507
|
+
* Wait until the page reaches a lifecycle state on the current main frame.
|
|
1508
|
+
* Mirrors Playwright's API signatures.
|
|
1509
|
+
*/
|
|
1510
|
+
waitForLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
|
|
1511
|
+
/**
|
|
1512
|
+
* Evaluate a function or expression in the current main frame's main world.
|
|
1513
|
+
* - If a string is provided, it is treated as a JS expression.
|
|
1514
|
+
* - If a function is provided, it is stringified and invoked with the optional argument.
|
|
1515
|
+
* - The return value should be JSON-serializable. Non-serializable objects will
|
|
1516
|
+
* best-effort serialize via JSON.stringify inside the page context.
|
|
1517
|
+
*/
|
|
1518
|
+
evaluate<R = unknown, Arg = unknown>(pageFunctionOrExpression: string | ((arg: Arg) => R | Promise<R>), arg?: Arg): Promise<R>;
|
|
1519
|
+
/**
|
|
1520
|
+
* Force the page viewport to an exact CSS size and device scale factor.
|
|
1521
|
+
* Ensures screenshots match width x height pixels when deviceScaleFactor = 1.
|
|
1522
|
+
*/
|
|
1523
|
+
setViewportSize(width: number, height: number, options?: {
|
|
1524
|
+
deviceScaleFactor?: number;
|
|
1525
|
+
}): Promise<void>;
|
|
1526
|
+
/**
|
|
1527
|
+
* Click at absolute page coordinates (CSS pixels).
|
|
1528
|
+
* Dispatches mouseMoved → mousePressed → mouseReleased via CDP Input domain
|
|
1529
|
+
* on the top-level page target's session. Coordinates are relative to the
|
|
1530
|
+
* viewport origin (top-left). Does not scroll.
|
|
1531
|
+
*/
|
|
1532
|
+
click(x: number, y: number, options: {
|
|
1533
|
+
button?: "left" | "right" | "middle";
|
|
1534
|
+
clickCount?: number;
|
|
1535
|
+
returnXpath: true;
|
|
1536
|
+
}): Promise<string>;
|
|
1537
|
+
click(x: number, y: number, options?: {
|
|
1538
|
+
button?: "left" | "right" | "middle";
|
|
1539
|
+
clickCount?: number;
|
|
1540
|
+
returnXpath?: false;
|
|
1541
|
+
}): Promise<void>;
|
|
1542
|
+
click(x: number, y: number, options: {
|
|
1543
|
+
button?: "left" | "right" | "middle";
|
|
1544
|
+
clickCount?: number;
|
|
1545
|
+
returnXpath: boolean;
|
|
1546
|
+
}): Promise<void | string>;
|
|
1547
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
|
|
1548
|
+
returnXpath: true;
|
|
1549
|
+
}): Promise<string>;
|
|
1550
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number, options?: {
|
|
1551
|
+
returnXpath?: false;
|
|
1552
|
+
}): Promise<void>;
|
|
1553
|
+
scroll(x: number, y: number, deltaX: number, deltaY: number, options: {
|
|
1554
|
+
returnXpath: boolean;
|
|
1555
|
+
}): Promise<void | string>;
|
|
1450
1556
|
/**
|
|
1451
|
-
*
|
|
1557
|
+
* Drag from (fromX, fromY) to (toX, toY) using mouse events.
|
|
1558
|
+
* Sends mouseMoved → mousePressed → mouseMoved (steps) → mouseReleased.
|
|
1452
1559
|
*/
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
}
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
declare class AgentScreenshotProviderError extends StagehandError {
|
|
1521
|
-
constructor(message: string);
|
|
1522
|
-
}
|
|
1523
|
-
declare class StagehandMissingArgumentError extends StagehandError {
|
|
1524
|
-
constructor(message: string);
|
|
1525
|
-
}
|
|
1526
|
-
declare class CreateChatCompletionResponseError extends StagehandError {
|
|
1527
|
-
constructor(message: string);
|
|
1528
|
-
}
|
|
1529
|
-
declare class StagehandEvalError extends StagehandError {
|
|
1530
|
-
constructor(message: string);
|
|
1531
|
-
}
|
|
1532
|
-
declare class StagehandDomProcessError extends StagehandError {
|
|
1533
|
-
constructor(message: string);
|
|
1534
|
-
}
|
|
1535
|
-
declare class StagehandClickError extends StagehandError {
|
|
1536
|
-
constructor(message: string, selector: string);
|
|
1537
|
-
}
|
|
1538
|
-
declare class LLMResponseError extends StagehandError {
|
|
1539
|
-
constructor(primitive: string, message: string);
|
|
1540
|
-
}
|
|
1541
|
-
declare class StagehandIframeError extends StagehandError {
|
|
1542
|
-
constructor(frameUrl: string, message: string);
|
|
1543
|
-
}
|
|
1544
|
-
declare class ContentFrameNotFoundError extends StagehandError {
|
|
1545
|
-
constructor(selector: string);
|
|
1546
|
-
}
|
|
1547
|
-
declare class XPathResolutionError extends StagehandError {
|
|
1548
|
-
constructor(xpath: string);
|
|
1549
|
-
}
|
|
1550
|
-
declare class ExperimentalApiConflictError extends StagehandError {
|
|
1551
|
-
constructor();
|
|
1552
|
-
}
|
|
1553
|
-
declare class ExperimentalNotConfiguredError extends StagehandError {
|
|
1554
|
-
constructor(featureName: string);
|
|
1555
|
-
}
|
|
1556
|
-
declare class CuaModelRequiredError extends StagehandError {
|
|
1557
|
-
constructor(availableModels: readonly string[]);
|
|
1558
|
-
}
|
|
1559
|
-
declare class ZodSchemaValidationError extends Error {
|
|
1560
|
-
readonly received: unknown;
|
|
1561
|
-
readonly issues: ReturnType<ZodError["format"]>;
|
|
1562
|
-
constructor(received: unknown, issues: ReturnType<ZodError["format"]>);
|
|
1563
|
-
}
|
|
1564
|
-
declare class StagehandInitError extends StagehandError {
|
|
1565
|
-
constructor(message: string);
|
|
1566
|
-
}
|
|
1567
|
-
declare class MCPConnectionError extends StagehandError {
|
|
1568
|
-
readonly serverUrl: string;
|
|
1569
|
-
readonly originalError: unknown;
|
|
1570
|
-
constructor(serverUrl: string, originalError: unknown);
|
|
1571
|
-
}
|
|
1572
|
-
declare class StagehandShadowRootMissingError extends StagehandError {
|
|
1573
|
-
constructor(detail?: string);
|
|
1574
|
-
}
|
|
1575
|
-
declare class StagehandShadowSegmentEmptyError extends StagehandError {
|
|
1576
|
-
constructor();
|
|
1577
|
-
}
|
|
1578
|
-
declare class StagehandShadowSegmentNotFoundError extends StagehandError {
|
|
1579
|
-
constructor(segment: string, hint?: string);
|
|
1580
|
-
}
|
|
1581
|
-
declare class ElementNotVisibleError extends StagehandError {
|
|
1582
|
-
constructor(selector: string);
|
|
1583
|
-
}
|
|
1584
|
-
declare class ResponseBodyError extends StagehandError {
|
|
1585
|
-
constructor(message: string);
|
|
1586
|
-
}
|
|
1587
|
-
declare class ResponseParseError extends StagehandError {
|
|
1588
|
-
constructor(message: string);
|
|
1589
|
-
}
|
|
1590
|
-
declare class TimeoutError extends StagehandError {
|
|
1591
|
-
constructor(operation: string, timeoutMs: number);
|
|
1592
|
-
}
|
|
1593
|
-
declare class PageNotFoundError extends StagehandError {
|
|
1594
|
-
constructor(identifier: string);
|
|
1595
|
-
}
|
|
1596
|
-
declare class ConnectionTimeoutError extends StagehandError {
|
|
1597
|
-
constructor(message: string);
|
|
1598
|
-
}
|
|
1599
|
-
|
|
1600
|
-
declare class AISdkClient extends LLMClient {
|
|
1601
|
-
type: "aisdk";
|
|
1602
|
-
private model;
|
|
1603
|
-
constructor({ model }: {
|
|
1604
|
-
model: LanguageModelV2;
|
|
1605
|
-
});
|
|
1606
|
-
createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
|
|
1607
|
-
}
|
|
1608
|
-
|
|
1609
|
-
interface StagehandAPIConstructorParams {
|
|
1610
|
-
apiKey: string;
|
|
1611
|
-
projectId: string;
|
|
1612
|
-
logger: (message: LogLine) => void;
|
|
1613
|
-
}
|
|
1614
|
-
interface StartSessionParams {
|
|
1615
|
-
modelName: string;
|
|
1616
|
-
modelApiKey: string;
|
|
1617
|
-
domSettleTimeoutMs: number;
|
|
1618
|
-
verbose: number;
|
|
1619
|
-
systemPrompt?: string;
|
|
1620
|
-
browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
|
|
1621
|
-
projectId?: string;
|
|
1622
|
-
};
|
|
1623
|
-
selfHeal?: boolean;
|
|
1624
|
-
browserbaseSessionID?: string;
|
|
1625
|
-
}
|
|
1626
|
-
interface StartSessionResult {
|
|
1627
|
-
sessionId: string;
|
|
1628
|
-
available?: boolean;
|
|
1629
|
-
}
|
|
1630
|
-
interface APIActParameters {
|
|
1631
|
-
input: string | Action;
|
|
1632
|
-
options?: ActOptions;
|
|
1633
|
-
frameId?: string;
|
|
1560
|
+
dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
|
|
1561
|
+
button?: "left" | "right" | "middle";
|
|
1562
|
+
steps?: number;
|
|
1563
|
+
delay?: number;
|
|
1564
|
+
returnXpath: true;
|
|
1565
|
+
}): Promise<[string, string]>;
|
|
1566
|
+
dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options?: {
|
|
1567
|
+
button?: "left" | "right" | "middle";
|
|
1568
|
+
steps?: number;
|
|
1569
|
+
delay?: number;
|
|
1570
|
+
returnXpath?: false;
|
|
1571
|
+
}): Promise<void>;
|
|
1572
|
+
dragAndDrop(fromX: number, fromY: number, toX: number, toY: number, options: {
|
|
1573
|
+
button?: "left" | "right" | "middle";
|
|
1574
|
+
steps?: number;
|
|
1575
|
+
delay?: number;
|
|
1576
|
+
returnXpath: boolean;
|
|
1577
|
+
}): Promise<void | [string, string]>;
|
|
1578
|
+
/**
|
|
1579
|
+
* Type a string by dispatching keyDown/keyUp events per character.
|
|
1580
|
+
* Focus must already be on the desired element. Uses CDP Input.dispatchKeyEvent
|
|
1581
|
+
* and never falls back to Input.insertText. Optional delay applies between
|
|
1582
|
+
* successive characters.
|
|
1583
|
+
*/
|
|
1584
|
+
type(text: string, options?: {
|
|
1585
|
+
delay?: number;
|
|
1586
|
+
withMistakes?: boolean;
|
|
1587
|
+
}): Promise<void>;
|
|
1588
|
+
/**
|
|
1589
|
+
* Press a single key or key combination (keyDown then keyUp).
|
|
1590
|
+
* For printable characters, uses the text path on keyDown; for named keys, sets key/code/VK.
|
|
1591
|
+
* Supports key combinations with modifiers like "Cmd+A", "Ctrl+C", "Shift+Tab", etc.
|
|
1592
|
+
*/
|
|
1593
|
+
keyPress(key: string, options?: {
|
|
1594
|
+
delay?: number;
|
|
1595
|
+
}): Promise<void>;
|
|
1596
|
+
private _pressedModifiers;
|
|
1597
|
+
/** Press a key down without releasing it */
|
|
1598
|
+
private keyDown;
|
|
1599
|
+
/** Release a pressed key */
|
|
1600
|
+
private keyUp;
|
|
1601
|
+
/** Normalize modifier key names to match CDP expectations */
|
|
1602
|
+
private normalizeModifierKey;
|
|
1603
|
+
/**
|
|
1604
|
+
* Get the map of named keys with their properties
|
|
1605
|
+
*/
|
|
1606
|
+
private getNamedKeys;
|
|
1607
|
+
/**
|
|
1608
|
+
* Minimal description for printable keys (letters/digits/space) to provide code and VK.
|
|
1609
|
+
* Used when non-Shift modifiers are pressed to avoid sending text while keeping accelerator info.
|
|
1610
|
+
*/
|
|
1611
|
+
private describePrintableKey;
|
|
1612
|
+
private isMacOS;
|
|
1613
|
+
/**
|
|
1614
|
+
* Return Chromium mac editing commands (without trailing ':') for a given code like 'KeyA'
|
|
1615
|
+
* Only used on macOS to trigger system editing shortcuts (e.g., selectAll, copy, paste...).
|
|
1616
|
+
*/
|
|
1617
|
+
private macCommandsFor;
|
|
1618
|
+
/** Resolve the main-world execution context for the current main frame. */
|
|
1619
|
+
private mainWorldExecutionContextId;
|
|
1620
|
+
/**
|
|
1621
|
+
* Wait until the **current** main frame reaches a lifecycle state.
|
|
1622
|
+
* - Fast path via `document.readyState`.
|
|
1623
|
+
* - Event path listens at the session level and compares incoming `frameId`
|
|
1624
|
+
* to `mainFrameId()` **at event time** to follow root swaps.
|
|
1625
|
+
*/
|
|
1626
|
+
waitForMainLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
|
|
1634
1627
|
}
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1628
|
+
|
|
1629
|
+
interface AgentContext {
|
|
1630
|
+
options: AgentExecuteOptionsBase;
|
|
1631
|
+
maxSteps: number;
|
|
1632
|
+
systemPrompt: string;
|
|
1633
|
+
allTools: ToolSet;
|
|
1634
|
+
messages: ModelMessage[];
|
|
1635
|
+
wrappedModel: ReturnType<typeof wrapLanguageModel>;
|
|
1636
|
+
initialPageUrl: string;
|
|
1637
|
+
}
|
|
1638
|
+
interface AgentState {
|
|
1639
|
+
collectedReasoning: string[];
|
|
1640
|
+
actions: AgentAction[];
|
|
1641
|
+
finalMessage: string;
|
|
1642
|
+
completed: boolean;
|
|
1643
|
+
currentPageUrl: string;
|
|
1640
1644
|
}
|
|
1641
|
-
interface
|
|
1645
|
+
interface AgentAction {
|
|
1646
|
+
type: string;
|
|
1647
|
+
reasoning?: string;
|
|
1648
|
+
taskCompleted?: boolean;
|
|
1649
|
+
action?: string;
|
|
1650
|
+
timeMs?: number;
|
|
1651
|
+
pageText?: string;
|
|
1652
|
+
pageUrl?: string;
|
|
1642
1653
|
instruction?: string;
|
|
1643
|
-
|
|
1644
|
-
frameId?: string;
|
|
1654
|
+
[key: string]: unknown;
|
|
1645
1655
|
}
|
|
1646
|
-
interface
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1656
|
+
interface AgentResult {
|
|
1657
|
+
success: boolean;
|
|
1658
|
+
message: string;
|
|
1659
|
+
actions: AgentAction[];
|
|
1660
|
+
completed: boolean;
|
|
1661
|
+
metadata?: Record<string, unknown>;
|
|
1662
|
+
usage?: {
|
|
1663
|
+
input_tokens: number;
|
|
1664
|
+
output_tokens: number;
|
|
1665
|
+
reasoning_tokens?: number;
|
|
1666
|
+
cached_input_tokens?: number;
|
|
1667
|
+
inference_time_ms: number;
|
|
1668
|
+
};
|
|
1669
|
+
/**
|
|
1670
|
+
* The conversation messages from this execution.
|
|
1671
|
+
* Pass these to a subsequent execute() call via the `messages` option to continue the conversation.
|
|
1672
|
+
* @experimental
|
|
1673
|
+
*/
|
|
1674
|
+
messages?: ModelMessage[];
|
|
1655
1675
|
}
|
|
1656
|
-
|
|
1676
|
+
type AgentStreamResult = StreamTextResult<ToolSet, never> & {
|
|
1677
|
+
result: Promise<AgentResult>;
|
|
1678
|
+
};
|
|
1657
1679
|
/**
|
|
1658
|
-
*
|
|
1659
|
-
* particular field. The `segments` array describes the chain of keys/indices.
|
|
1660
|
-
*
|
|
1661
|
-
* - **String** segments indicate object property names.
|
|
1662
|
-
* - **Number** segments indicate array indices.
|
|
1663
|
-
*
|
|
1664
|
-
* For example, `["users", 0, "homepage"]` might describe reaching
|
|
1665
|
-
* the `homepage` field in `schema.users[0].homepage`.
|
|
1680
|
+
* Base callbacks shared between execute (non-streaming) and streaming modes.
|
|
1666
1681
|
*/
|
|
1667
|
-
interface
|
|
1682
|
+
interface AgentCallbacks {
|
|
1668
1683
|
/**
|
|
1669
|
-
*
|
|
1670
|
-
*
|
|
1684
|
+
* Optional function called before each step to modify settings.
|
|
1685
|
+
* You can change the model, tool choices, active tools, system prompt,
|
|
1686
|
+
* and input messages for each step.
|
|
1671
1687
|
*/
|
|
1672
|
-
|
|
1688
|
+
prepareStep?: PrepareStepFunction<ToolSet>;
|
|
1689
|
+
/**
|
|
1690
|
+
* Callback called when each step (LLM call) is finished.
|
|
1691
|
+
* This is called for intermediate steps as well as the final step.
|
|
1692
|
+
*/
|
|
1693
|
+
onStepFinish?: GenerateTextOnStepFinishCallback<ToolSet> | StreamTextOnStepFinishCallback<ToolSet>;
|
|
1673
1694
|
}
|
|
1674
|
-
|
|
1675
|
-
type EvaluateOptions = {
|
|
1676
|
-
/** The question to ask about the task state */
|
|
1677
|
-
question: string;
|
|
1678
|
-
/** The answer to the question */
|
|
1679
|
-
answer?: string;
|
|
1680
|
-
/** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
|
|
1681
|
-
screenshot?: boolean | Buffer[];
|
|
1682
|
-
/** Custom system prompt for the evaluator */
|
|
1683
|
-
systemPrompt?: string;
|
|
1684
|
-
/** Delay in milliseconds before taking the screenshot @default 250 */
|
|
1685
|
-
screenshotDelayMs?: number;
|
|
1686
|
-
/** The agent's reasoning/thought process for completing the task */
|
|
1687
|
-
agentReasoning?: string;
|
|
1688
|
-
};
|
|
1689
|
-
type BatchAskOptions = {
|
|
1690
|
-
/** Array of questions with optional answers */
|
|
1691
|
-
questions: Array<{
|
|
1692
|
-
question: string;
|
|
1693
|
-
answer?: string;
|
|
1694
|
-
}>;
|
|
1695
|
-
/** Whether to take a screenshot of the task state */
|
|
1696
|
-
screenshot?: boolean;
|
|
1697
|
-
/** Custom system prompt for the evaluator */
|
|
1698
|
-
systemPrompt?: string;
|
|
1699
|
-
/** Delay in milliseconds before taking the screenshot @default 1000 */
|
|
1700
|
-
screenshotDelayMs?: number;
|
|
1701
|
-
};
|
|
1702
1695
|
/**
|
|
1703
|
-
*
|
|
1696
|
+
* Error message type for streaming-only callbacks used in non-streaming mode.
|
|
1697
|
+
* This provides a clear error message when users try to use streaming callbacks without stream: true.
|
|
1704
1698
|
*/
|
|
1705
|
-
|
|
1699
|
+
type StreamingCallbackNotAvailable = "This callback requires 'stream: true' in AgentConfig. Set stream: true to use streaming callbacks like onChunk, onFinish, onError, and onAbort.";
|
|
1700
|
+
/**
|
|
1701
|
+
* Callbacks specific to the non-streaming execute method.
|
|
1702
|
+
*/
|
|
1703
|
+
interface AgentExecuteCallbacks extends AgentCallbacks {
|
|
1706
1704
|
/**
|
|
1707
|
-
*
|
|
1705
|
+
* Callback called when each step (LLM call) is finished.
|
|
1708
1706
|
*/
|
|
1709
|
-
|
|
1707
|
+
onStepFinish?: GenerateTextOnStepFinishCallback<ToolSet>;
|
|
1710
1708
|
/**
|
|
1711
|
-
*
|
|
1709
|
+
* NOT AVAILABLE in non-streaming mode.
|
|
1710
|
+
* This callback requires `stream: true` in AgentConfig.
|
|
1711
|
+
*
|
|
1712
|
+
* @example
|
|
1713
|
+
* ```typescript
|
|
1714
|
+
* // Enable streaming to use onChunk:
|
|
1715
|
+
* const agent = stagehand.agent({ stream: true });
|
|
1716
|
+
* await agent.execute({
|
|
1717
|
+
* instruction: "...",
|
|
1718
|
+
* callbacks: { onChunk: async (chunk) => console.log(chunk) }
|
|
1719
|
+
* });
|
|
1720
|
+
* ```
|
|
1721
|
+
*/
|
|
1722
|
+
onChunk?: StreamingCallbackNotAvailable;
|
|
1723
|
+
/**
|
|
1724
|
+
* NOT AVAILABLE in non-streaming mode.
|
|
1725
|
+
* This callback requires `stream: true` in AgentConfig.
|
|
1726
|
+
*
|
|
1727
|
+
* @example
|
|
1728
|
+
* ```typescript
|
|
1729
|
+
* // Enable streaming to use onFinish:
|
|
1730
|
+
* const agent = stagehand.agent({ stream: true });
|
|
1731
|
+
* await agent.execute({
|
|
1732
|
+
* instruction: "...",
|
|
1733
|
+
* callbacks: { onFinish: (event) => console.log("Done!", event) }
|
|
1734
|
+
* });
|
|
1735
|
+
* ```
|
|
1736
|
+
*/
|
|
1737
|
+
onFinish?: StreamingCallbackNotAvailable;
|
|
1738
|
+
/**
|
|
1739
|
+
* NOT AVAILABLE in non-streaming mode.
|
|
1740
|
+
* This callback requires `stream: true` in AgentConfig.
|
|
1741
|
+
*
|
|
1742
|
+
* @example
|
|
1743
|
+
* ```typescript
|
|
1744
|
+
* // Enable streaming to use onError:
|
|
1745
|
+
* const agent = stagehand.agent({ stream: true });
|
|
1746
|
+
* await agent.execute({
|
|
1747
|
+
* instruction: "...",
|
|
1748
|
+
* callbacks: { onError: ({ error }) => console.error(error) }
|
|
1749
|
+
* });
|
|
1750
|
+
* ```
|
|
1751
|
+
*/
|
|
1752
|
+
onError?: StreamingCallbackNotAvailable;
|
|
1753
|
+
/**
|
|
1754
|
+
* NOT AVAILABLE in non-streaming mode.
|
|
1755
|
+
* This callback requires `stream: true` in AgentConfig.
|
|
1756
|
+
*
|
|
1757
|
+
* @example
|
|
1758
|
+
* ```typescript
|
|
1759
|
+
* // Enable streaming to use onAbort:
|
|
1760
|
+
* const agent = stagehand.agent({ stream: true });
|
|
1761
|
+
* await agent.execute({
|
|
1762
|
+
* instruction: "...",
|
|
1763
|
+
* callbacks: { onAbort: (event) => console.log("Aborted", event.steps) }
|
|
1764
|
+
* });
|
|
1765
|
+
* ```
|
|
1712
1766
|
*/
|
|
1713
|
-
|
|
1767
|
+
onAbort?: StreamingCallbackNotAvailable;
|
|
1714
1768
|
}
|
|
1715
|
-
|
|
1716
1769
|
/**
|
|
1717
|
-
*
|
|
1718
|
-
*
|
|
1719
|
-
* Owns the root CDP connection and wires Target/Page events into Page.
|
|
1720
|
-
* Maintains one Page per top-level target, adopts OOPIF child sessions into the owner Page,
|
|
1721
|
-
* and tracks target→page and (root) frame→target mappings for lookups.
|
|
1722
|
-
*
|
|
1723
|
-
* IMPORTANT: FrameId → session ownership is managed inside Page (via its FrameRegistry).
|
|
1724
|
-
* Context never “guesses” owners; it simply forwards events (with the emitting session)
|
|
1725
|
-
* so Page can record the correct owner at event time.
|
|
1770
|
+
* Callbacks specific to the streaming mode.
|
|
1726
1771
|
*/
|
|
1727
|
-
|
|
1728
|
-
readonly conn: CdpConnection;
|
|
1729
|
-
private readonly env;
|
|
1730
|
-
private readonly apiClient;
|
|
1731
|
-
private readonly localBrowserLaunchOptions;
|
|
1732
|
-
private constructor();
|
|
1733
|
-
private readonly _piercerInstalled;
|
|
1734
|
-
private _lastPopupSignalAt;
|
|
1735
|
-
private sessionKey;
|
|
1736
|
-
private readonly _sessionInit;
|
|
1737
|
-
private pagesByTarget;
|
|
1738
|
-
private mainFrameToTarget;
|
|
1739
|
-
private sessionOwnerPage;
|
|
1740
|
-
private frameOwnerPage;
|
|
1741
|
-
private pendingOopifByMainFrame;
|
|
1742
|
-
private createdAtByTarget;
|
|
1743
|
-
private typeByTarget;
|
|
1744
|
-
private _pageOrder;
|
|
1745
|
-
private pendingCreatedTargetUrl;
|
|
1772
|
+
interface AgentStreamCallbacks extends AgentCallbacks {
|
|
1746
1773
|
/**
|
|
1747
|
-
*
|
|
1774
|
+
* Callback called when each step (LLM call) is finished during streaming.
|
|
1748
1775
|
*/
|
|
1749
|
-
|
|
1750
|
-
env?: "LOCAL" | "BROWSERBASE";
|
|
1751
|
-
apiClient?: StagehandAPIClient | null;
|
|
1752
|
-
localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null;
|
|
1753
|
-
}): Promise<V3Context>;
|
|
1776
|
+
onStepFinish?: StreamTextOnStepFinishCallback<ToolSet>;
|
|
1754
1777
|
/**
|
|
1755
|
-
*
|
|
1756
|
-
*
|
|
1778
|
+
* Callback called when an error occurs during streaming.
|
|
1779
|
+
* Use this to log errors or handle error states.
|
|
1757
1780
|
*/
|
|
1758
|
-
|
|
1759
|
-
private waitForInitialTopLevelTargets;
|
|
1760
|
-
private ensurePiercer;
|
|
1761
|
-
/** Mark a page target as the most-recent one (active). */
|
|
1762
|
-
private _pushActive;
|
|
1763
|
-
/** Remove a page target from the recency list (used on close). */
|
|
1764
|
-
private _removeFromOrder;
|
|
1765
|
-
/** Return the current active Page (most-recent page that still exists). */
|
|
1766
|
-
activePage(): Page | undefined;
|
|
1767
|
-
/** Explicitly mark a known Page as the most-recent active page (and focus it). */
|
|
1768
|
-
setActivePage(page: Page): void;
|
|
1781
|
+
onError?: StreamTextOnErrorCallback;
|
|
1769
1782
|
/**
|
|
1770
|
-
*
|
|
1783
|
+
* Callback called for each chunk of the stream.
|
|
1784
|
+
* Stream processing will pause until the callback promise resolves.
|
|
1771
1785
|
*/
|
|
1772
|
-
|
|
1786
|
+
onChunk?: StreamTextOnChunkCallback<ToolSet>;
|
|
1773
1787
|
/**
|
|
1774
|
-
*
|
|
1775
|
-
* Note: child (OOPIF) roots are intentionally not present in this mapping.
|
|
1788
|
+
* Callback called when the stream finishes.
|
|
1776
1789
|
*/
|
|
1777
|
-
|
|
1790
|
+
onFinish?: StreamTextOnFinishCallback<ToolSet>;
|
|
1778
1791
|
/**
|
|
1779
|
-
*
|
|
1792
|
+
* Callback called when the stream is aborted.
|
|
1780
1793
|
*/
|
|
1781
|
-
|
|
1794
|
+
onAbort?: (event: {
|
|
1795
|
+
steps: Array<StepResult<ToolSet>>;
|
|
1796
|
+
}) => PromiseLike<void> | void;
|
|
1797
|
+
}
|
|
1798
|
+
/**
|
|
1799
|
+
* Base options for agent execution (without callbacks).
|
|
1800
|
+
*/
|
|
1801
|
+
interface AgentExecuteOptionsBase {
|
|
1802
|
+
instruction: string;
|
|
1803
|
+
maxSteps?: number;
|
|
1804
|
+
page?: Page$1 | Page$2 | Page$3 | Page;
|
|
1805
|
+
highlightCursor?: boolean;
|
|
1782
1806
|
/**
|
|
1783
|
-
*
|
|
1784
|
-
*
|
|
1807
|
+
* Previous conversation messages to continue from.
|
|
1808
|
+
* Pass the `messages` from a previous AgentResult to continue that conversation.
|
|
1809
|
+
* @experimental
|
|
1785
1810
|
*/
|
|
1786
|
-
|
|
1811
|
+
messages?: ModelMessage[];
|
|
1787
1812
|
/**
|
|
1788
|
-
*
|
|
1813
|
+
* An AbortSignal that can be used to cancel the agent execution.
|
|
1814
|
+
* When aborted, the agent will stop and return a partial result.
|
|
1815
|
+
* @experimental
|
|
1816
|
+
*
|
|
1817
|
+
* @example
|
|
1818
|
+
* ```typescript
|
|
1819
|
+
* const controller = new AbortController();
|
|
1820
|
+
* setTimeout(() => controller.abort(), 30000); // 30 second timeout
|
|
1821
|
+
*
|
|
1822
|
+
* const result = await agent.execute({
|
|
1823
|
+
* instruction: "...",
|
|
1824
|
+
* signal: controller.signal
|
|
1825
|
+
* });
|
|
1826
|
+
* ```
|
|
1789
1827
|
*/
|
|
1790
|
-
|
|
1828
|
+
signal?: AbortSignal;
|
|
1829
|
+
}
|
|
1830
|
+
/**
|
|
1831
|
+
* Options for non-streaming agent execution.
|
|
1832
|
+
* Only accepts AgentExecuteCallbacks (no streaming-specific callbacks like onChunk, onFinish).
|
|
1833
|
+
*/
|
|
1834
|
+
interface AgentExecuteOptions extends AgentExecuteOptionsBase {
|
|
1791
1835
|
/**
|
|
1792
|
-
*
|
|
1793
|
-
*
|
|
1794
|
-
* - Attach on `Target.targetCreated` (fallback for OOPIFs).
|
|
1795
|
-
* - Handle auto-attach events.
|
|
1796
|
-
* - Clean up on detach/destroy.
|
|
1836
|
+
* Callbacks for non-streaming agent execution.
|
|
1837
|
+
* For streaming callbacks (onChunk, onFinish, onError, onAbort), use stream: true in AgentConfig.
|
|
1797
1838
|
*/
|
|
1798
|
-
|
|
1839
|
+
callbacks?: AgentExecuteCallbacks;
|
|
1840
|
+
}
|
|
1841
|
+
/**
|
|
1842
|
+
* Options for streaming agent execution.
|
|
1843
|
+
* Accepts AgentStreamCallbacks including onChunk, onFinish, onError, and onAbort.
|
|
1844
|
+
*/
|
|
1845
|
+
interface AgentStreamExecuteOptions extends AgentExecuteOptionsBase {
|
|
1846
|
+
/**
|
|
1847
|
+
* Callbacks for streaming agent execution.
|
|
1848
|
+
* Includes streaming-specific callbacks: onChunk, onFinish, onError, onAbort.
|
|
1849
|
+
*/
|
|
1850
|
+
callbacks?: AgentStreamCallbacks;
|
|
1851
|
+
}
|
|
1852
|
+
type AgentType = "openai" | "anthropic" | "google" | "microsoft";
|
|
1853
|
+
declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-opus-4-5-20251101", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025", "microsoft/fara-7b"];
|
|
1854
|
+
type AvailableCuaModel = (typeof AVAILABLE_CUA_MODELS)[number];
|
|
1855
|
+
interface AgentExecutionOptions<TOptions extends AgentExecuteOptions = AgentExecuteOptions> {
|
|
1856
|
+
options: TOptions;
|
|
1857
|
+
logger: (message: LogLine) => void;
|
|
1858
|
+
retries?: number;
|
|
1859
|
+
}
|
|
1860
|
+
interface AgentHandlerOptions {
|
|
1861
|
+
modelName: string;
|
|
1862
|
+
clientOptions?: ClientOptions;
|
|
1863
|
+
userProvidedInstructions?: string;
|
|
1864
|
+
experimental?: boolean;
|
|
1865
|
+
}
|
|
1866
|
+
interface ActionExecutionResult {
|
|
1867
|
+
success: boolean;
|
|
1868
|
+
error?: string;
|
|
1869
|
+
data?: unknown;
|
|
1870
|
+
}
|
|
1871
|
+
interface ToolUseItem extends ResponseItem {
|
|
1872
|
+
type: "tool_use";
|
|
1873
|
+
id: string;
|
|
1874
|
+
name: string;
|
|
1875
|
+
input: Record<string, unknown>;
|
|
1876
|
+
}
|
|
1877
|
+
interface AnthropicMessage {
|
|
1878
|
+
role: string;
|
|
1879
|
+
content: string | Array<AnthropicContentBlock>;
|
|
1880
|
+
}
|
|
1881
|
+
interface AnthropicContentBlock {
|
|
1882
|
+
type: string;
|
|
1883
|
+
[key: string]: unknown;
|
|
1884
|
+
}
|
|
1885
|
+
interface AnthropicTextBlock extends AnthropicContentBlock {
|
|
1886
|
+
type: "text";
|
|
1887
|
+
text: string;
|
|
1888
|
+
}
|
|
1889
|
+
interface AnthropicToolResult {
|
|
1890
|
+
type: "tool_result";
|
|
1891
|
+
tool_use_id: string;
|
|
1892
|
+
content: string | Array<AnthropicContentBlock>;
|
|
1893
|
+
}
|
|
1894
|
+
interface ResponseItem {
|
|
1895
|
+
type: string;
|
|
1896
|
+
id: string;
|
|
1897
|
+
[key: string]: unknown;
|
|
1898
|
+
}
|
|
1899
|
+
interface ComputerCallItem extends ResponseItem {
|
|
1900
|
+
type: "computer_call";
|
|
1901
|
+
call_id: string;
|
|
1902
|
+
action: {
|
|
1903
|
+
type: string;
|
|
1904
|
+
[key: string]: unknown;
|
|
1905
|
+
};
|
|
1906
|
+
pending_safety_checks?: Array<{
|
|
1907
|
+
id: string;
|
|
1908
|
+
code: string;
|
|
1909
|
+
message: string;
|
|
1910
|
+
}>;
|
|
1911
|
+
}
|
|
1912
|
+
interface FunctionCallItem extends ResponseItem {
|
|
1913
|
+
type: "function_call";
|
|
1914
|
+
call_id: string;
|
|
1915
|
+
name: string;
|
|
1916
|
+
arguments: string;
|
|
1917
|
+
}
|
|
1918
|
+
type ResponseInputItem = {
|
|
1919
|
+
role: string;
|
|
1920
|
+
content: string;
|
|
1921
|
+
} | {
|
|
1922
|
+
type: "computer_call_output";
|
|
1923
|
+
call_id: string;
|
|
1924
|
+
output: {
|
|
1925
|
+
type: "input_image";
|
|
1926
|
+
image_url: string;
|
|
1927
|
+
current_url?: string;
|
|
1928
|
+
error?: string;
|
|
1929
|
+
[key: string]: unknown;
|
|
1930
|
+
} | string;
|
|
1931
|
+
acknowledged_safety_checks?: Array<{
|
|
1932
|
+
id: string;
|
|
1933
|
+
code: string;
|
|
1934
|
+
message: string;
|
|
1935
|
+
}>;
|
|
1936
|
+
} | {
|
|
1937
|
+
type: "function_call_output";
|
|
1938
|
+
call_id: string;
|
|
1939
|
+
output: string;
|
|
1940
|
+
};
|
|
1941
|
+
interface AgentInstance {
|
|
1942
|
+
execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
|
|
1943
|
+
}
|
|
1944
|
+
type AgentProviderType = AgentType;
|
|
1945
|
+
type AgentModelConfig<TModelName extends string = string> = {
|
|
1946
|
+
modelName: TModelName;
|
|
1947
|
+
} & Record<string, unknown>;
|
|
1948
|
+
type AgentConfig = {
|
|
1799
1949
|
/**
|
|
1800
|
-
*
|
|
1801
|
-
* - Enable Page domain and lifecycle events.
|
|
1802
|
-
* - If top-level → create Page, wire listeners, resume.
|
|
1803
|
-
* - Else → probe child root frame id via `Page.getFrameTree` and adopt immediately
|
|
1804
|
-
* if the parent is known; otherwise stage until parent `frameAttached`.
|
|
1805
|
-
* - Resume the target only after listeners are wired.
|
|
1950
|
+
* Custom system prompt to provide to the agent. Overrides the default system prompt.
|
|
1806
1951
|
*/
|
|
1807
|
-
|
|
1952
|
+
systemPrompt?: string;
|
|
1808
1953
|
/**
|
|
1809
|
-
*
|
|
1810
|
-
* - Remove child session ownership and prune its subtree.
|
|
1811
|
-
* - If a top-level target, cleanup its `Page` and mappings.
|
|
1812
|
-
* - Drop any staged child for this session.
|
|
1954
|
+
* MCP integrations - Array of Client objects
|
|
1813
1955
|
*/
|
|
1814
|
-
|
|
1956
|
+
integrations?: (Client | string)[];
|
|
1815
1957
|
/**
|
|
1816
|
-
*
|
|
1958
|
+
* Tools passed to the agent client
|
|
1817
1959
|
*/
|
|
1818
|
-
|
|
1960
|
+
tools?: ToolSet;
|
|
1819
1961
|
/**
|
|
1820
|
-
*
|
|
1821
|
-
* We forward the *emitting session* with every event so Page can stamp ownership precisely.
|
|
1962
|
+
* Indicates CUA is disabled for this configuration
|
|
1822
1963
|
*/
|
|
1823
|
-
|
|
1964
|
+
cua?: boolean;
|
|
1824
1965
|
/**
|
|
1825
|
-
*
|
|
1966
|
+
* The model to use for agent functionality
|
|
1826
1967
|
*/
|
|
1827
|
-
|
|
1968
|
+
model?: string | AgentModelConfig<string>;
|
|
1828
1969
|
/**
|
|
1829
|
-
*
|
|
1970
|
+
* The model to use for tool execution (observe/act calls within agent tools).
|
|
1971
|
+
* If not specified, inherits from the main model configuration.
|
|
1972
|
+
* Format: "provider/model" (e.g., "openai/gpt-4o-mini", "google/gemini-2.0-flash-exp")
|
|
1830
1973
|
*/
|
|
1831
|
-
|
|
1832
|
-
private _notePopupSignal;
|
|
1974
|
+
executionModel?: string | AgentModelConfig<string>;
|
|
1833
1975
|
/**
|
|
1834
|
-
*
|
|
1835
|
-
*
|
|
1976
|
+
* Enable streaming mode for the agent.
|
|
1977
|
+
* When true, execute() returns AgentStreamResult with textStream for incremental output.
|
|
1978
|
+
* When false (default), execute() returns AgentResult after completion.
|
|
1836
1979
|
*/
|
|
1837
|
-
|
|
1980
|
+
stream?: boolean;
|
|
1981
|
+
};
|
|
1982
|
+
/**
|
|
1983
|
+
* Agent instance returned when stream: true is set in AgentConfig.
|
|
1984
|
+
* execute() returns a streaming result that can be consumed incrementally.
|
|
1985
|
+
* Accepts AgentStreamExecuteOptions with streaming-specific callbacks.
|
|
1986
|
+
*/
|
|
1987
|
+
interface StreamingAgentInstance {
|
|
1988
|
+
execute: (instructionOrOptions: string | AgentStreamExecuteOptions) => Promise<AgentStreamResult>;
|
|
1989
|
+
}
|
|
1990
|
+
/**
|
|
1991
|
+
* Agent instance returned when stream is false or not set in AgentConfig.
|
|
1992
|
+
* execute() returns a result after the agent completes.
|
|
1993
|
+
* Accepts AgentExecuteOptions with non-streaming callbacks only.
|
|
1994
|
+
*/
|
|
1995
|
+
interface NonStreamingAgentInstance {
|
|
1996
|
+
execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
|
|
1838
1997
|
}
|
|
1839
1998
|
|
|
1840
|
-
type
|
|
1841
|
-
|
|
1842
|
-
|
|
1999
|
+
type OpenAIClientOptions = Pick<ClientOptions$1, "baseURL" | "apiKey">;
|
|
2000
|
+
type AnthropicClientOptions = Pick<ClientOptions$2, "baseURL" | "apiKey">;
|
|
2001
|
+
interface GoogleServiceAccountCredentials {
|
|
2002
|
+
type?: string;
|
|
2003
|
+
project_id?: string;
|
|
2004
|
+
private_key_id?: string;
|
|
2005
|
+
private_key?: string;
|
|
2006
|
+
client_email?: string;
|
|
2007
|
+
client_id?: string;
|
|
2008
|
+
auth_uri?: string;
|
|
2009
|
+
token_uri?: string;
|
|
2010
|
+
auth_provider_x509_cert_url?: string;
|
|
2011
|
+
client_x509_cert_url?: string;
|
|
2012
|
+
universe_domain?: string;
|
|
2013
|
+
}
|
|
2014
|
+
type GoogleVertexProviderSettings = Pick<GoogleVertexProviderSettings$1, "project" | "location"> & {
|
|
2015
|
+
googleAuthOptions?: {
|
|
2016
|
+
credentials?: GoogleServiceAccountCredentials;
|
|
2017
|
+
};
|
|
1843
2018
|
};
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
2019
|
+
type AnthropicJsonSchemaObject = {
|
|
2020
|
+
definitions?: {
|
|
2021
|
+
MySchema?: {
|
|
2022
|
+
properties?: Record<string, unknown>;
|
|
2023
|
+
required?: string[];
|
|
2024
|
+
};
|
|
2025
|
+
};
|
|
2026
|
+
properties?: Record<string, unknown>;
|
|
2027
|
+
required?: string[];
|
|
2028
|
+
} & Record<string, unknown>;
|
|
2029
|
+
interface LLMTool {
|
|
2030
|
+
type: "function";
|
|
2031
|
+
name: string;
|
|
2032
|
+
description: string;
|
|
2033
|
+
parameters: Record<string, unknown>;
|
|
1851
2034
|
}
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
2035
|
+
type AISDKProvider = (modelName: string) => LanguageModelV2;
|
|
2036
|
+
type AISDKCustomProvider = (options: ClientOptions) => AISDKProvider;
|
|
2037
|
+
type AvailableModel = "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" | "o4-mini" | "o3" | "o3-mini" | "o1" | "o1-mini" | "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "gpt-4.5-preview" | "o1-preview" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "claude-3-7-sonnet-latest" | "claude-3-7-sonnet-20250219" | "cerebras-llama-3.3-70b" | "cerebras-llama-3.1-8b" | "groq-llama-3.3-70b-versatile" | "groq-llama-3.3-70b-specdec" | "gemini-1.5-flash" | "gemini-1.5-pro" | "gemini-1.5-flash-8b" | "gemini-2.0-flash-lite" | "gemini-2.0-flash" | "gemini-2.5-flash-preview-04-17" | "gemini-2.5-pro-preview-03-25" | string;
|
|
2038
|
+
type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
|
|
2039
|
+
type ClientOptions = (OpenAIClientOptions | AnthropicClientOptions | GoogleVertexProviderSettings) & {
|
|
2040
|
+
apiKey?: string;
|
|
2041
|
+
provider?: AgentProviderType;
|
|
2042
|
+
baseURL?: string;
|
|
2043
|
+
/** OpenAI organization ID */
|
|
2044
|
+
organization?: string;
|
|
2045
|
+
/** Delay between agent actions in ms */
|
|
2046
|
+
waitBetweenActions?: number;
|
|
2047
|
+
/** Anthropic thinking budget for extended thinking */
|
|
2048
|
+
thinkingBudget?: number;
|
|
2049
|
+
/** Environment type for CUA agents (browser, mac, windows, ubuntu) */
|
|
2050
|
+
environment?: string;
|
|
2051
|
+
/** Max images for Microsoft FARA agent */
|
|
2052
|
+
maxImages?: number;
|
|
2053
|
+
/** Temperature for model inference */
|
|
2054
|
+
temperature?: number;
|
|
2055
|
+
};
|
|
2056
|
+
type ModelConfiguration = AvailableModel | (ClientOptions & {
|
|
2057
|
+
modelName: AvailableModel;
|
|
2058
|
+
});
|
|
2059
|
+
|
|
2060
|
+
interface ChatMessage {
|
|
2061
|
+
role: "system" | "user" | "assistant";
|
|
2062
|
+
content: ChatMessageContent;
|
|
1860
2063
|
}
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
2064
|
+
type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
|
|
2065
|
+
interface ChatMessageImageContent {
|
|
2066
|
+
type: string;
|
|
2067
|
+
image_url?: {
|
|
2068
|
+
url: string;
|
|
2069
|
+
};
|
|
2070
|
+
text?: string;
|
|
2071
|
+
source?: {
|
|
2072
|
+
type: string;
|
|
2073
|
+
media_type: string;
|
|
2074
|
+
data: string;
|
|
2075
|
+
};
|
|
1865
2076
|
}
|
|
1866
|
-
interface
|
|
1867
|
-
type:
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
2077
|
+
interface ChatMessageTextContent {
|
|
2078
|
+
type: string;
|
|
2079
|
+
text: string;
|
|
2080
|
+
}
|
|
2081
|
+
declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
|
|
2082
|
+
interface ChatCompletionOptions {
|
|
2083
|
+
messages: ChatMessage[];
|
|
2084
|
+
temperature?: number;
|
|
2085
|
+
top_p?: number;
|
|
2086
|
+
frequency_penalty?: number;
|
|
2087
|
+
presence_penalty?: number;
|
|
2088
|
+
image?: {
|
|
2089
|
+
buffer: Buffer;
|
|
2090
|
+
description?: string;
|
|
2091
|
+
};
|
|
2092
|
+
response_model?: {
|
|
2093
|
+
name: string;
|
|
2094
|
+
schema: StagehandZodSchema;
|
|
1873
2095
|
};
|
|
2096
|
+
tools?: LLMTool[];
|
|
2097
|
+
tool_choice?: "auto" | "none" | "required";
|
|
2098
|
+
maxOutputTokens?: number;
|
|
2099
|
+
requestId?: string;
|
|
1874
2100
|
}
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
2101
|
+
type LLMResponse = {
|
|
2102
|
+
id: string;
|
|
2103
|
+
object: string;
|
|
2104
|
+
created: number;
|
|
2105
|
+
model: string;
|
|
2106
|
+
choices: {
|
|
2107
|
+
index: number;
|
|
2108
|
+
message: {
|
|
2109
|
+
role: string;
|
|
2110
|
+
content: string | null;
|
|
2111
|
+
tool_calls: {
|
|
2112
|
+
id: string;
|
|
2113
|
+
type: string;
|
|
2114
|
+
function: {
|
|
2115
|
+
name: string;
|
|
2116
|
+
arguments: string;
|
|
2117
|
+
};
|
|
2118
|
+
}[];
|
|
2119
|
+
};
|
|
2120
|
+
finish_reason: string;
|
|
2121
|
+
}[];
|
|
2122
|
+
usage: {
|
|
2123
|
+
prompt_tokens: number;
|
|
2124
|
+
completion_tokens: number;
|
|
2125
|
+
total_tokens: number;
|
|
2126
|
+
};
|
|
2127
|
+
};
|
|
2128
|
+
interface CreateChatCompletionOptions {
|
|
2129
|
+
options: ChatCompletionOptions;
|
|
2130
|
+
logger: (message: LogLine) => void;
|
|
2131
|
+
retries?: number;
|
|
1878
2132
|
}
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
2133
|
+
/** Simple usage shape if your LLM returns usage tokens. */
|
|
2134
|
+
interface LLMUsage {
|
|
2135
|
+
prompt_tokens: number;
|
|
2136
|
+
completion_tokens: number;
|
|
2137
|
+
total_tokens: number;
|
|
2138
|
+
reasoning_tokens?: number;
|
|
2139
|
+
cached_input_tokens?: number;
|
|
2140
|
+
}
|
|
2141
|
+
/**
|
|
2142
|
+
* For calls that use a schema: the LLMClient may return { data: T; usage?: LLMUsage }
|
|
2143
|
+
*/
|
|
2144
|
+
interface LLMParsedResponse<T> {
|
|
2145
|
+
data: T;
|
|
2146
|
+
usage?: LLMUsage;
|
|
2147
|
+
}
|
|
2148
|
+
declare abstract class LLMClient {
|
|
2149
|
+
type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
|
|
2150
|
+
modelName: AvailableModel | (string & {});
|
|
2151
|
+
hasVision: boolean;
|
|
2152
|
+
clientOptions: ClientOptions;
|
|
2153
|
+
userProvidedInstructions?: string;
|
|
2154
|
+
constructor(modelName: AvailableModel, userProvidedInstructions?: string);
|
|
2155
|
+
abstract createChatCompletion<T>(options: CreateChatCompletionOptions & {
|
|
2156
|
+
options: {
|
|
2157
|
+
response_model: {
|
|
2158
|
+
name: string;
|
|
2159
|
+
schema: StagehandZodSchema;
|
|
2160
|
+
};
|
|
2161
|
+
};
|
|
2162
|
+
}): Promise<LLMParsedResponse<T>>;
|
|
2163
|
+
abstract createChatCompletion<T = LLMResponse>(options: CreateChatCompletionOptions): Promise<T>;
|
|
2164
|
+
generateObject: typeof generateObject;
|
|
2165
|
+
generateText: typeof generateText;
|
|
2166
|
+
streamText: typeof streamText;
|
|
2167
|
+
streamObject: typeof streamObject;
|
|
2168
|
+
generateImage: typeof experimental_generateImage;
|
|
2169
|
+
embed: typeof embed;
|
|
2170
|
+
embedMany: typeof embedMany;
|
|
2171
|
+
transcribe: typeof experimental_transcribe;
|
|
2172
|
+
generateSpeech: typeof experimental_generateSpeech;
|
|
2173
|
+
getLanguageModel?(): LanguageModelV2;
|
|
1882
2174
|
}
|
|
1883
2175
|
|
|
1884
2176
|
/**
|
|
@@ -1911,7 +2203,11 @@ declare class V3 {
|
|
|
1911
2203
|
private readonly domSettleTimeoutMs?;
|
|
1912
2204
|
private _isClosing;
|
|
1913
2205
|
browserbaseSessionId?: string;
|
|
2206
|
+
private browserbaseSessionUrl?;
|
|
2207
|
+
private browserbaseDebugUrl?;
|
|
1914
2208
|
get browserbaseSessionID(): string | undefined;
|
|
2209
|
+
get browserbaseSessionURL(): string | undefined;
|
|
2210
|
+
get browserbaseDebugURL(): string | undefined;
|
|
1915
2211
|
private _onCdpClosed;
|
|
1916
2212
|
readonly experimental: boolean;
|
|
1917
2213
|
readonly logInferenceToFile: boolean;
|
|
@@ -1959,6 +2255,7 @@ declare class V3 {
|
|
|
1959
2255
|
/** Apply post-connect local browser options that require CDP. */
|
|
1960
2256
|
private _applyPostConnectLocalOptions;
|
|
1961
2257
|
private _ensureBrowserbaseDownloadsEnabled;
|
|
2258
|
+
private resetBrowserbaseSessionMetadata;
|
|
1962
2259
|
/**
|
|
1963
2260
|
* Run an "act" instruction through the ActHandler.
|
|
1964
2261
|
*
|
|
@@ -2010,11 +2307,27 @@ declare class V3 {
|
|
|
2010
2307
|
/** Resolve an external page reference or fall back to the active V3 page. */
|
|
2011
2308
|
private resolvePage;
|
|
2012
2309
|
private normalizeToV3Page;
|
|
2310
|
+
private _logBrowserbaseSessionStatus;
|
|
2311
|
+
/**
|
|
2312
|
+
* Prepares shared context for agent execution (both execute and stream).
|
|
2313
|
+
* Extracts duplicated setup logic into a single helper.
|
|
2314
|
+
*/
|
|
2315
|
+
private prepareAgentExecution;
|
|
2013
2316
|
/**
|
|
2014
2317
|
* Create a v3 agent instance (AISDK tool-based) with execute().
|
|
2015
2318
|
* Mirrors the v2 Stagehand.agent() tool mode (no CUA provider here).
|
|
2319
|
+
*
|
|
2320
|
+
* @overload When stream: true, returns a streaming agent where execute() returns AgentStreamResult
|
|
2321
|
+
* @overload When stream is false/undefined, returns a non-streaming agent where execute() returns AgentResult
|
|
2016
2322
|
*/
|
|
2017
|
-
agent(options
|
|
2323
|
+
agent(options: AgentConfig & {
|
|
2324
|
+
stream: true;
|
|
2325
|
+
}): {
|
|
2326
|
+
execute: (instructionOrOptions: string | AgentStreamExecuteOptions) => Promise<AgentStreamResult>;
|
|
2327
|
+
};
|
|
2328
|
+
agent(options?: AgentConfig & {
|
|
2329
|
+
stream?: false;
|
|
2330
|
+
}): {
|
|
2018
2331
|
execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
|
|
2019
2332
|
};
|
|
2020
2333
|
}
|
|
@@ -2026,7 +2339,7 @@ declare class V3 {
|
|
|
2026
2339
|
declare abstract class AgentClient {
|
|
2027
2340
|
type: AgentType;
|
|
2028
2341
|
modelName: string;
|
|
2029
|
-
clientOptions:
|
|
2342
|
+
clientOptions: ClientOptions;
|
|
2030
2343
|
userProvidedInstructions?: string;
|
|
2031
2344
|
constructor(type: AgentType, modelName: string, userProvidedInstructions?: string);
|
|
2032
2345
|
abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
|
|
@@ -2049,7 +2362,7 @@ declare class AgentProvider {
|
|
|
2049
2362
|
* Create a new agent provider
|
|
2050
2363
|
*/
|
|
2051
2364
|
constructor(logger: (message: LogLine) => void);
|
|
2052
|
-
getClient(modelName: string, clientOptions?:
|
|
2365
|
+
getClient(modelName: string, clientOptions?: ClientOptions, userProvidedInstructions?: string, tools?: ToolSet$1): AgentClient;
|
|
2053
2366
|
static getAgentProvider(modelName: string): AgentProviderType;
|
|
2054
2367
|
}
|
|
2055
2368
|
|
|
@@ -2144,4 +2457,4 @@ declare class V3Evaluator {
|
|
|
2144
2457
|
private _evaluateWithMultipleScreenshots;
|
|
2145
2458
|
}
|
|
2146
2459
|
|
|
2147
|
-
export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, type InferStagehandSchema, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaDocument, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type StagehandZodObject, type StagehandZodSchema, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, isZod3Schema, isZod4Schema, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, toJsonSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
|
|
2460
|
+
export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, ActTimeoutError, type Action, type ActionExecutionResult, AgentAbortError, type AgentAction, type AgentCallbacks, type AgentConfig, type AgentContext, type AgentExecuteCallbacks, type AgentExecuteOptions, type AgentExecuteOptionsBase, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentState, type AgentStreamCallbacks, type AgentStreamExecuteOptions, type AgentStreamResult, type AgentType, AnnotatedScreenshotText, type AnthropicClientOptions, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, ExtractTimeoutError, type FunctionCallItem, type GoogleServiceAccountCredentials, type GoogleVertexProviderSettings, HandlerNotInitializedError, type HistoryEntry, type InferStagehandSchema, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaDocument, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type NonStreamingAgentInstance, type ObserveOptions, ObserveTimeoutError, type OpenAIClientOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type StagehandZodObject, type StagehandZodSchema, type StreamingAgentInstance, StreamingCallbacksInNonStreamingModeError, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, isZod3Schema, isZod4Schema, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, toJsonSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
|