@browserbasehq/orca 3.0.0-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -0
  3. package/dist/index.d.ts +1611 -0
  4. package/dist/index.js +28681 -0
  5. package/dist/lib/api.d.ts +23 -0
  6. package/dist/lib/dom/build/scriptContent.d.ts +1 -0
  7. package/dist/lib/inference.d.ts +71 -0
  8. package/dist/lib/inferenceLogUtils.d.ts +12 -0
  9. package/dist/lib/logger.d.ts +54 -0
  10. package/dist/lib/prompt.d.ts +12 -0
  11. package/dist/lib/utils.d.ts +65 -0
  12. package/dist/lib/v3/agent/AgentClient.d.ts +18 -0
  13. package/dist/lib/v3/agent/AgentProvider.d.ts +18 -0
  14. package/dist/lib/v3/agent/AnthropicCUAClient.d.ts +55 -0
  15. package/dist/lib/v3/agent/OpenAICUAClient.d.ts +64 -0
  16. package/dist/lib/v3/agent/StagehandAgent.d.ts +15 -0
  17. package/dist/lib/v3/agent/tools/index.d.ts +229 -0
  18. package/dist/lib/v3/agent/tools/v3-act.d.ts +29 -0
  19. package/dist/lib/v3/agent/tools/v3-ariaTree.d.ts +11 -0
  20. package/dist/lib/v3/agent/tools/v3-close.d.ts +24 -0
  21. package/dist/lib/v3/agent/tools/v3-extract.d.ts +38 -0
  22. package/dist/lib/v3/agent/tools/v3-fillform.d.ts +37 -0
  23. package/dist/lib/v3/agent/tools/v3-goto.d.ts +29 -0
  24. package/dist/lib/v3/agent/tools/v3-navback.d.ts +17 -0
  25. package/dist/lib/v3/agent/tools/v3-screenshot.d.ts +13 -0
  26. package/dist/lib/v3/agent/tools/v3-scroll.d.ts +23 -0
  27. package/dist/lib/v3/agent/tools/v3-wait.d.ts +19 -0
  28. package/dist/lib/v3/agent/utils/cuaKeyMapping.d.ts +10 -0
  29. package/dist/lib/v3/agent/utils/imageCompression.d.ts +18 -0
  30. package/dist/lib/v3/agent/utils/messageProcessing.d.ts +13 -0
  31. package/dist/lib/v3/dom/build/scriptV3Content.d.ts +1 -0
  32. package/dist/lib/v3/dom/genDomScripts.d.ts +1 -0
  33. package/dist/lib/v3/dom/index.d.ts +1 -0
  34. package/dist/lib/v3/dom/piercer.entry.d.ts +1 -0
  35. package/dist/lib/v3/dom/piercer.runtime.d.ts +25 -0
  36. package/dist/lib/v3/handlers/actHandler.d.ts +18 -0
  37. package/dist/lib/v3/handlers/extractHandler.d.ts +29 -0
  38. package/dist/lib/v3/handlers/handlerUtils/actHandlerUtils.d.ts +18 -0
  39. package/dist/lib/v3/handlers/observeHandler.d.ts +15 -0
  40. package/dist/lib/v3/handlers/v3AgentHandler.d.ts +17 -0
  41. package/dist/lib/v3/handlers/v3CuaAgentHandler.d.ts +26 -0
  42. package/dist/lib/v3/index.d.ts +10 -0
  43. package/dist/lib/v3/launch/browserbase.d.ts +8 -0
  44. package/dist/lib/v3/launch/local.d.ts +13 -0
  45. package/dist/lib/v3/llm/AnthropicClient.d.ts +16 -0
  46. package/dist/lib/v3/llm/CerebrasClient.d.ts +17 -0
  47. package/dist/lib/v3/llm/GoogleClient.d.ts +19 -0
  48. package/dist/lib/v3/llm/GroqClient.d.ts +17 -0
  49. package/dist/lib/v3/llm/LLMClient.d.ts +99 -0
  50. package/dist/lib/v3/llm/LLMProvider.d.ts +10 -0
  51. package/dist/lib/v3/llm/OpenAIClient.d.ts +15 -0
  52. package/dist/lib/v3/llm/aisdk.d.ts +15 -0
  53. package/dist/lib/v3/logger.d.ts +48 -0
  54. package/dist/lib/v3/mcp/connection.d.ts +11 -0
  55. package/dist/lib/v3/mcp/utils.d.ts +3 -0
  56. package/dist/lib/v3/tests/default-page-tracking.spec.d.ts +1 -0
  57. package/dist/lib/v3/tests/perform-understudy-method.spec.d.ts +1 -0
  58. package/dist/lib/v3/tests/shadow-iframe.spec.d.ts +1 -0
  59. package/dist/lib/v3/tests/timeouts.spec.d.ts +1 -0
  60. package/dist/lib/v3/tests/v3.config.d.ts +4 -0
  61. package/dist/lib/v3/tests/v3.playwright.config.d.ts +2 -0
  62. package/dist/lib/v3/tests/xpath-for-location-deep.spec.d.ts +1 -0
  63. package/dist/lib/v3/types/act.d.ts +10 -0
  64. package/dist/lib/v3/types/agent.d.ts +132 -0
  65. package/dist/lib/v3/types/api.d.ts +40 -0
  66. package/dist/lib/v3/types/cache.d.ts +71 -0
  67. package/dist/lib/v3/types/context.d.ts +2 -0
  68. package/dist/lib/v3/types/evals.d.ts +71 -0
  69. package/dist/lib/v3/types/evaluator.d.ts +40 -0
  70. package/dist/lib/v3/types/llm.d.ts +11 -0
  71. package/dist/lib/v3/types/log.d.ts +23 -0
  72. package/dist/lib/v3/types/model.d.ts +20 -0
  73. package/dist/lib/v3/types/playwright.d.ts +6 -0
  74. package/dist/lib/v3/types/stagehand.d.ts +113 -0
  75. package/dist/lib/v3/types/stagehandApiErrors.d.ts +18 -0
  76. package/dist/lib/v3/types/stagehandErrors.d.ts +104 -0
  77. package/dist/lib/v3/types.d.ts +176 -0
  78. package/dist/lib/v3/understudy/a11y/snapshot.d.ts +71 -0
  79. package/dist/lib/v3/understudy/cdp.d.ts +58 -0
  80. package/dist/lib/v3/understudy/context.d.ts +120 -0
  81. package/dist/lib/v3/understudy/deepLocator.d.ts +69 -0
  82. package/dist/lib/v3/understudy/executionContextRegistry.d.ts +15 -0
  83. package/dist/lib/v3/understudy/frame.d.ts +63 -0
  84. package/dist/lib/v3/understudy/frameLocator.d.ts +46 -0
  85. package/dist/lib/v3/understudy/frameRegistry.d.ts +100 -0
  86. package/dist/lib/v3/understudy/locator.d.ts +196 -0
  87. package/dist/lib/v3/understudy/page.d.ts +241 -0
  88. package/dist/lib/v3/understudy/piercer.d.ts +4 -0
  89. package/dist/lib/v3/v3.d.ts +156 -0
  90. package/dist/lib/version.d.ts +5 -0
  91. package/package.json +130 -0
@@ -0,0 +1,99 @@
1
+ import { LLMTool } from "../types/llm";
2
+ import { embed, embedMany, experimental_generateImage, experimental_generateSpeech, experimental_transcribe, generateObject, generateText, LanguageModel, streamObject, streamText } from "ai";
3
+ import { ZodType } from "zod/v3";
4
+ import { LogLine } from "../types/log";
5
+ import { AvailableModel, ClientOptions } from "../types/model";
6
+ export interface ChatMessage {
7
+ role: "system" | "user" | "assistant";
8
+ content: ChatMessageContent;
9
+ }
10
+ export type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[];
11
+ export interface ChatMessageImageContent {
12
+ type: string;
13
+ image_url?: {
14
+ url: string;
15
+ };
16
+ text?: string;
17
+ source?: {
18
+ type: string;
19
+ media_type: string;
20
+ data: string;
21
+ };
22
+ }
23
+ export interface ChatMessageTextContent {
24
+ type: string;
25
+ text: string;
26
+ }
27
+ export declare const AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
28
+ export interface ChatCompletionOptions {
29
+ messages: ChatMessage[];
30
+ temperature?: number;
31
+ top_p?: number;
32
+ frequency_penalty?: number;
33
+ presence_penalty?: number;
34
+ image?: {
35
+ buffer: Buffer;
36
+ description?: string;
37
+ };
38
+ response_model?: {
39
+ name: string;
40
+ schema: ZodType;
41
+ };
42
+ tools?: LLMTool[];
43
+ tool_choice?: "auto" | "none" | "required";
44
+ maxTokens?: number;
45
+ requestId?: string;
46
+ }
47
+ export type LLMResponse = {
48
+ id: string;
49
+ object: string;
50
+ created: number;
51
+ model: string;
52
+ choices: {
53
+ index: number;
54
+ message: {
55
+ role: string;
56
+ content: string | null;
57
+ tool_calls: {
58
+ id: string;
59
+ type: string;
60
+ function: {
61
+ name: string;
62
+ arguments: string;
63
+ };
64
+ }[];
65
+ };
66
+ finish_reason: string;
67
+ }[];
68
+ usage: {
69
+ prompt_tokens: number;
70
+ completion_tokens: number;
71
+ total_tokens: number;
72
+ };
73
+ };
74
+ export interface CreateChatCompletionOptions {
75
+ options: ChatCompletionOptions;
76
+ logger: (message: LogLine) => void;
77
+ retries?: number;
78
+ }
79
+ export declare abstract class LLMClient {
80
+ type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
81
+ modelName: AvailableModel | (string & {});
82
+ hasVision: boolean;
83
+ clientOptions: ClientOptions;
84
+ userProvidedInstructions?: string;
85
+ constructor(modelName: AvailableModel, userProvidedInstructions?: string);
86
+ abstract createChatCompletion<T = LLMResponse & {
87
+ usage?: LLMResponse["usage"];
88
+ }>(options: CreateChatCompletionOptions): Promise<T>;
89
+ generateObject: typeof generateObject;
90
+ generateText: typeof generateText;
91
+ streamText: typeof streamText;
92
+ streamObject: typeof streamObject;
93
+ generateImage: typeof experimental_generateImage;
94
+ embed: typeof embed;
95
+ embedMany: typeof embedMany;
96
+ transcribe: typeof experimental_transcribe;
97
+ generateSpeech: typeof experimental_generateSpeech;
98
+ getLanguageModel?(): LanguageModel;
99
+ }
@@ -0,0 +1,10 @@
1
+ import { LogLine } from "../types/log";
2
+ import { AvailableModel, ClientOptions, ModelProvider } from "../types/model";
3
+ import { LLMClient } from "./LLMClient";
4
+ export declare function getAISDKLanguageModel(subProvider: string, subModelName: string, apiKey?: string, baseURL?: string): import("ai/dist").LanguageModelV1;
5
+ export declare class LLMProvider {
6
+ private logger;
7
+ constructor(logger: (message: LogLine) => void);
8
+ getClient(modelName: AvailableModel, clientOptions?: ClientOptions): LLMClient;
9
+ static getModelProvider(modelName: AvailableModel): ModelProvider;
10
+ }
@@ -0,0 +1,15 @@
1
+ import { ClientOptions } from "openai";
2
+ import { LogLine } from "../types/log";
3
+ import { AvailableModel } from "../types/model";
4
+ import { CreateChatCompletionOptions, LLMClient, LLMResponse } from "./LLMClient";
5
+ export declare class OpenAIClient extends LLMClient {
6
+ type: "openai";
7
+ private client;
8
+ clientOptions: ClientOptions;
9
+ constructor({ modelName, clientOptions, }: {
10
+ logger: (message: LogLine) => void;
11
+ modelName: AvailableModel;
12
+ clientOptions?: ClientOptions;
13
+ });
14
+ createChatCompletion<T = LLMResponse>({ options: optionsInitial, logger, retries, }: CreateChatCompletionOptions): Promise<T>;
15
+ }
@@ -0,0 +1,15 @@
1
+ import { LanguageModel } from "ai";
2
+ import { ChatCompletion } from "openai/resources";
3
+ import { LogLine } from "../types/log";
4
+ import { CreateChatCompletionOptions, LLMClient } from "./LLMClient";
5
+ export declare class AISdkClient extends LLMClient {
6
+ type: "aisdk";
7
+ private model;
8
+ private logger?;
9
+ constructor({ model, logger, }: {
10
+ model: LanguageModel;
11
+ logger?: (message: LogLine) => void;
12
+ });
13
+ getLanguageModel(): LanguageModel;
14
+ createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
15
+ }
@@ -0,0 +1,48 @@
1
+ import type { LogLine } from "./types/log";
2
+ /**
3
+ * Stagehand V3 Logging
4
+ *
5
+ * Design goals:
6
+ * - Provide a single global logging sink (Pino or console) for general output.
7
+ * - Support concurrent V3 instances by routing logs to an instance-bound external logger
8
+ * (e.g., Braintrust EvalLogger) without cross-talk.
9
+ * - Keep the public API simple: per-instance binding happens via V3, not here.
10
+ *
11
+ * How it works:
12
+ * - initV3Logger(): initializes the global logger backend (Pino if enabled, otherwise a
13
+ * lightweight console logger). No external logger is bound globally.
14
+ * - bindInstanceLogger()/unbindInstanceLogger(): registers an external logger callback per
15
+ * instance id for use by v3Logger.
16
+ * - withInstanceLogContext(): establishes a context so v3Logger can route logs to the
17
+ * correct instance's external logger during that call tree.
18
+ * - v3Logger(): preferred entrypoint for emitting structured logs from V3 internals and handlers.
19
+ * It routes to the instance logger when available, or falls back to the global backend.
20
+ */
21
+ type Verbosity = 0 | 1 | 2;
22
+ type MinimalLogger = {
23
+ log: (line: LogLine) => void;
24
+ setVerbosity: (v: Verbosity) => void;
25
+ error: (msg: string, data?: Record<string, unknown>) => void;
26
+ info: (msg: string, data?: Record<string, unknown>) => void;
27
+ debug: (msg: string, data?: Record<string, unknown>) => void;
28
+ };
29
+ export declare function bindInstanceLogger(instanceId: string, logger: (line: LogLine) => void): void;
30
+ export declare function unbindInstanceLogger(instanceId: string): void;
31
+ export declare function withInstanceLogContext<T>(instanceId: string, fn: () => T): T;
32
+ /**
33
+ * Initialize the global V3 logger backend.
34
+ * - When disablePino is false (default), uses the Stagehand Pino logger for rich console output.
35
+ * - When disablePino is true, uses a lightweight console logger that respects verbosity.
36
+ *
37
+ * Note: This function never binds an external logger globally. Use bindInstanceLogger()
38
+ * with withInstanceLogContext() for per-instance routing.
39
+ */
40
+ export declare function initV3Logger(opts?: {
41
+ verbose?: Verbosity;
42
+ disablePino?: boolean;
43
+ pretty?: boolean;
44
+ }): Promise<void>;
45
+ export declare function getV3Logger(): MinimalLogger;
46
+ export declare function v3Logger(line: LogLine): void;
47
+ export declare function setV3Verbosity(v: Verbosity): void;
48
+ export {};
@@ -0,0 +1,11 @@
1
+ import { Client, ClientOptions } from "@modelcontextprotocol/sdk/client/index.js";
2
+ export interface ConnectToMCPServerOptions {
3
+ serverUrl: string | URL;
4
+ clientOptions?: ClientOptions;
5
+ }
6
+ export interface StdioServerConfig {
7
+ command: string;
8
+ args?: string[];
9
+ env?: Record<string, string>;
10
+ }
11
+ export declare const connectToMCPServer: (serverConfig: string | URL | StdioServerConfig | ConnectToMCPServerOptions) => Promise<Client>;
@@ -0,0 +1,3 @@
1
+ import { Client } from "@modelcontextprotocol/sdk/dist/esm/client";
2
+ import { ToolSet } from "ai/dist";
3
+ export declare const resolveTools: (clients: (Client | string)[], userTools: ToolSet) => Promise<ToolSet>;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,4 @@
1
+ import type { V3Options } from "@/lib/v3/types";
2
+ export declare const v3TestConfig: V3Options;
3
+ export declare function getV3TestConfig(overrides?: Partial<V3Options>): V3Options;
4
+ export default getV3TestConfig;
@@ -0,0 +1,2 @@
1
+ declare const _default: import("@playwright/test").PlaywrightTestConfig<{}, {}>;
2
+ export default _default;
@@ -0,0 +1,10 @@
1
+ export declare enum SupportedPlaywrightAction {
2
+ CLICK = "click",
3
+ FILL = "fill",
4
+ TYPE = "type",
5
+ PRESS = "press",
6
+ SCROLL = "scrollTo",
7
+ NEXT_CHUNK = "nextChunk",
8
+ PREV_CHUNK = "prevChunk",
9
+ SELECT_OPTION_FROM_DROPDOWN = "selectOptionFromDropdown"
10
+ }
@@ -0,0 +1,132 @@
1
+ import { LogLine } from "./log";
2
+ export interface AgentAction {
3
+ type: string;
4
+ reasoning?: string;
5
+ taskCompleted?: boolean;
6
+ action?: string;
7
+ timeMs?: number;
8
+ pageText?: string;
9
+ pageUrl?: string;
10
+ instruction?: string;
11
+ [key: string]: unknown;
12
+ }
13
+ export interface AgentResult {
14
+ success: boolean;
15
+ message: string;
16
+ actions: AgentAction[];
17
+ completed: boolean;
18
+ metadata?: Record<string, unknown>;
19
+ usage?: {
20
+ input_tokens: number;
21
+ output_tokens: number;
22
+ inference_time_ms: number;
23
+ };
24
+ }
25
+ export interface AgentOptions {
26
+ maxSteps?: number;
27
+ autoScreenshot?: boolean;
28
+ waitBetweenActions?: number;
29
+ context?: string;
30
+ }
31
+ export interface AgentExecuteOptions extends AgentOptions {
32
+ instruction: string;
33
+ }
34
+ export type AgentProviderType = "openai" | "anthropic";
35
+ export interface AgentClientOptions {
36
+ apiKey: string;
37
+ organization?: string;
38
+ baseURL?: string;
39
+ defaultMaxSteps?: number;
40
+ [key: string]: unknown;
41
+ }
42
+ export type AgentType = "openai" | "anthropic";
43
+ export interface AgentExecutionOptions {
44
+ options: AgentExecuteOptions;
45
+ logger: (message: LogLine) => void;
46
+ retries?: number;
47
+ }
48
+ export interface AgentHandlerOptions {
49
+ modelName: string;
50
+ clientOptions?: Record<string, unknown>;
51
+ userProvidedInstructions?: string;
52
+ agentType: AgentType;
53
+ experimental?: boolean;
54
+ }
55
+ export interface ActionExecutionResult {
56
+ success: boolean;
57
+ error?: string;
58
+ data?: unknown;
59
+ }
60
+ export interface ToolUseItem extends ResponseItem {
61
+ type: "tool_use";
62
+ id: string;
63
+ name: string;
64
+ input: Record<string, unknown>;
65
+ }
66
+ export interface AnthropicMessage {
67
+ role: string;
68
+ content: string | Array<AnthropicContentBlock>;
69
+ }
70
+ export interface AnthropicContentBlock {
71
+ type: string;
72
+ [key: string]: unknown;
73
+ }
74
+ export interface AnthropicTextBlock extends AnthropicContentBlock {
75
+ type: "text";
76
+ text: string;
77
+ }
78
+ export interface AnthropicToolResult {
79
+ type: "tool_result";
80
+ tool_use_id: string;
81
+ content: string | Array<AnthropicContentBlock>;
82
+ }
83
+ export interface ResponseItem {
84
+ type: string;
85
+ id: string;
86
+ [key: string]: unknown;
87
+ }
88
+ export interface ComputerCallItem extends ResponseItem {
89
+ type: "computer_call";
90
+ call_id: string;
91
+ action: {
92
+ type: string;
93
+ [key: string]: unknown;
94
+ };
95
+ pending_safety_checks?: Array<{
96
+ id: string;
97
+ code: string;
98
+ message: string;
99
+ }>;
100
+ }
101
+ export interface FunctionCallItem extends ResponseItem {
102
+ type: "function_call";
103
+ call_id: string;
104
+ name: string;
105
+ arguments: string;
106
+ }
107
+ export type ResponseInputItem = {
108
+ role: string;
109
+ content: string;
110
+ } | {
111
+ type: "computer_call_output";
112
+ call_id: string;
113
+ output: {
114
+ type: "input_image";
115
+ image_url: string;
116
+ current_url?: string;
117
+ error?: string;
118
+ [key: string]: unknown;
119
+ } | string;
120
+ acknowledged_safety_checks?: Array<{
121
+ id: string;
122
+ code: string;
123
+ message: string;
124
+ }>;
125
+ } | {
126
+ type: "function_call_output";
127
+ call_id: string;
128
+ output: string;
129
+ };
130
+ export interface AgentInstance {
131
+ execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
132
+ }
@@ -0,0 +1,40 @@
1
+ import Browserbase from "@browserbasehq/sdk";
2
+ import { LogLine } from "./log";
3
+ export interface StagehandAPIConstructorParams {
4
+ apiKey: string;
5
+ projectId: string;
6
+ logger: (message: LogLine) => void;
7
+ }
8
+ export interface ExecuteActionParams {
9
+ method: "act" | "extract" | "observe" | "navigate" | "end" | "agentExecute";
10
+ args?: unknown;
11
+ params?: unknown;
12
+ }
13
+ export interface StartSessionParams {
14
+ modelName: string;
15
+ modelApiKey: string;
16
+ domSettleTimeoutMs: number;
17
+ verbose: number;
18
+ debugDom: boolean;
19
+ systemPrompt?: string;
20
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
21
+ projectId?: string;
22
+ };
23
+ selfHeal?: boolean;
24
+ waitForCaptchaSolves?: boolean;
25
+ actionTimeoutMs?: number;
26
+ browserbaseSessionID?: string;
27
+ }
28
+ export interface StartSessionResult {
29
+ sessionId: string;
30
+ available?: boolean;
31
+ }
32
+ export interface SuccessResponse<T> {
33
+ success: true;
34
+ data: T;
35
+ }
36
+ export interface ErrorResponse {
37
+ success: false;
38
+ message: string;
39
+ }
40
+ export type ApiResponse<T> = SuccessResponse<T> | ErrorResponse;
@@ -0,0 +1,71 @@
1
+ import type { AgentResult } from "@/lib/v3/types/agent";
2
+ import type { Action } from "@/lib/v3/types/stagehand";
3
+ import type { LoadState } from "@/lib/v3/types";
4
+ export interface CachedActEntry {
5
+ version: 1;
6
+ instruction: string;
7
+ url: string;
8
+ variables: Record<string, string>;
9
+ actions: Action[];
10
+ actionDescription?: string;
11
+ message?: string;
12
+ }
13
+ export type AgentReplayStep = AgentReplayActStep | AgentReplayFillFormStep | AgentReplayGotoStep | AgentReplayScrollStep | AgentReplayWaitStep | AgentReplayNavBackStep | {
14
+ type: string;
15
+ [key: string]: unknown;
16
+ };
17
+ export interface AgentReplayActStep {
18
+ type: "act";
19
+ instruction: string;
20
+ actions?: Action[];
21
+ actionDescription?: string;
22
+ message?: string;
23
+ timeout?: number;
24
+ }
25
+ export interface AgentReplayFillFormStep {
26
+ type: "fillForm";
27
+ fields?: Array<{
28
+ action: string;
29
+ value: string;
30
+ }>;
31
+ observeResults?: Action[];
32
+ actions?: Action[];
33
+ }
34
+ export interface AgentReplayGotoStep {
35
+ type: "goto";
36
+ url: string;
37
+ waitUntil?: LoadState;
38
+ }
39
+ export interface AgentReplayScrollStep {
40
+ type: "scroll";
41
+ deltaX?: number;
42
+ deltaY?: number;
43
+ anchor?: {
44
+ x: number;
45
+ y: number;
46
+ };
47
+ }
48
+ export interface AgentReplayWaitStep {
49
+ type: "wait";
50
+ timeMs: number;
51
+ }
52
+ export interface AgentReplayNavBackStep {
53
+ type: "navback";
54
+ waitUntil?: LoadState;
55
+ }
56
+ export interface SanitizedAgentExecuteOptions {
57
+ maxSteps?: number;
58
+ autoScreenshot?: boolean;
59
+ waitBetweenActions?: number;
60
+ context?: string;
61
+ }
62
+ export interface CachedAgentEntry {
63
+ version: 1;
64
+ instruction: string;
65
+ startUrl: string;
66
+ options: SanitizedAgentExecuteOptions;
67
+ configSignature: string;
68
+ steps: AgentReplayStep[];
69
+ result: AgentResult;
70
+ timestamp: string;
71
+ }
@@ -0,0 +1,2 @@
1
+ export type EncodedId = `${number}-${number}`;
2
+ export declare const ID_PATTERN: RegExp;
@@ -0,0 +1,71 @@
1
+ import { z } from "zod/v3";
2
+ import type { AvailableModel } from "../types/model";
3
+ import type { LogLine } from "../types/log";
4
+ import type { AgentInstance } from "../types/agent";
5
+ import type { EvalCase } from "braintrust";
6
+ import type { V3 } from "@/lib/v3/v3";
7
+ import { EvalLogger } from "@/evals/logger";
8
+ export type StagehandInitResult = {
9
+ v3?: V3;
10
+ v3Agent?: AgentInstance;
11
+ logger: EvalLogger;
12
+ debugUrl: string;
13
+ sessionUrl: string;
14
+ modelName: AvailableModel;
15
+ agent: AgentInstance;
16
+ };
17
+ export type EvalFunction = (taskInput: StagehandInitResult & {
18
+ input: EvalInput;
19
+ }) => Promise<{
20
+ _success: boolean;
21
+ logs: LogLine[];
22
+ debugUrl: string;
23
+ sessionUrl: string;
24
+ error?: unknown;
25
+ }>;
26
+ export declare const EvalCategorySchema: z.ZodEnum<["observe", "act", "combination", "extract", "experimental", "targeted_extract", "regression", "regression_llm_providers", "llm_clients", "agent", "external_agent_benchmarks"]>;
27
+ export type EvalCategory = z.infer<typeof EvalCategorySchema>;
28
+ export interface EvalInput {
29
+ name: string;
30
+ modelName: AvailableModel;
31
+ params?: Record<string, unknown>;
32
+ }
33
+ export interface Testcase extends EvalCase<EvalInput, unknown, {
34
+ model: AvailableModel;
35
+ test: string;
36
+ categories?: string[];
37
+ }> {
38
+ input: EvalInput;
39
+ name: string;
40
+ tags: string[];
41
+ metadata: {
42
+ model: AvailableModel;
43
+ test: string;
44
+ categories?: string[];
45
+ };
46
+ expected: unknown;
47
+ }
48
+ export interface SummaryResult {
49
+ input: EvalInput;
50
+ output: {
51
+ _success: boolean;
52
+ };
53
+ name: string;
54
+ score: number;
55
+ }
56
+ export interface EvalArgs<TInput, TOutput, TExpected> {
57
+ input: TInput;
58
+ output: TOutput;
59
+ expected: TExpected;
60
+ metadata?: {
61
+ model: AvailableModel;
62
+ test: string;
63
+ };
64
+ }
65
+ export interface EvalResult {
66
+ name: string;
67
+ score: number;
68
+ }
69
+ export type LogLineEval = LogLine & {
70
+ parsedAuxiliary?: string | object;
71
+ };
@@ -0,0 +1,40 @@
1
+ export type EvaluateOptions = {
2
+ /** The question to ask about the task state */
3
+ question: string;
4
+ /** The answer to the question */
5
+ answer?: string;
6
+ /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
7
+ screenshot?: boolean | Buffer[];
8
+ /** Custom system prompt for the evaluator */
9
+ systemPrompt?: string;
10
+ /** Delay in milliseconds before taking the screenshot @default 250 */
11
+ screenshotDelayMs?: number;
12
+ /** The agent's reasoning/thought process for completing the task */
13
+ agentReasoning?: string;
14
+ };
15
+ export type BatchAskOptions = {
16
+ /** Array of questions with optional answers */
17
+ questions: Array<{
18
+ question: string;
19
+ answer?: string;
20
+ }>;
21
+ /** Whether to take a screenshot of the task state */
22
+ screenshot?: boolean;
23
+ /** Custom system prompt for the evaluator */
24
+ systemPrompt?: string;
25
+ /** Delay in milliseconds before taking the screenshot @default 1000 */
26
+ screenshotDelayMs?: number;
27
+ };
28
+ /**
29
+ * Result of an evaluation
30
+ */
31
+ export interface EvaluationResult {
32
+ /**
33
+ * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
34
+ */
35
+ evaluation: "YES" | "NO" | "INVALID";
36
+ /**
37
+ * The reasoning behind the evaluation
38
+ */
39
+ reasoning: string;
40
+ }
@@ -0,0 +1,11 @@
1
+ import { LanguageModel } from "ai";
2
+ export interface LLMTool {
3
+ type: "function";
4
+ name: string;
5
+ description: string;
6
+ parameters: Record<string, unknown>;
7
+ }
8
+ export type AISDKProvider = (modelName: string) => LanguageModel;
9
+ export type AISDKCustomProvider = (options: {
10
+ apiKey: string;
11
+ }) => AISDKProvider;
@@ -0,0 +1,23 @@
1
+ export type LogLevel = 0 | 1 | 2;
2
+ /**
3
+ * Mapping between numeric log levels and their names
4
+ *
5
+ * 0 - error/warn - Critical issues or important warnings
6
+ * 1 - info - Standard information messages
7
+ * 2 - debug - Detailed information for debugging
8
+ */
9
+ export declare const LOG_LEVEL_NAMES: Record<LogLevel, string>;
10
+ export type LogLine = {
11
+ id?: string;
12
+ category?: string;
13
+ message: string;
14
+ level?: LogLevel;
15
+ timestamp?: string;
16
+ auxiliary?: {
17
+ [key: string]: {
18
+ value: string;
19
+ type: "object" | "string" | "html" | "integer" | "float" | "boolean";
20
+ };
21
+ };
22
+ };
23
+ export type Logger = (logLine: LogLine) => void;
@@ -0,0 +1,20 @@
1
+ import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk";
2
+ import type { ClientOptions as OpenAIClientOptions } from "openai";
3
+ import { z } from "zod/v3";
4
+ export declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
5
+ export type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
6
+ export type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
7
+ export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions;
8
+ export type ModelConfiguration = AvailableModel | (ClientOptions & {
9
+ modelName: AvailableModel;
10
+ });
11
+ export interface AnthropicJsonSchemaObject {
12
+ definitions?: {
13
+ MySchema?: {
14
+ properties?: Record<string, unknown>;
15
+ required?: string[];
16
+ };
17
+ };
18
+ properties?: Record<string, unknown>;
19
+ required?: string[];
20
+ }
@@ -0,0 +1,6 @@
1
+ export interface GotoOptions {
2
+ timeout?: number;
3
+ waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit";
4
+ referer?: string;
5
+ frameId?: string;
6
+ }