@browserbasehq/orca 3.0.9-alpha-3 → 3.0.9-alpha-4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +59 -17
  2. package/dist/index.js +11651 -4706
  3. package/package.json +6 -4
package/dist/index.d.ts CHANGED
@@ -191,6 +191,7 @@ declare class Frame implements FrameManager {
191
191
  private getMainWorldExecutionContextId;
192
192
  }
193
193
 
194
+ type MouseButton = "left" | "right" | "middle";
194
195
  interface SetInputFilePayload {
195
196
  name: string;
196
197
  mimeType?: string;
@@ -199,7 +200,6 @@ interface SetInputFilePayload {
199
200
  }
200
201
  type SetInputFilesArgument = string | string[] | SetInputFilePayload | SetInputFilePayload[];
201
202
 
202
- type MouseButton = "left" | "right" | "middle";
203
203
  /**
204
204
  * Locator
205
205
  *
@@ -367,7 +367,7 @@ declare class Locator {
367
367
  */
368
368
  innerText(): Promise<string>;
369
369
  /**
370
- * For API parity, returns the same locator (querySelector already returns the first match).
370
+ * Return a locator narrowed to the first match.
371
371
  */
372
372
  first(): Locator;
373
373
  /** Return a locator narrowed to the element at the given zero-based index. */
@@ -380,6 +380,14 @@ declare class Locator {
380
380
  nodeId: Protocol.DOM.NodeId | null;
381
381
  objectId: Protocol.Runtime.RemoteObjectId;
382
382
  }>;
383
+ /**
384
+ * Resolve all matching nodes for this locator.
385
+ * If the locator is narrowed via nth(), only that index is returned.
386
+ */
387
+ resolveNodesForMask(): Promise<Array<{
388
+ nodeId: Protocol.DOM.NodeId | null;
389
+ objectId: Protocol.Runtime.RemoteObjectId;
390
+ }>>;
383
391
  /** Compute a center point from a BoxModel content quad */
384
392
  private centerFromBoxContent;
385
393
  }
@@ -476,7 +484,8 @@ declare class FrameLocator {
476
484
  declare class LocatorDelegate {
477
485
  private readonly fl;
478
486
  private readonly sel;
479
- constructor(fl: FrameLocator, sel: string);
487
+ private readonly nthIndex;
488
+ constructor(fl: FrameLocator, sel: string, nthIndex?: number);
480
489
  private real;
481
490
  click(options?: {
482
491
  button?: "left" | "right" | "middle";
@@ -497,6 +506,7 @@ declare class LocatorDelegate {
497
506
  innerText(): Promise<string>;
498
507
  count(): Promise<number>;
499
508
  first(): LocatorDelegate;
509
+ nth(index: number): LocatorDelegate;
500
510
  }
501
511
 
502
512
  type RemoteObject = Protocol.Runtime.RemoteObject;
@@ -1283,6 +1293,22 @@ declare const AgentConfigSchema: z$1.ZodObject<{
1283
1293
  }, z$1.core.$strip>, z$1.ZodString]>>;
1284
1294
  systemPrompt: z$1.ZodOptional<z$1.ZodString>;
1285
1295
  cua: z$1.ZodOptional<z$1.ZodBoolean>;
1296
+ mode: z$1.ZodOptional<z$1.ZodEnum<{
1297
+ cua: "cua";
1298
+ dom: "dom";
1299
+ hybrid: "hybrid";
1300
+ }>>;
1301
+ executionModel: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodObject<{
1302
+ provider: z$1.ZodOptional<z$1.ZodEnum<{
1303
+ openai: "openai";
1304
+ anthropic: "anthropic";
1305
+ google: "google";
1306
+ microsoft: "microsoft";
1307
+ }>>;
1308
+ modelName: z$1.ZodString;
1309
+ apiKey: z$1.ZodOptional<z$1.ZodString>;
1310
+ baseURL: z$1.ZodOptional<z$1.ZodString>;
1311
+ }, z$1.core.$strip>, z$1.ZodString]>>;
1286
1312
  }, z$1.core.$strip>;
1287
1313
  /** Action taken by the agent during execution */
1288
1314
  declare const AgentActionSchema: z$1.ZodObject<{
@@ -1357,6 +1383,22 @@ declare const AgentExecuteRequestSchema: z$1.ZodObject<{
1357
1383
  }, z$1.core.$strip>, z$1.ZodString]>>;
1358
1384
  systemPrompt: z$1.ZodOptional<z$1.ZodString>;
1359
1385
  cua: z$1.ZodOptional<z$1.ZodBoolean>;
1386
+ mode: z$1.ZodOptional<z$1.ZodEnum<{
1387
+ cua: "cua";
1388
+ dom: "dom";
1389
+ hybrid: "hybrid";
1390
+ }>>;
1391
+ executionModel: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodObject<{
1392
+ provider: z$1.ZodOptional<z$1.ZodEnum<{
1393
+ openai: "openai";
1394
+ anthropic: "anthropic";
1395
+ google: "google";
1396
+ microsoft: "microsoft";
1397
+ }>>;
1398
+ modelName: z$1.ZodString;
1399
+ apiKey: z$1.ZodOptional<z$1.ZodString>;
1400
+ baseURL: z$1.ZodOptional<z$1.ZodString>;
1401
+ }, z$1.core.$strip>, z$1.ZodString]>>;
1360
1402
  }, z$1.core.$strip>;
1361
1403
  executeOptions: z$1.ZodObject<{
1362
1404
  instruction: z$1.ZodString;
@@ -2081,6 +2123,9 @@ declare class StagehandEvalError extends StagehandError {
2081
2123
  declare class StagehandDomProcessError extends StagehandError {
2082
2124
  constructor(message: string);
2083
2125
  }
2126
+ declare class StagehandLocatorError extends StagehandError {
2127
+ constructor(action: string, selector: string, message: string);
2128
+ }
2084
2129
  declare class StagehandClickError extends StagehandError {
2085
2130
  constructor(message: string, selector: string);
2086
2131
  }
@@ -2297,7 +2342,7 @@ declare class V3Context {
2297
2342
  private constructor();
2298
2343
  private readonly _piercerInstalled;
2299
2344
  private _lastPopupSignalAt;
2300
- private sessionKey;
2345
+ private readonly _targetSessionListeners;
2301
2346
  private readonly _sessionInit;
2302
2347
  private pagesByTarget;
2303
2348
  private mainFrameToTarget;
@@ -2309,6 +2354,7 @@ declare class V3Context {
2309
2354
  private _pageOrder;
2310
2355
  private pendingCreatedTargetUrl;
2311
2356
  private readonly initScripts;
2357
+ private installTargetSessionListeners;
2312
2358
  /**
2313
2359
  * Create a Context for a given CDP websocket URL and bootstrap target wiring.
2314
2360
  */
@@ -2359,7 +2405,6 @@ declare class V3Context {
2359
2405
  /**
2360
2406
  * Bootstrap target lifecycle:
2361
2407
  * - Attach to existing targets.
2362
- * - Attach on `Target.targetCreated` (fallback for OOPIFs).
2363
2408
  * - Handle auto-attach events.
2364
2409
  * - Clean up on detach/destroy.
2365
2410
  */
@@ -2680,6 +2725,7 @@ declare class Page {
2680
2725
  private installInitScriptOnSession;
2681
2726
  private applyInitScriptsToSession;
2682
2727
  registerInitScript(source: string): Promise<void>;
2728
+ seedInitScript(source: string): void;
2683
2729
  private cursorEnabled;
2684
2730
  private ensureCursorScript;
2685
2731
  enableCursorOverlay(): Promise<void>;
@@ -2877,6 +2923,7 @@ declare class Page {
2877
2923
  * Supports iframe hop notation with '>>' (e.g., 'iframe#checkout >> .submit-btn').
2878
2924
  *
2879
2925
  * @param selector CSS selector to wait for (supports '>>' for iframe hops)
2926
+ * @param options
2880
2927
  * @param options.state Element state to wait for: 'attached' | 'detached' | 'visible' | 'hidden' (default: 'visible')
2881
2928
  * @param options.timeout Maximum time to wait in milliseconds (default: 30000)
2882
2929
  * @param options.pierceShadow Whether to search inside shadow DOM (default: true)
@@ -3305,7 +3352,7 @@ interface AgentStreamExecuteOptions extends AgentExecuteOptionsBase {
3305
3352
  callbacks?: AgentStreamCallbacks;
3306
3353
  }
3307
3354
  type AgentType = "openai" | "anthropic" | "google" | "microsoft";
3308
- declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-opus-4-5-20251101", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025", "microsoft/fara-7b"];
3355
+ declare const AVAILABLE_CUA_MODELS: readonly ["openai/computer-use-preview", "openai/computer-use-preview-2025-03-11", "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-opus-4-5-20251101", "anthropic/claude-opus-4-6", "anthropic/claude-haiku-4-5-20251001", "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4-5-20250929", "google/gemini-2.5-computer-use-preview-10-2025", "google/gemini-3-flash-preview", "google/gemini-3-pro-preview", "microsoft/fara-7b"];
3309
3356
  type AvailableCuaModel = (typeof AVAILABLE_CUA_MODELS)[number];
3310
3357
  interface AgentExecutionOptions<TOptions extends AgentExecuteOptions = AgentExecuteOptions> {
3311
3358
  options: TOptions;
@@ -3558,10 +3605,12 @@ interface FillFormVisionToolResult {
3558
3605
  error?: string;
3559
3606
  screenshotBase64?: string;
3560
3607
  }
3561
- interface ScrollVisionToolResult {
3608
+ interface ScrollToolResult {
3562
3609
  success: boolean;
3563
3610
  message: string;
3564
3611
  scrolledPixels: number;
3612
+ }
3613
+ interface ScrollVisionToolResult extends ScrollToolResult {
3565
3614
  screenshotBase64?: string;
3566
3615
  }
3567
3616
  interface WaitToolResult {
@@ -3978,13 +4027,10 @@ declare const actTool: (v3: V3, executionModel?: string) => ai.Tool<{
3978
4027
  }, {
3979
4028
  success: boolean;
3980
4029
  action: string;
3981
- playwrightArguments: Action;
3982
- error?: undefined;
4030
+ playwrightArguments?: Action;
3983
4031
  } | {
3984
4032
  success: boolean;
3985
4033
  error: any;
3986
- action?: undefined;
3987
- playwrightArguments?: undefined;
3988
4034
  }>;
3989
4035
 
3990
4036
  declare const screenshotTool: (v3: V3) => ai.Tool<Record<string, never>, {
@@ -4026,11 +4072,7 @@ declare const fillFormTool: (v3: V3, executionModel?: string) => ai.Tool<{
4026
4072
  declare const scrollTool: (v3: V3) => ai.Tool<{
4027
4073
  direction: "up" | "down";
4028
4074
  percentage?: number;
4029
- }, {
4030
- success: boolean;
4031
- message: string;
4032
- scrolledPixels: number;
4033
- }>;
4075
+ }, ScrollToolResult>;
4034
4076
  /**
4035
4077
  * Scroll tool for hybrid mode (grounding models).
4036
4078
  * Supports optional coordinates for scrolling within nested scrollable elements.
@@ -4318,4 +4360,4 @@ interface ServerAgentCacheHandle {
4318
4360
  }
4319
4361
  declare function __internalCreateInMemoryAgentCacheHandle(stagehand: V3): ServerAgentCacheHandle;
4320
4362
 
4321
- export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, ActTimeoutError, type Action, type ActionExecutionResult, AgentAbortError, type AgentAction, type AgentCallbacks, type AgentConfig, type AgentContext, type AgentExecuteCallbacks, type AgentExecuteOptions, type AgentExecuteOptionsBase, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentState, type AgentStreamCallbacks, type AgentStreamExecuteOptions, type AgentStreamResult, type AgentToolCall, type AgentToolMode, type AgentToolResult, type AgentToolTypesMap, type AgentTools, type AgentType, type AgentUITools, AnnotatedScreenshotText, type AnthropicClientOptions, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, api as Api, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClickToolResult, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, CustomOpenAIClient, type DragAndDropToolResult, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, ExtractTimeoutError, type FillFormField, type FillFormVisionToolResult, type FunctionCallItem, type GoogleServiceAccountCredentials, type GoogleVertexProviderSettings, HandlerNotInitializedError, type HistoryEntry, type InferStagehandSchema, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaDocument, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelOutputContentItem, type ModelProvider, type NonStreamingAgentInstance, type ObserveOptions, ObserveTimeoutError, type OpenAIClientOptions, Page, PageNotFoundError, type PageSnapshotOptions, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, type SafetyCheck, type SafetyConfirmationHandler, type SafetyConfirmationResponse, type ScrollVisionToolResult, type ServerAgentCacheHandle, type SnapshotResult, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandClosedError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, StagehandSnapshotError, type StagehandZodObject, type StagehandZodSchema, type StreamingAgentInstance, StreamingCallbacksInNonStreamingModeError, TimeoutError, type ToolUseItem, type TypeToolResult, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, type WaitToolResult, XPathResolutionError, ZodSchemaValidationError, __internalCreateInMemoryAgentCacheHandle, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, isZod3Schema, isZod4Schema, jsonSchemaToZod, loadApiKeyFromEnv, localBrowserLaunchOptionsSchema, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, toJsonSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
4363
+ export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, ActTimeoutError, type Action, type ActionExecutionResult, AgentAbortError, type AgentAction, type AgentCallbacks, type AgentConfig, type AgentContext, type AgentExecuteCallbacks, type AgentExecuteOptions, type AgentExecuteOptionsBase, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentState, type AgentStreamCallbacks, type AgentStreamExecuteOptions, type AgentStreamResult, type AgentToolCall, type AgentToolMode, type AgentToolResult, type AgentToolTypesMap, type AgentTools, type AgentType, type AgentUITools, AnnotatedScreenshotText, type AnthropicClientOptions, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, api as Api, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClickToolResult, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, CustomOpenAIClient, type DragAndDropToolResult, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, ExtractTimeoutError, type FillFormField, type FillFormVisionToolResult, type FunctionCallItem, type GoogleServiceAccountCredentials, type GoogleVertexProviderSettings, HandlerNotInitializedError, type HistoryEntry, type InferStagehandSchema, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaDocument, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelOutputContentItem, type ModelProvider, type NonStreamingAgentInstance, type ObserveOptions, ObserveTimeoutError, type OpenAIClientOptions, Page, PageNotFoundError, type PageSnapshotOptions, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, type SafetyCheck, type SafetyConfirmationHandler, type SafetyConfirmationResponse, type ScrollToolResult, type ScrollVisionToolResult, type ServerAgentCacheHandle, type SnapshotResult, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandClosedError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, StagehandLocatorError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, StagehandSnapshotError, type StagehandZodObject, type StagehandZodSchema, type StreamingAgentInstance, StreamingCallbacksInNonStreamingModeError, TimeoutError, type ToolUseItem, type TypeToolResult, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, type WaitToolResult, XPathResolutionError, ZodSchemaValidationError, __internalCreateInMemoryAgentCacheHandle, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, isZod3Schema, isZod4Schema, jsonSchemaToZod, loadApiKeyFromEnv, localBrowserLaunchOptionsSchema, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, toJsonSchema, transformSchema, trimTrailingTextNode, validateZodSchema };