@browserbasehq/orca 3.0.2-test-cua-base-url → 3.0.3-patch

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/dist/index.d.ts +80 -19
  2. package/dist/index.js +1053 -619
  3. package/package.json +18 -17
  4. package/LICENSE +0 -21
package/dist/index.d.ts CHANGED
@@ -199,11 +199,7 @@ declare class CdpConnection implements CDPSessionLike {
199
199
  close(): Promise<void>;
200
200
  getSession(sessionId: string): CdpSession | undefined;
201
201
  attachToTarget(targetId: string): Promise<CdpSession>;
202
- getTargets(): Promise<Array<{
203
- targetId: string;
204
- type: string;
205
- url: string;
206
- }>>;
202
+ getTargets(): Promise<Protocol.Target.TargetInfo[]>;
207
203
  private onMessage;
208
204
  _sendViaSession<R = unknown>(sessionId: string, method: string, params?: object): Promise<R>;
209
205
  _onSessionEvent(sessionId: string, event: string, handler: EventHandler): void;
@@ -238,9 +234,12 @@ declare class Frame implements FrameManager {
238
234
  session: CDPSessionLike;
239
235
  frameId: string;
240
236
  pageId: string;
237
+ private readonly remoteBrowser;
241
238
  /** Owning CDP session id (useful for logs); null for root connection (should not happen for targets) */
242
239
  readonly sessionId: string | null;
243
- constructor(session: CDPSessionLike, frameId: string, pageId: string);
240
+ constructor(session: CDPSessionLike, frameId: string, pageId: string, remoteBrowser: boolean);
241
+ /** True when the controlled browser runs on a different machine. */
242
+ isBrowserRemote(): boolean;
244
243
  /** DOM.getNodeForLocation → DOM.describeNode */
245
244
  getNodeAtLocation(x: number, y: number): Promise<Protocol.DOM.Node>;
246
245
  /** CSS selector → DOM.querySelector → DOM.getBoxModel */
@@ -284,6 +283,14 @@ declare class Frame implements FrameManager {
284
283
  private getExecutionContextId;
285
284
  }
286
285
 
286
+ interface SetInputFilePayload {
287
+ name: string;
288
+ mimeType?: string;
289
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
290
+ lastModified?: number;
291
+ }
292
+ type SetInputFilesArgument = string | string[] | SetInputFilePayload | SetInputFilePayload[];
293
+
287
294
  type MouseButton = "left" | "right" | "middle";
288
295
  /**
289
296
  * Locator
@@ -326,15 +333,16 @@ declare class Locator {
326
333
  * - Best‑effort dispatches change/input via CDP (Chrome does by default).
327
334
  * - Passing an empty array clears the selection.
328
335
  */
329
- setInputFiles(files: string | string[] | {
330
- name: string;
331
- mimeType: string;
332
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
333
- } | Array<{
334
- name: string;
335
- mimeType: string;
336
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
337
- }>): Promise<void>;
336
+ setInputFiles(files: SetInputFilesArgument): Promise<void>;
337
+ /**
338
+ * Remote browser fallback: build File objects inside the page and attach them via JS.
339
+ *
340
+ * When Stagehand is driving a browser that cannot see the local filesystem (Browserbase,
341
+ * remote CDP, etc.), CDP's DOM.setFileInputFiles would fail because Chrome can't reach
342
+ * our temp files. Instead we base64-encode the payloads, send them into the page, and
343
+ * let a DOM helper create File objects + dispatch change/input events.
344
+ */
345
+ private assignFilesViaPayloadInjection;
338
346
  /**
339
347
  * Return the DOM backendNodeId for this locator's target element.
340
348
  * Useful for identity comparisons without needing element handles.
@@ -723,6 +731,14 @@ declare class Response$1 {
723
731
  * richer metadata.
724
732
  */
725
733
  applyExtraInfo(event: Protocol.Network.ResponseReceivedExtraInfoEvent): void;
734
+ /**
735
+ * Internal helper for creating a Response object from a Serializable
736
+ * goto response from the Stagehand API
737
+ */
738
+ static fromSerializable(serialized: SerializableResponse, context: {
739
+ page: Page;
740
+ session: CDPSessionLike;
741
+ }): Response$1;
726
742
  /** Marks the response as finished and resolves the `finished()` promise. */
727
743
  markFinished(error: Error | null): void;
728
744
  }
@@ -745,7 +761,7 @@ declare class StagehandAPIClient {
745
761
  observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
746
762
  goto(url: string, options?: {
747
763
  waitUntil?: "load" | "domcontentloaded" | "networkidle";
748
- }, frameId?: string): Promise<void>;
764
+ }, frameId?: string): Promise<SerializableResponse | null>;
749
765
  agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
750
766
  end(): Promise<Response>;
751
767
  getReplayMetrics(): Promise<StagehandMetrics>;
@@ -793,6 +809,7 @@ declare class Page {
793
809
  private nextOrdinal;
794
810
  /** cache Frames per frameId so everyone uses the same one */
795
811
  private readonly frameCache;
812
+ private readonly browserIsRemote;
796
813
  /** Stable id for Frames created by this Page (use top-level TargetId). */
797
814
  private readonly pageId;
798
815
  /** Cached current URL for synchronous page.url() */
@@ -813,7 +830,7 @@ declare class Page {
813
830
  * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
814
831
  * Assumes Page domain is already enabled on the session passed in.
815
832
  */
816
- static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null): Promise<Page>;
833
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null, browserIsRemote?: boolean): Promise<Page>;
817
834
  /**
818
835
  * Parent/child session emitted a `frameAttached`.
819
836
  * Topology update + ownership stamped to **emitting session**.
@@ -1128,6 +1145,8 @@ interface AgentResult {
1128
1145
  usage?: {
1129
1146
  input_tokens: number;
1130
1147
  output_tokens: number;
1148
+ reasoning_tokens?: number;
1149
+ cached_input_tokens?: number;
1131
1150
  inference_time_ms: number;
1132
1151
  };
1133
1152
  }
@@ -1342,18 +1361,28 @@ declare enum V3FunctionName {
1342
1361
  interface StagehandMetrics {
1343
1362
  actPromptTokens: number;
1344
1363
  actCompletionTokens: number;
1364
+ actReasoningTokens: number;
1365
+ actCachedInputTokens: number;
1345
1366
  actInferenceTimeMs: number;
1346
1367
  extractPromptTokens: number;
1347
1368
  extractCompletionTokens: number;
1369
+ extractReasoningTokens: number;
1370
+ extractCachedInputTokens: number;
1348
1371
  extractInferenceTimeMs: number;
1349
1372
  observePromptTokens: number;
1350
1373
  observeCompletionTokens: number;
1374
+ observeReasoningTokens: number;
1375
+ observeCachedInputTokens: number;
1351
1376
  observeInferenceTimeMs: number;
1352
1377
  agentPromptTokens: number;
1353
1378
  agentCompletionTokens: number;
1379
+ agentReasoningTokens: number;
1380
+ agentCachedInputTokens: number;
1354
1381
  agentInferenceTimeMs: number;
1355
1382
  totalPromptTokens: number;
1356
1383
  totalCompletionTokens: number;
1384
+ totalReasoningTokens: number;
1385
+ totalCachedInputTokens: number;
1357
1386
  totalInferenceTimeMs: number;
1358
1387
  }
1359
1388
 
@@ -1500,6 +1529,9 @@ declare class ExperimentalApiConflictError extends StagehandError {
1500
1529
  declare class ExperimentalNotConfiguredError extends StagehandError {
1501
1530
  constructor(featureName: string);
1502
1531
  }
1532
+ declare class CuaModelRequiredError extends StagehandError {
1533
+ constructor(availableModels: readonly string[]);
1534
+ }
1503
1535
  declare class ZodSchemaValidationError extends Error {
1504
1536
  readonly received: unknown;
1505
1537
  readonly issues: ReturnType<ZodError["format"]>;
@@ -1522,6 +1554,24 @@ declare class StagehandShadowSegmentEmptyError extends StagehandError {
1522
1554
  declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1523
1555
  constructor(segment: string, hint?: string);
1524
1556
  }
1557
+ declare class ElementNotVisibleError extends StagehandError {
1558
+ constructor(selector: string);
1559
+ }
1560
+ declare class ResponseBodyError extends StagehandError {
1561
+ constructor(message: string);
1562
+ }
1563
+ declare class ResponseParseError extends StagehandError {
1564
+ constructor(message: string);
1565
+ }
1566
+ declare class TimeoutError extends StagehandError {
1567
+ constructor(operation: string, timeoutMs: number);
1568
+ }
1569
+ declare class PageNotFoundError extends StagehandError {
1570
+ constructor(identifier: string);
1571
+ }
1572
+ declare class ConnectionTimeoutError extends StagehandError {
1573
+ constructor(message: string);
1574
+ }
1525
1575
 
1526
1576
  declare class AISdkClient extends LLMClient {
1527
1577
  type: "aisdk";
@@ -1569,6 +1619,16 @@ interface APIObserveParameters {
1569
1619
  options?: ObserveOptions;
1570
1620
  frameId?: string;
1571
1621
  }
1622
+ interface SerializableResponse {
1623
+ requestId: string;
1624
+ frameId?: string;
1625
+ loaderId?: string;
1626
+ response: Protocol.Network.Response;
1627
+ fromServiceWorkerFlag?: boolean;
1628
+ finishedSettled?: boolean;
1629
+ extraInfoHeaders?: Protocol.Network.Headers | null;
1630
+ extraInfoHeadersText?: string;
1631
+ }
1572
1632
 
1573
1633
  /**
1574
1634
  * Represents a path through a Zod schema from the root object down to a
@@ -1672,6 +1732,7 @@ declare class V3Context {
1672
1732
  * We poll internal maps that bootstrap/onAttachedToTarget populate.
1673
1733
  */
1674
1734
  private waitForFirstTopLevelPage;
1735
+ private waitForInitialTopLevelTargets;
1675
1736
  private ensurePiercer;
1676
1737
  /** Mark a page target as the most-recent one (active). */
1677
1738
  private _pushActive;
@@ -1862,7 +1923,7 @@ declare class V3 {
1862
1923
  */
1863
1924
  get history(): Promise<ReadonlyArray<HistoryEntry>>;
1864
1925
  addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1865
- updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1926
+ updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, reasoningTokens: number, cachedInputTokens: number, inferenceTimeMs: number): void;
1866
1927
  private updateTotalMetrics;
1867
1928
  private _immediateShutdown;
1868
1929
  private static _installProcessGuards;
@@ -2058,4 +2119,4 @@ declare class V3Evaluator {
2058
2119
  private _evaluateWithMultipleScreenshots;
2059
2120
  }
2060
2121
 
2061
- export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, Response$1 as Response, type ResponseInputItem, type ResponseItem, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
2122
+ export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };