@browserbasehq/orca 3.0.2 → 3.0.3-zod

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +93 -43
  2. package/dist/index.js +1132 -569
  3. package/package.json +8 -6
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import z, { ZodType, z as z$1, ZodError, ZodTypeAny } from 'zod/v3';
1
+ import { ZodTypeAny, z, ZodObject, ZodRawShape, ZodError } from 'zod';
2
+ import * as z3 from 'zod/v3';
2
3
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
4
  import { LanguageModelV2 } from '@ai-sdk/provider';
4
5
  import { ClientOptions as ClientOptions$1 } from 'openai';
@@ -17,6 +18,16 @@ import { ChatCompletion } from 'openai/resources';
17
18
  import { ToolSet as ToolSet$1 } from 'ai/dist';
18
19
  import { Schema } from '@google/genai';
19
20
 
21
+ type StagehandZodSchema = ZodTypeAny | z3.ZodTypeAny;
22
+ type StagehandZodObject = ZodObject<ZodRawShape> | z3.ZodObject<z3.ZodRawShape>;
23
+ type InferStagehandSchema<T extends StagehandZodSchema> = T extends z3.ZodTypeAny ? z3.infer<T> : T extends ZodTypeAny ? z.infer<T> : never;
24
+ declare const isZod4Schema: (schema: StagehandZodSchema) => schema is ZodTypeAny & {
25
+ _zod: unknown;
26
+ };
27
+ declare const isZod3Schema: (schema: StagehandZodSchema) => schema is z3.ZodTypeAny;
28
+ type JsonSchemaDocument = Record<string, unknown>;
29
+ declare function toJsonSchema(schema: StagehandZodSchema): JsonSchemaDocument;
30
+
20
31
  type AnthropicJsonSchemaObject = {
21
32
  definitions?: {
22
33
  MySchema?: {
@@ -102,7 +113,7 @@ interface ChatCompletionOptions {
102
113
  };
103
114
  response_model?: {
104
115
  name: string;
105
- schema: ZodType;
116
+ schema: StagehandZodSchema;
106
117
  };
107
118
  tools?: LLMTool[];
108
119
  tool_choice?: "auto" | "none" | "required";
@@ -141,6 +152,21 @@ interface CreateChatCompletionOptions {
141
152
  logger: (message: LogLine) => void;
142
153
  retries?: number;
143
154
  }
155
+ /** Simple usage shape if your LLM returns usage tokens. */
156
+ interface LLMUsage {
157
+ prompt_tokens: number;
158
+ completion_tokens: number;
159
+ total_tokens: number;
160
+ reasoning_tokens?: number;
161
+ cached_input_tokens?: number;
162
+ }
163
+ /**
164
+ * For calls that use a schema: the LLMClient may return { data: T; usage?: LLMUsage }
165
+ */
166
+ interface LLMParsedResponse<T> {
167
+ data: T;
168
+ usage?: LLMUsage;
169
+ }
144
170
  declare abstract class LLMClient {
145
171
  type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
146
172
  modelName: AvailableModel | (string & {});
@@ -148,9 +174,15 @@ declare abstract class LLMClient {
148
174
  clientOptions: ClientOptions;
149
175
  userProvidedInstructions?: string;
150
176
  constructor(modelName: AvailableModel, userProvidedInstructions?: string);
151
- abstract createChatCompletion<T = LLMResponse & {
152
- usage?: LLMResponse["usage"];
153
- }>(options: CreateChatCompletionOptions): Promise<T>;
177
+ abstract createChatCompletion<T>(options: CreateChatCompletionOptions & {
178
+ options: {
179
+ response_model: {
180
+ name: string;
181
+ schema: StagehandZodSchema;
182
+ };
183
+ };
184
+ }): Promise<LLMParsedResponse<T>>;
185
+ abstract createChatCompletion<T = LLMResponse>(options: CreateChatCompletionOptions): Promise<T>;
154
186
  generateObject: typeof generateObject;
155
187
  generateText: typeof generateText;
156
188
  streamText: typeof streamText;
@@ -234,9 +266,12 @@ declare class Frame implements FrameManager {
234
266
  session: CDPSessionLike;
235
267
  frameId: string;
236
268
  pageId: string;
269
+ private readonly remoteBrowser;
237
270
  /** Owning CDP session id (useful for logs); null for root connection (should not happen for targets) */
238
271
  readonly sessionId: string | null;
239
- constructor(session: CDPSessionLike, frameId: string, pageId: string);
272
+ constructor(session: CDPSessionLike, frameId: string, pageId: string, remoteBrowser: boolean);
273
+ /** True when the controlled browser runs on a different machine. */
274
+ isBrowserRemote(): boolean;
240
275
  /** DOM.getNodeForLocation → DOM.describeNode */
241
276
  getNodeAtLocation(x: number, y: number): Promise<Protocol.DOM.Node>;
242
277
  /** CSS selector → DOM.querySelector → DOM.getBoxModel */
@@ -280,6 +315,14 @@ declare class Frame implements FrameManager {
280
315
  private getExecutionContextId;
281
316
  }
282
317
 
318
+ interface SetInputFilePayload {
319
+ name: string;
320
+ mimeType?: string;
321
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
322
+ lastModified?: number;
323
+ }
324
+ type SetInputFilesArgument = string | string[] | SetInputFilePayload | SetInputFilePayload[];
325
+
283
326
  type MouseButton = "left" | "right" | "middle";
284
327
  /**
285
328
  * Locator
@@ -322,15 +365,16 @@ declare class Locator {
322
365
  * - Best‑effort dispatches change/input via CDP (Chrome does by default).
323
366
  * - Passing an empty array clears the selection.
324
367
  */
325
- setInputFiles(files: string | string[] | {
326
- name: string;
327
- mimeType: string;
328
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
329
- } | Array<{
330
- name: string;
331
- mimeType: string;
332
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
333
- }>): Promise<void>;
368
+ setInputFiles(files: SetInputFilesArgument): Promise<void>;
369
+ /**
370
+ * Remote browser fallback: build File objects inside the page and attach them via JS.
371
+ *
372
+ * When Stagehand is driving a browser that cannot see the local filesystem (Browserbase,
373
+ * remote CDP, etc.), CDP's DOM.setFileInputFiles would fail because Chrome can't reach
374
+ * our temp files. Instead we base64-encode the payloads, send them into the page, and
375
+ * let a DOM helper create File objects + dispatch change/input events.
376
+ */
377
+ private assignFilesViaPayloadInjection;
334
378
  /**
335
379
  * Return the DOM backendNodeId for this locator's target element.
336
380
  * Useful for identity comparisons without needing element handles.
@@ -745,7 +789,7 @@ declare class StagehandAPIClient {
745
789
  constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
746
790
  init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
747
791
  act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
748
- extract<T extends z.AnyZodObject>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
792
+ extract<T extends StagehandZodSchema>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
749
793
  observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
750
794
  goto(url: string, options?: {
751
795
  waitUntil?: "load" | "domcontentloaded" | "networkidle";
@@ -797,6 +841,7 @@ declare class Page {
797
841
  private nextOrdinal;
798
842
  /** cache Frames per frameId so everyone uses the same one */
799
843
  private readonly frameCache;
844
+ private readonly browserIsRemote;
800
845
  /** Stable id for Frames created by this Page (use top-level TargetId). */
801
846
  private readonly pageId;
802
847
  /** Cached current URL for synchronous page.url() */
@@ -817,7 +862,7 @@ declare class Page {
817
862
  * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
818
863
  * Assumes Page domain is already enabled on the session passed in.
819
864
  */
820
- static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null): Promise<Page>;
865
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null, browserIsRemote?: boolean): Promise<Page>;
821
866
  /**
822
867
  * Parent/child session emitted a `frameAttached`.
823
868
  * Topology update + ownership stamped to **emitting session**.
@@ -1132,6 +1177,8 @@ interface AgentResult {
1132
1177
  usage?: {
1133
1178
  input_tokens: number;
1134
1179
  output_tokens: number;
1180
+ reasoning_tokens?: number;
1181
+ cached_input_tokens?: number;
1135
1182
  inference_time_ms: number;
1136
1183
  };
1137
1184
  }
@@ -1297,7 +1344,7 @@ interface ActResult {
1297
1344
  actionDescription: string;
1298
1345
  actions: Action[];
1299
1346
  }
1300
- type ExtractResult<T extends z$1.AnyZodObject> = z$1.infer<T>;
1347
+ type ExtractResult<T extends StagehandZodSchema> = InferStagehandSchema<T>;
1301
1348
  interface Action {
1302
1349
  selector: string;
1303
1350
  description: string;
@@ -1316,20 +1363,12 @@ interface ExtractOptions {
1316
1363
  selector?: string;
1317
1364
  page?: Page$1 | Page$2 | Page$3 | Page;
1318
1365
  }
1319
- declare const defaultExtractSchema: z$1.ZodObject<{
1320
- extraction: z$1.ZodString;
1321
- }, "strip", z$1.ZodTypeAny, {
1322
- extraction?: string;
1323
- }, {
1324
- extraction?: string;
1325
- }>;
1326
- declare const pageTextSchema: z$1.ZodObject<{
1327
- pageText: z$1.ZodString;
1328
- }, "strip", z$1.ZodTypeAny, {
1329
- pageText?: string;
1330
- }, {
1331
- pageText?: string;
1332
- }>;
1366
+ declare const defaultExtractSchema: z.ZodObject<{
1367
+ extraction: z.ZodString;
1368
+ }, z.core.$strip>;
1369
+ declare const pageTextSchema: z.ZodObject<{
1370
+ pageText: z.ZodString;
1371
+ }, z.core.$strip>;
1333
1372
  interface ObserveOptions {
1334
1373
  model?: ModelConfiguration;
1335
1374
  timeout?: number;
@@ -1346,18 +1385,28 @@ declare enum V3FunctionName {
1346
1385
  interface StagehandMetrics {
1347
1386
  actPromptTokens: number;
1348
1387
  actCompletionTokens: number;
1388
+ actReasoningTokens: number;
1389
+ actCachedInputTokens: number;
1349
1390
  actInferenceTimeMs: number;
1350
1391
  extractPromptTokens: number;
1351
1392
  extractCompletionTokens: number;
1393
+ extractReasoningTokens: number;
1394
+ extractCachedInputTokens: number;
1352
1395
  extractInferenceTimeMs: number;
1353
1396
  observePromptTokens: number;
1354
1397
  observeCompletionTokens: number;
1398
+ observeReasoningTokens: number;
1399
+ observeCachedInputTokens: number;
1355
1400
  observeInferenceTimeMs: number;
1356
1401
  agentPromptTokens: number;
1357
1402
  agentCompletionTokens: number;
1403
+ agentReasoningTokens: number;
1404
+ agentCachedInputTokens: number;
1358
1405
  agentInferenceTimeMs: number;
1359
1406
  totalPromptTokens: number;
1360
1407
  totalCompletionTokens: number;
1408
+ totalReasoningTokens: number;
1409
+ totalCachedInputTokens: number;
1361
1410
  totalInferenceTimeMs: number;
1362
1411
  }
1363
1412
 
@@ -1585,7 +1634,7 @@ interface APIActParameters {
1585
1634
  }
1586
1635
  interface APIExtractParameters {
1587
1636
  instruction?: string;
1588
- schema?: ZodTypeAny;
1637
+ schema?: StagehandZodSchema;
1589
1638
  options?: ExtractOptions;
1590
1639
  frameId?: string;
1591
1640
  }
@@ -1898,7 +1947,7 @@ declare class V3 {
1898
1947
  */
1899
1948
  get history(): Promise<ReadonlyArray<HistoryEntry>>;
1900
1949
  addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1901
- updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1950
+ updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, reasoningTokens: number, cachedInputTokens: number, inferenceTimeMs: number): void;
1902
1951
  private updateTotalMetrics;
1903
1952
  private _immediateShutdown;
1904
1953
  private static _installProcessGuards;
@@ -1929,10 +1978,10 @@ declare class V3 {
1929
1978
  * - extract(instruction, schema) → schema-inferred
1930
1979
  * - extract(instruction, schema, options)
1931
1980
  */
1932
- extract(): Promise<z$1.infer<typeof pageTextSchema>>;
1933
- extract(options: ExtractOptions): Promise<z$1.infer<typeof pageTextSchema>>;
1934
- extract(instruction: string, options?: ExtractOptions): Promise<z$1.infer<typeof defaultExtractSchema>>;
1935
- extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z$1.infer<T>>;
1981
+ extract(): Promise<z.infer<typeof pageTextSchema>>;
1982
+ extract(options: ExtractOptions): Promise<z.infer<typeof pageTextSchema>>;
1983
+ extract(instruction: string, options?: ExtractOptions): Promise<z.infer<typeof defaultExtractSchema>>;
1984
+ extract<T extends StagehandZodSchema>(instruction: string, schema: T, options?: ExtractOptions): Promise<InferStagehandSchema<T>>;
1936
1985
  /**
1937
1986
  * Run an "observe" instruction through the ObserveHandler.
1938
1987
  */
@@ -2004,14 +2053,14 @@ declare class AgentProvider {
2004
2053
  static getAgentProvider(modelName: string): AgentProviderType;
2005
2054
  }
2006
2055
 
2007
- declare function validateZodSchema(schema: z$1.ZodTypeAny, data: unknown): boolean;
2056
+ declare function validateZodSchema(schema: StagehandZodSchema, data: unknown): boolean;
2008
2057
  /**
2009
2058
  * Detects if the code is running in the Bun runtime environment.
2010
2059
  * @returns {boolean} True if running in Bun, false otherwise.
2011
2060
  */
2012
2061
  declare function isRunningInBun(): boolean;
2013
- declare function toGeminiSchema(zodSchema: z$1.ZodTypeAny): Schema;
2014
- declare function getZodType(schema: z$1.ZodTypeAny): string;
2062
+ declare function toGeminiSchema(zodSchema: StagehandZodSchema): Schema;
2063
+ declare function getZodType(schema: StagehandZodSchema): string;
2015
2064
  /**
2016
2065
  * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
2017
2066
  * For each such field, it replaces the `z.string().url()` with `z.number()`.
@@ -2025,7 +2074,7 @@ declare function getZodType(schema: z$1.ZodTypeAny): string;
2025
2074
  * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
2026
2075
  * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
2027
2076
  */
2028
- declare function transformSchema(schema: z$1.ZodTypeAny, currentPath: Array<string | number>): [z$1.ZodTypeAny, ZodPathSegments[]];
2077
+ declare function transformSchema(schema: StagehandZodSchema, currentPath: Array<string | number>): [StagehandZodSchema, ZodPathSegments[]];
2029
2078
  /**
2030
2079
  * Once we get the final extracted object that has numeric IDs in place of URLs,
2031
2080
  * use `injectUrls` to walk the object and replace numeric IDs
@@ -2054,6 +2103,7 @@ interface JsonSchemaProperty {
2054
2103
  minimum?: number;
2055
2104
  maximum?: number;
2056
2105
  description?: string;
2106
+ format?: string;
2057
2107
  }
2058
2108
  interface JsonSchema extends JsonSchemaProperty {
2059
2109
  type: string;
@@ -2094,4 +2144,4 @@ declare class V3Evaluator {
2094
2144
  private _evaluateWithMultipleScreenshots;
2095
2145
  }
2096
2146
 
2097
- export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
2147
+ export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, type InferStagehandSchema, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaDocument, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type StagehandZodObject, type StagehandZodSchema, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, isZod3Schema, isZod4Schema, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, toJsonSchema, transformSchema, trimTrailingTextNode, validateZodSchema };