@browserbasehq/orca 3.0.2-patch → 3.0.2-zod34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +21 -0
  2. package/dist/index.d.ts +85 -43
  3. package/dist/index.js +1022 -493
  4. package/package.json +21 -21
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Browserbase Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import z, { ZodType, z as z$1, ZodError, ZodTypeAny } from 'zod/v3';
1
+ import { ZodTypeAny, z, ZodError } from 'zod';
2
+ import * as z3 from 'zod/v3';
2
3
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
4
  import { LanguageModelV2 } from '@ai-sdk/provider';
4
5
  import { ClientOptions as ClientOptions$1 } from 'openai';
@@ -17,6 +18,9 @@ import { ChatCompletion } from 'openai/resources';
17
18
  import { ToolSet as ToolSet$1 } from 'ai/dist';
18
19
  import { Schema } from '@google/genai';
19
20
 
21
+ type StagehandZodSchema = ZodTypeAny | z3.ZodTypeAny;
22
+ type InferStagehandSchema<T extends StagehandZodSchema> = T extends z3.ZodTypeAny ? z3.infer<T> : T extends ZodTypeAny ? z.infer<T> : never;
23
+
20
24
  type AnthropicJsonSchemaObject = {
21
25
  definitions?: {
22
26
  MySchema?: {
@@ -102,7 +106,7 @@ interface ChatCompletionOptions {
102
106
  };
103
107
  response_model?: {
104
108
  name: string;
105
- schema: ZodType;
109
+ schema: StagehandZodSchema;
106
110
  };
107
111
  tools?: LLMTool[];
108
112
  tool_choice?: "auto" | "none" | "required";
@@ -141,6 +145,21 @@ interface CreateChatCompletionOptions {
141
145
  logger: (message: LogLine) => void;
142
146
  retries?: number;
143
147
  }
148
+ /** Simple usage shape if your LLM returns usage tokens. */
149
+ interface LLMUsage {
150
+ prompt_tokens: number;
151
+ completion_tokens: number;
152
+ total_tokens: number;
153
+ reasoning_tokens?: number;
154
+ cached_input_tokens?: number;
155
+ }
156
+ /**
157
+ * For calls that use a schema: the LLMClient may return { data: T; usage?: LLMUsage }
158
+ */
159
+ interface LLMParsedResponse<T> {
160
+ data: T;
161
+ usage?: LLMUsage;
162
+ }
144
163
  declare abstract class LLMClient {
145
164
  type: "openai" | "anthropic" | "cerebras" | "groq" | (string & {});
146
165
  modelName: AvailableModel | (string & {});
@@ -148,9 +167,15 @@ declare abstract class LLMClient {
148
167
  clientOptions: ClientOptions;
149
168
  userProvidedInstructions?: string;
150
169
  constructor(modelName: AvailableModel, userProvidedInstructions?: string);
151
- abstract createChatCompletion<T = LLMResponse & {
152
- usage?: LLMResponse["usage"];
153
- }>(options: CreateChatCompletionOptions): Promise<T>;
170
+ abstract createChatCompletion<T>(options: CreateChatCompletionOptions & {
171
+ options: {
172
+ response_model: {
173
+ name: string;
174
+ schema: StagehandZodSchema;
175
+ };
176
+ };
177
+ }): Promise<LLMParsedResponse<T>>;
178
+ abstract createChatCompletion<T = LLMResponse>(options: CreateChatCompletionOptions): Promise<T>;
154
179
  generateObject: typeof generateObject;
155
180
  generateText: typeof generateText;
156
181
  streamText: typeof streamText;
@@ -234,9 +259,12 @@ declare class Frame implements FrameManager {
234
259
  session: CDPSessionLike;
235
260
  frameId: string;
236
261
  pageId: string;
262
+ private readonly remoteBrowser;
237
263
  /** Owning CDP session id (useful for logs); null for root connection (should not happen for targets) */
238
264
  readonly sessionId: string | null;
239
- constructor(session: CDPSessionLike, frameId: string, pageId: string);
265
+ constructor(session: CDPSessionLike, frameId: string, pageId: string, remoteBrowser: boolean);
266
+ /** True when the controlled browser runs on a different machine. */
267
+ isBrowserRemote(): boolean;
240
268
  /** DOM.getNodeForLocation → DOM.describeNode */
241
269
  getNodeAtLocation(x: number, y: number): Promise<Protocol.DOM.Node>;
242
270
  /** CSS selector → DOM.querySelector → DOM.getBoxModel */
@@ -280,6 +308,14 @@ declare class Frame implements FrameManager {
280
308
  private getExecutionContextId;
281
309
  }
282
310
 
311
+ interface SetInputFilePayload {
312
+ name: string;
313
+ mimeType?: string;
314
+ buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
315
+ lastModified?: number;
316
+ }
317
+ type SetInputFilesArgument = string | string[] | SetInputFilePayload | SetInputFilePayload[];
318
+
283
319
  type MouseButton = "left" | "right" | "middle";
284
320
  /**
285
321
  * Locator
@@ -322,15 +358,16 @@ declare class Locator {
322
358
  * - Best‑effort dispatches change/input via CDP (Chrome does by default).
323
359
  * - Passing an empty array clears the selection.
324
360
  */
325
- setInputFiles(files: string | string[] | {
326
- name: string;
327
- mimeType: string;
328
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
329
- } | Array<{
330
- name: string;
331
- mimeType: string;
332
- buffer: ArrayBuffer | Uint8Array | Buffer$1 | string;
333
- }>): Promise<void>;
361
+ setInputFiles(files: SetInputFilesArgument): Promise<void>;
362
+ /**
363
+ * Remote browser fallback: build File objects inside the page and attach them via JS.
364
+ *
365
+ * When Stagehand is driving a browser that cannot see the local filesystem (Browserbase,
366
+ * remote CDP, etc.), CDP's DOM.setFileInputFiles would fail because Chrome can't reach
367
+ * our temp files. Instead we base64-encode the payloads, send them into the page, and
368
+ * let a DOM helper create File objects + dispatch change/input events.
369
+ */
370
+ private assignFilesViaPayloadInjection;
334
371
  /**
335
372
  * Return the DOM backendNodeId for this locator's target element.
336
373
  * Useful for identity comparisons without needing element handles.
@@ -745,7 +782,7 @@ declare class StagehandAPIClient {
745
782
  constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
746
783
  init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
747
784
  act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
748
- extract<T extends z.AnyZodObject>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
785
+ extract<T extends StagehandZodSchema>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
749
786
  observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
750
787
  goto(url: string, options?: {
751
788
  waitUntil?: "load" | "domcontentloaded" | "networkidle";
@@ -797,6 +834,7 @@ declare class Page {
797
834
  private nextOrdinal;
798
835
  /** cache Frames per frameId so everyone uses the same one */
799
836
  private readonly frameCache;
837
+ private readonly browserIsRemote;
800
838
  /** Stable id for Frames created by this Page (use top-level TargetId). */
801
839
  private readonly pageId;
802
840
  /** Cached current URL for synchronous page.url() */
@@ -817,7 +855,7 @@ declare class Page {
817
855
  * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
818
856
  * Assumes Page domain is already enabled on the session passed in.
819
857
  */
820
- static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null): Promise<Page>;
858
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null, browserIsRemote?: boolean): Promise<Page>;
821
859
  /**
822
860
  * Parent/child session emitted a `frameAttached`.
823
861
  * Topology update + ownership stamped to **emitting session**.
@@ -1132,6 +1170,8 @@ interface AgentResult {
1132
1170
  usage?: {
1133
1171
  input_tokens: number;
1134
1172
  output_tokens: number;
1173
+ reasoning_tokens?: number;
1174
+ cached_input_tokens?: number;
1135
1175
  inference_time_ms: number;
1136
1176
  };
1137
1177
  }
@@ -1297,7 +1337,7 @@ interface ActResult {
1297
1337
  actionDescription: string;
1298
1338
  actions: Action[];
1299
1339
  }
1300
- type ExtractResult<T extends z$1.AnyZodObject> = z$1.infer<T>;
1340
+ type ExtractResult<T extends StagehandZodSchema> = InferStagehandSchema<T>;
1301
1341
  interface Action {
1302
1342
  selector: string;
1303
1343
  description: string;
@@ -1316,20 +1356,12 @@ interface ExtractOptions {
1316
1356
  selector?: string;
1317
1357
  page?: Page$1 | Page$2 | Page$3 | Page;
1318
1358
  }
1319
- declare const defaultExtractSchema: z$1.ZodObject<{
1320
- extraction: z$1.ZodString;
1321
- }, "strip", z$1.ZodTypeAny, {
1322
- extraction?: string;
1323
- }, {
1324
- extraction?: string;
1325
- }>;
1326
- declare const pageTextSchema: z$1.ZodObject<{
1327
- pageText: z$1.ZodString;
1328
- }, "strip", z$1.ZodTypeAny, {
1329
- pageText?: string;
1330
- }, {
1331
- pageText?: string;
1332
- }>;
1359
+ declare const defaultExtractSchema: z.ZodObject<{
1360
+ extraction: z.ZodString;
1361
+ }, z.core.$strip>;
1362
+ declare const pageTextSchema: z.ZodObject<{
1363
+ pageText: z.ZodString;
1364
+ }, z.core.$strip>;
1333
1365
  interface ObserveOptions {
1334
1366
  model?: ModelConfiguration;
1335
1367
  timeout?: number;
@@ -1346,18 +1378,28 @@ declare enum V3FunctionName {
1346
1378
  interface StagehandMetrics {
1347
1379
  actPromptTokens: number;
1348
1380
  actCompletionTokens: number;
1381
+ actReasoningTokens: number;
1382
+ actCachedInputTokens: number;
1349
1383
  actInferenceTimeMs: number;
1350
1384
  extractPromptTokens: number;
1351
1385
  extractCompletionTokens: number;
1386
+ extractReasoningTokens: number;
1387
+ extractCachedInputTokens: number;
1352
1388
  extractInferenceTimeMs: number;
1353
1389
  observePromptTokens: number;
1354
1390
  observeCompletionTokens: number;
1391
+ observeReasoningTokens: number;
1392
+ observeCachedInputTokens: number;
1355
1393
  observeInferenceTimeMs: number;
1356
1394
  agentPromptTokens: number;
1357
1395
  agentCompletionTokens: number;
1396
+ agentReasoningTokens: number;
1397
+ agentCachedInputTokens: number;
1358
1398
  agentInferenceTimeMs: number;
1359
1399
  totalPromptTokens: number;
1360
1400
  totalCompletionTokens: number;
1401
+ totalReasoningTokens: number;
1402
+ totalCachedInputTokens: number;
1361
1403
  totalInferenceTimeMs: number;
1362
1404
  }
1363
1405
 
@@ -1585,7 +1627,7 @@ interface APIActParameters {
1585
1627
  }
1586
1628
  interface APIExtractParameters {
1587
1629
  instruction?: string;
1588
- schema?: ZodTypeAny;
1630
+ schema?: StagehandZodSchema;
1589
1631
  options?: ExtractOptions;
1590
1632
  frameId?: string;
1591
1633
  }
@@ -1898,7 +1940,7 @@ declare class V3 {
1898
1940
  */
1899
1941
  get history(): Promise<ReadonlyArray<HistoryEntry>>;
1900
1942
  addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1901
- updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1943
+ updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, reasoningTokens: number, cachedInputTokens: number, inferenceTimeMs: number): void;
1902
1944
  private updateTotalMetrics;
1903
1945
  private _immediateShutdown;
1904
1946
  private static _installProcessGuards;
@@ -1929,10 +1971,10 @@ declare class V3 {
1929
1971
  * - extract(instruction, schema) → schema-inferred
1930
1972
  * - extract(instruction, schema, options)
1931
1973
  */
1932
- extract(): Promise<z$1.infer<typeof pageTextSchema>>;
1933
- extract(options: ExtractOptions): Promise<z$1.infer<typeof pageTextSchema>>;
1934
- extract(instruction: string, options?: ExtractOptions): Promise<z$1.infer<typeof defaultExtractSchema>>;
1935
- extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z$1.infer<T>>;
1974
+ extract(): Promise<z.infer<typeof pageTextSchema>>;
1975
+ extract(options: ExtractOptions): Promise<z.infer<typeof pageTextSchema>>;
1976
+ extract(instruction: string, options?: ExtractOptions): Promise<z.infer<typeof defaultExtractSchema>>;
1977
+ extract<T extends StagehandZodSchema>(instruction: string, schema: T, options?: ExtractOptions): Promise<InferStagehandSchema<T>>;
1936
1978
  /**
1937
1979
  * Run an "observe" instruction through the ObserveHandler.
1938
1980
  */
@@ -2004,14 +2046,14 @@ declare class AgentProvider {
2004
2046
  static getAgentProvider(modelName: string): AgentProviderType;
2005
2047
  }
2006
2048
 
2007
- declare function validateZodSchema(schema: z$1.ZodTypeAny, data: unknown): boolean;
2049
+ declare function validateZodSchema(schema: StagehandZodSchema, data: unknown): boolean;
2008
2050
  /**
2009
2051
  * Detects if the code is running in the Bun runtime environment.
2010
2052
  * @returns {boolean} True if running in Bun, false otherwise.
2011
2053
  */
2012
2054
  declare function isRunningInBun(): boolean;
2013
- declare function toGeminiSchema(zodSchema: z$1.ZodTypeAny): Schema;
2014
- declare function getZodType(schema: z$1.ZodTypeAny): string;
2055
+ declare function toGeminiSchema(zodSchema: StagehandZodSchema): Schema;
2056
+ declare function getZodType(schema: StagehandZodSchema): string;
2015
2057
  /**
2016
2058
  * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
2017
2059
  * For each such field, it replaces the `z.string().url()` with `z.number()`.
@@ -2025,7 +2067,7 @@ declare function getZodType(schema: z$1.ZodTypeAny): string;
2025
2067
  * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
2026
2068
  * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
2027
2069
  */
2028
- declare function transformSchema(schema: z$1.ZodTypeAny, currentPath: Array<string | number>): [z$1.ZodTypeAny, ZodPathSegments[]];
2070
+ declare function transformSchema(schema: StagehandZodSchema, currentPath: Array<string | number>): [StagehandZodSchema, ZodPathSegments[]];
2029
2071
  /**
2030
2072
  * Once we get the final extracted object that has numeric IDs in place of URLs,
2031
2073
  * use `injectUrls` to walk the object and replace numeric IDs
@@ -2094,4 +2136,4 @@ declare class V3Evaluator {
2094
2136
  private _evaluateWithMultipleScreenshots;
2095
2137
  }
2096
2138
 
2097
- export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMResponse, LLMResponseError, type LLMTool, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };
2139
+ export { type AISDKCustomProvider, type AISDKProvider, AISdkClient, AVAILABLE_CUA_MODELS, type ActOptions, type ActResult, type Action, type ActionExecutionResult, type AgentAction, type AgentConfig, type AgentExecuteOptions, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentModelConfig, AgentProvider, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AnyPage, type AvailableCuaModel, type AvailableModel, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, ConnectionTimeoutError, type ConsoleListener, ConsoleMessage, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, CuaModelRequiredError, ElementNotVisibleError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, HandlerNotInitializedError, type HistoryEntry, InvalidAISDKModelFormatError, type JsonSchema, type JsonSchemaProperty, LLMClient, type LLMParsedResponse, type LLMResponse, LLMResponseError, type LLMTool, type LLMUsage, LOG_LEVEL_NAMES, type LoadState, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelConfiguration, type ModelProvider, type ObserveOptions, Page, PageNotFoundError, Response$1 as Response, ResponseBodyError, type ResponseInputItem, type ResponseItem, ResponseParseError, V3 as Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, TimeoutError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, V3, type V3Env, V3Evaluator, V3FunctionName, type V3Options, XPathResolutionError, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getZodType, injectUrls, isRunningInBun, jsonSchemaToZod, loadApiKeyFromEnv, modelToAgentProviderMap, pageTextSchema, providerEnvVarMap, toGeminiSchema, transformSchema, trimTrailingTextNode, validateZodSchema };