@browserbasehq/orca 3.0.0-preview.5 → 3.0.0-preview.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +157 -84
  2. package/dist/index.js +2062 -1389
  3. package/package.json +5 -1
package/dist/index.d.ts CHANGED
@@ -1,17 +1,17 @@
1
- import { ZodType, z, ZodError, ZodTypeAny } from 'zod/v3';
1
+ import z, { ZodType, z as z$1, ZodError, ZodTypeAny } from 'zod/v3';
2
2
  import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
3
3
  import { LanguageModelV2 } from '@ai-sdk/provider';
4
4
  import { ClientOptions as ClientOptions$1 } from 'openai';
5
5
  import { generateObject, generateText, streamText, streamObject, experimental_generateImage, embed, embedMany, experimental_transcribe, experimental_generateSpeech, ToolSet } from 'ai';
6
- import { Protocol } from 'devtools-protocol';
7
- import { Buffer as Buffer$1 } from 'buffer';
6
+ import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
8
7
  import { Page as Page$1 } from 'playwright-core';
9
8
  export { Page as PlaywrightPage } from 'playwright-core';
10
- import { Page as Page$3 } from 'patchright-core';
11
- export { Page as PatchrightPage } from 'patchright-core';
12
9
  import { Page as Page$2 } from 'puppeteer-core';
13
10
  export { Page as PuppeteerPage } from 'puppeteer-core';
14
- import { Client, ClientOptions as ClientOptions$3 } from '@modelcontextprotocol/sdk/client/index.js';
11
+ import { Page as Page$3 } from 'patchright-core';
12
+ export { Page as PatchrightPage } from 'patchright-core';
13
+ import { Protocol } from 'devtools-protocol';
14
+ import { Buffer as Buffer$1 } from 'buffer';
15
15
  import Browserbase from '@browserbasehq/sdk';
16
16
  import { ToolSet as ToolSet$1 } from 'ai/dist';
17
17
  import { Schema } from '@google/genai';
@@ -573,6 +573,27 @@ declare class LocatorDelegate {
573
573
  type AnyPage = Page$1 | Page$2 | Page$3 | Page;
574
574
  type LoadState = "load" | "domcontentloaded" | "networkidle";
575
575
 
576
+ declare class StagehandAPIClient {
577
+ private apiKey;
578
+ private projectId;
579
+ private sessionId?;
580
+ private modelApiKey;
581
+ private logger;
582
+ private fetchWithCookies;
583
+ constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
584
+ init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
585
+ act({ input, options, frameId }: APIActParameters): Promise<ActResult>;
586
+ extract<T extends z.AnyZodObject>({ instruction, schema: zodSchema, options, frameId, }: APIExtractParameters): Promise<ExtractResult<T>>;
587
+ observe({ instruction, options, frameId, }: APIObserveParameters): Promise<Action[]>;
588
+ goto(url: string, options?: {
589
+ waitUntil?: "load" | "domcontentloaded" | "networkidle";
590
+ }, frameId?: string): Promise<void>;
591
+ agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions | string, frameId?: string): Promise<AgentResult>;
592
+ end(): Promise<Response>;
593
+ private execute;
594
+ private request;
595
+ }
596
+
576
597
  declare class Page {
577
598
  private readonly conn;
578
599
  private readonly mainSession;
@@ -592,7 +613,11 @@ declare class Page {
592
613
  private readonly pageId;
593
614
  /** Cached current URL for synchronous page.url() */
594
615
  private _currentUrl;
616
+ private navigationCommandSeq;
617
+ private latestNavigationCommandId;
595
618
  private readonly networkManager;
619
+ /** Optional API client for routing page operations to the API */
620
+ private readonly apiClient;
596
621
  private constructor();
597
622
  private cursorEnabled;
598
623
  private ensureCursorScript;
@@ -602,7 +627,7 @@ declare class Page {
602
627
  * Factory: create Page and seed registry with the shallow tree from Page.getFrameTree.
603
628
  * Assumes Page domain is already enabled on the session passed in.
604
629
  */
605
- static create(conn: CdpConnection, session: CDPSessionLike, targetId: string): Promise<Page>;
630
+ static create(conn: CdpConnection, session: CDPSessionLike, targetId: string, apiClient?: StagehandAPIClient | null, localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null): Promise<Page>;
606
631
  /**
607
632
  * Parent/child session emitted a `frameAttached`.
608
633
  * Topology update + ownership stamped to **emitting session**.
@@ -682,6 +707,8 @@ declare class Page {
682
707
  * Return the current page URL (synchronous, cached from navigation events).
683
708
  */
684
709
  url(): string;
710
+ private beginNavigationCommand;
711
+ isCurrentNavigationCommand(id: number): boolean;
685
712
  /**
686
713
  * Return the current page title.
687
714
  * Prefers reading from the active document via Runtime.evaluate to reflect dynamic changes.
@@ -840,65 +867,6 @@ declare class Page {
840
867
  waitForMainLoadState(state: LoadState, timeoutMs?: number): Promise<void>;
841
868
  }
842
869
 
843
- /**
844
- * Represents a path through a Zod schema from the root object down to a
845
- * particular field. The `segments` array describes the chain of keys/indices.
846
- *
847
- * - **String** segments indicate object property names.
848
- * - **Number** segments indicate array indices.
849
- *
850
- * For example, `["users", 0, "homepage"]` might describe reaching
851
- * the `homepage` field in `schema.users[0].homepage`.
852
- */
853
- interface ZodPathSegments {
854
- /**
855
- * The ordered list of keys/indices leading from the schema root
856
- * to the targeted field.
857
- */
858
- segments: Array<string | number>;
859
- }
860
-
861
- type EvaluateOptions = {
862
- /** The question to ask about the task state */
863
- question: string;
864
- /** The answer to the question */
865
- answer?: string;
866
- /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
867
- screenshot?: boolean | Buffer[];
868
- /** Custom system prompt for the evaluator */
869
- systemPrompt?: string;
870
- /** Delay in milliseconds before taking the screenshot @default 250 */
871
- screenshotDelayMs?: number;
872
- /** The agent's reasoning/thought process for completing the task */
873
- agentReasoning?: string;
874
- };
875
- type BatchAskOptions = {
876
- /** Array of questions with optional answers */
877
- questions: Array<{
878
- question: string;
879
- answer?: string;
880
- }>;
881
- /** Whether to take a screenshot of the task state */
882
- screenshot?: boolean;
883
- /** Custom system prompt for the evaluator */
884
- systemPrompt?: string;
885
- /** Delay in milliseconds before taking the screenshot @default 1000 */
886
- screenshotDelayMs?: number;
887
- };
888
- /**
889
- * Result of an evaluation
890
- */
891
- interface EvaluationResult {
892
- /**
893
- * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
894
- */
895
- evaluation: "YES" | "NO" | "INVALID";
896
- /**
897
- * The reasoning behind the evaluation
898
- */
899
- reasoning: string;
900
- }
901
-
902
870
  interface AgentAction {
903
871
  type: string;
904
872
  reasoning?: string;
@@ -1084,7 +1052,7 @@ interface ActResult {
1084
1052
  actionDescription: string;
1085
1053
  actions: Action[];
1086
1054
  }
1087
- type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
1055
+ type ExtractResult<T extends z$1.AnyZodObject> = z$1.infer<T>;
1088
1056
  interface Action {
1089
1057
  selector: string;
1090
1058
  description: string;
@@ -1092,7 +1060,7 @@ interface Action {
1092
1060
  arguments?: string[];
1093
1061
  }
1094
1062
  interface HistoryEntry {
1095
- method: "act" | "extract" | "observe" | "navigate";
1063
+ method: "act" | "extract" | "observe" | "navigate" | "agent";
1096
1064
  parameters: unknown;
1097
1065
  result: unknown;
1098
1066
  timestamp: string;
@@ -1103,16 +1071,16 @@ interface ExtractOptions {
1103
1071
  selector?: string;
1104
1072
  page?: Page$1 | Page$2 | Page$3 | Page;
1105
1073
  }
1106
- declare const defaultExtractSchema: z.ZodObject<{
1107
- extraction: z.ZodString;
1108
- }, "strip", z.ZodTypeAny, {
1074
+ declare const defaultExtractSchema: z$1.ZodObject<{
1075
+ extraction: z$1.ZodString;
1076
+ }, "strip", z$1.ZodTypeAny, {
1109
1077
  extraction?: string;
1110
1078
  }, {
1111
1079
  extraction?: string;
1112
1080
  }>;
1113
- declare const pageTextSchema: z.ZodObject<{
1114
- pageText: z.ZodString;
1115
- }, "strip", z.ZodTypeAny, {
1081
+ declare const pageTextSchema: z$1.ZodObject<{
1082
+ pageText: z$1.ZodString;
1083
+ }, "strip", z$1.ZodTypeAny, {
1116
1084
  pageText?: string;
1117
1085
  }, {
1118
1086
  pageText?: string;
@@ -1207,6 +1175,7 @@ interface V3Options {
1207
1175
  /** Directory used to persist cached actions for act(). */
1208
1176
  cacheDir?: string;
1209
1177
  domSettleTimeout?: number;
1178
+ disableAPI?: boolean;
1210
1179
  }
1211
1180
 
1212
1181
  declare class StagehandError extends Error {
@@ -1313,6 +1282,103 @@ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1313
1282
  constructor(segment: string, hint?: string);
1314
1283
  }
1315
1284
 
1285
+ interface StagehandAPIConstructorParams {
1286
+ apiKey: string;
1287
+ projectId: string;
1288
+ logger: (message: LogLine) => void;
1289
+ }
1290
+ interface StartSessionParams {
1291
+ modelName: string;
1292
+ modelApiKey: string;
1293
+ domSettleTimeoutMs: number;
1294
+ verbose: number;
1295
+ systemPrompt?: string;
1296
+ browserbaseSessionCreateParams?: Omit<Browserbase.Sessions.SessionCreateParams, "projectId"> & {
1297
+ projectId?: string;
1298
+ };
1299
+ selfHeal?: boolean;
1300
+ browserbaseSessionID?: string;
1301
+ }
1302
+ interface StartSessionResult {
1303
+ sessionId: string;
1304
+ available?: boolean;
1305
+ }
1306
+ interface APIActParameters {
1307
+ input: string | Action;
1308
+ options?: ActOptions;
1309
+ frameId?: string;
1310
+ }
1311
+ interface APIExtractParameters {
1312
+ instruction?: string;
1313
+ schema?: ZodTypeAny;
1314
+ options?: ExtractOptions;
1315
+ frameId?: string;
1316
+ }
1317
+ interface APIObserveParameters {
1318
+ instruction?: string;
1319
+ options?: ObserveOptions;
1320
+ frameId?: string;
1321
+ }
1322
+
1323
+ /**
1324
+ * Represents a path through a Zod schema from the root object down to a
1325
+ * particular field. The `segments` array describes the chain of keys/indices.
1326
+ *
1327
+ * - **String** segments indicate object property names.
1328
+ * - **Number** segments indicate array indices.
1329
+ *
1330
+ * For example, `["users", 0, "homepage"]` might describe reaching
1331
+ * the `homepage` field in `schema.users[0].homepage`.
1332
+ */
1333
+ interface ZodPathSegments {
1334
+ /**
1335
+ * The ordered list of keys/indices leading from the schema root
1336
+ * to the targeted field.
1337
+ */
1338
+ segments: Array<string | number>;
1339
+ }
1340
+
1341
+ type EvaluateOptions = {
1342
+ /** The question to ask about the task state */
1343
+ question: string;
1344
+ /** The answer to the question */
1345
+ answer?: string;
1346
+ /** Whether to take a screenshot of the task state, or array of screenshots to evaluate */
1347
+ screenshot?: boolean | Buffer[];
1348
+ /** Custom system prompt for the evaluator */
1349
+ systemPrompt?: string;
1350
+ /** Delay in milliseconds before taking the screenshot @default 250 */
1351
+ screenshotDelayMs?: number;
1352
+ /** The agent's reasoning/thought process for completing the task */
1353
+ agentReasoning?: string;
1354
+ };
1355
+ type BatchAskOptions = {
1356
+ /** Array of questions with optional answers */
1357
+ questions: Array<{
1358
+ question: string;
1359
+ answer?: string;
1360
+ }>;
1361
+ /** Whether to take a screenshot of the task state */
1362
+ screenshot?: boolean;
1363
+ /** Custom system prompt for the evaluator */
1364
+ systemPrompt?: string;
1365
+ /** Delay in milliseconds before taking the screenshot @default 1000 */
1366
+ screenshotDelayMs?: number;
1367
+ };
1368
+ /**
1369
+ * Result of an evaluation
1370
+ */
1371
+ interface EvaluationResult {
1372
+ /**
1373
+ * The evaluation result ('YES', 'NO', or 'INVALID' if parsing failed or value was unexpected)
1374
+ */
1375
+ evaluation: "YES" | "NO" | "INVALID";
1376
+ /**
1377
+ * The reasoning behind the evaluation
1378
+ */
1379
+ reasoning: string;
1380
+ }
1381
+
1316
1382
  /**
1317
1383
  * V3Context
1318
1384
  *
@@ -1327,6 +1393,8 @@ declare class StagehandShadowSegmentNotFoundError extends StagehandError {
1327
1393
  declare class V3Context {
1328
1394
  readonly conn: CdpConnection;
1329
1395
  private readonly env;
1396
+ private readonly apiClient;
1397
+ private readonly localBrowserLaunchOptions;
1330
1398
  private constructor();
1331
1399
  private readonly _piercerInstalled;
1332
1400
  private _lastPopupSignalAt;
@@ -1346,6 +1414,8 @@ declare class V3Context {
1346
1414
  */
1347
1415
  static create(wsUrl: string, opts?: {
1348
1416
  env?: "LOCAL" | "BROWSERBASE";
1417
+ apiClient?: StagehandAPIClient | null;
1418
+ localBrowserLaunchOptions?: LocalBrowserLaunchOptions | null;
1349
1419
  }): Promise<V3Context>;
1350
1420
  /**
1351
1421
  * Wait until at least one top-level Page has been created and registered.
@@ -1509,8 +1579,10 @@ declare class V3 {
1509
1579
  private _onCdpClosed;
1510
1580
  readonly experimental: boolean;
1511
1581
  readonly logInferenceToFile: boolean;
1582
+ readonly disableAPI: boolean;
1512
1583
  private externalLogger?;
1513
1584
  verbose: 0 | 1 | 2;
1585
+ private stagehandLogger;
1514
1586
  private _history;
1515
1587
  private readonly instanceId;
1516
1588
  private static _processGuardsInstalled;
@@ -1518,7 +1590,9 @@ declare class V3 {
1518
1590
  private cacheStorage;
1519
1591
  private actCache;
1520
1592
  private agentCache;
1593
+ private apiClient;
1521
1594
  v3Metrics: V3Metrics;
1595
+ constructor(opts: V3Options);
1522
1596
  /**
1523
1597
  * Async property for metrics so callers can `await v3.metrics`.
1524
1598
  * Returning a Promise future-proofs async aggregation/storage.
@@ -1539,7 +1613,6 @@ declare class V3 {
1539
1613
  addToHistory(method: HistoryEntry["method"], parameters: unknown, result?: unknown): void;
1540
1614
  updateMetrics(functionName: V3FunctionName, promptTokens: number, completionTokens: number, inferenceTimeMs: number): void;
1541
1615
  private updateTotalMetrics;
1542
- constructor(opts: V3Options);
1543
1616
  private _immediateShutdown;
1544
1617
  private static _installProcessGuards;
1545
1618
  /**
@@ -1569,10 +1642,10 @@ declare class V3 {
1569
1642
  * - extract(instruction, schema) → schema-inferred
1570
1643
  * - extract(instruction, schema, options)
1571
1644
  */
1572
- extract(): Promise<z.infer<typeof pageTextSchema>>;
1573
- extract(options: ExtractOptions): Promise<z.infer<typeof pageTextSchema>>;
1574
- extract(instruction: string, options?: ExtractOptions): Promise<z.infer<typeof defaultExtractSchema>>;
1575
- extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z.infer<T>>;
1645
+ extract(): Promise<z$1.infer<typeof pageTextSchema>>;
1646
+ extract(options: ExtractOptions): Promise<z$1.infer<typeof pageTextSchema>>;
1647
+ extract(instruction: string, options?: ExtractOptions): Promise<z$1.infer<typeof defaultExtractSchema>>;
1648
+ extract<T extends ZodTypeAny>(instruction: string, schema: T, options?: ExtractOptions): Promise<z$1.infer<T>>;
1576
1649
  /**
1577
1650
  * Run an "observe" instruction through the ObserveHandler.
1578
1651
  */
@@ -1644,14 +1717,14 @@ declare class AgentProvider {
1644
1717
  static getAgentProvider(modelName: string): AgentProviderType;
1645
1718
  }
1646
1719
 
1647
- declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;
1720
+ declare function validateZodSchema(schema: z$1.ZodTypeAny, data: unknown): boolean;
1648
1721
  /**
1649
1722
  * Detects if the code is running in the Bun runtime environment.
1650
1723
  * @returns {boolean} True if running in Bun, false otherwise.
1651
1724
  */
1652
1725
  declare function isRunningInBun(): boolean;
1653
- declare function toGeminiSchema(zodSchema: z.ZodTypeAny): Schema;
1654
- declare function getZodType(schema: z.ZodTypeAny): string;
1726
+ declare function toGeminiSchema(zodSchema: z$1.ZodTypeAny): Schema;
1727
+ declare function getZodType(schema: z$1.ZodTypeAny): string;
1655
1728
  /**
1656
1729
  * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`.
1657
1730
  * For each such field, it replaces the `z.string().url()` with `z.number()`.
@@ -1665,7 +1738,7 @@ declare function getZodType(schema: z.ZodTypeAny): string;
1665
1738
  * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`.
1666
1739
  * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments.
1667
1740
  */
1668
- declare function transformSchema(schema: z.ZodTypeAny, currentPath: Array<string | number>): [z.ZodTypeAny, ZodPathSegments[]];
1741
+ declare function transformSchema(schema: z$1.ZodTypeAny, currentPath: Array<string | number>): [z$1.ZodTypeAny, ZodPathSegments[]];
1669
1742
  /**
1670
1743
  * Once we get the final extracted object that has numeric IDs in place of URLs,
1671
1744
  * use `injectUrls` to walk the object and replace numeric IDs
@@ -1676,7 +1749,7 @@ declare function injectUrls(obj: unknown, path: Array<string | number>, idToUrlM
1676
1749
  /**
1677
1750
  * Mapping from LLM provider names to their corresponding environment variable names for API keys.
1678
1751
  */
1679
- declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string>>;
1752
+ declare const providerEnvVarMap: Partial<Record<ModelProvider | string, string | Array<string>>>;
1680
1753
  /**
1681
1754
  * Loads an API key for a provider, checking environment variables.
1682
1755
  * @param provider The name of the provider (e.g., 'openai', 'anthropic')