@browserbasehq/stagehand 2.5.3 → 2.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -52,7 +52,7 @@ interface LLMTool {
52
52
  parameters: Record<string, unknown>;
53
53
  }
54
54
 
55
- declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
55
+ declare const AvailableModelSchema: z.ZodEnum<["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "o3-mini", "o1", "o1-mini", "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4.5-preview", "o1-preview", "claude-haiku-4-5", "claude-3-7-sonnet-latest", "claude-3-7-sonnet-20250219", "cerebras-llama-3.3-70b", "cerebras-llama-3.1-8b", "groq-llama-3.3-70b-versatile", "groq-llama-3.3-70b-specdec", "gemini-1.5-flash", "gemini-1.5-pro", "gemini-1.5-flash-8b", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]>;
56
56
  type AvailableModel = z.infer<typeof AvailableModelSchema> | string;
57
57
  type ModelProvider = "openai" | "anthropic" | "cerebras" | "groq" | "google" | "aisdk";
58
58
  type ClientOptions = ClientOptions$1 | ClientOptions$2;
@@ -318,7 +318,7 @@ interface ObserveOptions {
318
318
  iframes?: boolean;
319
319
  frameId?: string;
320
320
  }
321
- interface ObserveResult {
321
+ interface ObserveResult$1 {
322
322
  selector: string;
323
323
  description: string;
324
324
  backendNodeId?: number;
@@ -498,9 +498,9 @@ interface ActToolResult {
498
498
  action?: string;
499
499
  error?: string;
500
500
  isIframe?: boolean;
501
- playwrightArguments?: ObserveResult | null;
501
+ playwrightArguments?: ObserveResult$1 | null;
502
502
  }
503
- interface AgentAction {
503
+ interface AgentAction$1 {
504
504
  type: string;
505
505
  reasoning?: string;
506
506
  taskCompleted?: boolean;
@@ -510,13 +510,13 @@ interface AgentAction {
510
510
  timestamp?: number;
511
511
  pageUrl?: string;
512
512
  instruction?: string;
513
- playwrightArguments?: ObserveResult | null;
513
+ playwrightArguments?: ObserveResult$1 | null;
514
514
  [key: string]: unknown;
515
515
  }
516
- interface AgentResult {
516
+ interface AgentResult$1 {
517
517
  success: boolean;
518
518
  message: string;
519
- actions: AgentAction[];
519
+ actions: AgentAction$1[];
520
520
  completed: boolean;
521
521
  metadata?: Record<string, unknown>;
522
522
  usage?: {
@@ -525,14 +525,14 @@ interface AgentResult {
525
525
  inference_time_ms: number;
526
526
  };
527
527
  }
528
- interface AgentOptions {
528
+ interface AgentOptions$1 {
529
529
  maxSteps?: number;
530
530
  autoScreenshot?: boolean;
531
531
  waitBetweenActions?: number;
532
532
  context?: string;
533
533
  highlightCursor?: boolean;
534
534
  }
535
- interface AgentExecuteOptions extends AgentOptions {
535
+ interface AgentExecuteOptions$1 extends AgentOptions$1 {
536
536
  instruction: string;
537
537
  }
538
538
  type AgentProviderType = "openai" | "anthropic" | "google";
@@ -545,7 +545,7 @@ interface AgentClientOptions {
545
545
  }
546
546
  type AgentType = "openai" | "anthropic" | "google";
547
547
  interface AgentExecutionOptions {
548
- options: AgentExecuteOptions;
548
+ options: AgentExecuteOptions$1;
549
549
  logger: (message: LogLine) => void;
550
550
  retries?: number;
551
551
  }
@@ -632,7 +632,7 @@ type ResponseInputItem = {
632
632
  output: string;
633
633
  };
634
634
  interface AgentInstance {
635
- execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
635
+ execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
636
636
  setScreenshotCollector?: (collector: unknown) => void;
637
637
  }
638
638
 
@@ -657,13 +657,13 @@ interface StagehandScreenshotOptions extends PageScreenshotOptions {
657
657
  interface Page extends Omit<Page$1, "on" | "screenshot"> {
658
658
  act(action: string): Promise<ActResult>;
659
659
  act(options: ActOptions): Promise<ActResult>;
660
- act(observation: ObserveResult): Promise<ActResult>;
660
+ act(observation: ObserveResult$1): Promise<ActResult>;
661
661
  extract(instruction: string): Promise<ExtractResult<typeof defaultExtractSchema>>;
662
662
  extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
663
663
  extract(): Promise<ExtractResult<typeof pageTextSchema>>;
664
- observe(): Promise<ObserveResult[]>;
665
- observe(instruction: string): Promise<ObserveResult[]>;
666
- observe(options?: ObserveOptions): Promise<ObserveResult[]>;
664
+ observe(): Promise<ObserveResult$1[]>;
665
+ observe(instruction: string): Promise<ObserveResult$1[]>;
666
+ observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
667
667
  screenshot(options?: StagehandScreenshotOptions): Promise<Buffer>;
668
668
  on: {
669
669
  (event: "popup", listener: (page: Page) => unknown): Page;
@@ -672,6 +672,31 @@ interface Page extends Omit<Page$1, "on" | "screenshot"> {
672
672
  type BrowserContext = BrowserContext$1;
673
673
  type Browser = Browser$1;
674
674
 
675
+ type AccessibilityNode = {
676
+ role: string;
677
+ name?: string;
678
+ description?: string;
679
+ value?: string;
680
+ children?: AccessibilityNode[];
681
+ childIds?: string[];
682
+ parentId?: string;
683
+ nodeId?: string;
684
+ backendDOMNodeId?: number;
685
+ properties?: {
686
+ name: string;
687
+ value: {
688
+ type: string;
689
+ value?: string;
690
+ };
691
+ }[];
692
+ };
693
+ interface TreeResult {
694
+ tree: AccessibilityNode[];
695
+ simplified: string;
696
+ iframes?: AccessibilityNode[];
697
+ idToUrl: Record<EncodedId, string>;
698
+ xpathMap: Record<EncodedId, string>;
699
+ }
675
700
  interface EnhancedContext extends Omit<BrowserContext$1, "newPage" | "pages"> {
676
701
  newPage(): Promise<Page>;
677
702
  pages(): Page[];
@@ -703,14 +728,14 @@ interface StartSessionResult {
703
728
  available?: boolean;
704
729
  }
705
730
 
706
- interface ObserveResult$1 {
731
+ interface ObserveResult {
707
732
  selector: string;
708
733
  description: string;
709
734
  backendNodeId?: number;
710
735
  method?: string;
711
736
  arguments?: string[];
712
737
  }
713
- interface AgentAction$1 {
738
+ interface AgentAction {
714
739
  type: string;
715
740
  reasoning?: string;
716
741
  taskCompleted?: boolean;
@@ -720,13 +745,13 @@ interface AgentAction$1 {
720
745
  timestamp?: number;
721
746
  pageUrl?: string;
722
747
  instruction?: string;
723
- playwrightArguments?: ObserveResult$1 | null;
748
+ playwrightArguments?: ObserveResult | null;
724
749
  [key: string]: unknown;
725
750
  }
726
- interface AgentResult$1 {
751
+ interface AgentResult {
727
752
  success: boolean;
728
753
  message: string;
729
- actions: AgentAction$1[];
754
+ actions: AgentAction[];
730
755
  completed: boolean;
731
756
  metadata?: Record<string, unknown>;
732
757
  usage?: {
@@ -735,14 +760,14 @@ interface AgentResult$1 {
735
760
  inference_time_ms: number;
736
761
  };
737
762
  }
738
- interface AgentOptions$1 {
763
+ interface AgentOptions {
739
764
  maxSteps?: number;
740
765
  autoScreenshot?: boolean;
741
766
  waitBetweenActions?: number;
742
767
  context?: string;
743
768
  highlightCursor?: boolean;
744
769
  }
745
- interface AgentExecuteOptions$1 extends AgentOptions$1 {
770
+ interface AgentExecuteOptions extends AgentOptions {
746
771
  instruction: string;
747
772
  }
748
773
 
@@ -755,11 +780,11 @@ declare class StagehandAPI {
755
780
  private fetchWithCookies;
756
781
  constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
757
782
  init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, debugDom, systemPrompt, selfHeal, waitForCaptchaSolves, actionTimeoutMs, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
758
- act(options: ActOptions | ObserveResult): Promise<ActResult>;
783
+ act(options: ActOptions | ObserveResult$1): Promise<ActResult>;
759
784
  extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
760
- observe(options?: ObserveOptions): Promise<ObserveResult[]>;
785
+ observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
761
786
  goto(url: string, options?: GotoOptions): Promise<void>;
762
- agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions$1): Promise<AgentResult$1>;
787
+ agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions): Promise<AgentResult>;
763
788
  end(): Promise<Response>;
764
789
  private execute;
765
790
  private request;
@@ -771,6 +796,7 @@ declare class StagehandContext {
771
796
  private pageMap;
772
797
  private activeStagehandPage;
773
798
  private readonly frameIdMap;
799
+ private static readonly contextsWithInitScript;
774
800
  private constructor();
775
801
  private createStagehandPage;
776
802
  static init(context: BrowserContext$1, stagehand: Stagehand): Promise<StagehandContext>;
@@ -810,7 +836,6 @@ declare class StagehandPage {
810
836
  ordinalForFrameId(fid: string | undefined): number;
811
837
  encodeWithFrameId(fid: string | undefined, backendId: number): EncodedId;
812
838
  resetFrameOrdinals(): void;
813
- private ensureStagehandScript;
814
839
  /** Register the custom selector engine that pierces open/closed shadow roots. */
815
840
  private ensureStagehandSelectorEngine;
816
841
  /**
@@ -858,9 +883,9 @@ declare class StagehandPage {
858
883
  * `this.stagehand.domSettleTimeoutMs`.
859
884
  */
860
885
  _waitForSettledDom(timeoutMs?: number): Promise<void>;
861
- act(actionOrOptions: string | ActOptions | ObserveResult): Promise<ActResult>;
886
+ act(actionOrOptions: string | ActOptions | ObserveResult$1): Promise<ActResult>;
862
887
  extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions?: string | ExtractOptions<T>): Promise<ExtractResult<T>>;
863
- observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult[]>;
888
+ observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult$1[]>;
864
889
  /**
865
890
  * Get or create a CDP session for the given target.
866
891
  * @param target The Page or (OOPIF) Frame you want to talk to.
@@ -926,6 +951,7 @@ declare class StagehandError extends Error {
926
951
  constructor(message: string);
927
952
  }
928
953
  declare class StagehandDefaultError extends StagehandError {
954
+ causedBy?: Error | StagehandError;
929
955
  constructor(error?: unknown);
930
956
  }
931
957
  declare class StagehandEnvironmentError extends StagehandError {
@@ -1130,6 +1156,21 @@ declare class AISdkClient extends LLMClient {
1130
1156
  createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
1131
1157
  }
1132
1158
 
1159
+ /**
1160
+ * Retrieve and build a cleaned accessibility tree for a document or specific iframe.
1161
+ * Prunes, formats, and optionally filters by XPath, including scrollable role decoration.
1162
+ *
1163
+ * @deprecated This helper is an escape hatch intended for troubleshooting. Prefer
1164
+ * extract() for supported usage and reach for this only
1165
+ * when absolutely necessary.
1166
+ * @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
1167
+ * @param logger - Logging function for diagnostics and performance metrics.
1168
+ * @param selector - Optional XPath to filter the AX tree to a specific subtree.
1169
+ * @param targetFrame - Optional Playwright.Frame to scope the AX tree retrieval.
1170
+ * @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata.
1171
+ */
1172
+ declare function getAccessibilityTree(experimental: boolean, stagehandPage: StagehandPage, logger: (log: LogLine) => void, selector?: string, targetFrame?: Frame): Promise<TreeResult>;
1173
+
1133
1174
  declare class Stagehand {
1134
1175
  private stagehandPage;
1135
1176
  private stagehandContext;
@@ -1195,9 +1236,9 @@ declare class Stagehand {
1195
1236
  * @returns An agent instance with execute() method
1196
1237
  */
1197
1238
  agent(options?: AgentConfig): {
1198
- execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1239
+ execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
1199
1240
  setScreenshotCollector?: (collector: unknown) => void;
1200
1241
  };
1201
1242
  }
1202
1243
 
1203
- export { AISdkClient, type ActOptions, type ActResult, type ActToolResult, type ActionExecutionResult, type AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions, type AgentExecuteParams, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConstructorParams, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, type GotoOptions, HandlerNotInitializedError, type HistoryEntry, type InitResult, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelProvider, type ObserveOptions, type ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, type ResponseInputItem, type ResponseItem, Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandFunctionName, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, type StagehandScreenshotOptions, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, pageTextSchema };
1244
+ export { AISdkClient, type ActOptions, type ActResult, type ActToolResult, type ActionExecutionResult, type AgentAction$1 as AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions$1 as AgentExecuteOptions, type AgentExecuteParams, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions$1 as AgentOptions, type AgentProviderType, type AgentResult$1 as AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConstructorParams, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, type GotoOptions, HandlerNotInitializedError, type HistoryEntry, type InitResult, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelProvider, type ObserveOptions, type ObserveResult$1 as ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, type ResponseInputItem, type ResponseItem, Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandFunctionName, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, type StagehandScreenshotOptions, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, type TreeResult, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getAccessibilityTree, pageTextSchema };