@browserbasehq/stagehand 2.5.3 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -318,7 +318,7 @@ interface ObserveOptions {
318
318
  iframes?: boolean;
319
319
  frameId?: string;
320
320
  }
321
- interface ObserveResult {
321
+ interface ObserveResult$1 {
322
322
  selector: string;
323
323
  description: string;
324
324
  backendNodeId?: number;
@@ -498,9 +498,9 @@ interface ActToolResult {
498
498
  action?: string;
499
499
  error?: string;
500
500
  isIframe?: boolean;
501
- playwrightArguments?: ObserveResult | null;
501
+ playwrightArguments?: ObserveResult$1 | null;
502
502
  }
503
- interface AgentAction {
503
+ interface AgentAction$1 {
504
504
  type: string;
505
505
  reasoning?: string;
506
506
  taskCompleted?: boolean;
@@ -510,13 +510,13 @@ interface AgentAction {
510
510
  timestamp?: number;
511
511
  pageUrl?: string;
512
512
  instruction?: string;
513
- playwrightArguments?: ObserveResult | null;
513
+ playwrightArguments?: ObserveResult$1 | null;
514
514
  [key: string]: unknown;
515
515
  }
516
- interface AgentResult {
516
+ interface AgentResult$1 {
517
517
  success: boolean;
518
518
  message: string;
519
- actions: AgentAction[];
519
+ actions: AgentAction$1[];
520
520
  completed: boolean;
521
521
  metadata?: Record<string, unknown>;
522
522
  usage?: {
@@ -525,14 +525,14 @@ interface AgentResult {
525
525
  inference_time_ms: number;
526
526
  };
527
527
  }
528
- interface AgentOptions {
528
+ interface AgentOptions$1 {
529
529
  maxSteps?: number;
530
530
  autoScreenshot?: boolean;
531
531
  waitBetweenActions?: number;
532
532
  context?: string;
533
533
  highlightCursor?: boolean;
534
534
  }
535
- interface AgentExecuteOptions extends AgentOptions {
535
+ interface AgentExecuteOptions$1 extends AgentOptions$1 {
536
536
  instruction: string;
537
537
  }
538
538
  type AgentProviderType = "openai" | "anthropic" | "google";
@@ -545,7 +545,7 @@ interface AgentClientOptions {
545
545
  }
546
546
  type AgentType = "openai" | "anthropic" | "google";
547
547
  interface AgentExecutionOptions {
548
- options: AgentExecuteOptions;
548
+ options: AgentExecuteOptions$1;
549
549
  logger: (message: LogLine) => void;
550
550
  retries?: number;
551
551
  }
@@ -632,7 +632,7 @@ type ResponseInputItem = {
632
632
  output: string;
633
633
  };
634
634
  interface AgentInstance {
635
- execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
635
+ execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
636
636
  setScreenshotCollector?: (collector: unknown) => void;
637
637
  }
638
638
 
@@ -657,13 +657,13 @@ interface StagehandScreenshotOptions extends PageScreenshotOptions {
657
657
  interface Page extends Omit<Page$1, "on" | "screenshot"> {
658
658
  act(action: string): Promise<ActResult>;
659
659
  act(options: ActOptions): Promise<ActResult>;
660
- act(observation: ObserveResult): Promise<ActResult>;
660
+ act(observation: ObserveResult$1): Promise<ActResult>;
661
661
  extract(instruction: string): Promise<ExtractResult<typeof defaultExtractSchema>>;
662
662
  extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
663
663
  extract(): Promise<ExtractResult<typeof pageTextSchema>>;
664
- observe(): Promise<ObserveResult[]>;
665
- observe(instruction: string): Promise<ObserveResult[]>;
666
- observe(options?: ObserveOptions): Promise<ObserveResult[]>;
664
+ observe(): Promise<ObserveResult$1[]>;
665
+ observe(instruction: string): Promise<ObserveResult$1[]>;
666
+ observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
667
667
  screenshot(options?: StagehandScreenshotOptions): Promise<Buffer>;
668
668
  on: {
669
669
  (event: "popup", listener: (page: Page) => unknown): Page;
@@ -672,6 +672,31 @@ interface Page extends Omit<Page$1, "on" | "screenshot"> {
672
672
  type BrowserContext = BrowserContext$1;
673
673
  type Browser = Browser$1;
674
674
 
675
+ type AccessibilityNode = {
676
+ role: string;
677
+ name?: string;
678
+ description?: string;
679
+ value?: string;
680
+ children?: AccessibilityNode[];
681
+ childIds?: string[];
682
+ parentId?: string;
683
+ nodeId?: string;
684
+ backendDOMNodeId?: number;
685
+ properties?: {
686
+ name: string;
687
+ value: {
688
+ type: string;
689
+ value?: string;
690
+ };
691
+ }[];
692
+ };
693
+ interface TreeResult {
694
+ tree: AccessibilityNode[];
695
+ simplified: string;
696
+ iframes?: AccessibilityNode[];
697
+ idToUrl: Record<EncodedId, string>;
698
+ xpathMap: Record<EncodedId, string>;
699
+ }
675
700
  interface EnhancedContext extends Omit<BrowserContext$1, "newPage" | "pages"> {
676
701
  newPage(): Promise<Page>;
677
702
  pages(): Page[];
@@ -703,14 +728,14 @@ interface StartSessionResult {
703
728
  available?: boolean;
704
729
  }
705
730
 
706
- interface ObserveResult$1 {
731
+ interface ObserveResult {
707
732
  selector: string;
708
733
  description: string;
709
734
  backendNodeId?: number;
710
735
  method?: string;
711
736
  arguments?: string[];
712
737
  }
713
- interface AgentAction$1 {
738
+ interface AgentAction {
714
739
  type: string;
715
740
  reasoning?: string;
716
741
  taskCompleted?: boolean;
@@ -720,13 +745,13 @@ interface AgentAction$1 {
720
745
  timestamp?: number;
721
746
  pageUrl?: string;
722
747
  instruction?: string;
723
- playwrightArguments?: ObserveResult$1 | null;
748
+ playwrightArguments?: ObserveResult | null;
724
749
  [key: string]: unknown;
725
750
  }
726
- interface AgentResult$1 {
751
+ interface AgentResult {
727
752
  success: boolean;
728
753
  message: string;
729
- actions: AgentAction$1[];
754
+ actions: AgentAction[];
730
755
  completed: boolean;
731
756
  metadata?: Record<string, unknown>;
732
757
  usage?: {
@@ -735,14 +760,14 @@ interface AgentResult$1 {
735
760
  inference_time_ms: number;
736
761
  };
737
762
  }
738
- interface AgentOptions$1 {
763
+ interface AgentOptions {
739
764
  maxSteps?: number;
740
765
  autoScreenshot?: boolean;
741
766
  waitBetweenActions?: number;
742
767
  context?: string;
743
768
  highlightCursor?: boolean;
744
769
  }
745
- interface AgentExecuteOptions$1 extends AgentOptions$1 {
770
+ interface AgentExecuteOptions extends AgentOptions {
746
771
  instruction: string;
747
772
  }
748
773
 
@@ -755,11 +780,11 @@ declare class StagehandAPI {
755
780
  private fetchWithCookies;
756
781
  constructor({ apiKey, projectId, logger }: StagehandAPIConstructorParams);
757
782
  init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, debugDom, systemPrompt, selfHeal, waitForCaptchaSolves, actionTimeoutMs, browserbaseSessionCreateParams, browserbaseSessionID, }: StartSessionParams): Promise<StartSessionResult>;
758
- act(options: ActOptions | ObserveResult): Promise<ActResult>;
783
+ act(options: ActOptions | ObserveResult$1): Promise<ActResult>;
759
784
  extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>;
760
- observe(options?: ObserveOptions): Promise<ObserveResult[]>;
785
+ observe(options?: ObserveOptions): Promise<ObserveResult$1[]>;
761
786
  goto(url: string, options?: GotoOptions): Promise<void>;
762
- agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions$1): Promise<AgentResult$1>;
787
+ agentExecute(agentConfig: AgentConfig, executeOptions: AgentExecuteOptions): Promise<AgentResult>;
763
788
  end(): Promise<Response>;
764
789
  private execute;
765
790
  private request;
@@ -858,9 +883,9 @@ declare class StagehandPage {
858
883
  * `this.stagehand.domSettleTimeoutMs`.
859
884
  */
860
885
  _waitForSettledDom(timeoutMs?: number): Promise<void>;
861
- act(actionOrOptions: string | ActOptions | ObserveResult): Promise<ActResult>;
886
+ act(actionOrOptions: string | ActOptions | ObserveResult$1): Promise<ActResult>;
862
887
  extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions?: string | ExtractOptions<T>): Promise<ExtractResult<T>>;
863
- observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult[]>;
888
+ observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult$1[]>;
864
889
  /**
865
890
  * Get or create a CDP session for the given target.
866
891
  * @param target The Page or (OOPIF) Frame you want to talk to.
@@ -926,6 +951,7 @@ declare class StagehandError extends Error {
926
951
  constructor(message: string);
927
952
  }
928
953
  declare class StagehandDefaultError extends StagehandError {
954
+ causedBy?: Error | StagehandError;
929
955
  constructor(error?: unknown);
930
956
  }
931
957
  declare class StagehandEnvironmentError extends StagehandError {
@@ -1130,6 +1156,21 @@ declare class AISdkClient extends LLMClient {
1130
1156
  createChatCompletion<T = ChatCompletion>({ options, }: CreateChatCompletionOptions): Promise<T>;
1131
1157
  }
1132
1158
 
1159
+ /**
1160
+ * Retrieve and build a cleaned accessibility tree for a document or specific iframe.
1161
+ * Prunes, formats, and optionally filters by XPath, including scrollable role decoration.
1162
+ *
1163
+ * @deprecated This helper is an escape hatch intended for troubleshooting. Prefer
1164
+ * extract() for supported usage and reach for this only
1165
+ * when absolutely necessary.
1166
+ * @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
1167
+ * @param logger - Logging function for diagnostics and performance metrics.
1168
+ * @param selector - Optional XPath to filter the AX tree to a specific subtree.
1169
+ * @param targetFrame - Optional Playwright.Frame to scope the AX tree retrieval.
1170
+ * @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata.
1171
+ */
1172
+ declare function getAccessibilityTree(experimental: boolean, stagehandPage: StagehandPage, logger: (log: LogLine) => void, selector?: string, targetFrame?: Frame): Promise<TreeResult>;
1173
+
1133
1174
  declare class Stagehand {
1134
1175
  private stagehandPage;
1135
1176
  private stagehandContext;
@@ -1195,9 +1236,9 @@ declare class Stagehand {
1195
1236
  * @returns An agent instance with execute() method
1196
1237
  */
1197
1238
  agent(options?: AgentConfig): {
1198
- execute: (instructionOrOptions: string | AgentExecuteOptions) => Promise<AgentResult>;
1239
+ execute: (instructionOrOptions: string | AgentExecuteOptions$1) => Promise<AgentResult$1>;
1199
1240
  setScreenshotCollector?: (collector: unknown) => void;
1200
1241
  };
1201
1242
  }
1202
1243
 
1203
- export { AISdkClient, type ActOptions, type ActResult, type ActToolResult, type ActionExecutionResult, type AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions, type AgentExecuteParams, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions, type AgentProviderType, type AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConstructorParams, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, type GotoOptions, HandlerNotInitializedError, type HistoryEntry, type InitResult, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelProvider, type ObserveOptions, type ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, type ResponseInputItem, type ResponseItem, Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandFunctionName, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, type StagehandScreenshotOptions, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, pageTextSchema };
1244
+ export { AISdkClient, type ActOptions, type ActResult, type ActToolResult, type ActionExecutionResult, type AgentAction$1 as AgentAction, type AgentClientOptions, type AgentConfig, type AgentExecuteOptions$1 as AgentExecuteOptions, type AgentExecuteParams, type AgentExecutionOptions, type AgentHandlerOptions, type AgentInstance, type AgentOptions$1 as AgentOptions, type AgentProviderType, type AgentResult$1 as AgentResult, AgentScreenshotProviderError, type AgentType, AnnotatedScreenshotText, type AnthropicContentBlock, type AnthropicJsonSchemaObject, type AnthropicMessage, type AnthropicTextBlock, type AnthropicToolResult, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, BrowserbaseSessionNotFoundError, CaptchaTimeoutError, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ComputerCallItem, type ConstructorParams, ContentFrameNotFoundError, type CreateChatCompletionOptions, CreateChatCompletionResponseError, ExperimentalApiConflictError, ExperimentalNotConfiguredError, type ExtractOptions, type ExtractResult, type FunctionCallItem, type GotoOptions, HandlerNotInitializedError, type HistoryEntry, type InitResult, InvalidAISDKModelFormatError, LLMClient, type LLMResponse, LLMResponseError, LOG_LEVEL_NAMES, type LocalBrowserLaunchOptions, type LogLevel, type LogLine, type Logger, MCPConnectionError, MissingEnvironmentVariableError, MissingLLMConfigurationError, type ModelProvider, type ObserveOptions, type ObserveResult$1 as ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, type ResponseInputItem, type ResponseItem, Stagehand, StagehandAPIError, StagehandAPIUnauthorizedError, StagehandClickError, StagehandDefaultError, StagehandDomProcessError, StagehandElementNotFoundError, StagehandEnvironmentError, StagehandError, StagehandEvalError, StagehandFunctionName, StagehandHttpError, StagehandIframeError, StagehandInitError, StagehandInvalidArgumentError, type StagehandMetrics, StagehandMissingArgumentError, StagehandNotInitializedError, StagehandResponseBodyError, StagehandResponseParseError, type StagehandScreenshotOptions, StagehandServerError, StagehandShadowRootMissingError, StagehandShadowSegmentEmptyError, StagehandShadowSegmentNotFoundError, type ToolUseItem, type TreeResult, UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, XPathResolutionError, type ZodPathSegments, ZodSchemaValidationError, connectToMCPServer, defaultExtractSchema, getAccessibilityTree, pageTextSchema };
package/dist/index.js CHANGED
@@ -473,6 +473,7 @@ __export(index_exports, {
473
473
  ZodSchemaValidationError: () => ZodSchemaValidationError,
474
474
  connectToMCPServer: () => connectToMCPServer,
475
475
  defaultExtractSchema: () => defaultExtractSchema,
476
+ getAccessibilityTree: () => getAccessibilityTree,
476
477
  pageTextSchema: () => pageTextSchema
477
478
  });
478
479
  module.exports = __toCommonJS(index_exports);
@@ -493,7 +494,7 @@ var StagehandFunctionName = /* @__PURE__ */ ((StagehandFunctionName2) => {
493
494
  })(StagehandFunctionName || {});
494
495
 
495
496
  // lib/version.ts
496
- var STAGEHAND_VERSION = "2.5.3";
497
+ var STAGEHAND_VERSION = "2.5.4";
497
498
 
498
499
  // types/stagehandErrors.ts
499
500
  var StagehandError = class extends Error {
@@ -514,6 +515,7 @@ If you need help, please open a Github issue or reach out to us on Slack: https:
514
515
  Full error:
515
516
  ${error.message}`
516
517
  );
518
+ this.causedBy = error;
517
519
  }
518
520
  }
519
521
  };
@@ -2689,8 +2691,10 @@ function buildBackendIdMaps(experimental, sp, targetFrame) {
2689
2691
  if (n.contentDocument && locate(n.contentDocument)) return true;
2690
2692
  return false;
2691
2693
  } else {
2692
- if (n.backendNodeId === backendNodeId)
2693
- return iframeNode = n, true;
2694
+ if (n.backendNodeId === backendNodeId) {
2695
+ iframeNode = n;
2696
+ return true;
2697
+ }
2694
2698
  return ((_d2 = (_c2 = n.children) == null ? void 0 : _c2.some(locate)) != null ? _d2 : false) || (n.contentDocument ? locate(n.contentDocument) : false);
2695
2699
  }
2696
2700
  };
@@ -2702,17 +2706,25 @@ function buildBackendIdMaps(experimental, sp, targetFrame) {
2702
2706
  }
2703
2707
  const tagNameMap = {};
2704
2708
  const xpathMap = {};
2709
+ const scrollableBackendIds = /* @__PURE__ */ new Set();
2705
2710
  const stack = [{ node: startNode, path: "", fid: rootFid }];
2706
2711
  const seen = /* @__PURE__ */ new Set();
2707
2712
  const joinStep = (base, step) => base.endsWith("//") ? `${base}${step}` : `${base}/${step}`;
2708
2713
  while (stack.length) {
2709
2714
  const { node, path: path4, fid } = stack.pop();
2710
2715
  if (!node.backendNodeId) continue;
2716
+ const backendId = node.backendNodeId;
2711
2717
  const enc = sp.encodeWithFrameId(fid, node.backendNodeId);
2712
2718
  if (seen.has(enc)) continue;
2713
2719
  seen.add(enc);
2714
2720
  tagNameMap[enc] = lc(String(node.nodeName));
2715
2721
  xpathMap[enc] = path4;
2722
+ if (node.isScrollable === true) {
2723
+ scrollableBackendIds.add(backendId);
2724
+ }
2725
+ if (lc(String(node.nodeName)) === "html") {
2726
+ scrollableBackendIds.add(backendId);
2727
+ }
2716
2728
  if (lc(node.nodeName) === "iframe" && node.contentDocument) {
2717
2729
  const childFid = (_b = node.contentDocument.frameId) != null ? _b : fid;
2718
2730
  stack.push({ node: node.contentDocument, path: "", fid: childFid });
@@ -2753,7 +2765,7 @@ function buildBackendIdMaps(experimental, sp, targetFrame) {
2753
2765
  }
2754
2766
  }
2755
2767
  }
2756
- return { tagNameMap, xpathMap };
2768
+ return { tagNameMap, xpathMap, scrollableBackendIds };
2757
2769
  } finally {
2758
2770
  yield sp.disableCDP(
2759
2771
  "DOM",
@@ -2885,11 +2897,7 @@ function getCDPFrameId(sp, frame) {
2885
2897
  }
2886
2898
  function getAccessibilityTree(experimental, stagehandPage, logger, selector, targetFrame) {
2887
2899
  return __async(this, null, function* () {
2888
- const { tagNameMap, xpathMap } = yield buildBackendIdMaps(
2889
- experimental,
2890
- stagehandPage,
2891
- targetFrame
2892
- );
2900
+ const { tagNameMap, xpathMap, scrollableBackendIds } = yield buildBackendIdMaps(experimental, stagehandPage, targetFrame);
2893
2901
  yield stagehandPage.enableCDP("Accessibility", targetFrame);
2894
2902
  try {
2895
2903
  let params = {};
@@ -2916,10 +2924,6 @@ function getAccessibilityTree(experimental, stagehandPage, logger, selector, tar
2916
2924
  }
2917
2925
  }
2918
2926
  const { nodes: fullNodes } = yield stagehandPage.sendCDP("Accessibility.getFullAXTree", params, sessionFrame);
2919
- const scrollableIds = yield findScrollableElementIds(
2920
- stagehandPage,
2921
- targetFrame
2922
- );
2923
2927
  let nodes = fullNodes;
2924
2928
  if (selector) {
2925
2929
  nodes = yield filterAXTreeByXPath(
@@ -2931,7 +2935,7 @@ function getAccessibilityTree(experimental, stagehandPage, logger, selector, tar
2931
2935
  }
2932
2936
  const start = Date.now();
2933
2937
  const tree = yield buildHierarchicalTree(
2934
- decorateRoles(nodes, scrollableIds),
2938
+ decorateRoles(nodes, scrollableBackendIds),
2935
2939
  tagNameMap,
2936
2940
  logger,
2937
2941
  xpathMap
@@ -2980,16 +2984,16 @@ function filterAXTreeByXPath(page, full, xpath, targetFrame) {
2980
2984
  }
2981
2985
  function decorateRoles(nodes, scrollables) {
2982
2986
  return nodes.map((n) => {
2983
- var _a15, _b, _c, _d, _e;
2987
+ var _a15, _b, _c, _d, _e, _f;
2984
2988
  let role = (_b = (_a15 = n.role) == null ? void 0 : _a15.value) != null ? _b : "";
2985
- if (scrollables.has(n.backendDOMNodeId)) {
2989
+ if (((_c = n.role) == null ? void 0 : _c.value) !== "RootWebArea" && scrollables.has(n.backendDOMNodeId)) {
2986
2990
  role = role && role !== "generic" && role !== "none" ? `scrollable, ${role}` : "scrollable";
2987
2991
  }
2988
2992
  return {
2989
2993
  role,
2990
- name: (_c = n.name) == null ? void 0 : _c.value,
2991
- description: (_d = n.description) == null ? void 0 : _d.value,
2992
- value: (_e = n.value) == null ? void 0 : _e.value,
2994
+ name: (_d = n.name) == null ? void 0 : _d.value,
2995
+ description: (_e = n.description) == null ? void 0 : _e.value,
2996
+ value: (_f = n.value) == null ? void 0 : _f.value,
2993
2997
  nodeId: n.nodeId,
2994
2998
  backendDOMNodeId: n.backendDOMNodeId,
2995
2999
  parentId: n.parentId,
@@ -3219,27 +3223,6 @@ function getAccessibilityTreeWithFrames(experimental, stagehandPage, logger, roo
3219
3223
  return { combinedTree, combinedXpathMap, combinedUrlMap };
3220
3224
  });
3221
3225
  }
3222
- function findScrollableElementIds(stagehandPage, targetFrame) {
3223
- return __async(this, null, function* () {
3224
- const xpaths = targetFrame ? yield targetFrame.evaluate(() => window.getScrollableElementXpaths()) : yield stagehandPage.page.evaluate(
3225
- () => window.getScrollableElementXpaths()
3226
- );
3227
- const backendIds = /* @__PURE__ */ new Set();
3228
- for (const xpath of xpaths) {
3229
- if (!xpath) continue;
3230
- const objectId = yield resolveObjectIdForXPath(
3231
- stagehandPage,
3232
- xpath,
3233
- targetFrame
3234
- );
3235
- if (objectId) {
3236
- const { node } = yield stagehandPage.sendCDP("DOM.describeNode", { objectId }, targetFrame);
3237
- if (node == null ? void 0 : node.backendNodeId) backendIds.add(node.backendNodeId);
3238
- }
3239
- }
3240
- return backendIds;
3241
- });
3242
- }
3243
3226
  function resolveObjectIdForXPath(page, xpath, targetFrame) {
3244
3227
  return __async(this, null, function* () {
3245
3228
  const contextId = yield getFrameExecutionContextId(page, targetFrame);
@@ -24932,5 +24915,6 @@ var Stagehand3 = class {
24932
24915
  ZodSchemaValidationError,
24933
24916
  connectToMCPServer,
24934
24917
  defaultExtractSchema,
24918
+ getAccessibilityTree,
24935
24919
  pageTextSchema
24936
24920
  });
@@ -55,6 +55,9 @@ export declare function getCDPFrameId(sp: StagehandPage, frame?: Frame): Promise
55
55
  * Retrieve and build a cleaned accessibility tree for a document or specific iframe.
56
56
  * Prunes, formats, and optionally filters by XPath, including scrollable role decoration.
57
57
  *
58
+ * @deprecated This helper is an escape hatch intended for troubleshooting. Prefer
59
+ * extract() for supported usage and reach for this only
60
+ * when absolutely necessary.
58
61
  * @param stagehandPage - The StagehandPage instance for Playwright and CDP interaction.
59
62
  * @param logger - Logging function for diagnostics and performance metrics.
60
63
  * @param selector - Optional XPath to filter the AX tree to a specific subtree.
@@ -97,25 +100,6 @@ export declare function injectSubtrees(tree: string, idToTree: Map<EncodedId, st
97
100
  * @returns A Promise resolving to CombinedA11yResult with combined tree text, xpath map, and URL map.
98
101
  */
99
102
  export declare function getAccessibilityTreeWithFrames(experimental: boolean, stagehandPage: StagehandPage, logger: (l: LogLine) => void, rootXPath?: string): Promise<CombinedA11yResult>;
100
- /**
101
- * `findScrollableElementIds` is a function that identifies elements in
102
- * the browser that are deemed "scrollable". At a high level, it does the
103
- * following:
104
- * - Calls the browser-side `window.getScrollableElementXpaths()` function,
105
- * which returns a list of XPaths for scrollable containers.
106
- * - Iterates over the returned list of XPaths, locating each element in the DOM
107
- * using `stagehandPage.sendCDP(...)`
108
- * - During each iteration, we call `Runtime.evaluate` to run `document.evaluate(...)`
109
- * with each XPath, obtaining a `RemoteObject` reference if it exists.
110
- * - Then, for each valid object reference, we call `DOM.describeNode` to retrieve
111
- * the element’s `backendNodeId`.
112
- * - Collects all resulting `backendNodeId`s in a Set and returns them.
113
- *
114
- * @param stagehandPage - A StagehandPage instance with built-in CDP helpers.
115
- * @returns A Promise that resolves to a Set of unique `backendNodeId`s corresponding
116
- * to scrollable elements in the DOM.
117
- */
118
- export declare function findScrollableElementIds(stagehandPage: StagehandPage, targetFrame?: Frame): Promise<Set<number>>;
119
103
  /**
120
104
  * Resolve an XPath to a Chrome-DevTools-Protocol (CDP) remote-object ID.
121
105
  *
@@ -91,4 +91,7 @@ export * from "../types/stagehandApiErrors";
91
91
  export * from "../types/stagehandErrors";
92
92
  export * from "./llm/LLMClient";
93
93
  export * from "./llm/aisdk";
94
+ export type { TreeResult } from "../types/context";
94
95
  export { connectToMCPServer };
96
+ /** @deprecated Direct AX access is not part of the stable API. Prefer extract(). */
97
+ export { getAccessibilityTree } from "./a11y/utils";
@@ -2,4 +2,4 @@
2
2
  * AUTO-GENERATED — DO NOT EDIT BY HAND
3
3
  * Run `pnpm run gen-version` to refresh.
4
4
  */
5
- export declare const STAGEHAND_VERSION: "2.5.3";
5
+ export declare const STAGEHAND_VERSION: "2.5.4";
@@ -58,11 +58,13 @@ export type DOMNode = {
58
58
  contentDocument?: DOMNode;
59
59
  nodeType: number;
60
60
  frameId?: string;
61
+ isScrollable?: boolean;
61
62
  };
62
63
  export type BackendIdMaps = {
63
64
  tagNameMap: Record<number, string>;
64
65
  xpathMap: Record<number, string>;
65
66
  iframeXPath?: string;
67
+ scrollableBackendIds: Set<number>;
66
68
  };
67
69
  export interface EnhancedContext extends Omit<PlaywrightContext, "newPage" | "pages"> {
68
70
  newPage(): Promise<Page>;
@@ -3,6 +3,7 @@ export declare class StagehandError extends Error {
3
3
  constructor(message: string);
4
4
  }
5
5
  export declare class StagehandDefaultError extends StagehandError {
6
+ causedBy?: Error | StagehandError;
6
7
  constructor(error?: unknown);
7
8
  }
8
9
  export declare class StagehandEnvironmentError extends StagehandError {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/stagehand",
3
- "version": "2.5.3",
3
+ "version": "2.5.4",
4
4
  "description": "An AI web browsing framework focused on simplicity and extensibility.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -8,30 +8,6 @@
8
8
  "bin": {
9
9
  "evals": "./dist/evals/cli.js"
10
10
  },
11
- "scripts": {
12
- "example": "pnpm --filter @browserbasehq/stagehand-examples run start",
13
- "format": "prettier --write .",
14
- "prettier": "prettier --check .",
15
- "prettier:fix": "prettier --write .",
16
- "eslint": "eslint .",
17
- "cache:clear": "rm -rf .cache",
18
- "docs": "pnpm --filter @browserbasehq/stagehand-docs run dev",
19
- "evals": "tsx scripts/run-evals.ts",
20
- "evals:legacy": "tsx scripts/run-evals.ts",
21
- "e2e": "pnpm run build && cd evals/deterministic && playwright test --config=e2e.playwright.config.ts",
22
- "e2e:bb": "pnpm run build && cd evals/deterministic && playwright test --config=bb.playwright.config.ts",
23
- "e2e:local": "pnpm run build && cd evals/deterministic && playwright test --config=local.playwright.config.ts",
24
- "build-dom-scripts": "tsx lib/dom/genDomScripts.ts",
25
- "build-types": "tsc --emitDeclarationOnly --outDir dist",
26
- "build-js": "tsup lib/index.ts --dts",
27
- "build:cli": "tsup evals/cli.ts --outDir dist/evals --format cjs && cp evals/evals.config.json dist/evals/ && chmod +x dist/evals/cli.js && npm link",
28
- "build": "pnpm run lint && pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run build-types",
29
- "gen-version": "tsx scripts/gen-version.ts",
30
- "prepare": "pnpm run build",
31
- "lint": "pnpm run prettier:fix && pnpm run eslint",
32
- "release": "pnpm run build && changeset publish",
33
- "release-canary": "pnpm run build && changeset version --snapshot && changeset publish --tag alpha"
34
- },
35
11
  "files": [
36
12
  "dist/**"
37
13
  ],
@@ -118,5 +94,27 @@
118
94
  "overrides": {
119
95
  "whatwg-url": "^14.0.0"
120
96
  },
121
- "packageManager": "pnpm@9.15.0+sha512.76e2379760a4328ec4415815bcd6628dee727af3779aaa4c914e3944156c4299921a89f976381ee107d41f12cfa4b66681ca9c718f0668fa0831ed4c6d8ba56c"
122
- }
97
+ "scripts": {
98
+ "example": "pnpm --filter @browserbasehq/stagehand-examples run start",
99
+ "format": "prettier --write .",
100
+ "prettier": "prettier --check .",
101
+ "prettier:fix": "prettier --write .",
102
+ "eslint": "eslint .",
103
+ "cache:clear": "rm -rf .cache",
104
+ "docs": "pnpm --filter @browserbasehq/stagehand-docs run dev",
105
+ "evals": "tsx scripts/run-evals.ts",
106
+ "evals:legacy": "tsx scripts/run-evals.ts",
107
+ "e2e": "pnpm run build && cd evals/deterministic && playwright test --config=e2e.playwright.config.ts",
108
+ "e2e:bb": "pnpm run build && cd evals/deterministic && playwright test --config=bb.playwright.config.ts",
109
+ "e2e:local": "pnpm run build && cd evals/deterministic && playwright test --config=local.playwright.config.ts",
110
+ "build-dom-scripts": "tsx lib/dom/genDomScripts.ts",
111
+ "build-types": "tsc --emitDeclarationOnly --outDir dist",
112
+ "build-js": "tsup lib/index.ts --dts",
113
+ "build:cli": "tsup evals/cli.ts --outDir dist/evals --format cjs && cp evals/evals.config.json dist/evals/ && chmod +x dist/evals/cli.js && npm link",
114
+ "build": "pnpm run lint && pnpm run gen-version && pnpm run build-dom-scripts && pnpm run build-js && pnpm run build-types",
115
+ "gen-version": "tsx scripts/gen-version.ts",
116
+ "lint": "pnpm run prettier:fix && pnpm run eslint",
117
+ "release": "pnpm run build && changeset publish",
118
+ "release-canary": "pnpm run build && changeset version --snapshot && changeset publish --tag alpha"
119
+ }
120
+ }