@browserbasehq/stagehand 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ export async function debugDom() {
2
+ window.chunkNumber = 0;
3
+
4
+ const { selectorMap: multiSelectorMap, outputString } =
5
+ await window.processElements(window.chunkNumber);
6
+
7
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
8
+
9
+ drawChunk(selectorMap);
10
+ setupChunkNav();
11
+ }
12
+
13
+ function multiSelectorMapToSelectorMap(
14
+ multiSelectorMap: Record<number, string[]>,
15
+ ) {
16
+ return Object.fromEntries(
17
+ Object.entries(multiSelectorMap).map(([key, selectors]) => [
18
+ Number(key),
19
+ selectors[0],
20
+ ]),
21
+ );
22
+ }
23
+
24
+ function drawChunk(selectorMap: Record<number, string>) {
25
+ if (!window.showChunks) return;
26
+ cleanupMarkers();
27
+ Object.entries(selectorMap).forEach(([_index, selector]) => {
28
+ const element = document.evaluate(
29
+ selector as string,
30
+ document,
31
+ null,
32
+ XPathResult.FIRST_ORDERED_NODE_TYPE,
33
+ null,
34
+ ).singleNodeValue as Element;
35
+
36
+ if (element) {
37
+ let rect;
38
+ if (element.nodeType === Node.ELEMENT_NODE) {
39
+ rect = element.getBoundingClientRect();
40
+ } else {
41
+ const range = document.createRange();
42
+ range.selectNodeContents(element);
43
+ rect = range.getBoundingClientRect();
44
+ }
45
+ const color = "grey";
46
+ const overlay = document.createElement("div");
47
+ overlay.style.position = "absolute";
48
+ overlay.style.left = `${rect.left + window.scrollX}px`;
49
+ overlay.style.top = `${rect.top + window.scrollY}px`;
50
+ overlay.style.padding = "2px"; // Add 2px of padding to the overlay
51
+
52
+ overlay.style.width = `${rect.width}px`;
53
+ overlay.style.height = `${rect.height}px`;
54
+ overlay.style.backgroundColor = color;
55
+ overlay.className = "stagehand-marker";
56
+ overlay.style.opacity = "0.3";
57
+ overlay.style.zIndex = "1000000000"; // Ensure it's above the element
58
+ overlay.style.border = "1px solid"; // Add a 1px solid border to the overlay
59
+ overlay.style.pointerEvents = "none"; // Ensure the overlay does not capture mouse events
60
+ document.body.appendChild(overlay);
61
+ }
62
+ });
63
+ }
64
+
65
+ async function cleanupDebug() {
66
+ cleanupMarkers();
67
+ cleanupNav();
68
+ }
69
+
70
+ function cleanupMarkers() {
71
+ const markers = document.querySelectorAll(".stagehand-marker");
72
+ markers.forEach((marker) => {
73
+ marker.remove();
74
+ });
75
+ }
76
+
77
+ function cleanupNav() {
78
+ const stagehandNavElements = document.querySelectorAll(".stagehand-nav");
79
+ stagehandNavElements.forEach((element) => {
80
+ element.remove();
81
+ });
82
+ }
83
+
84
+ function setupChunkNav() {
85
+ const viewportHeight = window.innerHeight;
86
+ const documentHeight = document.documentElement.scrollHeight;
87
+ const totalChunks = Math.ceil(documentHeight / viewportHeight);
88
+
89
+ if (window.chunkNumber > 0) {
90
+ const prevChunkButton = document.createElement("button");
91
+ prevChunkButton.className = "stagehand-nav";
92
+
93
+ prevChunkButton.textContent = "Previous";
94
+ prevChunkButton.style.marginLeft = "50px";
95
+ prevChunkButton.style.position = "fixed";
96
+ prevChunkButton.style.bottom = "10px";
97
+ prevChunkButton.style.left = "50%";
98
+ prevChunkButton.style.transform = "translateX(-50%)";
99
+ prevChunkButton.style.zIndex = "1000000000";
100
+ prevChunkButton.onclick = async () => {
101
+ cleanupMarkers();
102
+ cleanupNav();
103
+ window.chunkNumber -= 1;
104
+ window.scrollTo(0, window.chunkNumber * window.innerHeight);
105
+ await window.waitForDomSettle();
106
+ const { selectorMap: multiSelectorMap } = await window.processElements(
107
+ window.chunkNumber,
108
+ );
109
+
110
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
111
+
112
+ drawChunk(selectorMap);
113
+ setupChunkNav();
114
+ };
115
+ document.body.appendChild(prevChunkButton);
116
+ }
117
+ if (totalChunks > window.chunkNumber) {
118
+ const nextChunkButton = document.createElement("button");
119
+ nextChunkButton.className = "stagehand-nav";
120
+ nextChunkButton.textContent = "Next";
121
+ nextChunkButton.style.marginRight = "50px";
122
+ nextChunkButton.style.position = "fixed";
123
+ nextChunkButton.style.bottom = "10px";
124
+ nextChunkButton.style.right = "50%";
125
+ nextChunkButton.style.transform = "translateX(50%)";
126
+ nextChunkButton.style.zIndex = "1000000000";
127
+ nextChunkButton.onclick = async () => {
128
+ cleanupMarkers();
129
+ cleanupNav();
130
+ window.chunkNumber += 1;
131
+ window.scrollTo(0, window.chunkNumber * window.innerHeight);
132
+ await window.waitForDomSettle();
133
+
134
+ const { selectorMap: multiSelectorMap } = await window.processElements(
135
+ window.chunkNumber,
136
+ );
137
+ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
138
+ drawChunk(selectorMap);
139
+ setupChunkNav();
140
+ };
141
+
142
+ document.body.appendChild(nextChunkButton);
143
+ }
144
+ }
145
+
146
+ window.debugDom = debugDom;
147
+ window.cleanupDebug = cleanupDebug;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * We have a collection of typescript functions that we need to run in the browser.
3
+ * First, we build them into a single js file
4
+ * Second, due to framework differences we need to get our script content as a string to avoid pathing issues due to file routing in frameworks like Next.js
5
+ * Playwright allows us to pass in script content directly as a string instead of reading a file from a path
6
+ * https://github.com/browserbase/stagehand/issues/180
7
+ *
8
+ * We can't rely on the normal build process for stagehand, because we need our script content as a string so that the import *just works*
9
+ */
10
+ import fs from "fs";
11
+ import path from "path";
12
+ import esbuild from "esbuild";
13
+
14
+ fs.mkdirSync(path.join(__dirname, "./build"), { recursive: true });
15
+
16
+ esbuild.buildSync({
17
+ entryPoints: [path.join(__dirname, "index.ts")],
18
+ bundle: true,
19
+ outdir: path.join(__dirname, "build"),
20
+ });
21
+
22
+ const scriptContent = fs.readFileSync(
23
+ path.join(__dirname, "./build/index.js"),
24
+ "utf8",
25
+ );
26
+
27
+ const output = `export const scriptContent = ${JSON.stringify(scriptContent)};`;
28
+
29
+ fs.writeFileSync(path.join(__dirname, "./build/scriptContent.ts"), output);
@@ -0,0 +1,25 @@
1
+ export {};
2
+ declare global {
3
+ interface Window {
4
+ chunkNumber: number;
5
+ showChunks?: boolean;
6
+ processDom: (chunksSeen: Array<number>) => Promise<{
7
+ outputString: string;
8
+ selectorMap: Record<number, string[]>;
9
+ chunk: number;
10
+ chunks: number[];
11
+ }>;
12
+ processAllOfDom: () => Promise<{
13
+ outputString: string;
14
+ selectorMap: Record<number, string[]>;
15
+ }>;
16
+ processElements: (chunk: number) => Promise<{
17
+ outputString: string;
18
+ selectorMap: Record<number, string[]>;
19
+ }>;
20
+ debugDom: () => Promise<void>;
21
+ cleanupDebug: () => void;
22
+ scrollToHeight: (height: number) => Promise<void>;
23
+ waitForDomSettle: () => Promise<void>;
24
+ }
25
+ }
@@ -0,0 +1,3 @@
1
+ export * from "./process";
2
+ export * from "./utils";
3
+ export * from "./debug";
@@ -0,0 +1,441 @@
1
+ import { generateXPathsForElement as generateXPaths } from "./xpathUtils";
2
+
3
+ export function isElementNode(node: Node): node is Element {
4
+ return node.nodeType === Node.ELEMENT_NODE;
5
+ }
6
+
7
+ export function isTextNode(node: Node): node is Text {
8
+ return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
9
+ }
10
+
11
+ export async function processDom(chunksSeen: Array<number>) {
12
+ const { chunk, chunksArray } = await pickChunk(chunksSeen);
13
+ const { outputString, selectorMap } = await processElements(chunk);
14
+
15
+ console.log(
16
+ `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`,
17
+ );
18
+
19
+ return {
20
+ outputString,
21
+ selectorMap,
22
+ chunk,
23
+ chunks: chunksArray,
24
+ };
25
+ }
26
+
27
+ export async function processAllOfDom() {
28
+ console.log("Stagehand (Browser Process): Processing all of DOM");
29
+
30
+ const viewportHeight = window.innerHeight;
31
+ const documentHeight = document.documentElement.scrollHeight;
32
+ const totalChunks = Math.ceil(documentHeight / viewportHeight);
33
+
34
+ let index = 0;
35
+ const results = [];
36
+ for (let chunk = 0; chunk < totalChunks; chunk++) {
37
+ const result = await processElements(chunk, true, index);
38
+ results.push(result);
39
+ index += Object.keys(result.selectorMap).length;
40
+ }
41
+
42
+ await scrollToHeight(0);
43
+
44
+ const allOutputString = results.map((result) => result.outputString).join("");
45
+ const allSelectorMap = results.reduce(
46
+ (acc, result) => ({ ...acc, ...result.selectorMap }),
47
+ {},
48
+ );
49
+
50
+ console.log(
51
+ `Stagehand (Browser Process): All dom elements: ${allOutputString}`,
52
+ );
53
+
54
+ return {
55
+ outputString: allOutputString,
56
+ selectorMap: allSelectorMap,
57
+ };
58
+ }
59
+
60
+ export async function scrollToHeight(height: number) {
61
+ window.scrollTo({ top: height, left: 0, behavior: "smooth" });
62
+
63
+ // Wait for scrolling to finish using the scrollend event
64
+ await new Promise<void>((resolve) => {
65
+ let scrollEndTimer: number;
66
+ const handleScrollEnd = () => {
67
+ clearTimeout(scrollEndTimer);
68
+ scrollEndTimer = window.setTimeout(() => {
69
+ window.removeEventListener("scroll", handleScrollEnd);
70
+ resolve();
71
+ }, 100);
72
+ };
73
+
74
+ window.addEventListener("scroll", handleScrollEnd, { passive: true });
75
+ handleScrollEnd();
76
+ });
77
+ }
78
+
79
+ const xpathCache: Map<Node, string[]> = new Map();
80
+
81
+ export async function processElements(
82
+ chunk: number,
83
+ scrollToChunk: boolean = true,
84
+ indexOffset: number = 0,
85
+ ): Promise<{
86
+ outputString: string;
87
+ selectorMap: Record<number, string[]>;
88
+ }> {
89
+ console.time("processElements:total");
90
+ const viewportHeight = window.innerHeight;
91
+ const chunkHeight = viewportHeight * chunk;
92
+
93
+ // Calculate the maximum scrollable offset
94
+ const maxScrollTop =
95
+ document.documentElement.scrollHeight - window.innerHeight;
96
+
97
+ // Adjust the offsetTop to not exceed the maximum scrollable offset
98
+ const offsetTop = Math.min(chunkHeight, maxScrollTop);
99
+
100
+ if (scrollToChunk) {
101
+ console.time("processElements:scroll");
102
+ await scrollToHeight(offsetTop);
103
+ console.timeEnd("processElements:scroll");
104
+ }
105
+
106
+ const candidateElements: Array<ChildNode> = [];
107
+ const DOMQueue: Array<ChildNode> = [...document.body.childNodes];
108
+
109
+ console.log("Stagehand (Browser Process): Generating candidate elements");
110
+ console.time("processElements:findCandidates");
111
+
112
+ while (DOMQueue.length > 0) {
113
+ const element = DOMQueue.pop();
114
+
115
+ let shouldAddElement = false;
116
+
117
+ if (element && isElementNode(element)) {
118
+ const childrenCount = element.childNodes.length;
119
+
120
+ // Always traverse child nodes
121
+ for (let i = childrenCount - 1; i >= 0; i--) {
122
+ const child = element.childNodes[i];
123
+ DOMQueue.push(child as ChildNode);
124
+ }
125
+
126
+ // Check if element is interactive
127
+ if (isInteractiveElement(element)) {
128
+ if (isActive(element) && isVisible(element)) {
129
+ shouldAddElement = true;
130
+ }
131
+ }
132
+
133
+ if (isLeafElement(element)) {
134
+ if (isActive(element) && isVisible(element)) {
135
+ shouldAddElement = true;
136
+ }
137
+ }
138
+ }
139
+
140
+ if (element && isTextNode(element) && isTextVisible(element)) {
141
+ shouldAddElement = true;
142
+ }
143
+
144
+ if (shouldAddElement) {
145
+ candidateElements.push(element);
146
+ }
147
+ }
148
+
149
+ console.timeEnd("processElements:findCandidates");
150
+
151
+ const selectorMap: Record<number, string[]> = {};
152
+ let outputString = "";
153
+
154
+ console.log(
155
+ `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`,
156
+ );
157
+
158
+ console.time("processElements:processCandidates");
159
+ console.time("processElements:generateXPaths");
160
+ const xpathLists = await Promise.all(
161
+ candidateElements.map(async (element) => {
162
+ if (xpathCache.has(element)) {
163
+ return xpathCache.get(element);
164
+ }
165
+
166
+ const xpaths = await generateXPaths(element);
167
+ xpathCache.set(element, xpaths);
168
+ return xpaths;
169
+ }),
170
+ );
171
+ console.timeEnd("processElements:generateXPaths");
172
+
173
+ candidateElements.forEach((element, index) => {
174
+ const xpaths = xpathLists[index];
175
+ let elementOutput = "";
176
+
177
+ if (isTextNode(element)) {
178
+ const textContent = element.textContent?.trim();
179
+ if (textContent) {
180
+ elementOutput += `${index + indexOffset}:${textContent}\n`;
181
+ }
182
+ } else if (isElementNode(element)) {
183
+ const tagName = element.tagName.toLowerCase();
184
+ const attributes = collectEssentialAttributes(element);
185
+
186
+ const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
187
+ const closingTag = `</${tagName}>`;
188
+ const textContent = element.textContent?.trim() || "";
189
+
190
+ elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;
191
+ }
192
+
193
+ outputString += elementOutput;
194
+ selectorMap[index + indexOffset] = xpaths;
195
+ });
196
+ console.timeEnd("processElements:processCandidates");
197
+
198
+ console.timeEnd("processElements:total");
199
+ return {
200
+ outputString,
201
+ selectorMap,
202
+ };
203
+ }
204
+
205
+ /**
206
+ * Collects essential attributes from an element.
207
+ * @param element The DOM element.
208
+ * @returns A string of formatted attributes.
209
+ */
210
+ function collectEssentialAttributes(element: Element): string {
211
+ const essentialAttributes = [
212
+ "id",
213
+ "class",
214
+ "href",
215
+ "src",
216
+ "aria-label",
217
+ "aria-name",
218
+ "aria-role",
219
+ "aria-description",
220
+ "aria-expanded",
221
+ "aria-haspopup",
222
+ "type",
223
+ "value",
224
+ ];
225
+
226
+ const attrs: string[] = essentialAttributes
227
+ .map((attr) => {
228
+ const value = element.getAttribute(attr);
229
+ return value ? `${attr}="${value}"` : "";
230
+ })
231
+ .filter((attr) => attr !== "");
232
+
233
+ // Collect data- attributes
234
+ Array.from(element.attributes).forEach((attr) => {
235
+ if (attr.name.startsWith("data-")) {
236
+ attrs.push(`${attr.name}="${attr.value}"`);
237
+ }
238
+ });
239
+
240
+ return attrs.join(" ");
241
+ }
242
+
243
+ window.processDom = processDom;
244
+ window.processAllOfDom = processAllOfDom;
245
+ window.processElements = processElements;
246
+ window.scrollToHeight = scrollToHeight;
247
+
248
+ const leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
249
+
250
+ const interactiveElementTypes = [
251
+ "A",
252
+ "BUTTON",
253
+ "DETAILS",
254
+ "EMBED",
255
+ "INPUT",
256
+ "LABEL",
257
+ "MENU",
258
+ "MENUITEM",
259
+ "OBJECT",
260
+ "SELECT",
261
+ "TEXTAREA",
262
+ "SUMMARY",
263
+ ];
264
+
265
+ const interactiveRoles = [
266
+ "button",
267
+ "menu",
268
+ "menuitem",
269
+ "link",
270
+ "checkbox",
271
+ "radio",
272
+ "slider",
273
+ "tab",
274
+ "tabpanel",
275
+ "textbox",
276
+ "combobox",
277
+ "grid",
278
+ "listbox",
279
+ "option",
280
+ "progressbar",
281
+ "scrollbar",
282
+ "searchbox",
283
+ "switch",
284
+ "tree",
285
+ "treeitem",
286
+ "spinbutton",
287
+ "tooltip",
288
+ ];
289
+ const interactiveAriaRoles = ["menu", "menuitem", "button"];
290
+
291
+ /*
292
+ * Checks if an element is visible and therefore relevant for LLMs to consider. We check:
293
+ * - Size
294
+ * - Display properties
295
+ * - Opacity
296
+ * If the element is a child of a previously hidden element, it should not be included, so we don't consider downstream effects of a parent element here
297
+ */
298
+ const isVisible = (element: Element) => {
299
+ const rect = element.getBoundingClientRect();
300
+ // Ensure the element is within the viewport
301
+ if (
302
+ rect.width === 0 ||
303
+ rect.height === 0 ||
304
+ rect.top < 0 ||
305
+ rect.top > window.innerHeight
306
+ ) {
307
+ return false;
308
+ }
309
+ if (!isTopElement(element, rect)) {
310
+ return false;
311
+ }
312
+
313
+ const visible = element.checkVisibility({
314
+ checkOpacity: true,
315
+ checkVisibilityCSS: true,
316
+ });
317
+
318
+ return visible;
319
+ };
320
+
321
+ const isTextVisible = (element: ChildNode) => {
322
+ const range = document.createRange();
323
+ range.selectNodeContents(element);
324
+ const rect = range.getBoundingClientRect();
325
+
326
+ if (
327
+ rect.width === 0 ||
328
+ rect.height === 0 ||
329
+ rect.top < 0 ||
330
+ rect.top > window.innerHeight
331
+ ) {
332
+ return false;
333
+ }
334
+ const parent = element.parentElement;
335
+ if (!parent) {
336
+ return false;
337
+ }
338
+ if (!isTopElement(parent, rect)) {
339
+ return false;
340
+ }
341
+
342
+ const visible = parent.checkVisibility({
343
+ checkOpacity: true,
344
+ checkVisibilityCSS: true,
345
+ });
346
+
347
+ return visible;
348
+ };
349
+
350
+ function isTopElement(elem: ChildNode, rect: DOMRect) {
351
+ const points = [
352
+ { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },
353
+ { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },
354
+ { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },
355
+ { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },
356
+ { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 },
357
+ ];
358
+
359
+ return points.some((point) => {
360
+ const topEl = document.elementFromPoint(point.x, point.y);
361
+ let current = topEl;
362
+ while (current && current !== document.body) {
363
+ if (current.isSameNode(elem)) {
364
+ return true;
365
+ }
366
+ current = current.parentElement;
367
+ }
368
+ return false;
369
+ });
370
+ }
371
+
372
+ const isActive = (element: Element) => {
373
+ if (
374
+ element.hasAttribute("disabled") ||
375
+ element.hasAttribute("hidden") ||
376
+ element.getAttribute("aria-disabled") === "true"
377
+ ) {
378
+ return false;
379
+ }
380
+
381
+ return true;
382
+ };
383
+ const isInteractiveElement = (element: Element) => {
384
+ const elementType = element.tagName;
385
+ const elementRole = element.getAttribute("role");
386
+ const elementAriaRole = element.getAttribute("aria-role");
387
+
388
+ return (
389
+ (elementType && interactiveElementTypes.includes(elementType)) ||
390
+ (elementRole && interactiveRoles.includes(elementRole)) ||
391
+ (elementAriaRole && interactiveAriaRoles.includes(elementAriaRole))
392
+ );
393
+ };
394
+
395
+ const isLeafElement = (element: Element) => {
396
+ if (element.textContent === "") {
397
+ return false;
398
+ }
399
+
400
+ if (element.childNodes.length === 0) {
401
+ return !leafElementDenyList.includes(element.tagName);
402
+ }
403
+
404
+ // This case ensures that extra context will be included for simple element nodes that contain only text
405
+ if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {
406
+ return true;
407
+ }
408
+
409
+ return false;
410
+ };
411
+
412
+ async function pickChunk(chunksSeen: Array<number>) {
413
+ const viewportHeight = window.innerHeight;
414
+ const documentHeight = document.documentElement.scrollHeight;
415
+
416
+ const chunks = Math.ceil(documentHeight / viewportHeight);
417
+
418
+ const chunksArray = Array.from({ length: chunks }, (_, i) => i);
419
+ const chunksRemaining = chunksArray.filter((chunk) => {
420
+ return !chunksSeen.includes(chunk);
421
+ });
422
+
423
+ const currentScrollPosition = window.scrollY;
424
+ const closestChunk = chunksRemaining.reduce((closest, current) => {
425
+ const currentChunkTop = viewportHeight * current;
426
+ const closestChunkTop = viewportHeight * closest;
427
+ return Math.abs(currentScrollPosition - currentChunkTop) <
428
+ Math.abs(currentScrollPosition - closestChunkTop)
429
+ ? current
430
+ : closest;
431
+ }, chunksRemaining[0]);
432
+ const chunk = closestChunk;
433
+
434
+ if (chunk === undefined) {
435
+ throw new Error(`No chunks remaining to check: ${chunksRemaining}`);
436
+ }
437
+ return {
438
+ chunk,
439
+ chunksArray,
440
+ };
441
+ }
@@ -0,0 +1,17 @@
1
+ export async function waitForDomSettle() {
2
+ return new Promise<void>((resolve) => {
3
+ const createTimeout = () => {
4
+ return setTimeout(() => {
5
+ resolve();
6
+ }, 2000);
7
+ };
8
+ let timeout = createTimeout();
9
+ const observer = new MutationObserver(() => {
10
+ clearTimeout(timeout);
11
+ timeout = createTimeout();
12
+ });
13
+ observer.observe(window.document.body, { childList: true, subtree: true });
14
+ });
15
+ }
16
+
17
+ window.waitForDomSettle = waitForDomSettle;