@browserbasehq/stagehand 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,482 @@
1
+ (() => {
2
+ // lib/dom/process.ts
3
+ function isElementNode(node) {
4
+ return node.nodeType === Node.ELEMENT_NODE;
5
+ }
6
+ function isTextNode(node) {
7
+ return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
8
+ }
9
+ async function processDom(chunksSeen) {
10
+ const { chunk, chunksArray } = await pickChunk(chunksSeen);
11
+ const { outputString, selectorMap } = await processElements(
12
+ chunk,
13
+ void 0,
14
+ void 0
15
+ );
16
+ console.log(
17
+ `Stagehand (Browser Process): Extracted dom elements:
18
+ ${outputString}`
19
+ );
20
+ return {
21
+ outputString,
22
+ selectorMap,
23
+ chunk,
24
+ chunks: chunksArray
25
+ };
26
+ }
27
+ async function processAllOfDom() {
28
+ console.log("Stagehand (Browser Process): Processing all of DOM");
29
+ const viewportHeight = window.innerHeight;
30
+ const documentHeight = document.documentElement.scrollHeight;
31
+ const totalChunks = Math.ceil(documentHeight / viewportHeight);
32
+ let index = 0;
33
+ const results = [];
34
+ for (let chunk = 0; chunk < totalChunks; chunk++) {
35
+ const result = await processElements(chunk, true, index);
36
+ results.push(result);
37
+ index += Object.keys(result.selectorMap).length;
38
+ }
39
+ await scrollToHeight(0);
40
+ const allOutputString = results.map((result) => result.outputString).join("");
41
+ const allSelectorMap = results.reduce(
42
+ (acc, result) => ({ ...acc, ...result.selectorMap }),
43
+ {}
44
+ );
45
+ console.log(
46
+ `Stagehand (Browser Process): All dom elements: ${allOutputString}`
47
+ );
48
+ return {
49
+ outputString: allOutputString,
50
+ selectorMap: allSelectorMap
51
+ };
52
+ }
53
+ async function scrollToHeight(height) {
54
+ window.scrollTo({ top: height, left: 0, behavior: "smooth" });
55
+ await new Promise((resolve) => {
56
+ let scrollEndTimer;
57
+ const handleScrollEnd = () => {
58
+ clearTimeout(scrollEndTimer);
59
+ scrollEndTimer = window.setTimeout(() => {
60
+ window.removeEventListener("scroll", handleScrollEnd);
61
+ resolve();
62
+ }, 100);
63
+ };
64
+ window.addEventListener("scroll", handleScrollEnd, { passive: true });
65
+ handleScrollEnd();
66
+ });
67
+ }
68
+ var xpathCache = /* @__PURE__ */ new Map();
69
+ async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {
70
+ console.time("processElements:total");
71
+ const viewportHeight = window.innerHeight;
72
+ const chunkHeight = viewportHeight * chunk;
73
+ const maxScrollTop = document.documentElement.scrollHeight - window.innerHeight;
74
+ const offsetTop = Math.min(chunkHeight, maxScrollTop);
75
+ if (scrollToChunk) {
76
+ console.time("processElements:scroll");
77
+ await scrollToHeight(offsetTop);
78
+ console.timeEnd("processElements:scroll");
79
+ }
80
+ const candidateElements = [];
81
+ const DOMQueue = [...document.body.childNodes];
82
+ console.log("Stagehand (Browser Process): Generating candidate elements");
83
+ console.time("processElements:findCandidates");
84
+ while (DOMQueue.length > 0) {
85
+ const element = DOMQueue.pop();
86
+ let shouldAddElement = false;
87
+ if (element && isElementNode(element)) {
88
+ const childrenCount = element.childNodes.length;
89
+ for (let i = childrenCount - 1; i >= 0; i--) {
90
+ const child = element.childNodes[i];
91
+ DOMQueue.push(child);
92
+ }
93
+ if (isInteractiveElement(element)) {
94
+ if (isActive(element) && isVisible(element)) {
95
+ shouldAddElement = true;
96
+ }
97
+ }
98
+ if (isLeafElement(element)) {
99
+ if (isActive(element) && isVisible(element)) {
100
+ shouldAddElement = true;
101
+ }
102
+ }
103
+ }
104
+ if (element && isTextNode(element) && isTextVisible(element)) {
105
+ shouldAddElement = true;
106
+ }
107
+ if (shouldAddElement) {
108
+ candidateElements.push(element);
109
+ }
110
+ }
111
+ console.timeEnd("processElements:findCandidates");
112
+ const selectorMap = {};
113
+ let outputString = "";
114
+ console.log(
115
+ `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`
116
+ );
117
+ console.time("processElements:processCandidates");
118
+ console.time("processElements:generateXPaths");
119
+ const xpathLists = await Promise.all(
120
+ candidateElements.map(async (element) => {
121
+ if (xpathCache.has(element)) {
122
+ return xpathCache.get(element);
123
+ }
124
+ const xpaths = await generateXPathsForElement(element);
125
+ xpathCache.set(element, xpaths);
126
+ return xpaths;
127
+ })
128
+ );
129
+ console.timeEnd("processElements:generateXPaths");
130
+ candidateElements.forEach((element, index) => {
131
+ const xpaths = xpathLists[index];
132
+ let elementOutput = "";
133
+ if (isTextNode(element)) {
134
+ const textContent = element.textContent?.trim();
135
+ if (textContent) {
136
+ elementOutput += `${index + indexOffset}:${textContent}
137
+ `;
138
+ }
139
+ } else if (isElementNode(element)) {
140
+ const tagName = element.tagName.toLowerCase();
141
+ const attributes = collectEssentialAttributes(element);
142
+ const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
143
+ const closingTag = `</${tagName}>`;
144
+ const textContent = element.textContent?.trim() || "";
145
+ elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}
146
+ `;
147
+ }
148
+ outputString += elementOutput;
149
+ selectorMap[index + indexOffset] = xpaths;
150
+ });
151
+ console.timeEnd("processElements:processCandidates");
152
+ console.timeEnd("processElements:total");
153
+ return {
154
+ outputString,
155
+ selectorMap
156
+ };
157
+ }
158
+ function collectEssentialAttributes(element) {
159
+ const essentialAttributes = [
160
+ "id",
161
+ "class",
162
+ "href",
163
+ "src",
164
+ "aria-label",
165
+ "aria-name",
166
+ "aria-role",
167
+ "aria-description",
168
+ "aria-expanded",
169
+ "aria-haspopup"
170
+ ];
171
+ const attrs = essentialAttributes.map((attr) => {
172
+ const value = element.getAttribute(attr);
173
+ return value ? `${attr}="${value}"` : "";
174
+ }).filter((attr) => attr !== "");
175
+ Array.from(element.attributes).forEach((attr) => {
176
+ if (attr.name.startsWith("data-")) {
177
+ attrs.push(`${attr.name}="${attr.value}"`);
178
+ }
179
+ });
180
+ return attrs.join(" ");
181
+ }
182
+ window.processDom = processDom;
183
+ window.processAllOfDom = processAllOfDom;
184
+ window.processElements = processElements;
185
+ window.scrollToHeight = scrollToHeight;
186
+ var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
187
+ var interactiveElementTypes = [
188
+ "A",
189
+ "BUTTON",
190
+ "DETAILS",
191
+ "EMBED",
192
+ "INPUT",
193
+ "LABEL",
194
+ "MENU",
195
+ "MENUITEM",
196
+ "OBJECT",
197
+ "SELECT",
198
+ "TEXTAREA",
199
+ "SUMMARY"
200
+ ];
201
+ var interactiveRoles = [
202
+ "button",
203
+ "menu",
204
+ "menuitem",
205
+ "link",
206
+ "checkbox",
207
+ "radio",
208
+ "slider",
209
+ "tab",
210
+ "tabpanel",
211
+ "textbox",
212
+ "combobox",
213
+ "grid",
214
+ "listbox",
215
+ "option",
216
+ "progressbar",
217
+ "scrollbar",
218
+ "searchbox",
219
+ "switch",
220
+ "tree",
221
+ "treeitem",
222
+ "spinbutton",
223
+ "tooltip"
224
+ ];
225
+ var interactiveAriaRoles = ["menu", "menuitem", "button"];
226
+ var isVisible = (element) => {
227
+ const rect = element.getBoundingClientRect();
228
+ if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
229
+ return false;
230
+ }
231
+ if (!isTopElement(element, rect)) {
232
+ return false;
233
+ }
234
+ const visible = element.checkVisibility({
235
+ checkOpacity: true,
236
+ checkVisibilityCSS: true
237
+ });
238
+ return visible;
239
+ };
240
+ var isTextVisible = (element) => {
241
+ const range = document.createRange();
242
+ range.selectNodeContents(element);
243
+ const rect = range.getBoundingClientRect();
244
+ if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
245
+ return false;
246
+ }
247
+ const parent = element.parentElement;
248
+ if (!parent) {
249
+ return false;
250
+ }
251
+ if (!isTopElement(parent, rect)) {
252
+ return false;
253
+ }
254
+ const visible = parent.checkVisibility({
255
+ checkOpacity: true,
256
+ checkVisibilityCSS: true
257
+ });
258
+ return visible;
259
+ };
260
+ function isTopElement(elem, rect) {
261
+ const points = [
262
+ { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },
263
+ { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },
264
+ { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },
265
+ { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },
266
+ { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }
267
+ ];
268
+ return points.some((point) => {
269
+ const topEl = document.elementFromPoint(point.x, point.y);
270
+ let current = topEl;
271
+ while (current && current !== document.body) {
272
+ if (current.isSameNode(elem)) {
273
+ return true;
274
+ }
275
+ current = current.parentElement;
276
+ }
277
+ return false;
278
+ });
279
+ }
280
+ var isActive = (element) => {
281
+ if (element.hasAttribute("disabled") || element.hasAttribute("hidden") || element.getAttribute("aria-disabled") === "true") {
282
+ return false;
283
+ }
284
+ return true;
285
+ };
286
+ var isInteractiveElement = (element) => {
287
+ const elementType = element.tagName;
288
+ const elementRole = element.getAttribute("role");
289
+ const elementAriaRole = element.getAttribute("aria-role");
290
+ return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);
291
+ };
292
+ var isLeafElement = (element) => {
293
+ if (element.textContent === "") {
294
+ return false;
295
+ }
296
+ if (element.childNodes.length === 0) {
297
+ return !leafElementDenyList.includes(element.tagName);
298
+ }
299
+ if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {
300
+ return true;
301
+ }
302
+ return false;
303
+ };
304
+ async function pickChunk(chunksSeen) {
305
+ const viewportHeight = window.innerHeight;
306
+ const documentHeight = document.documentElement.scrollHeight;
307
+ const chunks = Math.ceil(documentHeight / viewportHeight);
308
+ const chunksArray = Array.from({ length: chunks }, (_, i) => i);
309
+ const chunksRemaining = chunksArray.filter((chunk2) => {
310
+ return !chunksSeen.includes(chunk2);
311
+ });
312
+ const currentScrollPosition = window.scrollY;
313
+ const closestChunk = chunksRemaining.reduce((closest, current) => {
314
+ const currentChunkTop = viewportHeight * current;
315
+ const closestChunkTop = viewportHeight * closest;
316
+ return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;
317
+ }, chunksRemaining[0]);
318
+ const chunk = closestChunk;
319
+ if (chunk === void 0) {
320
+ throw new Error(`No chunks remaining to check: ${chunksRemaining}`);
321
+ }
322
+ return {
323
+ chunk,
324
+ chunksArray
325
+ };
326
+ }
327
+
328
+ // lib/dom/xpathUtils.ts
329
+ function getParentElement(node) {
330
+ return isElementNode(node) ? node.parentElement : node.parentNode;
331
+ }
332
+ function getCombinations(attributes, size) {
333
+ const results = [];
334
+ function helper(start, combo) {
335
+ if (combo.length === size) {
336
+ results.push([...combo]);
337
+ return;
338
+ }
339
+ for (let i = start; i < attributes.length; i++) {
340
+ combo.push(attributes[i]);
341
+ helper(i + 1, combo);
342
+ combo.pop();
343
+ }
344
+ }
345
+ helper(0, []);
346
+ return results;
347
+ }
348
+ function isXPathFirstResultElement(xpath, target) {
349
+ try {
350
+ const result = document.evaluate(
351
+ xpath,
352
+ document.documentElement,
353
+ null,
354
+ XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
355
+ null
356
+ );
357
+ return result.snapshotItem(0) === target;
358
+ } catch (error) {
359
+ console.warn(`Invalid XPath expression: ${xpath}`, error);
360
+ return false;
361
+ }
362
+ }
363
+ function escapeXPathString(value) {
364
+ if (value.includes("'")) {
365
+ if (value.includes('"')) {
366
+ return "concat(" + value.split(/('+)/).map((part) => {
367
+ if (part === "'") {
368
+ return `"'"`;
369
+ } else if (part.startsWith("'") && part.endsWith("'")) {
370
+ return `"${part}"`;
371
+ } else {
372
+ return `'${part}'`;
373
+ }
374
+ }).join(",") + ")";
375
+ } else {
376
+ return `"${value}"`;
377
+ }
378
+ } else {
379
+ return `'${value}'`;
380
+ }
381
+ }
382
+ async function generateXPathsForElement(element) {
383
+ if (!element) return [];
384
+ const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([
385
+ generateComplexXPath(element),
386
+ generateStandardXPath(element),
387
+ generatedIdBasedXPath(element)
388
+ ]);
389
+ return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
390
+ }
391
+ async function generateComplexXPath(element) {
392
+ const parts = [];
393
+ let currentElement = element;
394
+ while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {
395
+ if (isElementNode(currentElement)) {
396
+ const el = currentElement;
397
+ let selector = el.tagName.toLowerCase();
398
+ const attributePriority = [
399
+ "data-qa",
400
+ "data-component",
401
+ "data-role",
402
+ "role",
403
+ "aria-role",
404
+ "type",
405
+ "name",
406
+ "aria-label",
407
+ "placeholder",
408
+ "title",
409
+ "alt"
410
+ ];
411
+ const attributes = attributePriority.map((attr) => {
412
+ let value = el.getAttribute(attr);
413
+ if (attr === "href-full" && value) {
414
+ value = el.getAttribute("href");
415
+ }
416
+ return value ? { attr: attr === "href-full" ? "href" : attr, value } : null;
417
+ }).filter((attr) => attr !== null);
418
+ let uniqueSelector = "";
419
+ for (let i = 1; i <= attributes.length; i++) {
420
+ const combinations = getCombinations(attributes, i);
421
+ for (const combo of combinations) {
422
+ const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(" and ");
423
+ const xpath2 = `//${selector}[${conditions}]`;
424
+ if (isXPathFirstResultElement(xpath2, el)) {
425
+ uniqueSelector = xpath2;
426
+ break;
427
+ }
428
+ }
429
+ if (uniqueSelector) break;
430
+ }
431
+ if (uniqueSelector) {
432
+ parts.unshift(uniqueSelector.replace("//", ""));
433
+ break;
434
+ } else {
435
+ const parent = getParentElement(el);
436
+ if (parent) {
437
+ const siblings = Array.from(parent.children).filter(
438
+ (sibling) => sibling.tagName === el.tagName
439
+ );
440
+ const index = siblings.indexOf(el) + 1;
441
+ selector += siblings.length > 1 ? `[${index}]` : "";
442
+ }
443
+ parts.unshift(selector);
444
+ }
445
+ }
446
+ currentElement = getParentElement(currentElement);
447
+ }
448
+ const xpath = "//" + parts.join("/");
449
+ return xpath;
450
+ }
451
+ async function generateStandardXPath(element) {
452
+ const parts = [];
453
+ while (element && (isTextNode(element) || isElementNode(element))) {
454
+ let index = 0;
455
+ let hasSameTypeSiblings = false;
456
+ const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
457
+ for (let i = 0; i < siblings.length; i++) {
458
+ const sibling = siblings[i];
459
+ if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
460
+ index = index + 1;
461
+ hasSameTypeSiblings = true;
462
+ if (sibling.isSameNode(element)) {
463
+ break;
464
+ }
465
+ }
466
+ }
467
+ if (element.nodeName !== "#text") {
468
+ const tagName = element.nodeName.toLowerCase();
469
+ const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
470
+ parts.unshift(`${tagName}${pathIndex}`);
471
+ }
472
+ element = element.parentElement;
473
+ }
474
+ return parts.length ? `//${parts.join("//")}` : "";
475
+ }
476
+ async function generatedIdBasedXPath(element) {
477
+ if (isElementNode(element) && element.id) {
478
+ return `//*[@id='${element.id}']`;
479
+ }
480
+ return null;
481
+ }
482
+ })();
package/dist/index.d.ts CHANGED
@@ -54,7 +54,6 @@ declare class LLMProvider {
54
54
  declare class Stagehand {
55
55
  private llmProvider;
56
56
  private observations;
57
- private actions;
58
57
  page: Page;
59
58
  context: BrowserContext;
60
59
  private env;
@@ -69,6 +68,8 @@ declare class Stagehand {
69
68
  private domSettleTimeoutMs;
70
69
  private browserBaseSessionCreateParams?;
71
70
  private enableCaching;
71
+ private variables;
72
+ private actHandler;
72
73
  private browserbaseResumeSessionID?;
73
74
  constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, }?: {
74
75
  env: "LOCAL" | "BROWSERBASE";
@@ -88,8 +89,9 @@ declare class Stagehand {
88
89
  enableCaching?: boolean;
89
90
  browserbaseResumeSessionID?: string;
90
91
  });
91
- init({ modelName, }?: {
92
+ init({ modelName, domSettleTimeoutMs, }?: {
92
93
  modelName?: AvailableModel;
94
+ domSettleTimeoutMs?: number;
93
95
  }): Promise<{
94
96
  debugUrl: string;
95
97
  sessionUrl: string;
@@ -109,30 +111,31 @@ declare class Stagehand {
109
111
  private _waitForSettledDom;
110
112
  private startDomDebug;
111
113
  private cleanupDomDebug;
112
- private _generateId;
113
114
  private _recordObservation;
114
- private _recordAction;
115
115
  private _extract;
116
116
  private _observe;
117
- private _act;
118
- act({ action, modelName, useVision, }: {
117
+ act({ action, modelName, useVision, variables, domSettleTimeoutMs, }: {
119
118
  action: string;
120
119
  modelName?: AvailableModel;
121
120
  useVision?: "fallback" | boolean;
121
+ variables?: Record<string, string>;
122
+ domSettleTimeoutMs?: number;
122
123
  }): Promise<{
123
124
  success: boolean;
124
125
  message: string;
125
126
  action: string;
126
127
  }>;
127
- extract<T extends z.AnyZodObject>({ instruction, schema, modelName, }: {
128
+ extract<T extends z.AnyZodObject>({ instruction, schema, modelName, domSettleTimeoutMs, }: {
128
129
  instruction: string;
129
130
  schema: T;
130
131
  modelName?: AvailableModel;
132
+ domSettleTimeoutMs?: number;
131
133
  }): Promise<z.infer<T>>;
132
134
  observe(options?: {
133
135
  instruction?: string;
134
136
  modelName?: AvailableModel;
135
137
  useVision?: boolean;
138
+ domSettleTimeoutMs?: number;
136
139
  }): Promise<{
137
140
  selector: string;
138
141
  description: string;