@browserbasehq/stagehand 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dom/build/debug.js +18 -5
- package/dist/dom/build/index.js +203 -54
- package/dist/dom/build/process.js +181 -45
- package/dist/dom/build/xpathUtils.js +478 -0
- package/dist/index.d.ts +6 -6
- package/dist/index.js +1424 -699
- package/package.json +1 -1
package/dist/dom/build/debug.js
CHANGED
|
@@ -2,12 +2,19 @@
|
|
|
2
2
|
// lib/dom/debug.ts
|
|
3
3
|
async function debugDom() {
|
|
4
4
|
window.chunkNumber = 0;
|
|
5
|
-
const { selectorMap, outputString } = await window.processElements(
|
|
6
|
-
|
|
7
|
-
);
|
|
5
|
+
const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);
|
|
6
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
8
7
|
drawChunk(selectorMap);
|
|
9
8
|
setupChunkNav();
|
|
10
9
|
}
|
|
10
|
+
function multiSelectorMapToSelectorMap(multiSelectorMap) {
|
|
11
|
+
return Object.fromEntries(
|
|
12
|
+
Object.entries(multiSelectorMap).map(([key, selectors]) => [
|
|
13
|
+
Number(key),
|
|
14
|
+
selectors[0]
|
|
15
|
+
])
|
|
16
|
+
);
|
|
17
|
+
}
|
|
11
18
|
function drawChunk(selectorMap) {
|
|
12
19
|
cleanupMarkers();
|
|
13
20
|
Object.entries(selectorMap).forEach(([_index, selector]) => {
|
|
@@ -81,7 +88,10 @@
|
|
|
81
88
|
window.chunkNumber -= 1;
|
|
82
89
|
window.scrollTo(0, window.chunkNumber * window.innerHeight);
|
|
83
90
|
await window.waitForDomSettle();
|
|
84
|
-
const { selectorMap } = await processElements(
|
|
91
|
+
const { selectorMap: multiSelectorMap } = await window.processElements(
|
|
92
|
+
window.chunkNumber
|
|
93
|
+
);
|
|
94
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
85
95
|
drawChunk(selectorMap);
|
|
86
96
|
setupChunkNav();
|
|
87
97
|
};
|
|
@@ -103,7 +113,10 @@
|
|
|
103
113
|
window.chunkNumber += 1;
|
|
104
114
|
window.scrollTo(0, window.chunkNumber * window.innerHeight);
|
|
105
115
|
await window.waitForDomSettle();
|
|
106
|
-
const { selectorMap } = await processElements(
|
|
116
|
+
const { selectorMap: multiSelectorMap } = await window.processElements(
|
|
117
|
+
window.chunkNumber
|
|
118
|
+
);
|
|
119
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
107
120
|
drawChunk(selectorMap);
|
|
108
121
|
setupChunkNav();
|
|
109
122
|
};
|
package/dist/dom/build/index.js
CHANGED
|
@@ -1,8 +1,169 @@
|
|
|
1
1
|
(() => {
|
|
2
|
+
// lib/dom/xpathUtils.ts
|
|
3
|
+
function getParentElement(node) {
|
|
4
|
+
return isElementNode(node) ? node.parentElement : node.parentNode;
|
|
5
|
+
}
|
|
6
|
+
function getCombinations(attributes, size) {
|
|
7
|
+
const results = [];
|
|
8
|
+
function helper(start, combo) {
|
|
9
|
+
if (combo.length === size) {
|
|
10
|
+
results.push([...combo]);
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
for (let i = start; i < attributes.length; i++) {
|
|
14
|
+
combo.push(attributes[i]);
|
|
15
|
+
helper(i + 1, combo);
|
|
16
|
+
combo.pop();
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
helper(0, []);
|
|
20
|
+
return results;
|
|
21
|
+
}
|
|
22
|
+
function isXPathFirstResultElement(xpath, target) {
|
|
23
|
+
try {
|
|
24
|
+
const result = document.evaluate(
|
|
25
|
+
xpath,
|
|
26
|
+
document.documentElement,
|
|
27
|
+
null,
|
|
28
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
|
29
|
+
null
|
|
30
|
+
);
|
|
31
|
+
return result.snapshotItem(0) === target;
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.warn(`Invalid XPath expression: ${xpath}`, error);
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function escapeXPathString(value) {
|
|
38
|
+
if (value.includes("'")) {
|
|
39
|
+
if (value.includes('"')) {
|
|
40
|
+
return "concat(" + value.split(/('+)/).map((part) => {
|
|
41
|
+
if (part === "'") {
|
|
42
|
+
return `"'"`;
|
|
43
|
+
} else if (part.startsWith("'") && part.endsWith("'")) {
|
|
44
|
+
return `"${part}"`;
|
|
45
|
+
} else {
|
|
46
|
+
return `'${part}'`;
|
|
47
|
+
}
|
|
48
|
+
}).join(",") + ")";
|
|
49
|
+
} else {
|
|
50
|
+
return `"${value}"`;
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
return `'${value}'`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async function generateXPathsForElement(element) {
|
|
57
|
+
if (!element) return [];
|
|
58
|
+
const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([
|
|
59
|
+
generateComplexXPath(element),
|
|
60
|
+
generateStandardXPath(element),
|
|
61
|
+
generatedIdBasedXPath(element)
|
|
62
|
+
]);
|
|
63
|
+
return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];
|
|
64
|
+
}
|
|
65
|
+
async function generateComplexXPath(element) {
|
|
66
|
+
const parts = [];
|
|
67
|
+
let currentElement = element;
|
|
68
|
+
while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {
|
|
69
|
+
if (isElementNode(currentElement)) {
|
|
70
|
+
const el = currentElement;
|
|
71
|
+
let selector = el.tagName.toLowerCase();
|
|
72
|
+
const attributePriority = [
|
|
73
|
+
"data-qa",
|
|
74
|
+
"data-component",
|
|
75
|
+
"data-role",
|
|
76
|
+
"role",
|
|
77
|
+
"aria-role",
|
|
78
|
+
"type",
|
|
79
|
+
"name",
|
|
80
|
+
"aria-label",
|
|
81
|
+
"placeholder",
|
|
82
|
+
"title",
|
|
83
|
+
"alt"
|
|
84
|
+
];
|
|
85
|
+
const attributes = attributePriority.map((attr) => {
|
|
86
|
+
let value = el.getAttribute(attr);
|
|
87
|
+
if (attr === "href-full" && value) {
|
|
88
|
+
value = el.getAttribute("href");
|
|
89
|
+
}
|
|
90
|
+
return value ? { attr: attr === "href-full" ? "href" : attr, value } : null;
|
|
91
|
+
}).filter((attr) => attr !== null);
|
|
92
|
+
let uniqueSelector = "";
|
|
93
|
+
for (let i = 1; i <= attributes.length; i++) {
|
|
94
|
+
const combinations = getCombinations(attributes, i);
|
|
95
|
+
for (const combo of combinations) {
|
|
96
|
+
const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(" and ");
|
|
97
|
+
const xpath2 = `//${selector}[${conditions}]`;
|
|
98
|
+
if (isXPathFirstResultElement(xpath2, el)) {
|
|
99
|
+
uniqueSelector = xpath2;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (uniqueSelector) break;
|
|
104
|
+
}
|
|
105
|
+
if (uniqueSelector) {
|
|
106
|
+
parts.unshift(uniqueSelector.replace("//", ""));
|
|
107
|
+
break;
|
|
108
|
+
} else {
|
|
109
|
+
const parent = getParentElement(el);
|
|
110
|
+
if (parent) {
|
|
111
|
+
const siblings = Array.from(parent.children).filter(
|
|
112
|
+
(sibling) => sibling.tagName === el.tagName
|
|
113
|
+
);
|
|
114
|
+
const index = siblings.indexOf(el) + 1;
|
|
115
|
+
selector += siblings.length > 1 ? `[${index}]` : "";
|
|
116
|
+
}
|
|
117
|
+
parts.unshift(selector);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
currentElement = getParentElement(currentElement);
|
|
121
|
+
}
|
|
122
|
+
const xpath = "//" + parts.join("/");
|
|
123
|
+
return xpath;
|
|
124
|
+
}
|
|
125
|
+
async function generateStandardXPath(element) {
|
|
126
|
+
const parts = [];
|
|
127
|
+
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
128
|
+
let index = 0;
|
|
129
|
+
let hasSameTypeSiblings = false;
|
|
130
|
+
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
131
|
+
for (let i = 0; i < siblings.length; i++) {
|
|
132
|
+
const sibling = siblings[i];
|
|
133
|
+
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
134
|
+
index = index + 1;
|
|
135
|
+
hasSameTypeSiblings = true;
|
|
136
|
+
if (sibling.isSameNode(element)) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (element.nodeName !== "#text") {
|
|
142
|
+
const tagName = element.nodeName.toLowerCase();
|
|
143
|
+
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
144
|
+
parts.unshift(`${tagName}${pathIndex}`);
|
|
145
|
+
}
|
|
146
|
+
element = element.parentElement;
|
|
147
|
+
}
|
|
148
|
+
return parts.length ? `/${parts.join("/")}` : "";
|
|
149
|
+
}
|
|
150
|
+
async function generatedIdBasedXPath(element) {
|
|
151
|
+
if (isElementNode(element) && element.id) {
|
|
152
|
+
return `//*[@id='${element.id}']`;
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
2
157
|
// lib/dom/process.ts
|
|
158
|
+
function isElementNode(node) {
|
|
159
|
+
return node.nodeType === Node.ELEMENT_NODE;
|
|
160
|
+
}
|
|
161
|
+
function isTextNode(node) {
|
|
162
|
+
return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
|
|
163
|
+
}
|
|
3
164
|
async function processDom(chunksSeen) {
|
|
4
165
|
const { chunk, chunksArray } = await pickChunk(chunksSeen);
|
|
5
|
-
const { outputString, selectorMap } = await
|
|
166
|
+
const { outputString, selectorMap } = await processElements(chunk);
|
|
6
167
|
console.log(
|
|
7
168
|
`Stagehand (Browser Process): Extracted dom elements:
|
|
8
169
|
${outputString}`
|
|
@@ -22,7 +183,7 @@ ${outputString}`
|
|
|
22
183
|
let index = 0;
|
|
23
184
|
const results = [];
|
|
24
185
|
for (let chunk = 0; chunk < totalChunks; chunk++) {
|
|
25
|
-
const result = await
|
|
186
|
+
const result = await processElements(chunk, true, index);
|
|
26
187
|
results.push(result);
|
|
27
188
|
index += Object.keys(result.selectorMap).length;
|
|
28
189
|
}
|
|
@@ -49,13 +210,14 @@ ${outputString}`
|
|
|
49
210
|
scrollEndTimer = window.setTimeout(() => {
|
|
50
211
|
window.removeEventListener("scroll", handleScrollEnd);
|
|
51
212
|
resolve();
|
|
52
|
-
},
|
|
213
|
+
}, 100);
|
|
53
214
|
};
|
|
54
215
|
window.addEventListener("scroll", handleScrollEnd, { passive: true });
|
|
55
216
|
handleScrollEnd();
|
|
56
217
|
});
|
|
57
218
|
}
|
|
58
|
-
|
|
219
|
+
var xpathCache = /* @__PURE__ */ new Map();
|
|
220
|
+
async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {
|
|
59
221
|
console.time("processElements:total");
|
|
60
222
|
const viewportHeight = window.innerHeight;
|
|
61
223
|
const chunkHeight = viewportHeight * chunk;
|
|
@@ -68,7 +230,6 @@ ${outputString}`
|
|
|
68
230
|
}
|
|
69
231
|
const candidateElements = [];
|
|
70
232
|
const DOMQueue = [...document.body.childNodes];
|
|
71
|
-
const xpathCache = /* @__PURE__ */ new Map();
|
|
72
233
|
console.log("Stagehand (Browser Process): Generating candidate elements");
|
|
73
234
|
console.time("processElements:findCandidates");
|
|
74
235
|
while (DOMQueue.length > 0) {
|
|
@@ -105,16 +266,25 @@ ${outputString}`
|
|
|
105
266
|
`Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`
|
|
106
267
|
);
|
|
107
268
|
console.time("processElements:processCandidates");
|
|
269
|
+
console.time("processElements:generateXPaths");
|
|
270
|
+
const xpathLists = await Promise.all(
|
|
271
|
+
candidateElements.map(async (element) => {
|
|
272
|
+
if (xpathCache.has(element)) {
|
|
273
|
+
return xpathCache.get(element);
|
|
274
|
+
}
|
|
275
|
+
const xpaths = await generateXPathsForElement(element);
|
|
276
|
+
xpathCache.set(element, xpaths);
|
|
277
|
+
return xpaths;
|
|
278
|
+
})
|
|
279
|
+
);
|
|
280
|
+
console.timeEnd("processElements:generateXPaths");
|
|
108
281
|
candidateElements.forEach((element, index) => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
xpath = generateXPath(element);
|
|
112
|
-
xpathCache.set(element, xpath);
|
|
113
|
-
}
|
|
282
|
+
const xpaths = xpathLists[index];
|
|
283
|
+
let elementOutput = "";
|
|
114
284
|
if (isTextNode(element)) {
|
|
115
285
|
const textContent = element.textContent?.trim();
|
|
116
286
|
if (textContent) {
|
|
117
|
-
|
|
287
|
+
elementOutput += `${index + indexOffset}:${textContent}
|
|
118
288
|
`;
|
|
119
289
|
}
|
|
120
290
|
} else if (isElementNode(element)) {
|
|
@@ -123,10 +293,11 @@ ${outputString}`
|
|
|
123
293
|
const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
|
|
124
294
|
const closingTag = `</${tagName}>`;
|
|
125
295
|
const textContent = element.textContent?.trim() || "";
|
|
126
|
-
|
|
296
|
+
elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}
|
|
127
297
|
`;
|
|
128
298
|
}
|
|
129
|
-
|
|
299
|
+
outputString += elementOutput;
|
|
300
|
+
selectorMap[index + indexOffset] = xpaths;
|
|
130
301
|
});
|
|
131
302
|
console.timeEnd("processElements:processCandidates");
|
|
132
303
|
console.timeEnd("processElements:total");
|
|
@@ -161,36 +332,8 @@ ${outputString}`
|
|
|
161
332
|
}
|
|
162
333
|
window.processDom = processDom;
|
|
163
334
|
window.processAllOfDom = processAllOfDom;
|
|
164
|
-
window.processElements =
|
|
335
|
+
window.processElements = processElements;
|
|
165
336
|
window.scrollToHeight = scrollToHeight;
|
|
166
|
-
function generateXPath(element) {
|
|
167
|
-
if (isElementNode(element) && element.id) {
|
|
168
|
-
return `//*[@id='${element.id}']`;
|
|
169
|
-
}
|
|
170
|
-
const parts = [];
|
|
171
|
-
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
172
|
-
let index = 0;
|
|
173
|
-
let hasSameTypeSiblings = false;
|
|
174
|
-
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
175
|
-
for (let i = 0; i < siblings.length; i++) {
|
|
176
|
-
const sibling = siblings[i];
|
|
177
|
-
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
178
|
-
index = index + 1;
|
|
179
|
-
hasSameTypeSiblings = true;
|
|
180
|
-
if (sibling.isSameNode(element)) {
|
|
181
|
-
break;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
if (element.nodeName !== "#text") {
|
|
186
|
-
const tagName = element.nodeName.toLowerCase();
|
|
187
|
-
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
188
|
-
parts.unshift(`${tagName}${pathIndex}`);
|
|
189
|
-
}
|
|
190
|
-
element = element.parentElement;
|
|
191
|
-
}
|
|
192
|
-
return parts.length ? `/${parts.join("/")}` : "";
|
|
193
|
-
}
|
|
194
337
|
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
|
|
195
338
|
var interactiveElementTypes = [
|
|
196
339
|
"A",
|
|
@@ -231,13 +374,6 @@ ${outputString}`
|
|
|
231
374
|
"tooltip"
|
|
232
375
|
];
|
|
233
376
|
var interactiveAriaRoles = ["menu", "menuitem", "button"];
|
|
234
|
-
function isElementNode(node) {
|
|
235
|
-
return node.nodeType === Node.ELEMENT_NODE;
|
|
236
|
-
}
|
|
237
|
-
function isTextNode(node) {
|
|
238
|
-
const trimmedText = node.textContent?.trim().replace(/\s/g, "");
|
|
239
|
-
return node.nodeType === Node.TEXT_NODE && trimmedText !== "";
|
|
240
|
-
}
|
|
241
377
|
var isVisible = (element) => {
|
|
242
378
|
const rect = element.getBoundingClientRect();
|
|
243
379
|
if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
|
|
@@ -361,12 +497,19 @@ ${outputString}`
|
|
|
361
497
|
// lib/dom/debug.ts
|
|
362
498
|
async function debugDom() {
|
|
363
499
|
window.chunkNumber = 0;
|
|
364
|
-
const { selectorMap, outputString } = await window.processElements(
|
|
365
|
-
|
|
366
|
-
);
|
|
500
|
+
const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);
|
|
501
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
367
502
|
drawChunk(selectorMap);
|
|
368
503
|
setupChunkNav();
|
|
369
504
|
}
|
|
505
|
+
function multiSelectorMapToSelectorMap(multiSelectorMap) {
|
|
506
|
+
return Object.fromEntries(
|
|
507
|
+
Object.entries(multiSelectorMap).map(([key, selectors]) => [
|
|
508
|
+
Number(key),
|
|
509
|
+
selectors[0]
|
|
510
|
+
])
|
|
511
|
+
);
|
|
512
|
+
}
|
|
370
513
|
function drawChunk(selectorMap) {
|
|
371
514
|
cleanupMarkers();
|
|
372
515
|
Object.entries(selectorMap).forEach(([_index, selector]) => {
|
|
@@ -440,7 +583,10 @@ ${outputString}`
|
|
|
440
583
|
window.chunkNumber -= 1;
|
|
441
584
|
window.scrollTo(0, window.chunkNumber * window.innerHeight);
|
|
442
585
|
await window.waitForDomSettle();
|
|
443
|
-
const { selectorMap } = await processElements(
|
|
586
|
+
const { selectorMap: multiSelectorMap } = await window.processElements(
|
|
587
|
+
window.chunkNumber
|
|
588
|
+
);
|
|
589
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
444
590
|
drawChunk(selectorMap);
|
|
445
591
|
setupChunkNav();
|
|
446
592
|
};
|
|
@@ -462,7 +608,10 @@ ${outputString}`
|
|
|
462
608
|
window.chunkNumber += 1;
|
|
463
609
|
window.scrollTo(0, window.chunkNumber * window.innerHeight);
|
|
464
610
|
await window.waitForDomSettle();
|
|
465
|
-
const { selectorMap } = await processElements(
|
|
611
|
+
const { selectorMap: multiSelectorMap } = await window.processElements(
|
|
612
|
+
window.chunkNumber
|
|
613
|
+
);
|
|
614
|
+
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);
|
|
466
615
|
drawChunk(selectorMap);
|
|
467
616
|
setupChunkNav();
|
|
468
617
|
};
|
|
@@ -1,5 +1,166 @@
|
|
|
1
1
|
(() => {
|
|
2
|
+
// lib/dom/xpathUtils.ts
|
|
3
|
+
function getParentElement(node) {
|
|
4
|
+
return isElementNode(node) ? node.parentElement : node.parentNode;
|
|
5
|
+
}
|
|
6
|
+
function getCombinations(attributes, size) {
|
|
7
|
+
const results = [];
|
|
8
|
+
function helper(start, combo) {
|
|
9
|
+
if (combo.length === size) {
|
|
10
|
+
results.push([...combo]);
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
for (let i = start; i < attributes.length; i++) {
|
|
14
|
+
combo.push(attributes[i]);
|
|
15
|
+
helper(i + 1, combo);
|
|
16
|
+
combo.pop();
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
helper(0, []);
|
|
20
|
+
return results;
|
|
21
|
+
}
|
|
22
|
+
function isXPathFirstResultElement(xpath, target) {
|
|
23
|
+
try {
|
|
24
|
+
const result = document.evaluate(
|
|
25
|
+
xpath,
|
|
26
|
+
document.documentElement,
|
|
27
|
+
null,
|
|
28
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
|
29
|
+
null
|
|
30
|
+
);
|
|
31
|
+
return result.snapshotItem(0) === target;
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.warn(`Invalid XPath expression: ${xpath}`, error);
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function escapeXPathString(value) {
|
|
38
|
+
if (value.includes("'")) {
|
|
39
|
+
if (value.includes('"')) {
|
|
40
|
+
return "concat(" + value.split(/('+)/).map((part) => {
|
|
41
|
+
if (part === "'") {
|
|
42
|
+
return `"'"`;
|
|
43
|
+
} else if (part.startsWith("'") && part.endsWith("'")) {
|
|
44
|
+
return `"${part}"`;
|
|
45
|
+
} else {
|
|
46
|
+
return `'${part}'`;
|
|
47
|
+
}
|
|
48
|
+
}).join(",") + ")";
|
|
49
|
+
} else {
|
|
50
|
+
return `"${value}"`;
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
return `'${value}'`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async function generateXPathsForElement(element) {
|
|
57
|
+
if (!element) return [];
|
|
58
|
+
const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([
|
|
59
|
+
generateComplexXPath(element),
|
|
60
|
+
generateStandardXPath(element),
|
|
61
|
+
generatedIdBasedXPath(element)
|
|
62
|
+
]);
|
|
63
|
+
return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];
|
|
64
|
+
}
|
|
65
|
+
async function generateComplexXPath(element) {
|
|
66
|
+
const parts = [];
|
|
67
|
+
let currentElement = element;
|
|
68
|
+
while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {
|
|
69
|
+
if (isElementNode(currentElement)) {
|
|
70
|
+
const el = currentElement;
|
|
71
|
+
let selector = el.tagName.toLowerCase();
|
|
72
|
+
const attributePriority = [
|
|
73
|
+
"data-qa",
|
|
74
|
+
"data-component",
|
|
75
|
+
"data-role",
|
|
76
|
+
"role",
|
|
77
|
+
"aria-role",
|
|
78
|
+
"type",
|
|
79
|
+
"name",
|
|
80
|
+
"aria-label",
|
|
81
|
+
"placeholder",
|
|
82
|
+
"title",
|
|
83
|
+
"alt"
|
|
84
|
+
];
|
|
85
|
+
const attributes = attributePriority.map((attr) => {
|
|
86
|
+
let value = el.getAttribute(attr);
|
|
87
|
+
if (attr === "href-full" && value) {
|
|
88
|
+
value = el.getAttribute("href");
|
|
89
|
+
}
|
|
90
|
+
return value ? { attr: attr === "href-full" ? "href" : attr, value } : null;
|
|
91
|
+
}).filter((attr) => attr !== null);
|
|
92
|
+
let uniqueSelector = "";
|
|
93
|
+
for (let i = 1; i <= attributes.length; i++) {
|
|
94
|
+
const combinations = getCombinations(attributes, i);
|
|
95
|
+
for (const combo of combinations) {
|
|
96
|
+
const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(" and ");
|
|
97
|
+
const xpath2 = `//${selector}[${conditions}]`;
|
|
98
|
+
if (isXPathFirstResultElement(xpath2, el)) {
|
|
99
|
+
uniqueSelector = xpath2;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (uniqueSelector) break;
|
|
104
|
+
}
|
|
105
|
+
if (uniqueSelector) {
|
|
106
|
+
parts.unshift(uniqueSelector.replace("//", ""));
|
|
107
|
+
break;
|
|
108
|
+
} else {
|
|
109
|
+
const parent = getParentElement(el);
|
|
110
|
+
if (parent) {
|
|
111
|
+
const siblings = Array.from(parent.children).filter(
|
|
112
|
+
(sibling) => sibling.tagName === el.tagName
|
|
113
|
+
);
|
|
114
|
+
const index = siblings.indexOf(el) + 1;
|
|
115
|
+
selector += siblings.length > 1 ? `[${index}]` : "";
|
|
116
|
+
}
|
|
117
|
+
parts.unshift(selector);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
currentElement = getParentElement(currentElement);
|
|
121
|
+
}
|
|
122
|
+
const xpath = "//" + parts.join("/");
|
|
123
|
+
return xpath;
|
|
124
|
+
}
|
|
125
|
+
async function generateStandardXPath(element) {
|
|
126
|
+
const parts = [];
|
|
127
|
+
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
128
|
+
let index = 0;
|
|
129
|
+
let hasSameTypeSiblings = false;
|
|
130
|
+
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
131
|
+
for (let i = 0; i < siblings.length; i++) {
|
|
132
|
+
const sibling = siblings[i];
|
|
133
|
+
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
134
|
+
index = index + 1;
|
|
135
|
+
hasSameTypeSiblings = true;
|
|
136
|
+
if (sibling.isSameNode(element)) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (element.nodeName !== "#text") {
|
|
142
|
+
const tagName = element.nodeName.toLowerCase();
|
|
143
|
+
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
144
|
+
parts.unshift(`${tagName}${pathIndex}`);
|
|
145
|
+
}
|
|
146
|
+
element = element.parentElement;
|
|
147
|
+
}
|
|
148
|
+
return parts.length ? `/${parts.join("/")}` : "";
|
|
149
|
+
}
|
|
150
|
+
async function generatedIdBasedXPath(element) {
|
|
151
|
+
if (isElementNode(element) && element.id) {
|
|
152
|
+
return `//*[@id='${element.id}']`;
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
2
157
|
// lib/dom/process.ts
|
|
158
|
+
function isElementNode(node) {
|
|
159
|
+
return node.nodeType === Node.ELEMENT_NODE;
|
|
160
|
+
}
|
|
161
|
+
function isTextNode(node) {
|
|
162
|
+
return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
|
|
163
|
+
}
|
|
3
164
|
async function processDom(chunksSeen) {
|
|
4
165
|
const { chunk, chunksArray } = await pickChunk(chunksSeen);
|
|
5
166
|
const { outputString, selectorMap } = await processElements(chunk);
|
|
@@ -49,12 +210,13 @@ ${outputString}`
|
|
|
49
210
|
scrollEndTimer = window.setTimeout(() => {
|
|
50
211
|
window.removeEventListener("scroll", handleScrollEnd);
|
|
51
212
|
resolve();
|
|
52
|
-
},
|
|
213
|
+
}, 100);
|
|
53
214
|
};
|
|
54
215
|
window.addEventListener("scroll", handleScrollEnd, { passive: true });
|
|
55
216
|
handleScrollEnd();
|
|
56
217
|
});
|
|
57
218
|
}
|
|
219
|
+
var xpathCache = /* @__PURE__ */ new Map();
|
|
58
220
|
async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {
|
|
59
221
|
console.time("processElements:total");
|
|
60
222
|
const viewportHeight = window.innerHeight;
|
|
@@ -68,7 +230,6 @@ ${outputString}`
|
|
|
68
230
|
}
|
|
69
231
|
const candidateElements = [];
|
|
70
232
|
const DOMQueue = [...document.body.childNodes];
|
|
71
|
-
const xpathCache = /* @__PURE__ */ new Map();
|
|
72
233
|
console.log("Stagehand (Browser Process): Generating candidate elements");
|
|
73
234
|
console.time("processElements:findCandidates");
|
|
74
235
|
while (DOMQueue.length > 0) {
|
|
@@ -105,16 +266,25 @@ ${outputString}`
|
|
|
105
266
|
`Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`
|
|
106
267
|
);
|
|
107
268
|
console.time("processElements:processCandidates");
|
|
269
|
+
console.time("processElements:generateXPaths");
|
|
270
|
+
const xpathLists = await Promise.all(
|
|
271
|
+
candidateElements.map(async (element) => {
|
|
272
|
+
if (xpathCache.has(element)) {
|
|
273
|
+
return xpathCache.get(element);
|
|
274
|
+
}
|
|
275
|
+
const xpaths = await generateXPathsForElement(element);
|
|
276
|
+
xpathCache.set(element, xpaths);
|
|
277
|
+
return xpaths;
|
|
278
|
+
})
|
|
279
|
+
);
|
|
280
|
+
console.timeEnd("processElements:generateXPaths");
|
|
108
281
|
candidateElements.forEach((element, index) => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
xpath = generateXPath(element);
|
|
112
|
-
xpathCache.set(element, xpath);
|
|
113
|
-
}
|
|
282
|
+
const xpaths = xpathLists[index];
|
|
283
|
+
let elementOutput = "";
|
|
114
284
|
if (isTextNode(element)) {
|
|
115
285
|
const textContent = element.textContent?.trim();
|
|
116
286
|
if (textContent) {
|
|
117
|
-
|
|
287
|
+
elementOutput += `${index + indexOffset}:${textContent}
|
|
118
288
|
`;
|
|
119
289
|
}
|
|
120
290
|
} else if (isElementNode(element)) {
|
|
@@ -123,10 +293,11 @@ ${outputString}`
|
|
|
123
293
|
const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
|
|
124
294
|
const closingTag = `</${tagName}>`;
|
|
125
295
|
const textContent = element.textContent?.trim() || "";
|
|
126
|
-
|
|
296
|
+
elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}
|
|
127
297
|
`;
|
|
128
298
|
}
|
|
129
|
-
|
|
299
|
+
outputString += elementOutput;
|
|
300
|
+
selectorMap[index + indexOffset] = xpaths;
|
|
130
301
|
});
|
|
131
302
|
console.timeEnd("processElements:processCandidates");
|
|
132
303
|
console.timeEnd("processElements:total");
|
|
@@ -163,34 +334,6 @@ ${outputString}`
|
|
|
163
334
|
window.processAllOfDom = processAllOfDom;
|
|
164
335
|
window.processElements = processElements;
|
|
165
336
|
window.scrollToHeight = scrollToHeight;
|
|
166
|
-
function generateXPath(element) {
|
|
167
|
-
if (isElementNode(element) && element.id) {
|
|
168
|
-
return `//*[@id='${element.id}']`;
|
|
169
|
-
}
|
|
170
|
-
const parts = [];
|
|
171
|
-
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
172
|
-
let index = 0;
|
|
173
|
-
let hasSameTypeSiblings = false;
|
|
174
|
-
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
175
|
-
for (let i = 0; i < siblings.length; i++) {
|
|
176
|
-
const sibling = siblings[i];
|
|
177
|
-
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
178
|
-
index = index + 1;
|
|
179
|
-
hasSameTypeSiblings = true;
|
|
180
|
-
if (sibling.isSameNode(element)) {
|
|
181
|
-
break;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
if (element.nodeName !== "#text") {
|
|
186
|
-
const tagName = element.nodeName.toLowerCase();
|
|
187
|
-
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
188
|
-
parts.unshift(`${tagName}${pathIndex}`);
|
|
189
|
-
}
|
|
190
|
-
element = element.parentElement;
|
|
191
|
-
}
|
|
192
|
-
return parts.length ? `/${parts.join("/")}` : "";
|
|
193
|
-
}
|
|
194
337
|
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
|
|
195
338
|
var interactiveElementTypes = [
|
|
196
339
|
"A",
|
|
@@ -231,13 +374,6 @@ ${outputString}`
|
|
|
231
374
|
"tooltip"
|
|
232
375
|
];
|
|
233
376
|
var interactiveAriaRoles = ["menu", "menuitem", "button"];
|
|
234
|
-
function isElementNode(node) {
|
|
235
|
-
return node.nodeType === Node.ELEMENT_NODE;
|
|
236
|
-
}
|
|
237
|
-
function isTextNode(node) {
|
|
238
|
-
const trimmedText = node.textContent?.trim().replace(/\s/g, "");
|
|
239
|
-
return node.nodeType === Node.TEXT_NODE && trimmedText !== "";
|
|
240
|
-
}
|
|
241
377
|
var isVisible = (element) => {
|
|
242
378
|
const rect = element.getBoundingClientRect();
|
|
243
379
|
if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
|