@browserbasehq/stagehand 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dom/build/index.js +186 -46
- package/dist/dom/build/process.js +186 -46
- package/dist/dom/build/xpathUtils.js +482 -0
- package/dist/index.d.ts +6 -6
- package/dist/index.js +1398 -705
- package/package.json +1 -1
package/dist/dom/build/index.js
CHANGED
|
@@ -1,8 +1,173 @@
|
|
|
1
1
|
(() => {
|
|
2
|
+
// lib/dom/xpathUtils.ts
|
|
3
|
+
function getParentElement(node) {
|
|
4
|
+
return isElementNode(node) ? node.parentElement : node.parentNode;
|
|
5
|
+
}
|
|
6
|
+
function getCombinations(attributes, size) {
|
|
7
|
+
const results = [];
|
|
8
|
+
function helper(start, combo) {
|
|
9
|
+
if (combo.length === size) {
|
|
10
|
+
results.push([...combo]);
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
for (let i = start; i < attributes.length; i++) {
|
|
14
|
+
combo.push(attributes[i]);
|
|
15
|
+
helper(i + 1, combo);
|
|
16
|
+
combo.pop();
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
helper(0, []);
|
|
20
|
+
return results;
|
|
21
|
+
}
|
|
22
|
+
function isXPathFirstResultElement(xpath, target) {
|
|
23
|
+
try {
|
|
24
|
+
const result = document.evaluate(
|
|
25
|
+
xpath,
|
|
26
|
+
document.documentElement,
|
|
27
|
+
null,
|
|
28
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
|
29
|
+
null
|
|
30
|
+
);
|
|
31
|
+
return result.snapshotItem(0) === target;
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.warn(`Invalid XPath expression: ${xpath}`, error);
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function escapeXPathString(value) {
|
|
38
|
+
if (value.includes("'")) {
|
|
39
|
+
if (value.includes('"')) {
|
|
40
|
+
return "concat(" + value.split(/('+)/).map((part) => {
|
|
41
|
+
if (part === "'") {
|
|
42
|
+
return `"'"`;
|
|
43
|
+
} else if (part.startsWith("'") && part.endsWith("'")) {
|
|
44
|
+
return `"${part}"`;
|
|
45
|
+
} else {
|
|
46
|
+
return `'${part}'`;
|
|
47
|
+
}
|
|
48
|
+
}).join(",") + ")";
|
|
49
|
+
} else {
|
|
50
|
+
return `"${value}"`;
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
return `'${value}'`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async function generateXPathsForElement(element) {
|
|
57
|
+
if (!element) return [];
|
|
58
|
+
const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([
|
|
59
|
+
generateComplexXPath(element),
|
|
60
|
+
generateStandardXPath(element),
|
|
61
|
+
generatedIdBasedXPath(element)
|
|
62
|
+
]);
|
|
63
|
+
return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
|
|
64
|
+
}
|
|
65
|
+
async function generateComplexXPath(element) {
|
|
66
|
+
const parts = [];
|
|
67
|
+
let currentElement = element;
|
|
68
|
+
while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {
|
|
69
|
+
if (isElementNode(currentElement)) {
|
|
70
|
+
const el = currentElement;
|
|
71
|
+
let selector = el.tagName.toLowerCase();
|
|
72
|
+
const attributePriority = [
|
|
73
|
+
"data-qa",
|
|
74
|
+
"data-component",
|
|
75
|
+
"data-role",
|
|
76
|
+
"role",
|
|
77
|
+
"aria-role",
|
|
78
|
+
"type",
|
|
79
|
+
"name",
|
|
80
|
+
"aria-label",
|
|
81
|
+
"placeholder",
|
|
82
|
+
"title",
|
|
83
|
+
"alt"
|
|
84
|
+
];
|
|
85
|
+
const attributes = attributePriority.map((attr) => {
|
|
86
|
+
let value = el.getAttribute(attr);
|
|
87
|
+
if (attr === "href-full" && value) {
|
|
88
|
+
value = el.getAttribute("href");
|
|
89
|
+
}
|
|
90
|
+
return value ? { attr: attr === "href-full" ? "href" : attr, value } : null;
|
|
91
|
+
}).filter((attr) => attr !== null);
|
|
92
|
+
let uniqueSelector = "";
|
|
93
|
+
for (let i = 1; i <= attributes.length; i++) {
|
|
94
|
+
const combinations = getCombinations(attributes, i);
|
|
95
|
+
for (const combo of combinations) {
|
|
96
|
+
const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(" and ");
|
|
97
|
+
const xpath2 = `//${selector}[${conditions}]`;
|
|
98
|
+
if (isXPathFirstResultElement(xpath2, el)) {
|
|
99
|
+
uniqueSelector = xpath2;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (uniqueSelector) break;
|
|
104
|
+
}
|
|
105
|
+
if (uniqueSelector) {
|
|
106
|
+
parts.unshift(uniqueSelector.replace("//", ""));
|
|
107
|
+
break;
|
|
108
|
+
} else {
|
|
109
|
+
const parent = getParentElement(el);
|
|
110
|
+
if (parent) {
|
|
111
|
+
const siblings = Array.from(parent.children).filter(
|
|
112
|
+
(sibling) => sibling.tagName === el.tagName
|
|
113
|
+
);
|
|
114
|
+
const index = siblings.indexOf(el) + 1;
|
|
115
|
+
selector += siblings.length > 1 ? `[${index}]` : "";
|
|
116
|
+
}
|
|
117
|
+
parts.unshift(selector);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
currentElement = getParentElement(currentElement);
|
|
121
|
+
}
|
|
122
|
+
const xpath = "//" + parts.join("/");
|
|
123
|
+
return xpath;
|
|
124
|
+
}
|
|
125
|
+
async function generateStandardXPath(element) {
|
|
126
|
+
const parts = [];
|
|
127
|
+
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
128
|
+
let index = 0;
|
|
129
|
+
let hasSameTypeSiblings = false;
|
|
130
|
+
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
131
|
+
for (let i = 0; i < siblings.length; i++) {
|
|
132
|
+
const sibling = siblings[i];
|
|
133
|
+
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
134
|
+
index = index + 1;
|
|
135
|
+
hasSameTypeSiblings = true;
|
|
136
|
+
if (sibling.isSameNode(element)) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (element.nodeName !== "#text") {
|
|
142
|
+
const tagName = element.nodeName.toLowerCase();
|
|
143
|
+
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
144
|
+
parts.unshift(`${tagName}${pathIndex}`);
|
|
145
|
+
}
|
|
146
|
+
element = element.parentElement;
|
|
147
|
+
}
|
|
148
|
+
return parts.length ? `//${parts.join("//")}` : "";
|
|
149
|
+
}
|
|
150
|
+
async function generatedIdBasedXPath(element) {
|
|
151
|
+
if (isElementNode(element) && element.id) {
|
|
152
|
+
return `//*[@id='${element.id}']`;
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
2
157
|
// lib/dom/process.ts
|
|
158
|
+
function isElementNode(node) {
|
|
159
|
+
return node.nodeType === Node.ELEMENT_NODE;
|
|
160
|
+
}
|
|
161
|
+
function isTextNode(node) {
|
|
162
|
+
return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
|
|
163
|
+
}
|
|
3
164
|
async function processDom(chunksSeen) {
|
|
4
165
|
const { chunk, chunksArray } = await pickChunk(chunksSeen);
|
|
5
|
-
const { outputString, selectorMap } = await processElements2(
|
|
166
|
+
const { outputString, selectorMap } = await processElements2(
|
|
167
|
+
chunk,
|
|
168
|
+
void 0,
|
|
169
|
+
void 0
|
|
170
|
+
);
|
|
6
171
|
console.log(
|
|
7
172
|
`Stagehand (Browser Process): Extracted dom elements:
|
|
8
173
|
${outputString}`
|
|
@@ -49,12 +214,13 @@ ${outputString}`
|
|
|
49
214
|
scrollEndTimer = window.setTimeout(() => {
|
|
50
215
|
window.removeEventListener("scroll", handleScrollEnd);
|
|
51
216
|
resolve();
|
|
52
|
-
},
|
|
217
|
+
}, 100);
|
|
53
218
|
};
|
|
54
219
|
window.addEventListener("scroll", handleScrollEnd, { passive: true });
|
|
55
220
|
handleScrollEnd();
|
|
56
221
|
});
|
|
57
222
|
}
|
|
223
|
+
var xpathCache = /* @__PURE__ */ new Map();
|
|
58
224
|
async function processElements2(chunk, scrollToChunk = true, indexOffset = 0) {
|
|
59
225
|
console.time("processElements:total");
|
|
60
226
|
const viewportHeight = window.innerHeight;
|
|
@@ -68,7 +234,6 @@ ${outputString}`
|
|
|
68
234
|
}
|
|
69
235
|
const candidateElements = [];
|
|
70
236
|
const DOMQueue = [...document.body.childNodes];
|
|
71
|
-
const xpathCache = /* @__PURE__ */ new Map();
|
|
72
237
|
console.log("Stagehand (Browser Process): Generating candidate elements");
|
|
73
238
|
console.time("processElements:findCandidates");
|
|
74
239
|
while (DOMQueue.length > 0) {
|
|
@@ -105,16 +270,25 @@ ${outputString}`
|
|
|
105
270
|
`Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`
|
|
106
271
|
);
|
|
107
272
|
console.time("processElements:processCandidates");
|
|
273
|
+
console.time("processElements:generateXPaths");
|
|
274
|
+
const xpathLists = await Promise.all(
|
|
275
|
+
candidateElements.map(async (element) => {
|
|
276
|
+
if (xpathCache.has(element)) {
|
|
277
|
+
return xpathCache.get(element);
|
|
278
|
+
}
|
|
279
|
+
const xpaths = await generateXPathsForElement(element);
|
|
280
|
+
xpathCache.set(element, xpaths);
|
|
281
|
+
return xpaths;
|
|
282
|
+
})
|
|
283
|
+
);
|
|
284
|
+
console.timeEnd("processElements:generateXPaths");
|
|
108
285
|
candidateElements.forEach((element, index) => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
xpath = generateXPath(element);
|
|
112
|
-
xpathCache.set(element, xpath);
|
|
113
|
-
}
|
|
286
|
+
const xpaths = xpathLists[index];
|
|
287
|
+
let elementOutput = "";
|
|
114
288
|
if (isTextNode(element)) {
|
|
115
289
|
const textContent = element.textContent?.trim();
|
|
116
290
|
if (textContent) {
|
|
117
|
-
|
|
291
|
+
elementOutput += `${index + indexOffset}:${textContent}
|
|
118
292
|
`;
|
|
119
293
|
}
|
|
120
294
|
} else if (isElementNode(element)) {
|
|
@@ -123,10 +297,11 @@ ${outputString}`
|
|
|
123
297
|
const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
|
|
124
298
|
const closingTag = `</${tagName}>`;
|
|
125
299
|
const textContent = element.textContent?.trim() || "";
|
|
126
|
-
|
|
300
|
+
elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}
|
|
127
301
|
`;
|
|
128
302
|
}
|
|
129
|
-
|
|
303
|
+
outputString += elementOutput;
|
|
304
|
+
selectorMap[index + indexOffset] = xpaths;
|
|
130
305
|
});
|
|
131
306
|
console.timeEnd("processElements:processCandidates");
|
|
132
307
|
console.timeEnd("processElements:total");
|
|
@@ -163,34 +338,6 @@ ${outputString}`
|
|
|
163
338
|
window.processAllOfDom = processAllOfDom;
|
|
164
339
|
window.processElements = processElements2;
|
|
165
340
|
window.scrollToHeight = scrollToHeight;
|
|
166
|
-
function generateXPath(element) {
|
|
167
|
-
if (isElementNode(element) && element.id) {
|
|
168
|
-
return `//*[@id='${element.id}']`;
|
|
169
|
-
}
|
|
170
|
-
const parts = [];
|
|
171
|
-
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
172
|
-
let index = 0;
|
|
173
|
-
let hasSameTypeSiblings = false;
|
|
174
|
-
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
175
|
-
for (let i = 0; i < siblings.length; i++) {
|
|
176
|
-
const sibling = siblings[i];
|
|
177
|
-
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
178
|
-
index = index + 1;
|
|
179
|
-
hasSameTypeSiblings = true;
|
|
180
|
-
if (sibling.isSameNode(element)) {
|
|
181
|
-
break;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
if (element.nodeName !== "#text") {
|
|
186
|
-
const tagName = element.nodeName.toLowerCase();
|
|
187
|
-
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
188
|
-
parts.unshift(`${tagName}${pathIndex}`);
|
|
189
|
-
}
|
|
190
|
-
element = element.parentElement;
|
|
191
|
-
}
|
|
192
|
-
return parts.length ? `/${parts.join("/")}` : "";
|
|
193
|
-
}
|
|
194
341
|
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
|
|
195
342
|
var interactiveElementTypes = [
|
|
196
343
|
"A",
|
|
@@ -231,13 +378,6 @@ ${outputString}`
|
|
|
231
378
|
"tooltip"
|
|
232
379
|
];
|
|
233
380
|
var interactiveAriaRoles = ["menu", "menuitem", "button"];
|
|
234
|
-
function isElementNode(node) {
|
|
235
|
-
return node.nodeType === Node.ELEMENT_NODE;
|
|
236
|
-
}
|
|
237
|
-
function isTextNode(node) {
|
|
238
|
-
const trimmedText = node.textContent?.trim().replace(/\s/g, "");
|
|
239
|
-
return node.nodeType === Node.TEXT_NODE && trimmedText !== "";
|
|
240
|
-
}
|
|
241
381
|
var isVisible = (element) => {
|
|
242
382
|
const rect = element.getBoundingClientRect();
|
|
243
383
|
if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
|
|
@@ -1,8 +1,173 @@
|
|
|
1
1
|
(() => {
|
|
2
|
+
// lib/dom/xpathUtils.ts
|
|
3
|
+
function getParentElement(node) {
|
|
4
|
+
return isElementNode(node) ? node.parentElement : node.parentNode;
|
|
5
|
+
}
|
|
6
|
+
function getCombinations(attributes, size) {
|
|
7
|
+
const results = [];
|
|
8
|
+
function helper(start, combo) {
|
|
9
|
+
if (combo.length === size) {
|
|
10
|
+
results.push([...combo]);
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
for (let i = start; i < attributes.length; i++) {
|
|
14
|
+
combo.push(attributes[i]);
|
|
15
|
+
helper(i + 1, combo);
|
|
16
|
+
combo.pop();
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
helper(0, []);
|
|
20
|
+
return results;
|
|
21
|
+
}
|
|
22
|
+
function isXPathFirstResultElement(xpath, target) {
|
|
23
|
+
try {
|
|
24
|
+
const result = document.evaluate(
|
|
25
|
+
xpath,
|
|
26
|
+
document.documentElement,
|
|
27
|
+
null,
|
|
28
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
|
29
|
+
null
|
|
30
|
+
);
|
|
31
|
+
return result.snapshotItem(0) === target;
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.warn(`Invalid XPath expression: ${xpath}`, error);
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function escapeXPathString(value) {
|
|
38
|
+
if (value.includes("'")) {
|
|
39
|
+
if (value.includes('"')) {
|
|
40
|
+
return "concat(" + value.split(/('+)/).map((part) => {
|
|
41
|
+
if (part === "'") {
|
|
42
|
+
return `"'"`;
|
|
43
|
+
} else if (part.startsWith("'") && part.endsWith("'")) {
|
|
44
|
+
return `"${part}"`;
|
|
45
|
+
} else {
|
|
46
|
+
return `'${part}'`;
|
|
47
|
+
}
|
|
48
|
+
}).join(",") + ")";
|
|
49
|
+
} else {
|
|
50
|
+
return `"${value}"`;
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
return `'${value}'`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async function generateXPathsForElement(element) {
|
|
57
|
+
if (!element) return [];
|
|
58
|
+
const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([
|
|
59
|
+
generateComplexXPath(element),
|
|
60
|
+
generateStandardXPath(element),
|
|
61
|
+
generatedIdBasedXPath(element)
|
|
62
|
+
]);
|
|
63
|
+
return [...idBasedXPath ? [idBasedXPath] : [], standardXPath, complexXPath];
|
|
64
|
+
}
|
|
65
|
+
async function generateComplexXPath(element) {
|
|
66
|
+
const parts = [];
|
|
67
|
+
let currentElement = element;
|
|
68
|
+
while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {
|
|
69
|
+
if (isElementNode(currentElement)) {
|
|
70
|
+
const el = currentElement;
|
|
71
|
+
let selector = el.tagName.toLowerCase();
|
|
72
|
+
const attributePriority = [
|
|
73
|
+
"data-qa",
|
|
74
|
+
"data-component",
|
|
75
|
+
"data-role",
|
|
76
|
+
"role",
|
|
77
|
+
"aria-role",
|
|
78
|
+
"type",
|
|
79
|
+
"name",
|
|
80
|
+
"aria-label",
|
|
81
|
+
"placeholder",
|
|
82
|
+
"title",
|
|
83
|
+
"alt"
|
|
84
|
+
];
|
|
85
|
+
const attributes = attributePriority.map((attr) => {
|
|
86
|
+
let value = el.getAttribute(attr);
|
|
87
|
+
if (attr === "href-full" && value) {
|
|
88
|
+
value = el.getAttribute("href");
|
|
89
|
+
}
|
|
90
|
+
return value ? { attr: attr === "href-full" ? "href" : attr, value } : null;
|
|
91
|
+
}).filter((attr) => attr !== null);
|
|
92
|
+
let uniqueSelector = "";
|
|
93
|
+
for (let i = 1; i <= attributes.length; i++) {
|
|
94
|
+
const combinations = getCombinations(attributes, i);
|
|
95
|
+
for (const combo of combinations) {
|
|
96
|
+
const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(" and ");
|
|
97
|
+
const xpath2 = `//${selector}[${conditions}]`;
|
|
98
|
+
if (isXPathFirstResultElement(xpath2, el)) {
|
|
99
|
+
uniqueSelector = xpath2;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (uniqueSelector) break;
|
|
104
|
+
}
|
|
105
|
+
if (uniqueSelector) {
|
|
106
|
+
parts.unshift(uniqueSelector.replace("//", ""));
|
|
107
|
+
break;
|
|
108
|
+
} else {
|
|
109
|
+
const parent = getParentElement(el);
|
|
110
|
+
if (parent) {
|
|
111
|
+
const siblings = Array.from(parent.children).filter(
|
|
112
|
+
(sibling) => sibling.tagName === el.tagName
|
|
113
|
+
);
|
|
114
|
+
const index = siblings.indexOf(el) + 1;
|
|
115
|
+
selector += siblings.length > 1 ? `[${index}]` : "";
|
|
116
|
+
}
|
|
117
|
+
parts.unshift(selector);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
currentElement = getParentElement(currentElement);
|
|
121
|
+
}
|
|
122
|
+
const xpath = "//" + parts.join("/");
|
|
123
|
+
return xpath;
|
|
124
|
+
}
|
|
125
|
+
async function generateStandardXPath(element) {
|
|
126
|
+
const parts = [];
|
|
127
|
+
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
128
|
+
let index = 0;
|
|
129
|
+
let hasSameTypeSiblings = false;
|
|
130
|
+
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
131
|
+
for (let i = 0; i < siblings.length; i++) {
|
|
132
|
+
const sibling = siblings[i];
|
|
133
|
+
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
134
|
+
index = index + 1;
|
|
135
|
+
hasSameTypeSiblings = true;
|
|
136
|
+
if (sibling.isSameNode(element)) {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (element.nodeName !== "#text") {
|
|
142
|
+
const tagName = element.nodeName.toLowerCase();
|
|
143
|
+
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
144
|
+
parts.unshift(`${tagName}${pathIndex}`);
|
|
145
|
+
}
|
|
146
|
+
element = element.parentElement;
|
|
147
|
+
}
|
|
148
|
+
return parts.length ? `//${parts.join("//")}` : "";
|
|
149
|
+
}
|
|
150
|
+
async function generatedIdBasedXPath(element) {
|
|
151
|
+
if (isElementNode(element) && element.id) {
|
|
152
|
+
return `//*[@id='${element.id}']`;
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
2
157
|
// lib/dom/process.ts
|
|
158
|
+
function isElementNode(node) {
|
|
159
|
+
return node.nodeType === Node.ELEMENT_NODE;
|
|
160
|
+
}
|
|
161
|
+
function isTextNode(node) {
|
|
162
|
+
return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());
|
|
163
|
+
}
|
|
3
164
|
async function processDom(chunksSeen) {
|
|
4
165
|
const { chunk, chunksArray } = await pickChunk(chunksSeen);
|
|
5
|
-
const { outputString, selectorMap } = await processElements(
|
|
166
|
+
const { outputString, selectorMap } = await processElements(
|
|
167
|
+
chunk,
|
|
168
|
+
void 0,
|
|
169
|
+
void 0
|
|
170
|
+
);
|
|
6
171
|
console.log(
|
|
7
172
|
`Stagehand (Browser Process): Extracted dom elements:
|
|
8
173
|
${outputString}`
|
|
@@ -49,12 +214,13 @@ ${outputString}`
|
|
|
49
214
|
scrollEndTimer = window.setTimeout(() => {
|
|
50
215
|
window.removeEventListener("scroll", handleScrollEnd);
|
|
51
216
|
resolve();
|
|
52
|
-
},
|
|
217
|
+
}, 100);
|
|
53
218
|
};
|
|
54
219
|
window.addEventListener("scroll", handleScrollEnd, { passive: true });
|
|
55
220
|
handleScrollEnd();
|
|
56
221
|
});
|
|
57
222
|
}
|
|
223
|
+
var xpathCache = /* @__PURE__ */ new Map();
|
|
58
224
|
async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {
|
|
59
225
|
console.time("processElements:total");
|
|
60
226
|
const viewportHeight = window.innerHeight;
|
|
@@ -68,7 +234,6 @@ ${outputString}`
|
|
|
68
234
|
}
|
|
69
235
|
const candidateElements = [];
|
|
70
236
|
const DOMQueue = [...document.body.childNodes];
|
|
71
|
-
const xpathCache = /* @__PURE__ */ new Map();
|
|
72
237
|
console.log("Stagehand (Browser Process): Generating candidate elements");
|
|
73
238
|
console.time("processElements:findCandidates");
|
|
74
239
|
while (DOMQueue.length > 0) {
|
|
@@ -105,16 +270,25 @@ ${outputString}`
|
|
|
105
270
|
`Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`
|
|
106
271
|
);
|
|
107
272
|
console.time("processElements:processCandidates");
|
|
273
|
+
console.time("processElements:generateXPaths");
|
|
274
|
+
const xpathLists = await Promise.all(
|
|
275
|
+
candidateElements.map(async (element) => {
|
|
276
|
+
if (xpathCache.has(element)) {
|
|
277
|
+
return xpathCache.get(element);
|
|
278
|
+
}
|
|
279
|
+
const xpaths = await generateXPathsForElement(element);
|
|
280
|
+
xpathCache.set(element, xpaths);
|
|
281
|
+
return xpaths;
|
|
282
|
+
})
|
|
283
|
+
);
|
|
284
|
+
console.timeEnd("processElements:generateXPaths");
|
|
108
285
|
candidateElements.forEach((element, index) => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
xpath = generateXPath(element);
|
|
112
|
-
xpathCache.set(element, xpath);
|
|
113
|
-
}
|
|
286
|
+
const xpaths = xpathLists[index];
|
|
287
|
+
let elementOutput = "";
|
|
114
288
|
if (isTextNode(element)) {
|
|
115
289
|
const textContent = element.textContent?.trim();
|
|
116
290
|
if (textContent) {
|
|
117
|
-
|
|
291
|
+
elementOutput += `${index + indexOffset}:${textContent}
|
|
118
292
|
`;
|
|
119
293
|
}
|
|
120
294
|
} else if (isElementNode(element)) {
|
|
@@ -123,10 +297,11 @@ ${outputString}`
|
|
|
123
297
|
const openingTag = `<${tagName}${attributes ? " " + attributes : ""}>`;
|
|
124
298
|
const closingTag = `</${tagName}>`;
|
|
125
299
|
const textContent = element.textContent?.trim() || "";
|
|
126
|
-
|
|
300
|
+
elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}
|
|
127
301
|
`;
|
|
128
302
|
}
|
|
129
|
-
|
|
303
|
+
outputString += elementOutput;
|
|
304
|
+
selectorMap[index + indexOffset] = xpaths;
|
|
130
305
|
});
|
|
131
306
|
console.timeEnd("processElements:processCandidates");
|
|
132
307
|
console.timeEnd("processElements:total");
|
|
@@ -163,34 +338,6 @@ ${outputString}`
|
|
|
163
338
|
window.processAllOfDom = processAllOfDom;
|
|
164
339
|
window.processElements = processElements;
|
|
165
340
|
window.scrollToHeight = scrollToHeight;
|
|
166
|
-
function generateXPath(element) {
|
|
167
|
-
if (isElementNode(element) && element.id) {
|
|
168
|
-
return `//*[@id='${element.id}']`;
|
|
169
|
-
}
|
|
170
|
-
const parts = [];
|
|
171
|
-
while (element && (isTextNode(element) || isElementNode(element))) {
|
|
172
|
-
let index = 0;
|
|
173
|
-
let hasSameTypeSiblings = false;
|
|
174
|
-
const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];
|
|
175
|
-
for (let i = 0; i < siblings.length; i++) {
|
|
176
|
-
const sibling = siblings[i];
|
|
177
|
-
if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {
|
|
178
|
-
index = index + 1;
|
|
179
|
-
hasSameTypeSiblings = true;
|
|
180
|
-
if (sibling.isSameNode(element)) {
|
|
181
|
-
break;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
if (element.nodeName !== "#text") {
|
|
186
|
-
const tagName = element.nodeName.toLowerCase();
|
|
187
|
-
const pathIndex = hasSameTypeSiblings ? `[${index}]` : "";
|
|
188
|
-
parts.unshift(`${tagName}${pathIndex}`);
|
|
189
|
-
}
|
|
190
|
-
element = element.parentElement;
|
|
191
|
-
}
|
|
192
|
-
return parts.length ? `/${parts.join("/")}` : "";
|
|
193
|
-
}
|
|
194
341
|
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
|
|
195
342
|
var interactiveElementTypes = [
|
|
196
343
|
"A",
|
|
@@ -231,13 +378,6 @@ ${outputString}`
|
|
|
231
378
|
"tooltip"
|
|
232
379
|
];
|
|
233
380
|
var interactiveAriaRoles = ["menu", "menuitem", "button"];
|
|
234
|
-
function isElementNode(node) {
|
|
235
|
-
return node.nodeType === Node.ELEMENT_NODE;
|
|
236
|
-
}
|
|
237
|
-
function isTextNode(node) {
|
|
238
|
-
const trimmedText = node.textContent?.trim().replace(/\s/g, "");
|
|
239
|
-
return node.nodeType === Node.TEXT_NODE && trimmedText !== "";
|
|
240
|
-
}
|
|
241
381
|
var isVisible = (element) => {
|
|
242
382
|
const rect = element.getBoundingClientRect();
|
|
243
383
|
if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {
|