@midscene/shared 1.5.4-beta-20260310084708.0 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/extractor/index.mjs +2 -2
- package/dist/es/extractor/locator.mjs +183 -32
- package/dist/es/extractor/util.mjs +26 -1
- package/dist/es/node/fs.mjs +1 -1
- package/dist/lib/extractor/index.js +11 -8
- package/dist/lib/extractor/locator.js +184 -30
- package/dist/lib/extractor/util.js +35 -1
- package/dist/lib/node/fs.js +1 -1
- package/dist/types/extractor/index.d.ts +1 -1
- package/dist/types/extractor/locator.d.ts +4 -1
- package/dist/types/extractor/util.d.ts +7 -0
- package/package.json +1 -1
- package/src/extractor/index.ts +1 -0
- package/src/extractor/locator.ts +332 -68
- package/src/extractor/util.ts +43 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { descriptionOfTree, traverseTree, treeToList, trimAttributes, truncateText } from "./tree.mjs";
|
|
2
2
|
import { extractTextWithPosition, extractTreeNode, extractTreeNodeAsString } from "./web-extractor.mjs";
|
|
3
|
-
import { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsByPoint } from "./locator.mjs";
|
|
3
|
+
import { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsById, getXpathsByPoint } from "./locator.mjs";
|
|
4
4
|
import { generateElementByRect, isNotContainerElement } from "./dom-util.mjs";
|
|
5
|
-
export { descriptionOfTree, generateElementByRect, getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsByPoint, isNotContainerElement, traverseTree, treeToList, trimAttributes, truncateText, extractTreeNode as webExtractNodeTree, extractTreeNodeAsString as webExtractNodeTreeAsString, extractTextWithPosition as webExtractTextWithPosition };
|
|
5
|
+
export { descriptionOfTree, generateElementByRect, getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsById, getXpathsByPoint, isNotContainerElement, traverseTree, treeToList, trimAttributes, truncateText, extractTreeNode as webExtractNodeTree, extractTreeNodeAsString as webExtractNodeTreeAsString, extractTextWithPosition as webExtractTextWithPosition };
|
|
@@ -1,6 +1,54 @@
|
|
|
1
1
|
import { isSvgElement } from "./dom-util.mjs";
|
|
2
|
-
import { getRect, isElementPartiallyInViewport } from "./util.mjs";
|
|
2
|
+
import { getNodeFromCacheList, getRect, isElementPartiallyInViewport, logger } from "./util.mjs";
|
|
3
3
|
import { collectElementInfo } from "./web-extractor.mjs";
|
|
4
|
+
const SUB_XPATH_SEPARATOR = '|>>|';
|
|
5
|
+
function parseCSSZoom(style) {
|
|
6
|
+
return Number.parseFloat(style.zoom ?? '1') || 1;
|
|
7
|
+
}
|
|
8
|
+
function calculateIframeOffset(nodeOwnerDoc, rootDoc) {
|
|
9
|
+
let leftOffset = 0;
|
|
10
|
+
let topOffset = 0;
|
|
11
|
+
let iterDoc = nodeOwnerDoc;
|
|
12
|
+
while(iterDoc && iterDoc !== rootDoc)try {
|
|
13
|
+
const frameElement = iterDoc.defaultView?.frameElement;
|
|
14
|
+
if (!frameElement) break;
|
|
15
|
+
const rect = frameElement.getBoundingClientRect();
|
|
16
|
+
const parentWin = iterDoc.defaultView?.parent;
|
|
17
|
+
let borderLeft = 0;
|
|
18
|
+
let borderTop = 0;
|
|
19
|
+
let zoom = 1;
|
|
20
|
+
try {
|
|
21
|
+
if (parentWin) {
|
|
22
|
+
const style = parentWin.getComputedStyle(frameElement);
|
|
23
|
+
borderLeft = Number.parseFloat(style.borderLeftWidth) || 0;
|
|
24
|
+
borderTop = Number.parseFloat(style.borderTopWidth) || 0;
|
|
25
|
+
zoom = parseCSSZoom(style);
|
|
26
|
+
}
|
|
27
|
+
} catch {}
|
|
28
|
+
leftOffset = leftOffset / zoom + rect.left + borderLeft;
|
|
29
|
+
topOffset = topOffset / zoom + rect.top + borderTop;
|
|
30
|
+
iterDoc = frameElement.ownerDocument;
|
|
31
|
+
} catch {
|
|
32
|
+
break;
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
left: leftOffset,
|
|
36
|
+
top: topOffset
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function translatePointToIframeCoordinates(point, iframeElement, parentWindow) {
|
|
40
|
+
const rect = iframeElement.getBoundingClientRect();
|
|
41
|
+
const style = parentWindow.getComputedStyle(iframeElement);
|
|
42
|
+
const clientLeft = iframeElement.clientLeft;
|
|
43
|
+
const clientTop = iframeElement.clientTop;
|
|
44
|
+
const paddingLeft = Number.parseFloat(style.paddingLeft) || 0;
|
|
45
|
+
const paddingTop = Number.parseFloat(style.paddingTop) || 0;
|
|
46
|
+
const zoom = parseCSSZoom(style);
|
|
47
|
+
return {
|
|
48
|
+
left: (point.left - rect.left - clientLeft - paddingLeft) / zoom,
|
|
49
|
+
top: (point.top - rect.top - clientTop - paddingTop) / zoom
|
|
50
|
+
};
|
|
51
|
+
}
|
|
4
52
|
const getElementXpathIndex = (element)=>{
|
|
5
53
|
let index = 1;
|
|
6
54
|
let prev = element.previousElementSibling;
|
|
@@ -14,8 +62,8 @@ const normalizeXpathText = (text)=>{
|
|
|
14
62
|
if ('string' != typeof text) return '';
|
|
15
63
|
return text.replace(/\s+/g, ' ').trim();
|
|
16
64
|
};
|
|
17
|
-
const buildCurrentElementXpath = (element, isOrderSensitive, isLeafElement)=>{
|
|
18
|
-
const parentPath = element.parentNode ? getElementXpath(element.parentNode, isOrderSensitive) : '';
|
|
65
|
+
const buildCurrentElementXpath = (element, isOrderSensitive, isLeafElement, limitToCurrentDocument = false)=>{
|
|
66
|
+
const parentPath = element.parentNode ? getElementXpath(element.parentNode, isOrderSensitive, false, limitToCurrentDocument) : '';
|
|
19
67
|
const prefix = parentPath ? `${parentPath}/` : '/';
|
|
20
68
|
const tagName = element.nodeName.toLowerCase();
|
|
21
69
|
const textContent = element.textContent?.trim();
|
|
@@ -29,11 +77,11 @@ const buildCurrentElementXpath = (element, isOrderSensitive, isLeafElement)=>{
|
|
|
29
77
|
const index = getElementXpathIndex(element);
|
|
30
78
|
return `${prefix}${tagSelector}[${index}]`;
|
|
31
79
|
};
|
|
32
|
-
const getElementXpath = (element, isOrderSensitive = false, isLeafElement = false)=>{
|
|
80
|
+
const getElementXpath = (element, isOrderSensitive = false, isLeafElement = false, limitToCurrentDocument = false)=>{
|
|
33
81
|
if (element.nodeType === Node.TEXT_NODE) {
|
|
34
82
|
const parentNode = element.parentNode;
|
|
35
83
|
if (parentNode && parentNode.nodeType === Node.ELEMENT_NODE) {
|
|
36
|
-
const parentXPath = getElementXpath(parentNode, isOrderSensitive, true);
|
|
84
|
+
const parentXPath = getElementXpath(parentNode, isOrderSensitive, true, limitToCurrentDocument);
|
|
37
85
|
const textContent = element.textContent?.trim();
|
|
38
86
|
if (textContent) return `${parentXPath}/text()[normalize-space()="${normalizeXpathText(textContent)}"]`;
|
|
39
87
|
return `${parentXPath}/text()`;
|
|
@@ -42,58 +90,161 @@ const getElementXpath = (element, isOrderSensitive = false, isLeafElement = fals
|
|
|
42
90
|
}
|
|
43
91
|
if (element.nodeType !== Node.ELEMENT_NODE) return '';
|
|
44
92
|
const el = element;
|
|
45
|
-
|
|
46
|
-
|
|
93
|
+
try {
|
|
94
|
+
const nodeName = el.nodeName.toLowerCase();
|
|
95
|
+
if (el === el.ownerDocument?.documentElement || 'html' === nodeName) {
|
|
96
|
+
if (!limitToCurrentDocument) {
|
|
97
|
+
const frameElement = el.ownerDocument?.defaultView?.frameElement;
|
|
98
|
+
if (frameElement) {
|
|
99
|
+
const framePath = getElementXpath(frameElement, isOrderSensitive, false, limitToCurrentDocument);
|
|
100
|
+
return `${framePath}${SUB_XPATH_SEPARATOR}/html`;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return '/html';
|
|
104
|
+
}
|
|
105
|
+
if (el === el.ownerDocument?.body || 'body' === nodeName) {
|
|
106
|
+
if (!limitToCurrentDocument) {
|
|
107
|
+
const frameElement = el.ownerDocument?.defaultView?.frameElement;
|
|
108
|
+
if (frameElement) {
|
|
109
|
+
const framePath = getElementXpath(frameElement, isOrderSensitive, false, limitToCurrentDocument);
|
|
110
|
+
return `${framePath}${SUB_XPATH_SEPARATOR}/html/body`;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return '/html/body';
|
|
114
|
+
}
|
|
115
|
+
} catch (error) {
|
|
116
|
+
logger('[midscene:locator] ownerDocument access failed:', error);
|
|
117
|
+
if ('html' === el.nodeName.toLowerCase()) return '/html';
|
|
118
|
+
if ('body' === el.nodeName.toLowerCase()) return '/html/body';
|
|
119
|
+
}
|
|
47
120
|
if (isSvgElement(el)) {
|
|
48
121
|
const tagName = el.nodeName.toLowerCase();
|
|
49
|
-
if ('svg' === tagName) return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement);
|
|
122
|
+
if ('svg' === tagName) return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement, limitToCurrentDocument);
|
|
50
123
|
let parent = el.parentNode;
|
|
51
124
|
while(parent && parent.nodeType === Node.ELEMENT_NODE){
|
|
52
125
|
const parentEl = parent;
|
|
53
|
-
if (!isSvgElement(parentEl)) return getElementXpath(parentEl, isOrderSensitive, isLeafElement);
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if ('svg' === parentTag) return getElementXpath(parentEl, isOrderSensitive, isLeafElement);
|
|
57
|
-
}
|
|
126
|
+
if (!isSvgElement(parentEl)) return getElementXpath(parentEl, isOrderSensitive, isLeafElement, limitToCurrentDocument);
|
|
127
|
+
const parentTag = parentEl.nodeName.toLowerCase();
|
|
128
|
+
if ('svg' === parentTag) return getElementXpath(parentEl, isOrderSensitive, isLeafElement, limitToCurrentDocument);
|
|
58
129
|
parent = parent.parentNode;
|
|
59
130
|
}
|
|
60
131
|
const fallbackParent = el.parentNode;
|
|
61
|
-
if (fallbackParent && fallbackParent.nodeType === Node.ELEMENT_NODE) return getElementXpath(fallbackParent, isOrderSensitive, isLeafElement);
|
|
132
|
+
if (fallbackParent && fallbackParent.nodeType === Node.ELEMENT_NODE) return getElementXpath(fallbackParent, isOrderSensitive, isLeafElement, limitToCurrentDocument);
|
|
62
133
|
return '';
|
|
63
134
|
}
|
|
64
|
-
return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement);
|
|
135
|
+
return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement, limitToCurrentDocument);
|
|
65
136
|
};
|
|
66
|
-
function
|
|
67
|
-
const
|
|
68
|
-
if (!
|
|
69
|
-
const fullXPath = getElementXpath(
|
|
137
|
+
function getXpathsById(id) {
|
|
138
|
+
const node = getNodeFromCacheList(id);
|
|
139
|
+
if (!node) return null;
|
|
140
|
+
const fullXPath = getElementXpath(node, false, true, true);
|
|
70
141
|
return [
|
|
71
142
|
fullXPath
|
|
72
143
|
];
|
|
73
144
|
}
|
|
145
|
+
function getXpathsByPoint(point, isOrderSensitive) {
|
|
146
|
+
let currentWindow = 'undefined' != typeof window ? window : void 0;
|
|
147
|
+
let currentDocument = 'undefined' != typeof document ? document : void 0;
|
|
148
|
+
let { left, top } = point;
|
|
149
|
+
let depth = 0;
|
|
150
|
+
const MAX_DEPTH = 10;
|
|
151
|
+
let xpathPrefix = '';
|
|
152
|
+
let lastFoundElement = null;
|
|
153
|
+
while(depth < MAX_DEPTH){
|
|
154
|
+
depth++;
|
|
155
|
+
const element = currentDocument.elementFromPoint(left, top);
|
|
156
|
+
if (!element) {
|
|
157
|
+
if (lastFoundElement) {
|
|
158
|
+
const fullXPath = getElementXpath(lastFoundElement, isOrderSensitive, true, true);
|
|
159
|
+
return [
|
|
160
|
+
xpathPrefix + fullXPath
|
|
161
|
+
];
|
|
162
|
+
}
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
lastFoundElement = element;
|
|
166
|
+
const tag = element.tagName.toLowerCase();
|
|
167
|
+
if ('iframe' === tag || 'frame' === tag) try {
|
|
168
|
+
const contentWindow = element.contentWindow;
|
|
169
|
+
const contentDocument = element.contentDocument;
|
|
170
|
+
if (contentWindow && contentDocument) {
|
|
171
|
+
const localPoint = translatePointToIframeCoordinates({
|
|
172
|
+
left,
|
|
173
|
+
top
|
|
174
|
+
}, element, currentWindow);
|
|
175
|
+
const currentIframeXpath = getElementXpath(element, isOrderSensitive, false, true);
|
|
176
|
+
xpathPrefix += currentIframeXpath + SUB_XPATH_SEPARATOR;
|
|
177
|
+
currentWindow = contentWindow;
|
|
178
|
+
currentDocument = contentDocument;
|
|
179
|
+
left = localPoint.left;
|
|
180
|
+
top = localPoint.top;
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
} catch (error) {
|
|
184
|
+
logger('[midscene:locator] iframe penetration failed (cross-origin?):', error);
|
|
185
|
+
}
|
|
186
|
+
const fullXPath = getElementXpath(element, isOrderSensitive, true, true);
|
|
187
|
+
return [
|
|
188
|
+
xpathPrefix + fullXPath
|
|
189
|
+
];
|
|
190
|
+
}
|
|
191
|
+
if (lastFoundElement) {
|
|
192
|
+
const fullXPath = getElementXpath(lastFoundElement, isOrderSensitive, true, true);
|
|
193
|
+
return [
|
|
194
|
+
xpathPrefix + fullXPath
|
|
195
|
+
];
|
|
196
|
+
}
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
74
199
|
function getNodeInfoByXpath(xpath) {
|
|
75
|
-
const
|
|
76
|
-
if (
|
|
77
|
-
|
|
78
|
-
|
|
200
|
+
const parts = xpath.split(SUB_XPATH_SEPARATOR).map((p)=>p.trim()).filter(Boolean);
|
|
201
|
+
if (0 === parts.length) return null;
|
|
202
|
+
let currentDocument = 'undefined' != typeof document ? document : void 0;
|
|
203
|
+
let node = null;
|
|
204
|
+
for(let i = 0; i < parts.length; i++){
|
|
205
|
+
const currentXpath = parts[i];
|
|
206
|
+
const xpathResult = currentDocument.evaluate(currentXpath, currentDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
207
|
+
if (1 !== xpathResult.snapshotLength) {
|
|
208
|
+
logger(`[midscene:locator] XPath "${currentXpath}" matched ${xpathResult.snapshotLength} elements (expected 1), discarding.`);
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
node = xpathResult.snapshotItem(0);
|
|
212
|
+
if (i < parts.length - 1) if (!node || node.nodeType !== Node.ELEMENT_NODE || 'iframe' !== node.tagName.toLowerCase()) return null;
|
|
213
|
+
else try {
|
|
214
|
+
const contentDocument = node.contentDocument;
|
|
215
|
+
if (contentDocument) currentDocument = contentDocument;
|
|
216
|
+
else {
|
|
217
|
+
logger('[midscene:locator] iframe contentDocument is null (cross-origin?)');
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
} catch (error) {
|
|
221
|
+
logger('[midscene:locator] iframe contentDocument access failed:', error);
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
79
224
|
}
|
|
80
|
-
const node = xpathResult.snapshotItem(0);
|
|
81
225
|
return node;
|
|
82
226
|
}
|
|
83
227
|
function getElementInfoByXpath(xpath) {
|
|
84
228
|
const node = getNodeInfoByXpath(xpath);
|
|
85
229
|
if (!node) return null;
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
230
|
+
let targetWindow = 'undefined' != typeof window ? window : void 0;
|
|
231
|
+
let targetDocument = 'undefined' != typeof document ? document : void 0;
|
|
232
|
+
if (node.ownerDocument?.defaultView) {
|
|
233
|
+
targetWindow = node.ownerDocument.defaultView;
|
|
234
|
+
targetDocument = node.ownerDocument;
|
|
235
|
+
}
|
|
236
|
+
const rootDoc = 'undefined' != typeof document ? document : null;
|
|
237
|
+
const iframeOffset = calculateIframeOffset(node.ownerDocument ?? null, rootDoc);
|
|
238
|
+
const targetWin = targetWindow;
|
|
239
|
+
const targetDoc = targetDocument;
|
|
240
|
+
if (node instanceof targetWin.HTMLElement) {
|
|
241
|
+
const rect = getRect(node, 1, targetWin);
|
|
242
|
+
const isVisible = isElementPartiallyInViewport(rect, targetWin, targetDoc, 1);
|
|
89
243
|
if (!isVisible) node.scrollIntoView({
|
|
90
244
|
behavior: 'instant',
|
|
91
245
|
block: 'center'
|
|
92
246
|
});
|
|
93
247
|
}
|
|
94
|
-
return collectElementInfo(node,
|
|
95
|
-
left: 0,
|
|
96
|
-
top: 0
|
|
97
|
-
}, true);
|
|
248
|
+
return collectElementInfo(node, targetWin, targetDoc, 1, iframeOffset, true);
|
|
98
249
|
}
|
|
99
|
-
export { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsByPoint };
|
|
250
|
+
export { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsById, getXpathsByPoint };
|
|
@@ -197,8 +197,33 @@ function getNodeAttributes(node, currentWindow) {
|
|
|
197
197
|
});
|
|
198
198
|
return Object.fromEntries(attributesList);
|
|
199
199
|
}
|
|
200
|
+
const NODE_CACHE_MAX_SIZE = 2000;
|
|
201
|
+
function setNodeHashCacheListOnWindow() {
|
|
202
|
+
if ('undefined' != typeof window) window.midsceneNodeHashCache = new Map();
|
|
203
|
+
}
|
|
204
|
+
function getNodeCacheMap() {
|
|
205
|
+
if ('undefined' == typeof window) return;
|
|
206
|
+
return window.midsceneNodeHashCache;
|
|
207
|
+
}
|
|
208
|
+
function setNodeToCacheList(node, id) {
|
|
209
|
+
const cache = getNodeCacheMap();
|
|
210
|
+
if (!cache) return;
|
|
211
|
+
if (cache.has(id)) return;
|
|
212
|
+
if (cache.size >= NODE_CACHE_MAX_SIZE) {
|
|
213
|
+
const firstKey = cache.keys().next().value;
|
|
214
|
+
if (void 0 !== firstKey) cache.delete(firstKey);
|
|
215
|
+
}
|
|
216
|
+
cache.set(id, node);
|
|
217
|
+
}
|
|
218
|
+
function getNodeFromCacheList(id) {
|
|
219
|
+
return getNodeCacheMap()?.get(id);
|
|
220
|
+
}
|
|
200
221
|
function midsceneGenerateHash(node, content, rect) {
|
|
201
222
|
const slicedHash = generateHashId(rect, content);
|
|
223
|
+
if (node) {
|
|
224
|
+
if ('undefined' != typeof window && !getNodeCacheMap()) setNodeHashCacheListOnWindow();
|
|
225
|
+
setNodeToCacheList(node, slicedHash);
|
|
226
|
+
}
|
|
202
227
|
return slicedHash;
|
|
203
228
|
}
|
|
204
229
|
function generateId(numberId) {
|
|
@@ -217,4 +242,4 @@ function getTopDocument() {
|
|
|
217
242
|
const container = document.body || document;
|
|
218
243
|
return container;
|
|
219
244
|
}
|
|
220
|
-
export { elementRect, generateId, getDebugMode, getNodeAttributes, getPseudoElementContent, getRect, getTopDocument, hasOverflowY, isElementPartiallyInViewport, logger, midsceneGenerateHash, overlappedRect, setDebugMode, setExtractTextWithPositionOnWindow, setGenerateHashOnWindow, setMidsceneVisibleRectOnWindow, validTextNodeContent };
|
|
245
|
+
export { elementRect, generateId, getDebugMode, getNodeAttributes, getNodeFromCacheList, getPseudoElementContent, getRect, getTopDocument, hasOverflowY, isElementPartiallyInViewport, logger, midsceneGenerateHash, overlappedRect, setDebugMode, setExtractTextWithPositionOnWindow, setGenerateHashOnWindow, setMidsceneVisibleRectOnWindow, setNodeHashCacheListOnWindow, setNodeToCacheList, validTextNodeContent };
|