@midscene/shared 1.5.4-beta-20260310084708.0 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,94 @@
1
1
  import type { ElementInfo } from '.';
2
2
  import type { Point } from '../types';
3
3
  import { isSvgElement } from './dom-util';
4
- import { getRect, isElementPartiallyInViewport } from './util';
4
+ import {
5
+ getNodeFromCacheList,
6
+ getRect,
7
+ isElementPartiallyInViewport,
8
+ logger,
9
+ } from './util';
5
10
  import { collectElementInfo } from './web-extractor';
6
11
 
12
+ /** Separator for compound XPath across iframes (e.g. "iframePath|>>|/html/body/div") */
13
+ const SUB_XPATH_SEPARATOR = '|>>|';
14
+
15
+ /** Parse the non-standard `zoom` CSS property (Chromium-only) with fallback to 1 */
16
+ function parseCSSZoom(style: CSSStyleDeclaration): number {
17
+ return (
18
+ Number.parseFloat(
19
+ (style as CSSStyleDeclaration & { zoom?: string }).zoom ?? '1',
20
+ ) || 1
21
+ );
22
+ }
23
+
24
+ /**
25
+ * Calculate the accumulated offset from an iframe-nested node's document
26
+ * up to the top-level document, accounting for border, padding, and zoom at each level.
27
+ */
28
+ function calculateIframeOffset(
29
+ nodeOwnerDoc: Document | null,
30
+ rootDoc: Document | null,
31
+ ): { left: number; top: number } {
32
+ let leftOffset = 0;
33
+ let topOffset = 0;
34
+ let iterDoc = nodeOwnerDoc;
35
+
36
+ while (iterDoc && iterDoc !== rootDoc) {
37
+ try {
38
+ const frameElement = iterDoc.defaultView?.frameElement;
39
+ if (!frameElement) break;
40
+
41
+ const rect = (frameElement as Element).getBoundingClientRect();
42
+ const parentWin = iterDoc.defaultView?.parent;
43
+
44
+ let borderLeft = 0;
45
+ let borderTop = 0;
46
+ let zoom = 1;
47
+ try {
48
+ if (parentWin) {
49
+ const style = parentWin.getComputedStyle(frameElement as Element);
50
+ borderLeft = Number.parseFloat(style.borderLeftWidth) || 0;
51
+ borderTop = Number.parseFloat(style.borderTopWidth) || 0;
52
+ zoom = parseCSSZoom(style);
53
+ }
54
+ } catch {
55
+ // cross-origin iframe style access may fail, use defaults
56
+ }
57
+
58
+ leftOffset = leftOffset / zoom + rect.left + borderLeft;
59
+ topOffset = topOffset / zoom + rect.top + borderTop;
60
+ iterDoc = (frameElement as Element).ownerDocument;
61
+ } catch {
62
+ break;
63
+ }
64
+ }
65
+
66
+ return { left: leftOffset, top: topOffset };
67
+ }
68
+
69
+ /**
70
+ * Translate a point from the parent window coordinate space into
71
+ * the iframe's local coordinate space.
72
+ */
73
+ function translatePointToIframeCoordinates(
74
+ point: { left: number; top: number },
75
+ iframeElement: Element,
76
+ parentWindow: Window,
77
+ ): { left: number; top: number } {
78
+ const rect = iframeElement.getBoundingClientRect();
79
+ const style = parentWindow.getComputedStyle(iframeElement);
80
+ const clientLeft = iframeElement.clientLeft;
81
+ const clientTop = iframeElement.clientTop;
82
+ const paddingLeft = Number.parseFloat(style.paddingLeft) || 0;
83
+ const paddingTop = Number.parseFloat(style.paddingTop) || 0;
84
+ const zoom = parseCSSZoom(style);
85
+
86
+ return {
87
+ left: (point.left - rect.left - clientLeft - paddingLeft) / zoom,
88
+ top: (point.top - rect.top - clientTop - paddingTop) / zoom,
89
+ };
90
+ }
91
+
7
92
  const getElementXpathIndex = (element: Element): number => {
8
93
  let index = 1;
9
94
  let prev = element.previousElementSibling;
@@ -30,10 +115,16 @@ const buildCurrentElementXpath = (
30
115
  element: Element,
31
116
  isOrderSensitive: boolean,
32
117
  isLeafElement: boolean,
118
+ limitToCurrentDocument = false,
33
119
  ): string => {
34
120
  // Build parent path - inline the buildParentXpath logic
35
121
  const parentPath = element.parentNode
36
- ? getElementXpath(element.parentNode, isOrderSensitive)
122
+ ? getElementXpath(
123
+ element.parentNode,
124
+ isOrderSensitive,
125
+ false,
126
+ limitToCurrentDocument,
127
+ )
37
128
  : '';
38
129
  const prefix = parentPath ? `${parentPath}/` : '/';
39
130
  const tagName = element.nodeName.toLowerCase();
@@ -67,13 +158,18 @@ export const getElementXpath = (
67
158
  element: Node,
68
159
  isOrderSensitive = false,
69
160
  isLeafElement = false,
161
+ limitToCurrentDocument = false,
70
162
  ): string => {
71
163
  // process text node
72
164
  if (element.nodeType === Node.TEXT_NODE) {
73
165
  const parentNode = element.parentNode;
74
166
  if (parentNode && parentNode.nodeType === Node.ELEMENT_NODE) {
75
- // For text nodes, treat parent as leaf element to enable text matching
76
- const parentXPath = getElementXpath(parentNode, isOrderSensitive, true);
167
+ const parentXPath = getElementXpath(
168
+ parentNode,
169
+ isOrderSensitive,
170
+ true,
171
+ limitToCurrentDocument,
172
+ );
77
173
  const textContent = element.textContent?.trim();
78
174
  if (textContent) {
79
175
  return `${parentXPath}/text()[normalize-space()="${normalizeXpathText(textContent)}"]`;
@@ -83,123 +179,291 @@ export const getElementXpath = (
83
179
  return '';
84
180
  }
85
181
 
86
- // process element node
87
182
  if (element.nodeType !== Node.ELEMENT_NODE) return '';
88
183
 
89
- // process element node - at this point, element should be an Element
90
184
  const el = element as Element;
91
185
 
92
- // special element handling
93
- if (el === document.documentElement) return '/html';
94
- if (el === document.body) return '/html/body';
186
+ // special element handling (iframe-aware: prefix with frame path when not limitToCurrentDocument)
187
+ try {
188
+ const nodeName = el.nodeName.toLowerCase();
189
+ if (el === el.ownerDocument?.documentElement || nodeName === 'html') {
190
+ if (!limitToCurrentDocument) {
191
+ const frameElement = el.ownerDocument?.defaultView?.frameElement;
192
+ if (frameElement) {
193
+ const framePath = getElementXpath(
194
+ frameElement as Element,
195
+ isOrderSensitive,
196
+ false,
197
+ limitToCurrentDocument,
198
+ );
199
+ return `${framePath}${SUB_XPATH_SEPARATOR}/html`;
200
+ }
201
+ }
202
+ return '/html';
203
+ }
204
+ if (el === el.ownerDocument?.body || nodeName === 'body') {
205
+ if (!limitToCurrentDocument) {
206
+ const frameElement = el.ownerDocument?.defaultView?.frameElement;
207
+ if (frameElement) {
208
+ const framePath = getElementXpath(
209
+ frameElement as Element,
210
+ isOrderSensitive,
211
+ false,
212
+ limitToCurrentDocument,
213
+ );
214
+ return `${framePath}${SUB_XPATH_SEPARATOR}/html/body`;
215
+ }
216
+ }
217
+ return '/html/body';
218
+ }
219
+ } catch (error) {
220
+ logger('[midscene:locator] ownerDocument access failed:', error);
221
+ if (el.nodeName.toLowerCase() === 'html') return '/html';
222
+ if (el.nodeName.toLowerCase() === 'body') return '/html/body';
223
+ }
95
224
 
96
- // if the element is any SVG element, handle based on tag type
97
225
  if (isSvgElement(el)) {
98
226
  const tagName = el.nodeName.toLowerCase();
99
-
100
- // For top-level <svg> tag, include it in the path to distinguish between multiple SVG icons
101
- // This is important when there are multiple SVG elements under the same parent (e.g., td[34]/svg[1], td[34]/svg[2])
102
227
  if (tagName === 'svg') {
103
- // Include the <svg> tag with its index in the XPath
104
- return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement);
228
+ return buildCurrentElementXpath(
229
+ el,
230
+ isOrderSensitive,
231
+ isLeafElement,
232
+ limitToCurrentDocument,
233
+ );
105
234
  }
106
-
107
- // For SVG child elements (path, circle, rect, etc.), skip to the nearest <svg> ancestor
108
- // These internal elements are usually decorative and can change frequently
109
235
  let parent = el.parentNode;
110
236
  while (parent && parent.nodeType === Node.ELEMENT_NODE) {
111
237
  const parentEl = parent as Element;
112
- if (isSvgElement(parentEl)) {
113
- const parentTag = parentEl.nodeName.toLowerCase();
114
- if (parentTag === 'svg') {
115
- // Found the <svg> container, return its path
116
- return getElementXpath(parentEl, isOrderSensitive, isLeafElement);
117
- }
118
- } else {
119
- // Found a non-SVG ancestor, return its path
120
- return getElementXpath(parentEl, isOrderSensitive, isLeafElement);
238
+ if (!isSvgElement(parentEl)) {
239
+ return getElementXpath(
240
+ parentEl,
241
+ isOrderSensitive,
242
+ isLeafElement,
243
+ limitToCurrentDocument,
244
+ );
245
+ }
246
+ const parentTag = parentEl.nodeName.toLowerCase();
247
+ if (parentTag === 'svg') {
248
+ return getElementXpath(
249
+ parentEl,
250
+ isOrderSensitive,
251
+ isLeafElement,
252
+ limitToCurrentDocument,
253
+ );
121
254
  }
122
255
  parent = parent.parentNode;
123
256
  }
124
- // fallback if no suitable parent found
125
257
  const fallbackParent = el.parentNode;
126
258
  if (fallbackParent && fallbackParent.nodeType === Node.ELEMENT_NODE) {
127
259
  return getElementXpath(
128
260
  fallbackParent as Element,
129
261
  isOrderSensitive,
130
262
  isLeafElement,
263
+ limitToCurrentDocument,
131
264
  );
132
265
  }
133
266
  return '';
134
267
  }
135
268
 
136
- // decide which format to use
137
- return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement);
269
+ return buildCurrentElementXpath(
270
+ el,
271
+ isOrderSensitive,
272
+ isLeafElement,
273
+ limitToCurrentDocument,
274
+ );
138
275
  };
139
276
 
277
+ /** Retrieve XPath for a previously cached node by its hash ID.
278
+ * Returns a local xpath within the node's own document (limitToCurrentDocument=true). */
279
+ export function getXpathsById(id: string): string[] | null {
280
+ const node = getNodeFromCacheList(id);
281
+ if (!node) return null;
282
+ const fullXPath = getElementXpath(node, false, true, true);
283
+ return [fullXPath];
284
+ }
285
+
140
286
  export function getXpathsByPoint(
141
287
  point: Point,
142
288
  isOrderSensitive: boolean,
143
289
  ): string[] | null {
144
- const element = document.elementFromPoint(point.left, point.top);
290
+ let currentWindow: Window =
291
+ typeof window !== 'undefined' ? window : (undefined as any);
292
+ let currentDocument: Document =
293
+ typeof document !== 'undefined' ? document : (undefined as any);
294
+ let { left, top } = point;
295
+ let depth = 0;
296
+ const MAX_DEPTH = 10;
297
+ let xpathPrefix = '';
298
+ let lastFoundElement: Element | null = null;
299
+
300
+ while (depth < MAX_DEPTH) {
301
+ depth++;
302
+ const element = currentDocument.elementFromPoint(left, top);
303
+
304
+ if (!element) {
305
+ if (lastFoundElement) {
306
+ const fullXPath = getElementXpath(
307
+ lastFoundElement,
308
+ isOrderSensitive,
309
+ true,
310
+ true,
311
+ );
312
+ return [xpathPrefix + fullXPath];
313
+ }
314
+ return null;
315
+ }
145
316
 
146
- if (!element) {
147
- return null;
317
+ lastFoundElement = element;
318
+
319
+ const tag = element.tagName.toLowerCase();
320
+ if (tag === 'iframe' || tag === 'frame') {
321
+ try {
322
+ const contentWindow = (element as HTMLIFrameElement).contentWindow;
323
+ const contentDocument = (element as HTMLIFrameElement).contentDocument;
324
+
325
+ if (contentWindow && contentDocument) {
326
+ const localPoint = translatePointToIframeCoordinates(
327
+ { left, top },
328
+ element,
329
+ currentWindow,
330
+ );
331
+ const currentIframeXpath = getElementXpath(
332
+ element,
333
+ isOrderSensitive,
334
+ false,
335
+ true,
336
+ );
337
+ xpathPrefix += currentIframeXpath + SUB_XPATH_SEPARATOR;
338
+ currentWindow = contentWindow;
339
+ currentDocument = contentDocument;
340
+ left = localPoint.left;
341
+ top = localPoint.top;
342
+ continue;
343
+ }
344
+ } catch (error) {
345
+ logger(
346
+ '[midscene:locator] iframe penetration failed (cross-origin?):',
347
+ error,
348
+ );
349
+ }
350
+ }
351
+
352
+ const fullXPath = getElementXpath(element, isOrderSensitive, true, true);
353
+ return [xpathPrefix + fullXPath];
148
354
  }
149
355
 
150
- const fullXPath = getElementXpath(element, isOrderSensitive, true);
151
- return [fullXPath];
356
+ if (lastFoundElement) {
357
+ const fullXPath = getElementXpath(
358
+ lastFoundElement,
359
+ isOrderSensitive,
360
+ true,
361
+ true,
362
+ );
363
+ return [xpathPrefix + fullXPath];
364
+ }
365
+ return null;
152
366
  }
153
367
 
154
368
  export function getNodeInfoByXpath(xpath: string): Node | null {
155
- const xpathResult = document.evaluate(
156
- xpath,
157
- document,
158
- null,
159
- XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
160
- null,
161
- );
369
+ const parts = xpath
370
+ .split(SUB_XPATH_SEPARATOR)
371
+ .map((p) => p.trim())
372
+ .filter(Boolean);
373
+ if (parts.length === 0) return null;
374
+
375
+ let currentDocument: Document =
376
+ typeof document !== 'undefined' ? document : (undefined as any);
377
+ let node: Node | null = null;
162
378
 
163
- if (xpathResult.snapshotLength !== 1) {
164
- console.warn(
165
- `[midscene:warning] Received XPath "${xpath}" but it matched ${xpathResult.snapshotLength} elements. Discarding this result.`,
379
+ for (let i = 0; i < parts.length; i++) {
380
+ const currentXpath = parts[i];
381
+ const xpathResult = currentDocument.evaluate(
382
+ currentXpath,
383
+ currentDocument,
384
+ null,
385
+ XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
386
+ null,
166
387
  );
167
- return null;
168
- }
169
388
 
170
- const node = xpathResult.snapshotItem(0);
389
+ if (xpathResult.snapshotLength !== 1) {
390
+ logger(
391
+ `[midscene:locator] XPath "${currentXpath}" matched ${xpathResult.snapshotLength} elements (expected 1), discarding.`,
392
+ );
393
+ return null;
394
+ }
395
+
396
+ node = xpathResult.snapshotItem(0);
397
+
398
+ if (i < parts.length - 1) {
399
+ if (
400
+ node &&
401
+ node.nodeType === Node.ELEMENT_NODE &&
402
+ (node as Element).tagName.toLowerCase() === 'iframe'
403
+ ) {
404
+ try {
405
+ const contentDocument = (node as HTMLIFrameElement).contentDocument;
406
+ if (contentDocument) {
407
+ currentDocument = contentDocument;
408
+ } else {
409
+ logger(
410
+ '[midscene:locator] iframe contentDocument is null (cross-origin?)',
411
+ );
412
+ return null;
413
+ }
414
+ } catch (error) {
415
+ logger(
416
+ '[midscene:locator] iframe contentDocument access failed:',
417
+ error,
418
+ );
419
+ return null;
420
+ }
421
+ } else {
422
+ return null;
423
+ }
424
+ }
425
+ }
171
426
 
172
427
  return node;
173
428
  }
174
429
 
175
430
  export function getElementInfoByXpath(xpath: string): ElementInfo | null {
176
431
  const node = getNodeInfoByXpath(xpath);
432
+ if (!node) return null;
177
433
 
178
- if (!node) {
179
- return null;
434
+ let targetWindow: Window =
435
+ typeof window !== 'undefined' ? window : (undefined as any);
436
+ let targetDocument: Document =
437
+ typeof document !== 'undefined' ? document : (undefined as any);
438
+
439
+ if (node.ownerDocument?.defaultView) {
440
+ targetWindow = node.ownerDocument.defaultView;
441
+ targetDocument = node.ownerDocument;
180
442
  }
181
443
 
182
- // Check if the node is an element that can be scrolled into view
183
- // This includes both HTMLElement and SVGElement
184
- if (node instanceof Element) {
185
- // only when the element is not completely in the viewport, call scrollIntoView
186
- const rect = getRect(node, 1, window);
187
- const isVisible = isElementPartiallyInViewport(rect, window, document, 1);
444
+ const rootDoc = typeof document !== 'undefined' ? document : null;
445
+ const iframeOffset = calculateIframeOffset(
446
+ node.ownerDocument ?? null,
447
+ rootDoc,
448
+ );
188
449
 
450
+ const targetWin = targetWindow as typeof globalThis.window;
451
+ const targetDoc = targetDocument as typeof globalThis.document;
452
+ if (node instanceof (targetWin as any).HTMLElement) {
453
+ const rect = getRect(node, 1, targetWin);
454
+ const isVisible = isElementPartiallyInViewport(
455
+ rect,
456
+ targetWin,
457
+ targetDoc,
458
+ 1,
459
+ );
189
460
  if (!isVisible) {
190
- node.scrollIntoView({ behavior: 'instant', block: 'center' });
461
+ (node as HTMLElement).scrollIntoView({
462
+ behavior: 'instant',
463
+ block: 'center',
464
+ });
191
465
  }
192
466
  }
193
467
 
194
- return collectElementInfo(
195
- node,
196
- window,
197
- document,
198
- 1,
199
- {
200
- left: 0,
201
- top: 0,
202
- },
203
- true,
204
- );
468
+ return collectElementInfo(node, targetWin, targetDoc, 1, iframeOffset, true);
205
469
  }
@@ -396,6 +396,42 @@ export function getNodeAttributes(
396
396
  return Object.fromEntries(attributesList);
397
397
  }
398
398
 
399
+ /** Maximum number of cached node entries to prevent memory leaks */
400
+ const NODE_CACHE_MAX_SIZE = 2000;
401
+
402
+ /**
403
+ * Reset the node hash cache. Call at the beginning of each extraction cycle
404
+ * to prevent stale DOM references from accumulating.
405
+ */
406
+ export function setNodeHashCacheListOnWindow() {
407
+ if (typeof window !== 'undefined') {
408
+ (window as any).midsceneNodeHashCache = new Map<string, globalThis.Node>();
409
+ }
410
+ }
411
+
412
+ function getNodeCacheMap(): Map<string, globalThis.Node> | undefined {
413
+ if (typeof window === 'undefined') return undefined;
414
+ return (window as any).midsceneNodeHashCache as
415
+ | Map<string, globalThis.Node>
416
+ | undefined;
417
+ }
418
+
419
+ export function setNodeToCacheList(node: globalThis.Node, id: string): void {
420
+ const cache = getNodeCacheMap();
421
+ if (!cache) return;
422
+ if (cache.has(id)) return;
423
+
424
+ if (cache.size >= NODE_CACHE_MAX_SIZE) {
425
+ const firstKey = cache.keys().next().value;
426
+ if (firstKey !== undefined) cache.delete(firstKey);
427
+ }
428
+ cache.set(id, node);
429
+ }
430
+
431
+ export function getNodeFromCacheList(id: string): globalThis.Node | undefined {
432
+ return getNodeCacheMap()?.get(id);
433
+ }
434
+
399
435
  export function midsceneGenerateHash(
400
436
  node: globalThis.Node | null,
401
437
  content: string,
@@ -403,7 +439,13 @@ export function midsceneGenerateHash(
403
439
  ): string {
404
440
  const slicedHash = generateHashId(rect, content);
405
441
 
406
- // Returns the first 10 characters as a short hash
442
+ if (node) {
443
+ if (typeof window !== 'undefined' && !getNodeCacheMap()) {
444
+ setNodeHashCacheListOnWindow();
445
+ }
446
+ setNodeToCacheList(node, slicedHash);
447
+ }
448
+
407
449
  return slicedHash;
408
450
  }
409
451