@rpascene/shared 0.30.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/es/baseDB.mjs +109 -0
- package/dist/es/build/copy-static.mjs +29 -0
- package/dist/es/common.mjs +37 -0
- package/dist/es/constants/example-code.mjs +202 -0
- package/dist/es/constants/index.mjs +74 -0
- package/dist/es/env/basic.mjs +6 -0
- package/dist/es/env/constants.mjs +97 -0
- package/dist/es/env/decide-model-config.mjs +172 -0
- package/dist/es/env/global-config-manager.mjs +82 -0
- package/dist/es/env/helper.mjs +45 -0
- package/dist/es/env/index.mjs +5 -0
- package/dist/es/env/init-debug.mjs +18 -0
- package/dist/es/env/model-config-manager.mjs +99 -0
- package/dist/es/env/parse.mjs +69 -0
- package/dist/es/env/types.mjs +265 -0
- package/dist/es/env/utils.mjs +18 -0
- package/dist/es/extractor/constants.mjs +2 -0
- package/dist/es/extractor/cs_postmessage.mjs +61 -0
- package/dist/es/extractor/customLocator.mjs +646 -0
- package/dist/es/extractor/debug.mjs +6 -0
- package/dist/es/extractor/dom-util.mjs +92 -0
- package/dist/es/extractor/index.mjs +7 -0
- package/dist/es/extractor/locator.mjs +95 -0
- package/dist/es/extractor/tree.mjs +81 -0
- package/dist/es/extractor/util.mjs +244 -0
- package/dist/es/extractor/web-extractor.mjs +361 -0
- package/dist/es/img/box-select.mjs +184 -0
- package/dist/es/img/draw-box.mjs +42 -0
- package/dist/es/img/get-jimp.mjs +10 -0
- package/dist/es/img/get-photon.mjs +19 -0
- package/dist/es/img/get-sharp.mjs +11 -0
- package/dist/es/img/index.mjs +5 -0
- package/dist/es/img/info.mjs +32 -0
- package/dist/es/img/transform.mjs +192 -0
- package/dist/es/index.mjs +3 -0
- package/dist/es/logger.mjs +61 -0
- package/dist/es/node/fs.mjs +44 -0
- package/dist/es/node/index.mjs +1 -0
- package/dist/es/polyfills/async-hooks.mjs +2 -0
- package/dist/es/polyfills/index.mjs +1 -0
- package/dist/es/types/index.mjs +3 -0
- package/dist/es/us-keyboard-layout.mjs +1414 -0
- package/dist/es/us-keyboard-layout.mjs.LICENSE.txt +5 -0
- package/dist/es/utils.mjs +66 -0
- package/dist/lib/baseDB.js +149 -0
- package/dist/lib/build/copy-static.js +77 -0
- package/dist/lib/common.js +93 -0
- package/dist/lib/constants/example-code.js +239 -0
- package/dist/lib/constants/index.js +153 -0
- package/dist/lib/env/basic.js +40 -0
- package/dist/lib/env/constants.js +143 -0
- package/dist/lib/env/decide-model-config.js +212 -0
- package/dist/lib/env/global-config-manager.js +116 -0
- package/dist/lib/env/helper.js +85 -0
- package/dist/lib/env/index.js +94 -0
- package/dist/lib/env/init-debug.js +52 -0
- package/dist/lib/env/model-config-manager.js +133 -0
- package/dist/lib/env/parse.js +106 -0
- package/dist/lib/env/types.js +650 -0
- package/dist/lib/env/utils.js +61 -0
- package/dist/lib/extractor/constants.js +42 -0
- package/dist/lib/extractor/cs_postmessage.js +98 -0
- package/dist/lib/extractor/customLocator.js +698 -0
- package/dist/lib/extractor/debug.js +12 -0
- package/dist/lib/extractor/dom-util.js +150 -0
- package/dist/lib/extractor/index.js +153 -0
- package/dist/lib/extractor/locator.js +141 -0
- package/dist/lib/extractor/tree.js +127 -0
- package/dist/lib/extractor/util.js +335 -0
- package/dist/lib/extractor/web-extractor.js +407 -0
- package/dist/lib/img/box-select.js +232 -0
- package/dist/lib/img/draw-box.js +89 -0
- package/dist/lib/img/get-jimp.js +72 -0
- package/dist/lib/img/get-photon.js +76 -0
- package/dist/lib/img/get-sharp.js +63 -0
- package/dist/lib/img/index.js +102 -0
- package/dist/lib/img/info.js +86 -0
- package/dist/lib/img/transform.js +279 -0
- package/dist/lib/index.js +43 -0
- package/dist/lib/logger.js +114 -0
- package/dist/lib/node/fs.js +97 -0
- package/dist/lib/node/index.js +60 -0
- package/dist/lib/polyfills/async-hooks.js +36 -0
- package/dist/lib/polyfills/index.js +60 -0
- package/dist/lib/types/index.js +37 -0
- package/dist/lib/us-keyboard-layout.js +1457 -0
- package/dist/lib/us-keyboard-layout.js.LICENSE.txt +5 -0
- package/dist/lib/utils.js +136 -0
- package/dist/types/baseDB.d.ts +25 -0
- package/dist/types/build/copy-static.d.ts +31 -0
- package/dist/types/common.d.ts +12 -0
- package/dist/types/constants/example-code.d.ts +2 -0
- package/dist/types/constants/index.d.ts +23 -0
- package/dist/types/env/basic.d.ts +6 -0
- package/dist/types/env/constants.d.ts +40 -0
- package/dist/types/env/decide-model-config.d.ts +14 -0
- package/dist/types/env/global-config-manager.d.ts +32 -0
- package/dist/types/env/helper.d.ts +6 -0
- package/dist/types/env/index.d.ts +4 -0
- package/dist/types/env/init-debug.d.ts +1 -0
- package/dist/types/env/model-config-manager.d.ts +24 -0
- package/dist/types/env/parse.d.ts +12 -0
- package/dist/types/env/types.d.ts +295 -0
- package/dist/types/env/utils.d.ts +7 -0
- package/dist/types/extractor/constants.d.ts +1 -0
- package/dist/types/extractor/cs_postmessage.d.ts +2 -0
- package/dist/types/extractor/customLocator.d.ts +69 -0
- package/dist/types/extractor/debug.d.ts +1 -0
- package/dist/types/extractor/dom-util.d.ts +26 -0
- package/dist/types/extractor/index.d.ts +36 -0
- package/dist/types/extractor/locator.d.ts +7 -0
- package/dist/types/extractor/tree.d.ts +9 -0
- package/dist/types/extractor/util.d.ts +43 -0
- package/dist/types/extractor/web-extractor.d.ts +19 -0
- package/dist/types/img/box-select.d.ts +25 -0
- package/dist/types/img/draw-box.d.ts +15 -0
- package/dist/types/img/get-jimp.d.ts +2 -0
- package/dist/types/img/get-photon.d.ts +8 -0
- package/dist/types/img/get-sharp.d.ts +3 -0
- package/dist/types/img/index.d.ts +4 -0
- package/dist/types/img/info.d.ts +29 -0
- package/dist/types/img/transform.d.ts +88 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/logger.d.ts +4 -0
- package/dist/types/node/fs.d.ts +15 -0
- package/dist/types/node/index.d.ts +1 -0
- package/dist/types/polyfills/async-hooks.d.ts +6 -0
- package/dist/types/polyfills/index.d.ts +4 -0
- package/dist/types/types/index.d.ts +37 -0
- package/dist/types/us-keyboard-layout.d.ts +32 -0
- package/dist/types/utils.d.ts +22 -0
- package/package.json +102 -0
- package/src/baseDB.ts +158 -0
- package/src/build/copy-static.ts +62 -0
- package/src/common.ts +67 -0
- package/src/constants/example-code.ts +202 -0
- package/src/constants/index.ts +81 -0
- package/src/env/basic.ts +12 -0
- package/src/env/constants.ts +291 -0
- package/src/env/decide-model-config.ts +319 -0
- package/src/env/global-config-manager.ts +174 -0
- package/src/env/helper.ts +80 -0
- package/src/env/index.ts +4 -0
- package/src/env/init-debug.ts +29 -0
- package/src/env/model-config-manager.ts +145 -0
- package/src/env/parse.ts +131 -0
- package/src/env/types.ts +573 -0
- package/src/env/utils.ts +39 -0
- package/src/extractor/constants.ts +5 -0
- package/src/extractor/cs_postmessage.ts +101 -0
- package/src/extractor/customLocator.ts +1138 -0
- package/src/extractor/debug.ts +10 -0
- package/src/extractor/dom-util.ts +141 -0
- package/src/extractor/index.ts +54 -0
- package/src/extractor/locator.ts +179 -0
- package/src/extractor/tree.ts +179 -0
- package/src/extractor/util.ts +468 -0
- package/src/extractor/web-extractor.ts +559 -0
- package/src/img/box-select.ts +346 -0
- package/src/img/draw-box.ts +60 -0
- package/src/img/get-jimp.ts +12 -0
- package/src/img/get-photon.ts +48 -0
- package/src/img/get-sharp.ts +18 -0
- package/src/img/index.ts +24 -0
- package/src/img/info.ts +79 -0
- package/src/img/jimp.d.ts +4 -0
- package/src/img/transform.ts +396 -0
- package/src/index.ts +6 -0
- package/src/logger.ts +93 -0
- package/src/node/fs.ts +84 -0
- package/src/node/index.ts +1 -0
- package/src/polyfills/async-hooks.ts +6 -0
- package/src/polyfills/index.ts +4 -0
- package/src/types/index.ts +53 -0
- package/src/us-keyboard-layout.ts +723 -0
- package/src/utils.ts +127 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { NodeType } from "../constants/index.mjs";
|
|
2
|
+
import { generateHashId } from "../utils.mjs";
|
|
3
|
+
function isFormElement(node, currentWindow = globalThis) {
|
|
4
|
+
return node instanceof currentWindow.HTMLElement && ('input' === node.tagName.toLowerCase() || 'textarea' === node.tagName.toLowerCase() || 'select' === node.tagName.toLowerCase() || 'option' === node.tagName.toLowerCase());
|
|
5
|
+
}
|
|
6
|
+
function isButtonElement(node, currentWindow = globalThis) {
|
|
7
|
+
return node instanceof currentWindow.HTMLElement && 'button' === node.tagName.toLowerCase();
|
|
8
|
+
}
|
|
9
|
+
function isAElement(node, currentWindow = globalThis) {
|
|
10
|
+
return node instanceof currentWindow.HTMLElement && 'a' === node.tagName.toLowerCase();
|
|
11
|
+
}
|
|
12
|
+
function isSvgElement(node, currentWindow = globalThis) {
|
|
13
|
+
return node instanceof currentWindow.SVGElement;
|
|
14
|
+
}
|
|
15
|
+
function isImgElement(node, currentWindow = globalThis) {
|
|
16
|
+
if (!includeBaseElement(node) && node instanceof currentWindow.Element) {
|
|
17
|
+
const computedStyle = window.getComputedStyle(node);
|
|
18
|
+
const backgroundImage = computedStyle.getPropertyValue('background-image');
|
|
19
|
+
if ('none' !== backgroundImage) return true;
|
|
20
|
+
}
|
|
21
|
+
if (isIconfont(node)) return true;
|
|
22
|
+
return node instanceof currentWindow.HTMLElement && 'img' === node.tagName.toLowerCase() || node instanceof currentWindow.SVGElement && 'svg' === node.tagName.toLowerCase();
|
|
23
|
+
}
|
|
24
|
+
function isIconfont(node, currentWindow = globalThis) {
|
|
25
|
+
if (node instanceof currentWindow.Element) {
|
|
26
|
+
const computedStyle = window.getComputedStyle(node);
|
|
27
|
+
const fontFamilyValue = computedStyle.fontFamily || '';
|
|
28
|
+
return fontFamilyValue.toLowerCase().indexOf('iconfont') >= 0;
|
|
29
|
+
}
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
function isNotContainerElement(node, currentWindow = globalThis) {
|
|
33
|
+
return isTextElement(node, currentWindow) || isIconfont(node, currentWindow) || isImgElement(node, currentWindow) || isButtonElement(node, currentWindow) || isAElement(node, currentWindow) || isFormElement(node, currentWindow);
|
|
34
|
+
}
|
|
35
|
+
function isTextElement(node, currentWindow = globalThis) {
|
|
36
|
+
var _node_nodeName_toLowerCase, _node_nodeName;
|
|
37
|
+
if (node instanceof currentWindow.Element) {
|
|
38
|
+
var _node_childNodes;
|
|
39
|
+
if ((null == node ? void 0 : null == (_node_childNodes = node.childNodes) ? void 0 : _node_childNodes.length) === 1 && (null == node ? void 0 : node.childNodes[0]) instanceof currentWindow.Text) return true;
|
|
40
|
+
}
|
|
41
|
+
return (null == (_node_nodeName = node.nodeName) ? void 0 : null == (_node_nodeName_toLowerCase = _node_nodeName.toLowerCase) ? void 0 : _node_nodeName_toLowerCase.call(_node_nodeName)) === '#text' && !isIconfont(node);
|
|
42
|
+
}
|
|
43
|
+
function isContainerElement(node, currentWindow = globalThis) {
|
|
44
|
+
if (!(node instanceof currentWindow.HTMLElement)) return false;
|
|
45
|
+
if (includeBaseElement(node, currentWindow)) return false;
|
|
46
|
+
const computedStyle = window.getComputedStyle(node);
|
|
47
|
+
const backgroundColor = computedStyle.getPropertyValue('background-color');
|
|
48
|
+
if (backgroundColor) return true;
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
function includeBaseElement(node, currentWindow = globalThis) {
|
|
52
|
+
if (!(node instanceof currentWindow.HTMLElement)) return false;
|
|
53
|
+
if (node.innerText) return true;
|
|
54
|
+
const includeList = [
|
|
55
|
+
'svg',
|
|
56
|
+
'button',
|
|
57
|
+
'input',
|
|
58
|
+
'textarea',
|
|
59
|
+
'select',
|
|
60
|
+
'option',
|
|
61
|
+
'img',
|
|
62
|
+
'a'
|
|
63
|
+
];
|
|
64
|
+
for (const tagName of includeList){
|
|
65
|
+
const element = node.querySelectorAll(tagName);
|
|
66
|
+
if (element.length > 0) return true;
|
|
67
|
+
}
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
function generateElementByPosition(position) {
|
|
71
|
+
const rect = {
|
|
72
|
+
left: Math.max(position.x - 4, 0),
|
|
73
|
+
top: Math.max(position.y - 4, 0),
|
|
74
|
+
width: 8,
|
|
75
|
+
height: 8
|
|
76
|
+
};
|
|
77
|
+
const id = generateHashId(rect);
|
|
78
|
+
const element = {
|
|
79
|
+
id,
|
|
80
|
+
attributes: {
|
|
81
|
+
nodeType: NodeType.POSITION
|
|
82
|
+
},
|
|
83
|
+
rect,
|
|
84
|
+
content: '',
|
|
85
|
+
center: [
|
|
86
|
+
position.x,
|
|
87
|
+
position.y
|
|
88
|
+
]
|
|
89
|
+
};
|
|
90
|
+
return element;
|
|
91
|
+
}
|
|
92
|
+
export { generateElementByPosition, isAElement, isButtonElement, isContainerElement, isFormElement, isImgElement, isNotContainerElement, isSvgElement, isTextElement };
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { descriptionOfTree, traverseTree, treeToList, trimAttributes, truncateText } from "./tree.mjs";
|
|
2
|
+
import { extractTextWithPosition, extractTreeNode, extractTreeNodeAsString } from "./web-extractor.mjs";
|
|
3
|
+
import { getNodeFromCacheList, setNodeHashCacheListOnWindow } from "./util.mjs";
|
|
4
|
+
import { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsById, getXpathsByPoint } from "./locator.mjs";
|
|
5
|
+
import { generateElementByPosition, isNotContainerElement } from "./dom-util.mjs";
|
|
6
|
+
export * from "./customLocator.mjs";
|
|
7
|
+
export { descriptionOfTree, generateElementByPosition, getElementInfoByXpath, getElementXpath, getNodeFromCacheList, getNodeInfoByXpath, getXpathsById, getXpathsByPoint, isNotContainerElement, setNodeHashCacheListOnWindow, traverseTree, treeToList, trimAttributes, truncateText, extractTreeNode as webExtractNodeTree, extractTreeNodeAsString as webExtractNodeTreeAsString, extractTextWithPosition as webExtractTextWithPosition };
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { isSvgElement } from "./dom-util.mjs";
|
|
2
|
+
import { getNodeFromCacheList, getRect, isElementPartiallyInViewport } from "./util.mjs";
|
|
3
|
+
import { collectElementInfo } from "./web-extractor.mjs";
|
|
4
|
+
const getElementXpathIndex = (element)=>{
|
|
5
|
+
let index = 1;
|
|
6
|
+
let prev = element.previousElementSibling;
|
|
7
|
+
while(prev){
|
|
8
|
+
if (prev.nodeName.toLowerCase() === element.nodeName.toLowerCase()) index++;
|
|
9
|
+
prev = prev.previousElementSibling;
|
|
10
|
+
}
|
|
11
|
+
return index;
|
|
12
|
+
};
|
|
13
|
+
const normalizeXpathText = (text)=>{
|
|
14
|
+
if ('string' != typeof text) return '';
|
|
15
|
+
return text.replace(/\s+/g, ' ').trim();
|
|
16
|
+
};
|
|
17
|
+
const buildCurrentElementXpath = (element, isOrderSensitive, isLeafElement)=>{
|
|
18
|
+
var _element_textContent;
|
|
19
|
+
const parentPath = element.parentNode ? getElementXpath(element.parentNode, isOrderSensitive) : '';
|
|
20
|
+
const prefix = parentPath ? `${parentPath}/` : '/';
|
|
21
|
+
const tagName = element.nodeName.toLowerCase();
|
|
22
|
+
const textContent = null == (_element_textContent = element.textContent) ? void 0 : _element_textContent.trim();
|
|
23
|
+
if (isOrderSensitive) {
|
|
24
|
+
const index = getElementXpathIndex(element);
|
|
25
|
+
return `${prefix}${tagName}[${index}]`;
|
|
26
|
+
}
|
|
27
|
+
if (isLeafElement && textContent) return `${prefix}${tagName}[normalize-space()="${normalizeXpathText(textContent)}"]`;
|
|
28
|
+
const index = getElementXpathIndex(element);
|
|
29
|
+
return `${prefix}${tagName}[${index}]`;
|
|
30
|
+
};
|
|
31
|
+
const getElementXpath = (element, isOrderSensitive = false, isLeafElement = false)=>{
|
|
32
|
+
if (element.nodeType === Node.TEXT_NODE) {
|
|
33
|
+
const parentNode = element.parentNode;
|
|
34
|
+
if (parentNode && parentNode.nodeType === Node.ELEMENT_NODE) {
|
|
35
|
+
var _element_textContent;
|
|
36
|
+
const parentXPath = getElementXpath(parentNode, isOrderSensitive, true);
|
|
37
|
+
const textContent = null == (_element_textContent = element.textContent) ? void 0 : _element_textContent.trim();
|
|
38
|
+
if (textContent) return `${parentXPath}/text()[normalize-space()="${normalizeXpathText(textContent)}"]`;
|
|
39
|
+
return `${parentXPath}/text()`;
|
|
40
|
+
}
|
|
41
|
+
return '';
|
|
42
|
+
}
|
|
43
|
+
if (element.nodeType !== Node.ELEMENT_NODE) return '';
|
|
44
|
+
const el = element;
|
|
45
|
+
if (el === document.documentElement) return '/html';
|
|
46
|
+
if (el === document.body) return '/html/body';
|
|
47
|
+
if (isSvgElement(el)) {
|
|
48
|
+
let parent = el.parentNode;
|
|
49
|
+
while(parent && parent.nodeType === Node.ELEMENT_NODE){
|
|
50
|
+
if (!isSvgElement(parent)) return getElementXpath(parent, isOrderSensitive, isLeafElement);
|
|
51
|
+
parent = parent.parentNode;
|
|
52
|
+
}
|
|
53
|
+
return getElementXpath(el.parentNode, isOrderSensitive, isLeafElement);
|
|
54
|
+
}
|
|
55
|
+
return buildCurrentElementXpath(el, isOrderSensitive, isLeafElement);
|
|
56
|
+
};
|
|
57
|
+
function getXpathsById(id) {
|
|
58
|
+
const node = getNodeFromCacheList(id);
|
|
59
|
+
if (!node) return null;
|
|
60
|
+
const fullXPath = getElementXpath(node, false, true);
|
|
61
|
+
return [
|
|
62
|
+
fullXPath
|
|
63
|
+
];
|
|
64
|
+
}
|
|
65
|
+
function getXpathsByPoint(point, isOrderSensitive) {
|
|
66
|
+
const element = document.elementFromPoint(point.left, point.top);
|
|
67
|
+
if (!element) return null;
|
|
68
|
+
const fullXPath = getElementXpath(element, isOrderSensitive, true);
|
|
69
|
+
return [
|
|
70
|
+
fullXPath
|
|
71
|
+
];
|
|
72
|
+
}
|
|
73
|
+
function getNodeInfoByXpath(xpath) {
|
|
74
|
+
const xpathResult = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
75
|
+
if (1 !== xpathResult.snapshotLength) return null;
|
|
76
|
+
const node = xpathResult.snapshotItem(0);
|
|
77
|
+
return node;
|
|
78
|
+
}
|
|
79
|
+
function getElementInfoByXpath(xpath) {
|
|
80
|
+
const node = getNodeInfoByXpath(xpath);
|
|
81
|
+
if (!node) return null;
|
|
82
|
+
if (node instanceof HTMLElement) {
|
|
83
|
+
const rect = getRect(node, 1, window);
|
|
84
|
+
const isVisible = isElementPartiallyInViewport(rect, window, document, 1);
|
|
85
|
+
if (!isVisible) node.scrollIntoView({
|
|
86
|
+
behavior: 'instant',
|
|
87
|
+
block: 'center'
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return collectElementInfo(node, window, document, 1, {
|
|
91
|
+
left: 0,
|
|
92
|
+
top: 0
|
|
93
|
+
}, true);
|
|
94
|
+
}
|
|
95
|
+
export { getElementInfoByXpath, getElementXpath, getNodeInfoByXpath, getXpathsById, getXpathsByPoint };
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
function truncateText(text, maxLength = 150) {
|
|
2
|
+
if (void 0 === text) return '';
|
|
3
|
+
if ('object' == typeof text) text = JSON.stringify(text);
|
|
4
|
+
if ('number' == typeof text) return text.toString();
|
|
5
|
+
if ('string' == typeof text && text.length > maxLength) return `${text.slice(0, maxLength)}...`;
|
|
6
|
+
if ('string' == typeof text) return text.trim();
|
|
7
|
+
return '';
|
|
8
|
+
}
|
|
9
|
+
function trimAttributes(attributes, truncateTextLength) {
|
|
10
|
+
const tailorAttributes = Object.keys(attributes).reduce((res, currentKey)=>{
|
|
11
|
+
const attributeVal = attributes[currentKey];
|
|
12
|
+
if ('style' === currentKey || 'htmlTagName' === currentKey || 'nodeType' === currentKey) return res;
|
|
13
|
+
res[currentKey] = truncateText(attributeVal, truncateTextLength);
|
|
14
|
+
return res;
|
|
15
|
+
}, {});
|
|
16
|
+
return tailorAttributes;
|
|
17
|
+
}
|
|
18
|
+
const nodeSizeThreshold = 4;
|
|
19
|
+
function descriptionOfTree(tree, truncateTextLength, filterNonTextContent = false, visibleOnly = true) {
|
|
20
|
+
const attributesString = (kv)=>Object.entries(kv).map(([key, value])=>`${key}="${truncateText(value, truncateTextLength)}"`).join(' ');
|
|
21
|
+
function buildContentTree(node, indent = 0, visibleOnly = true) {
|
|
22
|
+
let before = '';
|
|
23
|
+
let contentWithIndent = '';
|
|
24
|
+
let after = '';
|
|
25
|
+
let emptyNode = true;
|
|
26
|
+
const indentStr = ' '.repeat(indent);
|
|
27
|
+
let children = '';
|
|
28
|
+
for(let i = 0; i < (node.children || []).length; i++){
|
|
29
|
+
const childContent = buildContentTree(node.children[i], indent + 1, visibleOnly);
|
|
30
|
+
if (childContent) children += `\n${childContent}`;
|
|
31
|
+
}
|
|
32
|
+
if (node.node && node.node.rect.width > nodeSizeThreshold && node.node.rect.height > nodeSizeThreshold && (!filterNonTextContent || filterNonTextContent && node.node.content) && (!visibleOnly || visibleOnly && node.node.isVisible)) {
|
|
33
|
+
var _node_node_attributes;
|
|
34
|
+
emptyNode = false;
|
|
35
|
+
let nodeTypeString;
|
|
36
|
+
nodeTypeString = (null == (_node_node_attributes = node.node.attributes) ? void 0 : _node_node_attributes.htmlTagName) ? node.node.attributes.htmlTagName.replace(/[<>]/g, '') : node.node.attributes.nodeType.replace(/\sNode$/, '').toLowerCase();
|
|
37
|
+
const markerId = node.node.indexId;
|
|
38
|
+
const markerIdString = markerId ? `markerId="${markerId}"` : '';
|
|
39
|
+
const rectAttribute = node.node.rect ? {
|
|
40
|
+
left: node.node.rect.left,
|
|
41
|
+
top: node.node.rect.top,
|
|
42
|
+
width: node.node.rect.width,
|
|
43
|
+
height: node.node.rect.height
|
|
44
|
+
} : {};
|
|
45
|
+
before = `<${nodeTypeString} id="${node.node.id}" ${markerIdString} ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`;
|
|
46
|
+
const content = truncateText(node.node.content, truncateTextLength);
|
|
47
|
+
contentWithIndent = content ? `\n${indentStr} ${content}` : '';
|
|
48
|
+
after = `</${nodeTypeString}>`;
|
|
49
|
+
} else if (!filterNonTextContent) {
|
|
50
|
+
if (!children.trim().startsWith('<>')) {
|
|
51
|
+
before = '<>';
|
|
52
|
+
contentWithIndent = '';
|
|
53
|
+
after = '</>';
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (emptyNode && !children.trim()) return '';
|
|
57
|
+
const result = `${indentStr}${before}${contentWithIndent}${children}\n${indentStr}${after}`;
|
|
58
|
+
if (result.trim()) return result;
|
|
59
|
+
return '';
|
|
60
|
+
}
|
|
61
|
+
const result = buildContentTree(tree, 0, visibleOnly);
|
|
62
|
+
return result.replace(/^\s*\n/gm, '');
|
|
63
|
+
}
|
|
64
|
+
function treeToList(tree) {
|
|
65
|
+
const result = [];
|
|
66
|
+
function dfs(node) {
|
|
67
|
+
if (node.node) result.push(node.node);
|
|
68
|
+
for (const child of node.children)dfs(child);
|
|
69
|
+
}
|
|
70
|
+
dfs(tree);
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
function traverseTree(tree, onNode) {
|
|
74
|
+
function dfs(node) {
|
|
75
|
+
if (node.node) node.node = onNode(node.node);
|
|
76
|
+
for (const child of node.children)dfs(child);
|
|
77
|
+
}
|
|
78
|
+
dfs(tree);
|
|
79
|
+
return tree;
|
|
80
|
+
}
|
|
81
|
+
export { descriptionOfTree, traverseTree, treeToList, trimAttributes, truncateText };
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import { generateHashId } from "../utils.mjs";
|
|
2
|
+
import { extractTextWithPosition } from "./web-extractor.mjs";
|
|
3
|
+
const MAX_VALUE_LENGTH = 300;
|
|
4
|
+
let debugMode = false;
|
|
5
|
+
function setDebugMode(mode) {
|
|
6
|
+
debugMode = mode;
|
|
7
|
+
}
|
|
8
|
+
function getDebugMode() {
|
|
9
|
+
return debugMode;
|
|
10
|
+
}
|
|
11
|
+
function logger(..._msg) {
|
|
12
|
+
if (!debugMode) return;
|
|
13
|
+
console.log(..._msg);
|
|
14
|
+
}
|
|
15
|
+
function isElementPartiallyInViewport(rect, currentWindow, currentDocument, visibleAreaRatio = 2 / 3) {
|
|
16
|
+
const elementHeight = rect.height;
|
|
17
|
+
const elementWidth = rect.width;
|
|
18
|
+
const viewportRect = {
|
|
19
|
+
left: 0,
|
|
20
|
+
top: 0,
|
|
21
|
+
width: currentWindow.innerWidth || currentDocument.documentElement.clientWidth,
|
|
22
|
+
height: currentWindow.innerHeight || currentDocument.documentElement.clientHeight,
|
|
23
|
+
right: currentWindow.innerWidth || currentDocument.documentElement.clientWidth,
|
|
24
|
+
bottom: currentWindow.innerHeight || currentDocument.documentElement.clientHeight,
|
|
25
|
+
x: 0,
|
|
26
|
+
y: 0,
|
|
27
|
+
zoom: 1
|
|
28
|
+
};
|
|
29
|
+
const overlapRect = overlappedRect(rect, viewportRect);
|
|
30
|
+
if (!overlapRect) return false;
|
|
31
|
+
const visibleArea = overlapRect.width * overlapRect.height;
|
|
32
|
+
const totalArea = elementHeight * elementWidth;
|
|
33
|
+
return visibleArea / totalArea >= visibleAreaRatio;
|
|
34
|
+
}
|
|
35
|
+
function getPseudoElementContent(element, currentWindow) {
|
|
36
|
+
if (!(element instanceof currentWindow.HTMLElement)) return {
|
|
37
|
+
before: '',
|
|
38
|
+
after: ''
|
|
39
|
+
};
|
|
40
|
+
const beforeContent = currentWindow.getComputedStyle(element, '::before').getPropertyValue('content');
|
|
41
|
+
const afterContent = currentWindow.getComputedStyle(element, '::after').getPropertyValue('content');
|
|
42
|
+
return {
|
|
43
|
+
before: 'none' === beforeContent ? '' : beforeContent.replace(/"/g, ''),
|
|
44
|
+
after: 'none' === afterContent ? '' : afterContent.replace(/"/g, '')
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
function hasOverflowY(element, currentWindow) {
|
|
48
|
+
const style = currentWindow.getComputedStyle(element);
|
|
49
|
+
return 'scroll' === style.overflowY || 'auto' === style.overflowY || 'hidden' === style.overflowY;
|
|
50
|
+
}
|
|
51
|
+
function overlappedRect(rect1, rect2) {
|
|
52
|
+
const left = Math.max(rect1.left, rect2.left);
|
|
53
|
+
const top = Math.max(rect1.top, rect2.top);
|
|
54
|
+
const right = Math.min(rect1.right, rect2.right);
|
|
55
|
+
const bottom = Math.min(rect1.bottom, rect2.bottom);
|
|
56
|
+
if (left < right && top < bottom) return {
|
|
57
|
+
left,
|
|
58
|
+
top,
|
|
59
|
+
right,
|
|
60
|
+
bottom,
|
|
61
|
+
width: right - left,
|
|
62
|
+
height: bottom - top,
|
|
63
|
+
x: left,
|
|
64
|
+
y: top,
|
|
65
|
+
zoom: 1
|
|
66
|
+
};
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
function getRect(el, baseZoom, currentWindow) {
|
|
70
|
+
let originalRect;
|
|
71
|
+
let newZoom = 1;
|
|
72
|
+
if (el instanceof currentWindow.HTMLElement) {
|
|
73
|
+
originalRect = el.getBoundingClientRect();
|
|
74
|
+
if (!('currentCSSZoom' in el)) newZoom = Number.parseFloat(currentWindow.getComputedStyle(el).zoom) || 1;
|
|
75
|
+
} else {
|
|
76
|
+
const range = currentWindow.document.createRange();
|
|
77
|
+
range.selectNodeContents(el);
|
|
78
|
+
originalRect = range.getBoundingClientRect();
|
|
79
|
+
}
|
|
80
|
+
const zoom = newZoom * baseZoom;
|
|
81
|
+
return {
|
|
82
|
+
width: originalRect.width * zoom,
|
|
83
|
+
height: originalRect.height * zoom,
|
|
84
|
+
left: originalRect.left * zoom,
|
|
85
|
+
top: originalRect.top * zoom,
|
|
86
|
+
right: originalRect.right * zoom,
|
|
87
|
+
bottom: originalRect.bottom * zoom,
|
|
88
|
+
x: originalRect.x * zoom,
|
|
89
|
+
y: originalRect.y * zoom,
|
|
90
|
+
zoom
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
const isElementCovered = (el, rect, currentWindow)=>{
|
|
94
|
+
const x = rect.left + rect.width / 2;
|
|
95
|
+
const y = rect.top + rect.height / 2;
|
|
96
|
+
const topElement = currentWindow.document.elementFromPoint(x, y);
|
|
97
|
+
if (!topElement) return false;
|
|
98
|
+
if (topElement === el) return false;
|
|
99
|
+
if (null == el ? void 0 : el.contains(topElement)) return false;
|
|
100
|
+
if (null == topElement ? void 0 : topElement.contains(el)) return false;
|
|
101
|
+
const rectOfTopElement = getRect(topElement, 1, currentWindow);
|
|
102
|
+
const overlapRect = overlappedRect(rect, rectOfTopElement);
|
|
103
|
+
if (!overlapRect) return false;
|
|
104
|
+
logger(el, 'Element is covered by another element', {
|
|
105
|
+
topElement,
|
|
106
|
+
el,
|
|
107
|
+
rect,
|
|
108
|
+
x,
|
|
109
|
+
y
|
|
110
|
+
});
|
|
111
|
+
return true;
|
|
112
|
+
};
|
|
113
|
+
function elementRect(el, currentWindow, currentDocument, baseZoom = 1) {
|
|
114
|
+
if (!el) {
|
|
115
|
+
logger(el, 'Element is not in the DOM hierarchy');
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
if (!(el instanceof currentWindow.HTMLElement) && el.nodeType !== Node.TEXT_NODE && 'svg' !== el.nodeName.toLowerCase()) {
|
|
119
|
+
logger(el, 'Element is not in the DOM hierarchy');
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
if (el instanceof currentWindow.HTMLElement) {
|
|
123
|
+
const style = currentWindow.getComputedStyle(el);
|
|
124
|
+
if ('none' === style.display || 'hidden' === style.visibility || '0' === style.opacity && 'INPUT' !== el.tagName) {
|
|
125
|
+
logger(el, 'Element is hidden');
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const rect = getRect(el, baseZoom, currentWindow);
|
|
130
|
+
if (0 === rect.width && 0 === rect.height) {
|
|
131
|
+
logger(el, 'Element has no size');
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
if (1 === baseZoom && isElementCovered(el, rect, currentWindow)) return false;
|
|
135
|
+
const isVisible = isElementPartiallyInViewport(rect, currentWindow, currentDocument);
|
|
136
|
+
let parent = el;
|
|
137
|
+
const parentUntilNonStatic = (currentNode)=>{
|
|
138
|
+
let parent = null == currentNode ? void 0 : currentNode.parentElement;
|
|
139
|
+
while(parent){
|
|
140
|
+
const style = currentWindow.getComputedStyle(parent);
|
|
141
|
+
if ('static' !== style.position) return parent;
|
|
142
|
+
parent = parent.parentElement;
|
|
143
|
+
}
|
|
144
|
+
return null;
|
|
145
|
+
};
|
|
146
|
+
while(parent && parent !== currentDocument.body){
|
|
147
|
+
if (!(parent instanceof currentWindow.HTMLElement)) {
|
|
148
|
+
parent = parent.parentElement;
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
const parentStyle = currentWindow.getComputedStyle(parent);
|
|
152
|
+
if ('hidden' === parentStyle.overflow) {
|
|
153
|
+
const parentRect = getRect(parent, 1, currentWindow);
|
|
154
|
+
const tolerance = 10;
|
|
155
|
+
if (rect.right < parentRect.left - tolerance || rect.left > parentRect.right + tolerance || rect.bottom < parentRect.top - tolerance || rect.top > parentRect.bottom + tolerance) {
|
|
156
|
+
logger(el, 'element is partially or totally hidden by an ancestor', {
|
|
157
|
+
rect,
|
|
158
|
+
parentRect
|
|
159
|
+
});
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if ('fixed' === parentStyle.position || 'sticky' === parentStyle.position) break;
|
|
164
|
+
parent = 'absolute' === parentStyle.position ? parentUntilNonStatic(parent) : parent.parentElement;
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
left: Math.round(rect.left),
|
|
168
|
+
top: Math.round(rect.top),
|
|
169
|
+
width: Math.round(rect.width),
|
|
170
|
+
height: Math.round(rect.height),
|
|
171
|
+
zoom: rect.zoom,
|
|
172
|
+
isVisible
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
function validTextNodeContent(node) {
|
|
176
|
+
if (!node) return false;
|
|
177
|
+
if (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE && '#text' !== node.nodeName) return false;
|
|
178
|
+
const content = node.textContent || node.innerText;
|
|
179
|
+
if (content && !/^\s*$/.test(content)) return content.trim();
|
|
180
|
+
return false;
|
|
181
|
+
}
|
|
182
|
+
function getNodeAttributes(node, currentWindow) {
|
|
183
|
+
if (!node || !(node instanceof currentWindow.HTMLElement) || !node.attributes) return {};
|
|
184
|
+
const attributesList = Array.from(node.attributes).map((attr)=>{
|
|
185
|
+
if ('class' === attr.name) return [
|
|
186
|
+
attr.name,
|
|
187
|
+
`.${attr.value.split(' ').join('.')}`
|
|
188
|
+
];
|
|
189
|
+
if (!attr.value) return [];
|
|
190
|
+
let value = attr.value;
|
|
191
|
+
if (value.startsWith('data:image')) value = 'image';
|
|
192
|
+
if (value.length > MAX_VALUE_LENGTH) value = `${value.slice(0, MAX_VALUE_LENGTH)}...`;
|
|
193
|
+
return [
|
|
194
|
+
attr.name,
|
|
195
|
+
value
|
|
196
|
+
];
|
|
197
|
+
});
|
|
198
|
+
return Object.fromEntries(attributesList);
|
|
199
|
+
}
|
|
200
|
+
function rpasceneGenerateHash(node, content, rect) {
|
|
201
|
+
const slicedHash = generateHashId(rect, content);
|
|
202
|
+
if (node) {
|
|
203
|
+
if (!window.rpasceneNodeHashCacheList) setNodeHashCacheListOnWindow();
|
|
204
|
+
setNodeToCacheList(node, slicedHash);
|
|
205
|
+
}
|
|
206
|
+
return slicedHash;
|
|
207
|
+
}
|
|
208
|
+
function setNodeHashCacheListOnWindow() {
|
|
209
|
+
if ('undefined' != typeof window) window.rpasceneNodeHashCacheList = [];
|
|
210
|
+
}
|
|
211
|
+
function setNodeToCacheList(node, id) {
|
|
212
|
+
if ('undefined' != typeof window) {
|
|
213
|
+
var _window_rpasceneNodeHashCacheList;
|
|
214
|
+
if (getNodeFromCacheList(id)) return;
|
|
215
|
+
null == (_window_rpasceneNodeHashCacheList = window.rpasceneNodeHashCacheList) || _window_rpasceneNodeHashCacheList.push({
|
|
216
|
+
node,
|
|
217
|
+
id
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
function getNodeFromCacheList(id) {
|
|
222
|
+
if ('undefined' != typeof window) {
|
|
223
|
+
var _window_rpasceneNodeHashCacheList_find, _window_rpasceneNodeHashCacheList;
|
|
224
|
+
return null == (_window_rpasceneNodeHashCacheList = window.rpasceneNodeHashCacheList) ? void 0 : null == (_window_rpasceneNodeHashCacheList_find = _window_rpasceneNodeHashCacheList.find((item)=>item.id === id)) ? void 0 : _window_rpasceneNodeHashCacheList_find.node;
|
|
225
|
+
}
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
function generateId(numberId) {
|
|
229
|
+
return `${numberId}`;
|
|
230
|
+
}
|
|
231
|
+
function setGenerateHashOnWindow() {
|
|
232
|
+
if ('undefined' != typeof window) window.rpasceneGenerateHash = rpasceneGenerateHash;
|
|
233
|
+
}
|
|
234
|
+
function setRpasceneVisibleRectOnWindow() {
|
|
235
|
+
if ('undefined' != typeof window) window.rpasceneVisibleRect = elementRect;
|
|
236
|
+
}
|
|
237
|
+
function setExtractTextWithPositionOnWindow() {
|
|
238
|
+
if ('undefined' != typeof window) window.extractTextWithPosition = extractTextWithPosition;
|
|
239
|
+
}
|
|
240
|
+
function getTopDocument() {
|
|
241
|
+
const container = document.body || document;
|
|
242
|
+
return container;
|
|
243
|
+
}
|
|
244
|
+
export { elementRect, generateId, getDebugMode, getNodeAttributes, getNodeFromCacheList, getPseudoElementContent, getRect, getTopDocument, hasOverflowY, isElementPartiallyInViewport, logger, overlappedRect, rpasceneGenerateHash, setDebugMode, setExtractTextWithPositionOnWindow, setGenerateHashOnWindow, setNodeHashCacheListOnWindow, setNodeToCacheList, setRpasceneVisibleRectOnWindow, validTextNodeContent };
|