@ubio/webvision 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/page.mjs ADDED
@@ -0,0 +1,267 @@
1
+ // src/page/utils.ts
2
+ function isHidden(element, options = {}) {
3
+ const {
4
+ checkOpacity = true,
5
+ checkVisibility = true,
6
+ checkTransform = true
7
+ } = options;
8
+ const style = getComputedStyle(element);
9
+ const opacity = Number(style.opacity);
10
+ const display = style.display;
11
+ const visibility = style.visibility;
12
+ const transform = style.transform;
13
+ if (display === "none" || checkOpacity && opacity < 0.1 || checkVisibility && visibility === "hidden" || checkTransform && transform.includes("scale(0)")) {
14
+ return true;
15
+ }
16
+ if (!element.checkVisibility()) {
17
+ return true;
18
+ }
19
+ return false;
20
+ }
21
+ function hasVisibleArea(element) {
22
+ const rect = element.getBoundingClientRect();
23
+ const area = rect.width * rect.height;
24
+ return area > 100;
25
+ }
26
+ function deepIsHidden(element, options = {}) {
27
+ if (isHidden(element, options)) {
28
+ return true;
29
+ }
30
+ if (!hasVisibleArea(element)) {
31
+ return [...element.children].every((el) => deepIsHidden(el, options));
32
+ }
33
+ return false;
34
+ }
35
+ function normalizeText(str) {
36
+ return str.replace(/\p{Cf}/gu, " ").replace(/\s+/g, " ").trim();
37
+ }
38
+ function containsImage(el) {
39
+ return el.matches("img") || !!el.querySelector("img");
40
+ }
41
+ function isRecursiveInline(el, ignoreTags = []) {
42
+ for (const child of el.childNodes) {
43
+ if (child instanceof Element) {
44
+ if (ignoreTags.includes(child.tagName.toLowerCase())) {
45
+ return false;
46
+ }
47
+ const display = getComputedStyle(child).display;
48
+ const inline = display === "inline" || display === "inline-block";
49
+ if (!inline) {
50
+ return false;
51
+ }
52
+ if (!isRecursiveInline(child, ignoreTags)) {
53
+ return false;
54
+ }
55
+ }
56
+ }
57
+ return true;
58
+ }
59
+
60
+ // src/page/snapshot.ts
61
+ var DEFAULT_SKIP_TAGS = ["svg", "script", "noscript", "style", "link", "meta"];
62
+ var DEFAULT_SEMANTIC_TAGS = [
63
+ "a",
64
+ "button",
65
+ "label",
66
+ "section",
67
+ "article",
68
+ "main",
69
+ "header",
70
+ "footer",
71
+ "nav",
72
+ "aside",
73
+ "h1",
74
+ "h2",
75
+ "h3",
76
+ "h4",
77
+ "h5",
78
+ "h6",
79
+ "ul",
80
+ "ol",
81
+ "li",
82
+ "dl",
83
+ "dt",
84
+ "dd",
85
+ "p",
86
+ "pre",
87
+ "code",
88
+ "blockquote",
89
+ "figure",
90
+ "figcaption",
91
+ "table",
92
+ "thead",
93
+ "tbody",
94
+ "tr",
95
+ "td",
96
+ "th",
97
+ "form",
98
+ "input",
99
+ "textarea",
100
+ "select",
101
+ "option",
102
+ "fieldset",
103
+ "legend",
104
+ "strong",
105
+ "em",
106
+ "sub",
107
+ "sup"
108
+ ];
109
+ var DomSnapshot = class _DomSnapshot {
110
+ constructor(node, parent, options) {
111
+ this.node = node;
112
+ this.parent = parent;
113
+ this.children = [];
114
+ this.options = {
115
+ skipHidden: true,
116
+ skipEmptyText: true,
117
+ skipImages: false,
118
+ skipTags: DEFAULT_SKIP_TAGS,
119
+ tagPreference: DEFAULT_SEMANTIC_TAGS,
120
+ collapseInline: true,
121
+ ...options
122
+ };
123
+ this.classList = [...this.element?.classList ?? []];
124
+ if (this.element) {
125
+ this.parseTree(this.element);
126
+ }
127
+ }
128
+ get element() {
129
+ return this.node instanceof Element ? this.node : null;
130
+ }
131
+ get depth() {
132
+ return this.parent ? this.parent.depth + 1 : 0;
133
+ }
134
+ get inlineText() {
135
+ const text = this.node instanceof HTMLElement ? this.node.innerText : this.node.textContent;
136
+ return normalizeText(text ?? "");
137
+ }
138
+ get indent() {
139
+ return " ".repeat(this.depth);
140
+ }
141
+ get isLeaf() {
142
+ return this.children.length === 0;
143
+ }
144
+ get tagName() {
145
+ return this.node instanceof Element ? this.node.tagName.toLowerCase() : "";
146
+ }
147
+ get href() {
148
+ return this.node instanceof HTMLAnchorElement ? this.node.href : "";
149
+ }
150
+ get src() {
151
+ return this.node instanceof HTMLImageElement ? this.node.src : "";
152
+ }
153
+ getFontSize() {
154
+ if (this.node instanceof Text) {
155
+ return this.parent?.getFontSize() ?? 0;
156
+ }
157
+ return Number(getComputedStyle(this.node).fontSize?.replace("px", ""));
158
+ }
159
+ getTextSize(rootFontSize) {
160
+ const ownFontSize = this.getFontSize();
161
+ if (ownFontSize > 1.2 * rootFontSize) {
162
+ return "large";
163
+ }
164
+ if (ownFontSize < 0.85 * rootFontSize) {
165
+ return "small";
166
+ }
167
+ return "normal";
168
+ }
169
+ parseTree(el) {
170
+ this.children = [];
171
+ const childNodes = this.getAcceptedChildren(el);
172
+ if (childNodes.length === 1) {
173
+ return this.collapseWrapper(el, childNodes[0]);
174
+ }
175
+ if (this.options.collapseInline && isRecursiveInline(el, this.options.tagPreference)) {
176
+ return;
177
+ }
178
+ for (const childNode of childNodes) {
179
+ const snapshot = new _DomSnapshot(childNode, this, this.options);
180
+ this.children.push(snapshot);
181
+ }
182
+ }
183
+ /**
184
+ * Collapses an element with only one visible child into one.
185
+ *
186
+ * Wrapper element is preferred if it's a link or a button,
187
+ * in other cases child element is preferred.
188
+ */
189
+ collapseWrapper(el, child) {
190
+ if (child instanceof Text) {
191
+ return;
192
+ }
193
+ this.classList.push(...child.classList);
194
+ const parentRank = this.options.tagPreference.indexOf(el.tagName.toLowerCase());
195
+ const childRank = this.options.tagPreference.indexOf(child.tagName.toLowerCase());
196
+ const preferParent = parentRank !== -1 && parentRank < childRank;
197
+ if (!preferParent) {
198
+ this.node = child;
199
+ }
200
+ this.parseTree(child);
201
+ }
202
+ getAcceptedChildren(el) {
203
+ const childNodes = [...el.childNodes];
204
+ return childNodes.filter((node) => {
205
+ if (!(node instanceof Element || node instanceof Text)) {
206
+ return false;
207
+ }
208
+ if (node instanceof Element) {
209
+ if (this.options.skipHidden && deepIsHidden(node, { checkOpacity: false })) {
210
+ return false;
211
+ }
212
+ if (!this.options.skipImages && containsImage(node)) {
213
+ return true;
214
+ }
215
+ if (this.options.skipTags.includes(node.tagName.toLowerCase())) {
216
+ return false;
217
+ }
218
+ }
219
+ if (this.options.skipEmptyText) {
220
+ const isEmptyText = normalizeText(node.textContent ?? "").length === 0;
221
+ if (isEmptyText) {
222
+ return false;
223
+ }
224
+ }
225
+ return true;
226
+ });
227
+ }
228
+ toIndentedText() {
229
+ const buffer = [
230
+ this.renderLine()
231
+ ];
232
+ for (const child of this.children) {
233
+ buffer.push(child.toIndentedText());
234
+ }
235
+ return buffer.join("\n");
236
+ }
237
+ renderLine() {
238
+ const indent = " ".repeat(this.depth);
239
+ const components = [indent];
240
+ if (this.node instanceof Text) {
241
+ return [indent, this.inlineText].filter(Boolean).join(" ");
242
+ }
243
+ const { tagName, src, href } = this;
244
+ components.push(tagName);
245
+ if (src) {
246
+ components.push(`(${src})`);
247
+ }
248
+ if (href) {
249
+ components.push(`(${href})`);
250
+ }
251
+ if (this.isLeaf) {
252
+ components.push(" " + this.inlineText);
253
+ }
254
+ return components.filter(Boolean).join("");
255
+ }
256
+ };
257
+ export {
258
+ DEFAULT_SEMANTIC_TAGS,
259
+ DEFAULT_SKIP_TAGS,
260
+ DomSnapshot,
261
+ containsImage,
262
+ deepIsHidden,
263
+ hasVisibleArea,
264
+ isHidden,
265
+ isRecursiveInline,
266
+ normalizeText
267
+ };
@@ -0,0 +1,2 @@
1
+ export * from './snapshot.js';
2
+ export * from './utils.js';
@@ -0,0 +1,3 @@
1
+ export * from './snapshot.js';
2
+ export * from './utils.js';
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/page/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC"}
@@ -0,0 +1,39 @@
1
+ export declare const DEFAULT_SKIP_TAGS: string[];
2
+ export declare const DEFAULT_SEMANTIC_TAGS: string[];
3
+ export interface DomSnapshotOptions {
4
+ skipHidden: boolean;
5
+ skipImages: boolean;
6
+ skipEmptyText: boolean;
7
+ skipTags: string[];
8
+ tagPreference: string[];
9
+ collapseInline: boolean;
10
+ }
11
+ export declare class DomSnapshot {
12
+ node: Element | Text;
13
+ parent: DomSnapshot | null;
14
+ options: DomSnapshotOptions;
15
+ classList: string[];
16
+ children: DomSnapshot[];
17
+ constructor(node: Element | Text, parent: DomSnapshot | null, options: Partial<DomSnapshotOptions>);
18
+ get element(): Element | null;
19
+ get depth(): number;
20
+ get inlineText(): string;
21
+ get indent(): string;
22
+ get isLeaf(): boolean;
23
+ get tagName(): string;
24
+ get href(): string;
25
+ get src(): string;
26
+ getFontSize(): number;
27
+ getTextSize(rootFontSize: number): "large" | "small" | "normal";
28
+ private parseTree;
29
+ /**
30
+ * Collapses an element with only one visible child into one.
31
+ *
32
+ * Wrapper element is preferred if it's a link or a button,
33
+ * in other cases child element is preferred.
34
+ */
35
+ private collapseWrapper;
36
+ private getAcceptedChildren;
37
+ toIndentedText(): string;
38
+ renderLine(): string;
39
+ }
@@ -0,0 +1,171 @@
1
+ import { containsImage, deepIsHidden, isRecursiveInline, normalizeText } from './utils.js';
2
+ export const DEFAULT_SKIP_TAGS = ['svg', 'script', 'noscript', 'style', 'link', 'meta'];
3
+ export const DEFAULT_SEMANTIC_TAGS = [
4
+ 'a', 'button', 'label', 'section',
5
+ 'article', 'main', 'header', 'footer', 'nav', 'aside',
6
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
7
+ 'ul', 'ol', 'li', 'dl', 'dt', 'dd',
8
+ 'p', 'pre', 'code', 'blockquote', 'figure', 'figcaption',
9
+ 'table', 'thead', 'tbody', 'tr', 'td', 'th',
10
+ 'form', 'input', 'textarea', 'select', 'option', 'fieldset', 'legend',
11
+ 'strong', 'em', 'sub', 'sup',
12
+ ];
13
+ export class DomSnapshot {
14
+ constructor(node, parent, options) {
15
+ this.node = node;
16
+ this.parent = parent;
17
+ this.children = [];
18
+ this.options = {
19
+ skipHidden: true,
20
+ skipEmptyText: true,
21
+ skipImages: false,
22
+ skipTags: DEFAULT_SKIP_TAGS,
23
+ tagPreference: DEFAULT_SEMANTIC_TAGS,
24
+ collapseInline: true,
25
+ ...options,
26
+ };
27
+ this.classList = [...(this.element?.classList ?? [])];
28
+ if (this.element) {
29
+ this.parseTree(this.element);
30
+ }
31
+ }
32
+ get element() {
33
+ return this.node instanceof Element ? this.node : null;
34
+ }
35
+ get depth() {
36
+ return this.parent ? this.parent.depth + 1 : 0;
37
+ }
38
+ get inlineText() {
39
+ const text = this.node instanceof HTMLElement ?
40
+ this.node.innerText :
41
+ this.node.textContent;
42
+ return normalizeText(text ?? '');
43
+ }
44
+ get indent() {
45
+ return ' '.repeat(this.depth);
46
+ }
47
+ get isLeaf() {
48
+ return this.children.length === 0;
49
+ }
50
+ get tagName() {
51
+ return this.node instanceof Element ? this.node.tagName.toLowerCase() : '';
52
+ }
53
+ get href() {
54
+ return this.node instanceof HTMLAnchorElement ? this.node.href : '';
55
+ }
56
+ get src() {
57
+ return this.node instanceof HTMLImageElement ? this.node.src : '';
58
+ }
59
+ getFontSize() {
60
+ if (this.node instanceof Text) {
61
+ return this.parent?.getFontSize() ?? 0;
62
+ }
63
+ return Number(getComputedStyle(this.node).fontSize?.replace('px', ''));
64
+ }
65
+ getTextSize(rootFontSize) {
66
+ const ownFontSize = this.getFontSize();
67
+ if (ownFontSize > 1.2 * rootFontSize) {
68
+ return 'large';
69
+ }
70
+ if (ownFontSize < 0.85 * rootFontSize) {
71
+ return 'small';
72
+ }
73
+ return 'normal';
74
+ }
75
+ parseTree(el) {
76
+ this.children = [];
77
+ const childNodes = this.getAcceptedChildren(el);
78
+ if (childNodes.length === 1) {
79
+ return this.collapseWrapper(el, childNodes[0]);
80
+ }
81
+ if (this.options.collapseInline && isRecursiveInline(el, this.options.tagPreference)) {
82
+ // Do not process more children
83
+ return;
84
+ }
85
+ for (const childNode of childNodes) {
86
+ const snapshot = new DomSnapshot(childNode, this, this.options);
87
+ this.children.push(snapshot);
88
+ }
89
+ }
90
+ /**
91
+ * Collapses an element with only one visible child into one.
92
+ *
93
+ * Wrapper element is preferred if it's a link or a button,
94
+ * in other cases child element is preferred.
95
+ */
96
+ collapseWrapper(el, child) {
97
+ if (child instanceof Text) {
98
+ return;
99
+ }
100
+ this.classList.push(...child.classList);
101
+ const parentRank = this.options.tagPreference.indexOf(el.tagName.toLowerCase());
102
+ const childRank = this.options.tagPreference.indexOf(child.tagName.toLowerCase());
103
+ const preferParent = parentRank !== -1 && (parentRank < childRank);
104
+ if (!preferParent) {
105
+ this.node = child;
106
+ }
107
+ // Continue parsing child element
108
+ this.parseTree(child);
109
+ }
110
+ getAcceptedChildren(el) {
111
+ const childNodes = [...el.childNodes];
112
+ return childNodes.filter((node) => {
113
+ // Ignore non-text and non-HTML nodes
114
+ if (!(node instanceof Element || node instanceof Text)) {
115
+ return false;
116
+ }
117
+ if (node instanceof Element) {
118
+ // Skip hidden elements (opacity, display, visibility, etc)
119
+ // TODO checkOpacity breaks PDF viewer
120
+ if (this.options.skipHidden && deepIsHidden(node, { checkOpacity: false })) {
121
+ return false;
122
+ }
123
+ // Do not skip images even if other criteria are met
124
+ if (!this.options.skipImages && containsImage(node)) {
125
+ return true;
126
+ }
127
+ // Skip listed tags
128
+ if (this.options.skipTags.includes(node.tagName.toLowerCase())) {
129
+ return false;
130
+ }
131
+ }
132
+ // Skip nodes with empty text
133
+ if (this.options.skipEmptyText) {
134
+ const isEmptyText = normalizeText(node.textContent ?? '').length === 0;
135
+ if (isEmptyText) {
136
+ return false;
137
+ }
138
+ }
139
+ return true;
140
+ });
141
+ }
142
+ toIndentedText() {
143
+ const buffer = [
144
+ this.renderLine(),
145
+ ];
146
+ for (const child of this.children) {
147
+ buffer.push(child.toIndentedText());
148
+ }
149
+ return buffer.join('\n');
150
+ }
151
+ renderLine() {
152
+ const indent = ' '.repeat(this.depth);
153
+ const components = [indent];
154
+ if (this.node instanceof Text) {
155
+ return [indent, this.inlineText].filter(Boolean).join(' ');
156
+ }
157
+ const { tagName, src, href } = this;
158
+ components.push(tagName);
159
+ if (src) {
160
+ components.push(`(${src})`);
161
+ }
162
+ if (href) {
163
+ components.push(`(${href})`);
164
+ }
165
+ if (this.isLeaf) {
166
+ components.push(' ' + this.inlineText);
167
+ }
168
+ return components.filter(Boolean).join('');
169
+ }
170
+ }
171
+ //# sourceMappingURL=snapshot.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"snapshot.js","sourceRoot":"","sources":["../../src/page/snapshot.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3F,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AACxF,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACjC,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS;IACjC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO;IACrD,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,YAAY;IACxD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC3C,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ;IACrE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK;CAC/B,CAAC;AAWF,MAAM,OAAO,WAAW;IAMpB,YACW,IAAoB,EACpB,MAA0B,EACjC,OAAoC;QAF7B,SAAI,GAAJ,IAAI,CAAgB;QACpB,WAAM,GAAN,MAAM,CAAoB;QAJrC,aAAQ,GAAkB,EAAE,CAAC;QAOzB,IAAI,CAAC,OAAO,GAAG;YACX,UAAU,EAAE,IAAI;YAChB,aAAa,EAAE,IAAI;YACnB,UAAU,EAAE,KAAK;YACjB,QAAQ,EAAE,iBAAiB;YAC3B,aAAa,EAAE,qBAAqB;YACpC,cAAc,EAAE,IAAI;YACpB,GAAG,OAAO;SACb,CAAC;QACF,IAAI,CAAC,SAAS,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC;QACtD,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACjC,CAAC;IACL,CAAC;IAED,IAAI,OAAO;QACP,OAAO,IAAI,CAAC,IAAI,YAAY,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IAC3D,CAAC;IAED,IAAI,KAAK;QACL,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,UAAU;QACV,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,YAAY,WAAW,CAAC,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrB,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1B,OAAO,aAAa,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,MAAM;QACN,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,MAAM;QACN,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC;IACtC,CAAC;IAED,IAAI,OAAO;QACP,OAAO,IAAI,CAAC,IAAI,YAAY,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/E,CAAC;IAED,IAAI,IAAI;QACJ,OAAO,IAAI,CAAC,IAAI,YAAY,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACxE,CAAC;IAED,IAAI,GAAG;QACH,OAAO,IAAI,CAAC,IAAI,YAAY,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACtE,CAAC;IAED,WAAW;QACP,IAAI,IAAI,CAAC,IAAI,YAAY,IAAI,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;IAC3E,CAAC;IAED,WAAW,CAAC,YAAoB;QAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACvC,IAAI,WAAW,GAAG,GAAG,GAAG,YAAY,EAAE,CAAC;YACnC,OAAO,OAAO,CAAC;QACnB,CAAC;QACD,IAAI,WAAW,GAAG,IAAI,GAAG,YAAY,EAAE,CAAC;YACpC,OAAO,OAAO,CAAC;QACnB,CAAC;QACD,OAAO,QAAQ,CAAC;IACpB,CAAC;IAEO,SAAS,CAAC,EAAW;QACzB,IAAI,CAAC,QAAQ,GAAG,EAAE,CAAC;QACnB,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAChD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,eAAe,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,iBAAiB,CAAC,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YACnF,+BAA+B;YAC/B,OAAO;QACX,CAAC;QACD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;YAChE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACK,eAAe,CAAC,EAAW,EAAE,KAAqB;QACtD,IAAI,KAAK,YAAY,IAAI,EAAE,CAAC;YACxB,OAAO;QACX,CAAC;QACD,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QAChF,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;QAClF,MAAM,YAAY,GAAG,UAAU,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;QACnE,IAAI,CAAC,YAAY,EAAE,CAAC;YAChB,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QACtB,CAAC;QACD,iCAAiC;QACjC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC1B,CAAC;IAEO,mBAAmB,CAAC,EAAW;QACnC,MAAM,UAAU,GAAG,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC;QACtC,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,IAAU,EAA0B,EAAE;YAC5D,qCAAqC;YACrC,IAAI,CAAC,CAAC,IAAI,YAAY,OAAO,IAAI,IAAI,YAAY,IAAI,CAAC,EAAE,CAAC;gBACrD,OAAO,KAAK,CAAC;YACjB,CAAC;YACD,IAAI,IAAI,YAAY,OAAO,EAAE,CAAC;gBAC1B,2DAA2D;gBAC3D,sCAAsC;gBACtC,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;oBACzE,OAAO,KAAK,CAAC;gBACjB,CAAC;gBACD,oDAAoD;gBACpD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;oBAClD,OAAO,IAAI,CAAC;gBAChB,CAAC;gBACD,mBAAmB;gBACnB,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;oBAC7D,OAAO,KAAK,CAAC;gBACjB,CAAC;YACL,CAAC;YACD,6BAA6B;YAC7B,IAAI,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC;gBAC7B,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;gBACvE,IAAI,WAAW,EAAE,CAAC;oBACd,OAAO,KAAK,CAAC;gBACjB,CAAC;YACL,CAAC;YACD,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAED,cAAc;QACV,MAAM,MAAM,GAAG;YACX,IAAI,CAAC,UAAU,EAAE;SACpB,CAAC;QACF,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;QACxC,CAAC;QACD,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;IAED,UAAU;QACN,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,UAAU,GAAyB,CAAC,MAAM,CAAC,CAAC;QAClD,IAAI,IAAI,CAAC,IAAI,YAAY,IAAI,EAAE,CAAC;YAC5B,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/D,CAAC;QACD,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;QACpC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACzB,IAAI,GAAG,EAAE,CAAC;YACN,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,IAAI,EAAE,CAAC;YACP,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,UAAU,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;CAEJ"}
@@ -0,0 +1,19 @@
1
+ export interface VisibilityOptions {
2
+ checkOpacity: boolean;
3
+ checkVisibility: boolean;
4
+ checkTransform: boolean;
5
+ }
6
+ /**
7
+ * Element not visible with descendants if:
8
+ *
9
+ * - opacity < 0.1
10
+ * - display: none
11
+ * - visibility: hidden
12
+ * - transform: scale(0)
13
+ */
14
+ export declare function isHidden(element: Element, options?: Partial<VisibilityOptions>): boolean;
15
+ export declare function hasVisibleArea(element: Element): boolean;
16
+ export declare function deepIsHidden(element: Element, options?: Partial<VisibilityOptions>): boolean;
17
+ export declare function normalizeText(str: string): string;
18
+ export declare function containsImage(el: Element): boolean;
19
+ export declare function isRecursiveInline(el: Element, ignoreTags?: string[]): boolean;
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Element not visible with descendants if:
3
+ *
4
+ * - opacity < 0.1
5
+ * - display: none
6
+ * - visibility: hidden
7
+ * - transform: scale(0)
8
+ */
9
+ export function isHidden(element, options = {}) {
10
+ const { checkOpacity = true, checkVisibility = true, checkTransform = true, } = options;
11
+ const style = getComputedStyle(element);
12
+ const opacity = Number(style.opacity);
13
+ const display = style.display;
14
+ const visibility = style.visibility;
15
+ const transform = style.transform;
16
+ if (display === 'none' ||
17
+ (checkOpacity && opacity < 0.1) ||
18
+ (checkVisibility && (visibility === 'hidden')) ||
19
+ (checkTransform && transform.includes('scale(0)'))) {
20
+ return true;
21
+ }
22
+ if (!element.checkVisibility()) {
23
+ return true;
24
+ }
25
+ return false;
26
+ }
27
+ export function hasVisibleArea(element) {
28
+ const rect = element.getBoundingClientRect();
29
+ const area = rect.width * rect.height;
30
+ return area > 100;
31
+ }
32
+ export function deepIsHidden(element, options = {}) {
33
+ if (isHidden(element, options)) {
34
+ return true;
35
+ }
36
+ if (!hasVisibleArea(element)) {
37
+ return [...element.children].every(el => deepIsHidden(el, options));
38
+ }
39
+ return false;
40
+ }
41
+ export function normalizeText(str) {
42
+ return str
43
+ .replace(/\p{Cf}/gu, ' ')
44
+ .replace(/\s+/g, ' ')
45
+ .trim();
46
+ }
47
+ export function containsImage(el) {
48
+ return el.matches('img') || !!el.querySelector('img');
49
+ }
50
+ export function isRecursiveInline(el, ignoreTags = []) {
51
+ for (const child of el.childNodes) {
52
+ if (child instanceof Element) {
53
+ if (ignoreTags.includes(child.tagName.toLowerCase())) {
54
+ return false;
55
+ }
56
+ const display = getComputedStyle(child).display;
57
+ const inline = display === 'inline' || display === 'inline-block';
58
+ if (!inline) {
59
+ return false;
60
+ }
61
+ if (!isRecursiveInline(child, ignoreTags)) {
62
+ return false;
63
+ }
64
+ }
65
+ }
66
+ return true;
67
+ }
68
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/page/utils.ts"],"names":[],"mappings":"AAMA;;;;;;;GAOG;AACH,MAAM,UAAU,QAAQ,CAAC,OAAgB,EAAE,UAAsC,EAAE;IAC/E,MAAM,EACF,YAAY,GAAG,IAAI,EACnB,eAAe,GAAG,IAAI,EACtB,cAAc,GAAG,IAAI,GACxB,GAAG,OAAO,CAAC;IACZ,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;IAC9B,MAAM,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;IACpC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;IAClC,IACI,OAAO,KAAK,MAAM;QAClB,CAAC,YAAY,IAAI,OAAO,GAAG,GAAG,CAAC;QAC/B,CAAC,eAAe,IAAI,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC;QAC9C,CAAC,cAAc,IAAI,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,EACpD,CAAC;QACC,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,OAAgB;IAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,qBAAqB,EAAE,CAAC;IAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;IACtC,OAAO,IAAI,GAAG,GAAG,CAAC;AACtB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,OAAgB,EAAE,UAAsC,EAAE;IACnF,IAAI,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW;IACrC,OAAO,GAAG;SACL,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,EAAW;IACrC,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,EAAW,EAAE,aAAuB,EAAE;IACpE,KAAK,MAAM,KAAK,IAAI,EAAE,CAAC,UAAU,EAAE,CAAC;QAChC,IAAI,KAAK,YAAY,OAAO,EAAE,CAAC;YAC3B,IAAI,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBACnD,OAAO,KAAK,CAAC;YACjB,CAAC;YACD,MAAM,OAAO,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YAChD,MAAM,MAAM,GAAG,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,cAAc,CAAC;YAClE,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,OAAO,KAAK,CAAC;YACjB,CAAC;YACD,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,UAAU,CAAC,EAAE,CAAC;gBACxC,OAAO,KAAK,CAAC;YACjB,CAAC;QACL,CAAC;IACL,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC"}
package/package.json ADDED
@@ -0,0 +1,39 @@
1
+ {
2
+ "name": "@ubio/webvision",
3
+ "version": "1.0.1",
4
+ "main": "out/main/index.js",
5
+ "type": "module",
6
+ "exports": {
7
+ "./page": "./out/page/index.js"
8
+ },
9
+ "files": [
10
+ "out/main",
11
+ "out/page",
12
+ "build/page.mjs"
13
+ ],
14
+ "scripts": {
15
+ "clean": "rm -rf out *.tsbuildinfo",
16
+ "dev": "npm run clean && tsc -b -w",
17
+ "compile": "npm run clean && tsc -b",
18
+ "compile:page": "esbuild src/page/index.ts --bundle --outfile=build/page.mjs --target=es2022 --format=esm",
19
+ "lint": "eslint --cache",
20
+ "test": "NODE_ENV=test mocha",
21
+ "version": "npm run compile",
22
+ "postversion": "npm publish --access=public && git push --tags origin main"
23
+ },
24
+ "pre-commit": [
25
+ "lint",
26
+ "compile",
27
+ "compile:page"
28
+ ],
29
+ "license": "ISC",
30
+ "devDependencies": {
31
+ "@nodescript/eslint-config": "^2.1.0",
32
+ "@types/node": "^22.13.1",
33
+ "esbuild": "^0.25.0",
34
+ "eslint": "^9.19.0",
35
+ "playwright": "^1.50.1",
36
+ "pre-commit": "^1.2.2",
37
+ "typescript": "^5.7.3"
38
+ }
39
+ }