@mseep/vessel-browser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3618 @@
1
+ "use strict";
2
+ const electron = require("electron");
3
+ var Readability = { exports: {} };
4
+ var hasRequiredReadability$1;
5
+ function requireReadability$1() {
6
+ if (hasRequiredReadability$1) return Readability.exports;
7
+ hasRequiredReadability$1 = 1;
8
+ (function(module) {
9
+ function Readability2(doc, options) {
10
+ if (options && options.documentElement) {
11
+ doc = options;
12
+ options = arguments[2];
13
+ } else if (!doc || !doc.documentElement) {
14
+ throw new Error(
15
+ "First argument to Readability constructor should be a document object."
16
+ );
17
+ }
18
+ options = options || {};
19
+ this._doc = doc;
20
+ this._docJSDOMParser = this._doc.firstChild.__JSDOMParser__;
21
+ this._articleTitle = null;
22
+ this._articleByline = null;
23
+ this._articleDir = null;
24
+ this._articleSiteName = null;
25
+ this._attempts = [];
26
+ this._metadata = {};
27
+ this._debug = !!options.debug;
28
+ this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
29
+ this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
30
+ this._charThreshold = options.charThreshold || this.DEFAULT_CHAR_THRESHOLD;
31
+ this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(
32
+ options.classesToPreserve || []
33
+ );
34
+ this._keepClasses = !!options.keepClasses;
35
+ this._serializer = options.serializer || function(el) {
36
+ return el.innerHTML;
37
+ };
38
+ this._disableJSONLD = !!options.disableJSONLD;
39
+ this._allowedVideoRegex = options.allowedVideoRegex || this.REGEXPS.videos;
40
+ this._linkDensityModifier = options.linkDensityModifier || 0;
41
+ this._flags = this.FLAG_STRIP_UNLIKELYS | this.FLAG_WEIGHT_CLASSES | this.FLAG_CLEAN_CONDITIONALLY;
42
+ if (this._debug) {
43
+ let logNode = function(node) {
44
+ if (node.nodeType == node.TEXT_NODE) {
45
+ return `${node.nodeName} ("${node.textContent}")`;
46
+ }
47
+ let attrPairs = Array.from(node.attributes || [], function(attr) {
48
+ return `${attr.name}="${attr.value}"`;
49
+ }).join(" ");
50
+ return `<${node.localName} ${attrPairs}>`;
51
+ };
52
+ this.log = function() {
53
+ if (typeof console !== "undefined") {
54
+ let args = Array.from(arguments, (arg) => {
55
+ if (arg && arg.nodeType == this.ELEMENT_NODE) {
56
+ return logNode(arg);
57
+ }
58
+ return arg;
59
+ });
60
+ args.unshift("Reader: (Readability)");
61
+ console.log(...args);
62
+ } else if (typeof dump !== "undefined") {
63
+ var msg = Array.prototype.map.call(arguments, function(x) {
64
+ return x && x.nodeName ? logNode(x) : x;
65
+ }).join(" ");
66
+ dump("Reader: (Readability) " + msg + "\n");
67
+ }
68
+ };
69
+ } else {
70
+ this.log = function() {
71
+ };
72
+ }
73
+ }
74
+ Readability2.prototype = {
75
+ FLAG_STRIP_UNLIKELYS: 1,
76
+ FLAG_WEIGHT_CLASSES: 2,
77
+ FLAG_CLEAN_CONDITIONALLY: 4,
78
+ // https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
79
+ ELEMENT_NODE: 1,
80
+ TEXT_NODE: 3,
81
+ // Max number of nodes supported by this parser. Default: 0 (no limit)
82
+ DEFAULT_MAX_ELEMS_TO_PARSE: 0,
83
+ // The number of top candidates to consider when analysing how
84
+ // tight the competition is among candidates.
85
+ DEFAULT_N_TOP_CANDIDATES: 5,
86
+ // Element tags to score by default.
87
+ DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
88
+ // The default number of chars an article must have in order to return a result
89
+ DEFAULT_CHAR_THRESHOLD: 500,
90
+ // All of the regular expressions in use within readability.
91
+ // Defined up here so we don't instantiate them repeatedly in loops.
92
+ REGEXPS: {
93
+ // NOTE: These two regular expressions are duplicated in
94
+ // Readability-readerable.js. Please keep both copies in sync.
95
+ unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
96
+ okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
97
+ positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
98
+ negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|footer|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|widget/i,
99
+ extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
100
+ byline: /byline|author|dateline|writtenby|p-author/i,
101
+ replaceFonts: /<(\/?)font[^>]*>/gi,
102
+ normalize: /\s{2,}/g,
103
+ videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,
104
+ shareElements: /(\b|_)(share|sharedaddy)(\b|_)/i,
105
+ nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,
106
+ prevLink: /(prev|earl|old|new|<|«)/i,
107
+ tokenize: /\W+/g,
108
+ whitespace: /^\s*$/,
109
+ hasContent: /\S$/,
110
+ hashUrl: /^#.+/,
111
+ srcsetUrl: /(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g,
112
+ b64DataUrl: /^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i,
113
+ // Commas as used in Latin, Sindhi, Chinese and various other scripts.
114
+ // see: https://en.wikipedia.org/wiki/Comma#Comma_variants
115
+ commas: /\u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C/g,
116
+ // See: https://schema.org/Article
117
+ jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/,
118
+ // used to see if a node's content matches words commonly used for ad blocks or loading indicators
119
+ adWords: /^(ad(vertising|vertisement)?|pub(licité)?|werb(ung)?|广告|Реклама|Anuncio)$/iu,
120
+ loadingWords: /^((loading|正在加载|Загрузка|chargement|cargando)(…|\.\.\.)?)$/iu
121
+ },
122
+ UNLIKELY_ROLES: [
123
+ "menu",
124
+ "menubar",
125
+ "complementary",
126
+ "navigation",
127
+ "alert",
128
+ "alertdialog",
129
+ "dialog"
130
+ ],
131
+ DIV_TO_P_ELEMS: /* @__PURE__ */ new Set([
132
+ "BLOCKQUOTE",
133
+ "DL",
134
+ "DIV",
135
+ "IMG",
136
+ "OL",
137
+ "P",
138
+ "PRE",
139
+ "TABLE",
140
+ "UL"
141
+ ]),
142
+ ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P", "OL", "UL"],
143
+ PRESENTATIONAL_ATTRIBUTES: [
144
+ "align",
145
+ "background",
146
+ "bgcolor",
147
+ "border",
148
+ "cellpadding",
149
+ "cellspacing",
150
+ "frame",
151
+ "hspace",
152
+ "rules",
153
+ "style",
154
+ "valign",
155
+ "vspace"
156
+ ],
157
+ DEPRECATED_SIZE_ATTRIBUTE_ELEMS: ["TABLE", "TH", "TD", "HR", "PRE"],
158
+ // The commented out elements qualify as phrasing content but tend to be
159
+ // removed by readability when put into paragraphs, so we ignore them here.
160
+ PHRASING_ELEMS: [
161
+ // "CANVAS", "IFRAME", "SVG", "VIDEO",
162
+ "ABBR",
163
+ "AUDIO",
164
+ "B",
165
+ "BDO",
166
+ "BR",
167
+ "BUTTON",
168
+ "CITE",
169
+ "CODE",
170
+ "DATA",
171
+ "DATALIST",
172
+ "DFN",
173
+ "EM",
174
+ "EMBED",
175
+ "I",
176
+ "IMG",
177
+ "INPUT",
178
+ "KBD",
179
+ "LABEL",
180
+ "MARK",
181
+ "MATH",
182
+ "METER",
183
+ "NOSCRIPT",
184
+ "OBJECT",
185
+ "OUTPUT",
186
+ "PROGRESS",
187
+ "Q",
188
+ "RUBY",
189
+ "SAMP",
190
+ "SCRIPT",
191
+ "SELECT",
192
+ "SMALL",
193
+ "SPAN",
194
+ "STRONG",
195
+ "SUB",
196
+ "SUP",
197
+ "TEXTAREA",
198
+ "TIME",
199
+ "VAR",
200
+ "WBR"
201
+ ],
202
+ // These are the classes that readability sets itself.
203
+ CLASSES_TO_PRESERVE: ["page"],
204
+ // These are the list of HTML entities that need to be escaped.
205
+ HTML_ESCAPE_MAP: {
206
+ lt: "<",
207
+ gt: ">",
208
+ amp: "&",
209
+ quot: '"',
210
+ apos: "'"
211
+ },
212
+ /**
213
+ * Run any post-process modifications to article content as necessary.
214
+ *
215
+ * @param Element
216
+ * @return void
217
+ **/
218
+ _postProcessContent(articleContent) {
219
+ this._fixRelativeUris(articleContent);
220
+ this._simplifyNestedElements(articleContent);
221
+ if (!this._keepClasses) {
222
+ this._cleanClasses(articleContent);
223
+ }
224
+ },
225
+ /**
226
+ * Iterates over a NodeList, calls `filterFn` for each node and removes node
227
+ * if function returned `true`.
228
+ *
229
+ * If function is not passed, removes all the nodes in node list.
230
+ *
231
+ * @param NodeList nodeList The nodes to operate on
232
+ * @param Function filterFn the function to use as a filter
233
+ * @return void
234
+ */
235
+ _removeNodes(nodeList, filterFn) {
236
+ if (this._docJSDOMParser && nodeList._isLiveNodeList) {
237
+ throw new Error("Do not pass live node lists to _removeNodes");
238
+ }
239
+ for (var i = nodeList.length - 1; i >= 0; i--) {
240
+ var node = nodeList[i];
241
+ var parentNode = node.parentNode;
242
+ if (parentNode) {
243
+ if (!filterFn || filterFn.call(this, node, i, nodeList)) {
244
+ parentNode.removeChild(node);
245
+ }
246
+ }
247
+ }
248
+ },
249
+ /**
250
+ * Iterates over a NodeList, and calls _setNodeTag for each node.
251
+ *
252
+ * @param NodeList nodeList The nodes to operate on
253
+ * @param String newTagName the new tag name to use
254
+ * @return void
255
+ */
256
+ _replaceNodeTags(nodeList, newTagName) {
257
+ if (this._docJSDOMParser && nodeList._isLiveNodeList) {
258
+ throw new Error("Do not pass live node lists to _replaceNodeTags");
259
+ }
260
+ for (const node of nodeList) {
261
+ this._setNodeTag(node, newTagName);
262
+ }
263
+ },
264
+ /**
265
+ * Iterate over a NodeList, which doesn't natively fully implement the Array
266
+ * interface.
267
+ *
268
+ * For convenience, the current object context is applied to the provided
269
+ * iterate function.
270
+ *
271
+ * @param NodeList nodeList The NodeList.
272
+ * @param Function fn The iterate function.
273
+ * @return void
274
+ */
275
+ _forEachNode(nodeList, fn) {
276
+ Array.prototype.forEach.call(nodeList, fn, this);
277
+ },
278
+ /**
279
+ * Iterate over a NodeList, and return the first node that passes
280
+ * the supplied test function
281
+ *
282
+ * For convenience, the current object context is applied to the provided
283
+ * test function.
284
+ *
285
+ * @param NodeList nodeList The NodeList.
286
+ * @param Function fn The test function.
287
+ * @return void
288
+ */
289
+ _findNode(nodeList, fn) {
290
+ return Array.prototype.find.call(nodeList, fn, this);
291
+ },
292
+ /**
293
+ * Iterate over a NodeList, return true if any of the provided iterate
294
+ * function calls returns true, false otherwise.
295
+ *
296
+ * For convenience, the current object context is applied to the
297
+ * provided iterate function.
298
+ *
299
+ * @param NodeList nodeList The NodeList.
300
+ * @param Function fn The iterate function.
301
+ * @return Boolean
302
+ */
303
+ _someNode(nodeList, fn) {
304
+ return Array.prototype.some.call(nodeList, fn, this);
305
+ },
306
+ /**
307
+ * Iterate over a NodeList, return true if all of the provided iterate
308
+ * function calls return true, false otherwise.
309
+ *
310
+ * For convenience, the current object context is applied to the
311
+ * provided iterate function.
312
+ *
313
+ * @param NodeList nodeList The NodeList.
314
+ * @param Function fn The iterate function.
315
+ * @return Boolean
316
+ */
317
+ _everyNode(nodeList, fn) {
318
+ return Array.prototype.every.call(nodeList, fn, this);
319
+ },
320
+ _getAllNodesWithTag(node, tagNames) {
321
+ if (node.querySelectorAll) {
322
+ return node.querySelectorAll(tagNames.join(","));
323
+ }
324
+ return [].concat.apply(
325
+ [],
326
+ tagNames.map(function(tag) {
327
+ var collection = node.getElementsByTagName(tag);
328
+ return Array.isArray(collection) ? collection : Array.from(collection);
329
+ })
330
+ );
331
+ },
332
+ /**
333
+ * Removes the class="" attribute from every element in the given
334
+ * subtree, except those that match CLASSES_TO_PRESERVE and
335
+ * the classesToPreserve array from the options object.
336
+ *
337
+ * @param Element
338
+ * @return void
339
+ */
340
+ _cleanClasses(node) {
341
+ var classesToPreserve = this._classesToPreserve;
342
+ var className = (node.getAttribute("class") || "").split(/\s+/).filter((cls) => classesToPreserve.includes(cls)).join(" ");
343
+ if (className) {
344
+ node.setAttribute("class", className);
345
+ } else {
346
+ node.removeAttribute("class");
347
+ }
348
+ for (node = node.firstElementChild; node; node = node.nextElementSibling) {
349
+ this._cleanClasses(node);
350
+ }
351
+ },
352
+ /**
353
+ * Tests whether a string is a URL or not.
354
+ *
355
+ * @param {string} str The string to test
356
+ * @return {boolean} true if str is a URL, false if not
357
+ */
358
+ _isUrl(str) {
359
+ try {
360
+ new URL(str);
361
+ return true;
362
+ } catch {
363
+ return false;
364
+ }
365
+ },
366
+ /**
367
+ * Converts each <a> and <img> uri in the given element to an absolute URI,
368
+ * ignoring #ref URIs.
369
+ *
370
+ * @param Element
371
+ * @return void
372
+ */
373
+ _fixRelativeUris(articleContent) {
374
+ var baseURI = this._doc.baseURI;
375
+ var documentURI = this._doc.documentURI;
376
+ function toAbsoluteURI(uri) {
377
+ if (baseURI == documentURI && uri.charAt(0) == "#") {
378
+ return uri;
379
+ }
380
+ try {
381
+ return new URL(uri, baseURI).href;
382
+ } catch (ex) {
383
+ }
384
+ return uri;
385
+ }
386
+ var links = this._getAllNodesWithTag(articleContent, ["a"]);
387
+ this._forEachNode(links, function(link) {
388
+ var href = link.getAttribute("href");
389
+ if (href) {
390
+ if (href.indexOf("javascript:") === 0) {
391
+ if (link.childNodes.length === 1 && link.childNodes[0].nodeType === this.TEXT_NODE) {
392
+ var text = this._doc.createTextNode(link.textContent);
393
+ link.parentNode.replaceChild(text, link);
394
+ } else {
395
+ var container = this._doc.createElement("span");
396
+ while (link.firstChild) {
397
+ container.appendChild(link.firstChild);
398
+ }
399
+ link.parentNode.replaceChild(container, link);
400
+ }
401
+ } else {
402
+ link.setAttribute("href", toAbsoluteURI(href));
403
+ }
404
+ }
405
+ });
406
+ var medias = this._getAllNodesWithTag(articleContent, [
407
+ "img",
408
+ "picture",
409
+ "figure",
410
+ "video",
411
+ "audio",
412
+ "source"
413
+ ]);
414
+ this._forEachNode(medias, function(media) {
415
+ var src = media.getAttribute("src");
416
+ var poster = media.getAttribute("poster");
417
+ var srcset = media.getAttribute("srcset");
418
+ if (src) {
419
+ media.setAttribute("src", toAbsoluteURI(src));
420
+ }
421
+ if (poster) {
422
+ media.setAttribute("poster", toAbsoluteURI(poster));
423
+ }
424
+ if (srcset) {
425
+ var newSrcset = srcset.replace(
426
+ this.REGEXPS.srcsetUrl,
427
+ function(_, p1, p2, p3) {
428
+ return toAbsoluteURI(p1) + (p2 || "") + p3;
429
+ }
430
+ );
431
+ media.setAttribute("srcset", newSrcset);
432
+ }
433
+ });
434
+ },
435
+ _simplifyNestedElements(articleContent) {
436
+ var node = articleContent;
437
+ while (node) {
438
+ if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) {
439
+ if (this._isElementWithoutContent(node)) {
440
+ node = this._removeAndGetNext(node);
441
+ continue;
442
+ } else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) {
443
+ var child = node.children[0];
444
+ for (var i = 0; i < node.attributes.length; i++) {
445
+ child.setAttributeNode(node.attributes[i].cloneNode());
446
+ }
447
+ node.parentNode.replaceChild(child, node);
448
+ node = child;
449
+ continue;
450
+ }
451
+ }
452
+ node = this._getNextNode(node);
453
+ }
454
+ },
455
+ /**
456
+ * Get the article title as an H1.
457
+ *
458
+ * @return string
459
+ **/
460
+ _getArticleTitle() {
461
+ var doc = this._doc;
462
+ var curTitle = "";
463
+ var origTitle = "";
464
+ try {
465
+ curTitle = origTitle = doc.title.trim();
466
+ if (typeof curTitle !== "string") {
467
+ curTitle = origTitle = this._getInnerText(
468
+ doc.getElementsByTagName("title")[0]
469
+ );
470
+ }
471
+ } catch (e) {
472
+ }
473
+ var titleHadHierarchicalSeparators = false;
474
+ function wordCount(str) {
475
+ return str.split(/\s+/).length;
476
+ }
477
+ if (/ [\|\-\\\/>»] /.test(curTitle)) {
478
+ titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle);
479
+ let allSeparators = Array.from(origTitle.matchAll(/ [\|\-\\\/>»] /gi));
480
+ curTitle = origTitle.substring(0, allSeparators.pop().index);
481
+ if (wordCount(curTitle) < 3) {
482
+ curTitle = origTitle.replace(/^[^\|\-\\\/>»]*[\|\-\\\/>»]/gi, "");
483
+ }
484
+ } else if (curTitle.includes(": ")) {
485
+ var headings = this._getAllNodesWithTag(doc, ["h1", "h2"]);
486
+ var trimmedTitle = curTitle.trim();
487
+ var match = this._someNode(headings, function(heading) {
488
+ return heading.textContent.trim() === trimmedTitle;
489
+ });
490
+ if (!match) {
491
+ curTitle = origTitle.substring(origTitle.lastIndexOf(":") + 1);
492
+ if (wordCount(curTitle) < 3) {
493
+ curTitle = origTitle.substring(origTitle.indexOf(":") + 1);
494
+ } else if (wordCount(origTitle.substr(0, origTitle.indexOf(":"))) > 5) {
495
+ curTitle = origTitle;
496
+ }
497
+ }
498
+ } else if (curTitle.length > 150 || curTitle.length < 15) {
499
+ var hOnes = doc.getElementsByTagName("h1");
500
+ if (hOnes.length === 1) {
501
+ curTitle = this._getInnerText(hOnes[0]);
502
+ }
503
+ }
504
+ curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " ");
505
+ var curTitleWordCount = wordCount(curTitle);
506
+ if (curTitleWordCount <= 4 && (!titleHadHierarchicalSeparators || curTitleWordCount != wordCount(origTitle.replace(/[\|\-\\\/>»]+/g, "")) - 1)) {
507
+ curTitle = origTitle;
508
+ }
509
+ return curTitle;
510
+ },
511
+ /**
512
+ * Prepare the HTML document for readability to scrape it.
513
+ * This includes things like stripping javascript, CSS, and handling terrible markup.
514
+ *
515
+ * @return void
516
+ **/
517
+ _prepDocument() {
518
+ var doc = this._doc;
519
+ this._removeNodes(this._getAllNodesWithTag(doc, ["style"]));
520
+ if (doc.body) {
521
+ this._replaceBrs(doc.body);
522
+ }
523
+ this._replaceNodeTags(this._getAllNodesWithTag(doc, ["font"]), "SPAN");
524
+ },
525
+ /**
526
+ * Finds the next node, starting from the given node, and ignoring
527
+ * whitespace in between. If the given node is an element, the same node is
528
+ * returned.
529
+ */
530
+ _nextNode(node) {
531
+ var next = node;
532
+ while (next && next.nodeType != this.ELEMENT_NODE && this.REGEXPS.whitespace.test(next.textContent)) {
533
+ next = next.nextSibling;
534
+ }
535
+ return next;
536
+ },
537
+ /**
538
+ * Replaces 2 or more successive <br> elements with a single <p>.
539
+ * Whitespace between <br> elements are ignored. For example:
540
+ * <div>foo<br>bar<br> <br><br>abc</div>
541
+ * will become:
542
+ * <div>foo<br>bar<p>abc</p></div>
543
+ */
544
+ _replaceBrs(elem) {
545
+ this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br) {
546
+ var next = br.nextSibling;
547
+ var replaced = false;
548
+ while ((next = this._nextNode(next)) && next.tagName == "BR") {
549
+ replaced = true;
550
+ var brSibling = next.nextSibling;
551
+ next.remove();
552
+ next = brSibling;
553
+ }
554
+ if (replaced) {
555
+ var p = this._doc.createElement("p");
556
+ br.parentNode.replaceChild(p, br);
557
+ next = p.nextSibling;
558
+ while (next) {
559
+ if (next.tagName == "BR") {
560
+ var nextElem = this._nextNode(next.nextSibling);
561
+ if (nextElem && nextElem.tagName == "BR") {
562
+ break;
563
+ }
564
+ }
565
+ if (!this._isPhrasingContent(next)) {
566
+ break;
567
+ }
568
+ var sibling = next.nextSibling;
569
+ p.appendChild(next);
570
+ next = sibling;
571
+ }
572
+ while (p.lastChild && this._isWhitespace(p.lastChild)) {
573
+ p.lastChild.remove();
574
+ }
575
+ if (p.parentNode.tagName === "P") {
576
+ this._setNodeTag(p.parentNode, "DIV");
577
+ }
578
+ }
579
+ });
580
+ },
581
+ _setNodeTag(node, tag) {
582
+ this.log("_setNodeTag", node, tag);
583
+ if (this._docJSDOMParser) {
584
+ node.localName = tag.toLowerCase();
585
+ node.tagName = tag.toUpperCase();
586
+ return node;
587
+ }
588
+ var replacement = node.ownerDocument.createElement(tag);
589
+ while (node.firstChild) {
590
+ replacement.appendChild(node.firstChild);
591
+ }
592
+ node.parentNode.replaceChild(replacement, node);
593
+ if (node.readability) {
594
+ replacement.readability = node.readability;
595
+ }
596
+ for (var i = 0; i < node.attributes.length; i++) {
597
+ replacement.setAttributeNode(node.attributes[i].cloneNode());
598
+ }
599
+ return replacement;
600
+ },
601
+ /**
602
+ * Prepare the article node for display. Clean out any inline styles,
603
+ * iframes, forms, strip extraneous <p> tags, etc.
604
+ *
605
+ * @param Element
606
+ * @return void
607
+ **/
608
+ _prepArticle(articleContent) {
609
+ this._cleanStyles(articleContent);
610
+ this._markDataTables(articleContent);
611
+ this._fixLazyImages(articleContent);
612
+ this._cleanConditionally(articleContent, "form");
613
+ this._cleanConditionally(articleContent, "fieldset");
614
+ this._clean(articleContent, "object");
615
+ this._clean(articleContent, "embed");
616
+ this._clean(articleContent, "footer");
617
+ this._clean(articleContent, "link");
618
+ this._clean(articleContent, "aside");
619
+ var shareElementThreshold = this.DEFAULT_CHAR_THRESHOLD;
620
+ this._forEachNode(articleContent.children, function(topCandidate) {
621
+ this._cleanMatchedNodes(topCandidate, function(node, matchString) {
622
+ return this.REGEXPS.shareElements.test(matchString) && node.textContent.length < shareElementThreshold;
623
+ });
624
+ });
625
+ this._clean(articleContent, "iframe");
626
+ this._clean(articleContent, "input");
627
+ this._clean(articleContent, "textarea");
628
+ this._clean(articleContent, "select");
629
+ this._clean(articleContent, "button");
630
+ this._cleanHeaders(articleContent);
631
+ this._cleanConditionally(articleContent, "table");
632
+ this._cleanConditionally(articleContent, "ul");
633
+ this._cleanConditionally(articleContent, "div");
634
+ this._replaceNodeTags(
635
+ this._getAllNodesWithTag(articleContent, ["h1"]),
636
+ "h2"
637
+ );
638
+ this._removeNodes(
639
+ this._getAllNodesWithTag(articleContent, ["p"]),
640
+ function(paragraph) {
641
+ var contentElementCount = this._getAllNodesWithTag(paragraph, [
642
+ "img",
643
+ "embed",
644
+ "object",
645
+ "iframe"
646
+ ]).length;
647
+ return contentElementCount === 0 && !this._getInnerText(paragraph, false);
648
+ }
649
+ );
650
+ this._forEachNode(
651
+ this._getAllNodesWithTag(articleContent, ["br"]),
652
+ function(br) {
653
+ var next = this._nextNode(br.nextSibling);
654
+ if (next && next.tagName == "P") {
655
+ br.remove();
656
+ }
657
+ }
658
+ );
659
+ this._forEachNode(
660
+ this._getAllNodesWithTag(articleContent, ["table"]),
661
+ function(table) {
662
+ var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table;
663
+ if (this._hasSingleTagInsideElement(tbody, "TR")) {
664
+ var row = tbody.firstElementChild;
665
+ if (this._hasSingleTagInsideElement(row, "TD")) {
666
+ var cell = row.firstElementChild;
667
+ cell = this._setNodeTag(
668
+ cell,
669
+ this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV"
670
+ );
671
+ table.parentNode.replaceChild(cell, table);
672
+ }
673
+ }
674
+ }
675
+ );
676
+ },
677
+ /**
678
+ * Initialize a node with the readability object. Also checks the
679
+ * className/id for special names to add to its score.
680
+ *
681
+ * @param Element
682
+ * @return void
683
+ **/
684
+ _initializeNode(node) {
685
+ node.readability = { contentScore: 0 };
686
+ switch (node.tagName) {
687
+ case "DIV":
688
+ node.readability.contentScore += 5;
689
+ break;
690
+ case "PRE":
691
+ case "TD":
692
+ case "BLOCKQUOTE":
693
+ node.readability.contentScore += 3;
694
+ break;
695
+ case "ADDRESS":
696
+ case "OL":
697
+ case "UL":
698
+ case "DL":
699
+ case "DD":
700
+ case "DT":
701
+ case "LI":
702
+ case "FORM":
703
+ node.readability.contentScore -= 3;
704
+ break;
705
+ case "H1":
706
+ case "H2":
707
+ case "H3":
708
+ case "H4":
709
+ case "H5":
710
+ case "H6":
711
+ case "TH":
712
+ node.readability.contentScore -= 5;
713
+ break;
714
+ }
715
+ node.readability.contentScore += this._getClassWeight(node);
716
+ },
717
+ _removeAndGetNext(node) {
718
+ var nextNode = this._getNextNode(node, true);
719
+ node.remove();
720
+ return nextNode;
721
+ },
722
+ /**
723
+ * Traverse the DOM from node to node, starting at the node passed in.
724
+ * Pass true for the second parameter to indicate this node itself
725
+ * (and its kids) are going away, and we want the next node over.
726
+ *
727
+ * Calling this in a loop will traverse the DOM depth-first.
728
+ *
729
+ * @param {Element} node
730
+ * @param {boolean} ignoreSelfAndKids
731
+ * @return {Element}
732
+ */
733
+ _getNextNode(node, ignoreSelfAndKids) {
734
+ if (!ignoreSelfAndKids && node.firstElementChild) {
735
+ return node.firstElementChild;
736
+ }
737
+ if (node.nextElementSibling) {
738
+ return node.nextElementSibling;
739
+ }
740
+ do {
741
+ node = node.parentNode;
742
+ } while (node && !node.nextElementSibling);
743
+ return node && node.nextElementSibling;
744
+ },
745
+ // compares second text to first one
746
+ // 1 = same text, 0 = completely different text
747
+ // works the way that it splits both texts into words and then finds words that are unique in second text
748
+ // the result is given by the lower length of unique parts
749
+ _textSimilarity(textA, textB) {
750
+ var tokensA = textA.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
751
+ var tokensB = textB.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
752
+ if (!tokensA.length || !tokensB.length) {
753
+ return 0;
754
+ }
755
+ var uniqTokensB = tokensB.filter((token) => !tokensA.includes(token));
756
+ var distanceB = uniqTokensB.join(" ").length / tokensB.join(" ").length;
757
+ return 1 - distanceB;
758
+ },
759
+ /**
760
+ * Checks whether an element node contains a valid byline
761
+ *
762
+ * @param node {Element}
763
+ * @param matchString {string}
764
+ * @return boolean
765
+ */
766
+ _isValidByline(node, matchString) {
767
+ var rel = node.getAttribute("rel");
768
+ var itemprop = node.getAttribute("itemprop");
769
+ var bylineLength = node.textContent.trim().length;
770
+ return (rel === "author" || itemprop && itemprop.includes("author") || this.REGEXPS.byline.test(matchString)) && !!bylineLength && bylineLength < 100;
771
+ },
772
+ _getNodeAncestors(node, maxDepth) {
773
+ maxDepth = maxDepth || 0;
774
+ var i = 0, ancestors = [];
775
+ while (node.parentNode) {
776
+ ancestors.push(node.parentNode);
777
+ if (maxDepth && ++i === maxDepth) {
778
+ break;
779
+ }
780
+ node = node.parentNode;
781
+ }
782
+ return ancestors;
783
+ },
784
+ /***
785
+ * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
786
+ * most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
787
+ *
788
+ * @param page a document to run upon. Needs to be a full document, complete with body.
789
+ * @return Element
790
+ **/
791
+ /* eslint-disable-next-line complexity */
792
+ _grabArticle(page) {
793
+ this.log("**** grabArticle ****");
794
+ var doc = this._doc;
795
+ var isPaging = page !== null;
796
+ page = page ? page : this._doc.body;
797
+ if (!page) {
798
+ this.log("No body found in document. Abort.");
799
+ return null;
800
+ }
801
+ var pageCacheHtml = page.innerHTML;
802
+ while (true) {
803
+ this.log("Starting grabArticle loop");
804
+ var stripUnlikelyCandidates = this._flagIsActive(
805
+ this.FLAG_STRIP_UNLIKELYS
806
+ );
807
+ var elementsToScore = [];
808
+ var node = this._doc.documentElement;
809
+ let shouldRemoveTitleHeader = true;
810
+ while (node) {
811
+ if (node.tagName === "HTML") {
812
+ this._articleLang = node.getAttribute("lang");
813
+ }
814
+ var matchString = node.className + " " + node.id;
815
+ if (!this._isProbablyVisible(node)) {
816
+ this.log("Removing hidden node - " + matchString);
817
+ node = this._removeAndGetNext(node);
818
+ continue;
819
+ }
820
+ if (node.getAttribute("aria-modal") == "true" && node.getAttribute("role") == "dialog") {
821
+ node = this._removeAndGetNext(node);
822
+ continue;
823
+ }
824
+ if (!this._articleByline && !this._metadata.byline && this._isValidByline(node, matchString)) {
825
+ var endOfSearchMarkerNode = this._getNextNode(node, true);
826
+ var next = this._getNextNode(node);
827
+ var itemPropNameNode = null;
828
+ while (next && next != endOfSearchMarkerNode) {
829
+ var itemprop = next.getAttribute("itemprop");
830
+ if (itemprop && itemprop.includes("name")) {
831
+ itemPropNameNode = next;
832
+ break;
833
+ } else {
834
+ next = this._getNextNode(next);
835
+ }
836
+ }
837
+ this._articleByline = (itemPropNameNode ?? node).textContent.trim();
838
+ node = this._removeAndGetNext(node);
839
+ continue;
840
+ }
841
+ if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) {
842
+ this.log(
843
+ "Removing header: ",
844
+ node.textContent.trim(),
845
+ this._articleTitle.trim()
846
+ );
847
+ shouldRemoveTitleHeader = false;
848
+ node = this._removeAndGetNext(node);
849
+ continue;
850
+ }
851
+ if (stripUnlikelyCandidates) {
852
+ if (this.REGEXPS.unlikelyCandidates.test(matchString) && !this.REGEXPS.okMaybeItsACandidate.test(matchString) && !this._hasAncestorTag(node, "table") && !this._hasAncestorTag(node, "code") && node.tagName !== "BODY" && node.tagName !== "A") {
853
+ this.log("Removing unlikely candidate - " + matchString);
854
+ node = this._removeAndGetNext(node);
855
+ continue;
856
+ }
857
+ if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) {
858
+ this.log(
859
+ "Removing content with role " + node.getAttribute("role") + " - " + matchString
860
+ );
861
+ node = this._removeAndGetNext(node);
862
+ continue;
863
+ }
864
+ }
865
+ if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" || node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" || node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") && this._isElementWithoutContent(node)) {
866
+ node = this._removeAndGetNext(node);
867
+ continue;
868
+ }
869
+ if (this.DEFAULT_TAGS_TO_SCORE.includes(node.tagName)) {
870
+ elementsToScore.push(node);
871
+ }
872
+ if (node.tagName === "DIV") {
873
+ var p = null;
874
+ var childNode = node.firstChild;
875
+ while (childNode) {
876
+ var nextSibling = childNode.nextSibling;
877
+ if (this._isPhrasingContent(childNode)) {
878
+ if (p !== null) {
879
+ p.appendChild(childNode);
880
+ } else if (!this._isWhitespace(childNode)) {
881
+ p = doc.createElement("p");
882
+ node.replaceChild(p, childNode);
883
+ p.appendChild(childNode);
884
+ }
885
+ } else if (p !== null) {
886
+ while (p.lastChild && this._isWhitespace(p.lastChild)) {
887
+ p.lastChild.remove();
888
+ }
889
+ p = null;
890
+ }
891
+ childNode = nextSibling;
892
+ }
893
+ if (this._hasSingleTagInsideElement(node, "P") && this._getLinkDensity(node) < 0.25) {
894
+ var newNode = node.children[0];
895
+ node.parentNode.replaceChild(newNode, node);
896
+ node = newNode;
897
+ elementsToScore.push(node);
898
+ } else if (!this._hasChildBlockElement(node)) {
899
+ node = this._setNodeTag(node, "P");
900
+ elementsToScore.push(node);
901
+ }
902
+ }
903
+ node = this._getNextNode(node);
904
+ }
905
+ var candidates = [];
906
+ this._forEachNode(elementsToScore, function(elementToScore) {
907
+ if (!elementToScore.parentNode || typeof elementToScore.parentNode.tagName === "undefined") {
908
+ return;
909
+ }
910
+ var innerText = this._getInnerText(elementToScore);
911
+ if (innerText.length < 25) {
912
+ return;
913
+ }
914
+ var ancestors2 = this._getNodeAncestors(elementToScore, 5);
915
+ if (ancestors2.length === 0) {
916
+ return;
917
+ }
918
+ var contentScore = 0;
919
+ contentScore += 1;
920
+ contentScore += innerText.split(this.REGEXPS.commas).length;
921
+ contentScore += Math.min(Math.floor(innerText.length / 100), 3);
922
+ this._forEachNode(ancestors2, function(ancestor, level) {
923
+ if (!ancestor.tagName || !ancestor.parentNode || typeof ancestor.parentNode.tagName === "undefined") {
924
+ return;
925
+ }
926
+ if (typeof ancestor.readability === "undefined") {
927
+ this._initializeNode(ancestor);
928
+ candidates.push(ancestor);
929
+ }
930
+ if (level === 0) {
931
+ var scoreDivider = 1;
932
+ } else if (level === 1) {
933
+ scoreDivider = 2;
934
+ } else {
935
+ scoreDivider = level * 3;
936
+ }
937
+ ancestor.readability.contentScore += contentScore / scoreDivider;
938
+ });
939
+ });
940
+ var topCandidates = [];
941
+ for (var c = 0, cl = candidates.length; c < cl; c += 1) {
942
+ var candidate = candidates[c];
943
+ var candidateScore = candidate.readability.contentScore * (1 - this._getLinkDensity(candidate));
944
+ candidate.readability.contentScore = candidateScore;
945
+ this.log("Candidate:", candidate, "with score " + candidateScore);
946
+ for (var t = 0; t < this._nbTopCandidates; t++) {
947
+ var aTopCandidate = topCandidates[t];
948
+ if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
949
+ topCandidates.splice(t, 0, candidate);
950
+ if (topCandidates.length > this._nbTopCandidates) {
951
+ topCandidates.pop();
952
+ }
953
+ break;
954
+ }
955
+ }
956
+ }
957
+ var topCandidate = topCandidates[0] || null;
958
+ var neededToCreateTopCandidate = false;
959
+ var parentOfTopCandidate;
960
+ if (topCandidate === null || topCandidate.tagName === "BODY") {
961
+ topCandidate = doc.createElement("DIV");
962
+ neededToCreateTopCandidate = true;
963
+ while (page.firstChild) {
964
+ this.log("Moving child out:", page.firstChild);
965
+ topCandidate.appendChild(page.firstChild);
966
+ }
967
+ page.appendChild(topCandidate);
968
+ this._initializeNode(topCandidate);
969
+ } else if (topCandidate) {
970
+ var alternativeCandidateAncestors = [];
971
+ for (var i = 1; i < topCandidates.length; i++) {
972
+ if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) {
973
+ alternativeCandidateAncestors.push(
974
+ this._getNodeAncestors(topCandidates[i])
975
+ );
976
+ }
977
+ }
978
+ var MINIMUM_TOPCANDIDATES = 3;
979
+ if (alternativeCandidateAncestors.length >= MINIMUM_TOPCANDIDATES) {
980
+ parentOfTopCandidate = topCandidate.parentNode;
981
+ while (parentOfTopCandidate.tagName !== "BODY") {
982
+ var listsContainingThisAncestor = 0;
983
+ for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) {
984
+ listsContainingThisAncestor += Number(
985
+ alternativeCandidateAncestors[ancestorIndex].includes(
986
+ parentOfTopCandidate
987
+ )
988
+ );
989
+ }
990
+ if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) {
991
+ topCandidate = parentOfTopCandidate;
992
+ break;
993
+ }
994
+ parentOfTopCandidate = parentOfTopCandidate.parentNode;
995
+ }
996
+ }
997
+ if (!topCandidate.readability) {
998
+ this._initializeNode(topCandidate);
999
+ }
1000
+ parentOfTopCandidate = topCandidate.parentNode;
1001
+ var lastScore = topCandidate.readability.contentScore;
1002
+ var scoreThreshold = lastScore / 3;
1003
+ while (parentOfTopCandidate.tagName !== "BODY") {
1004
+ if (!parentOfTopCandidate.readability) {
1005
+ parentOfTopCandidate = parentOfTopCandidate.parentNode;
1006
+ continue;
1007
+ }
1008
+ var parentScore = parentOfTopCandidate.readability.contentScore;
1009
+ if (parentScore < scoreThreshold) {
1010
+ break;
1011
+ }
1012
+ if (parentScore > lastScore) {
1013
+ topCandidate = parentOfTopCandidate;
1014
+ break;
1015
+ }
1016
+ lastScore = parentOfTopCandidate.readability.contentScore;
1017
+ parentOfTopCandidate = parentOfTopCandidate.parentNode;
1018
+ }
1019
+ parentOfTopCandidate = topCandidate.parentNode;
1020
+ while (parentOfTopCandidate.tagName != "BODY" && parentOfTopCandidate.children.length == 1) {
1021
+ topCandidate = parentOfTopCandidate;
1022
+ parentOfTopCandidate = topCandidate.parentNode;
1023
+ }
1024
+ if (!topCandidate.readability) {
1025
+ this._initializeNode(topCandidate);
1026
+ }
1027
+ }
1028
+ var articleContent = doc.createElement("DIV");
1029
+ if (isPaging) {
1030
+ articleContent.id = "readability-content";
1031
+ }
1032
+ var siblingScoreThreshold = Math.max(
1033
+ 10,
1034
+ topCandidate.readability.contentScore * 0.2
1035
+ );
1036
+ parentOfTopCandidate = topCandidate.parentNode;
1037
+ var siblings = parentOfTopCandidate.children;
1038
+ for (var s = 0, sl = siblings.length; s < sl; s++) {
1039
+ var sibling = siblings[s];
1040
+ var append = false;
1041
+ this.log(
1042
+ "Looking at sibling node:",
1043
+ sibling,
1044
+ sibling.readability ? "with score " + sibling.readability.contentScore : ""
1045
+ );
1046
+ this.log(
1047
+ "Sibling has score",
1048
+ sibling.readability ? sibling.readability.contentScore : "Unknown"
1049
+ );
1050
+ if (sibling === topCandidate) {
1051
+ append = true;
1052
+ } else {
1053
+ var contentBonus = 0;
1054
+ if (sibling.className === topCandidate.className && topCandidate.className !== "") {
1055
+ contentBonus += topCandidate.readability.contentScore * 0.2;
1056
+ }
1057
+ if (sibling.readability && sibling.readability.contentScore + contentBonus >= siblingScoreThreshold) {
1058
+ append = true;
1059
+ } else if (sibling.nodeName === "P") {
1060
+ var linkDensity = this._getLinkDensity(sibling);
1061
+ var nodeContent = this._getInnerText(sibling);
1062
+ var nodeLength = nodeContent.length;
1063
+ if (nodeLength > 80 && linkDensity < 0.25) {
1064
+ append = true;
1065
+ } else if (nodeLength < 80 && nodeLength > 0 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) {
1066
+ append = true;
1067
+ }
1068
+ }
1069
+ }
1070
+ if (append) {
1071
+ this.log("Appending node:", sibling);
1072
+ if (!this.ALTER_TO_DIV_EXCEPTIONS.includes(sibling.nodeName)) {
1073
+ this.log("Altering sibling:", sibling, "to div.");
1074
+ sibling = this._setNodeTag(sibling, "DIV");
1075
+ }
1076
+ articleContent.appendChild(sibling);
1077
+ siblings = parentOfTopCandidate.children;
1078
+ s -= 1;
1079
+ sl -= 1;
1080
+ }
1081
+ }
1082
+ if (this._debug) {
1083
+ this.log("Article content pre-prep: " + articleContent.innerHTML);
1084
+ }
1085
+ this._prepArticle(articleContent);
1086
+ if (this._debug) {
1087
+ this.log("Article content post-prep: " + articleContent.innerHTML);
1088
+ }
1089
+ if (neededToCreateTopCandidate) {
1090
+ topCandidate.id = "readability-page-1";
1091
+ topCandidate.className = "page";
1092
+ } else {
1093
+ var div = doc.createElement("DIV");
1094
+ div.id = "readability-page-1";
1095
+ div.className = "page";
1096
+ while (articleContent.firstChild) {
1097
+ div.appendChild(articleContent.firstChild);
1098
+ }
1099
+ articleContent.appendChild(div);
1100
+ }
1101
+ if (this._debug) {
1102
+ this.log("Article content after paging: " + articleContent.innerHTML);
1103
+ }
1104
+ var parseSuccessful = true;
1105
+ var textLength = this._getInnerText(articleContent, true).length;
1106
+ if (textLength < this._charThreshold) {
1107
+ parseSuccessful = false;
1108
+ page.innerHTML = pageCacheHtml;
1109
+ this._attempts.push({
1110
+ articleContent,
1111
+ textLength
1112
+ });
1113
+ if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
1114
+ this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
1115
+ } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
1116
+ this._removeFlag(this.FLAG_WEIGHT_CLASSES);
1117
+ } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
1118
+ this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
1119
+ } else {
1120
+ this._attempts.sort(function(a, b) {
1121
+ return b.textLength - a.textLength;
1122
+ });
1123
+ if (!this._attempts[0].textLength) {
1124
+ return null;
1125
+ }
1126
+ articleContent = this._attempts[0].articleContent;
1127
+ parseSuccessful = true;
1128
+ }
1129
+ }
1130
+ if (parseSuccessful) {
1131
+ var ancestors = [parentOfTopCandidate, topCandidate].concat(
1132
+ this._getNodeAncestors(parentOfTopCandidate)
1133
+ );
1134
+ this._someNode(ancestors, function(ancestor) {
1135
+ if (!ancestor.tagName) {
1136
+ return false;
1137
+ }
1138
+ var articleDir = ancestor.getAttribute("dir");
1139
+ if (articleDir) {
1140
+ this._articleDir = articleDir;
1141
+ return true;
1142
+ }
1143
+ return false;
1144
+ });
1145
+ return articleContent;
1146
+ }
1147
+ }
1148
+ },
1149
+ /**
1150
+ * Converts some of the common HTML entities in string to their corresponding characters.
1151
+ *
1152
+ * @param str {string} - a string to unescape.
1153
+ * @return string without HTML entity.
1154
+ */
1155
+ _unescapeHtmlEntities(str) {
1156
+ if (!str) {
1157
+ return str;
1158
+ }
1159
+ var htmlEscapeMap = this.HTML_ESCAPE_MAP;
1160
+ return str.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag) {
1161
+ return htmlEscapeMap[tag];
1162
+ }).replace(/&#(?:x([0-9a-f]+)|([0-9]+));/gi, function(_, hex, numStr) {
1163
+ var num = parseInt(hex || numStr, hex ? 16 : 10);
1164
+ if (num == 0 || num > 1114111 || num >= 55296 && num <= 57343) {
1165
+ num = 65533;
1166
+ }
1167
+ return String.fromCodePoint(num);
1168
+ });
1169
+ },
1170
+ /**
1171
+ * Try to extract metadata from JSON-LD object.
1172
+ * For now, only Schema.org objects of type Article or its subtypes are supported.
1173
+ * @return Object with any metadata that could be extracted (possibly none)
1174
+ */
1175
+ _getJSONLD(doc) {
1176
+ var scripts = this._getAllNodesWithTag(doc, ["script"]);
1177
+ var metadata;
1178
+ this._forEachNode(scripts, function(jsonLdElement) {
1179
+ if (!metadata && jsonLdElement.getAttribute("type") === "application/ld+json") {
1180
+ try {
1181
+ var content = jsonLdElement.textContent.replace(
1182
+ /^\s*<!\[CDATA\[|\]\]>\s*$/g,
1183
+ ""
1184
+ );
1185
+ var parsed = JSON.parse(content);
1186
+ if (Array.isArray(parsed)) {
1187
+ parsed = parsed.find((it) => {
1188
+ return it["@type"] && it["@type"].match(this.REGEXPS.jsonLdArticleTypes);
1189
+ });
1190
+ if (!parsed) {
1191
+ return;
1192
+ }
1193
+ }
1194
+ var schemaDotOrgRegex = /^https?\:\/\/schema\.org\/?$/;
1195
+ var matches = typeof parsed["@context"] === "string" && parsed["@context"].match(schemaDotOrgRegex) || typeof parsed["@context"] === "object" && typeof parsed["@context"]["@vocab"] == "string" && parsed["@context"]["@vocab"].match(schemaDotOrgRegex);
1196
+ if (!matches) {
1197
+ return;
1198
+ }
1199
+ if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
1200
+ parsed = parsed["@graph"].find((it) => {
1201
+ return (it["@type"] || "").match(this.REGEXPS.jsonLdArticleTypes);
1202
+ });
1203
+ }
1204
+ if (!parsed || !parsed["@type"] || !parsed["@type"].match(this.REGEXPS.jsonLdArticleTypes)) {
1205
+ return;
1206
+ }
1207
+ metadata = {};
1208
+ if (typeof parsed.name === "string" && typeof parsed.headline === "string" && parsed.name !== parsed.headline) {
1209
+ var title = this._getArticleTitle();
1210
+ var nameMatches = this._textSimilarity(parsed.name, title) > 0.75;
1211
+ var headlineMatches = this._textSimilarity(parsed.headline, title) > 0.75;
1212
+ if (headlineMatches && !nameMatches) {
1213
+ metadata.title = parsed.headline;
1214
+ } else {
1215
+ metadata.title = parsed.name;
1216
+ }
1217
+ } else if (typeof parsed.name === "string") {
1218
+ metadata.title = parsed.name.trim();
1219
+ } else if (typeof parsed.headline === "string") {
1220
+ metadata.title = parsed.headline.trim();
1221
+ }
1222
+ if (parsed.author) {
1223
+ if (typeof parsed.author.name === "string") {
1224
+ metadata.byline = parsed.author.name.trim();
1225
+ } else if (Array.isArray(parsed.author) && parsed.author[0] && typeof parsed.author[0].name === "string") {
1226
+ metadata.byline = parsed.author.filter(function(author) {
1227
+ return author && typeof author.name === "string";
1228
+ }).map(function(author) {
1229
+ return author.name.trim();
1230
+ }).join(", ");
1231
+ }
1232
+ }
1233
+ if (typeof parsed.description === "string") {
1234
+ metadata.excerpt = parsed.description.trim();
1235
+ }
1236
+ if (parsed.publisher && typeof parsed.publisher.name === "string") {
1237
+ metadata.siteName = parsed.publisher.name.trim();
1238
+ }
1239
+ if (typeof parsed.datePublished === "string") {
1240
+ metadata.datePublished = parsed.datePublished.trim();
1241
+ }
1242
+ } catch (err) {
1243
+ this.log(err.message);
1244
+ }
1245
+ }
1246
+ });
1247
+ return metadata ? metadata : {};
1248
+ },
1249
+ /**
1250
+ * Attempts to get excerpt and byline metadata for the article.
1251
+ *
1252
+ * @param {Object} jsonld — object containing any metadata that
1253
+ * could be extracted from JSON-LD object.
1254
+ *
1255
+ * @return Object with optional "excerpt" and "byline" properties
1256
+ */
1257
+ _getArticleMetadata(jsonld) {
1258
+ var metadata = {};
1259
+ var values = {};
1260
+ var metaElements = this._doc.getElementsByTagName("meta");
1261
+ var propertyPattern = /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;
1262
+ var namePattern = /^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
1263
+ this._forEachNode(metaElements, function(element) {
1264
+ var elementName = element.getAttribute("name");
1265
+ var elementProperty = element.getAttribute("property");
1266
+ var content = element.getAttribute("content");
1267
+ if (!content) {
1268
+ return;
1269
+ }
1270
+ var matches = null;
1271
+ var name = null;
1272
+ if (elementProperty) {
1273
+ matches = elementProperty.match(propertyPattern);
1274
+ if (matches) {
1275
+ name = matches[0].toLowerCase().replace(/\s/g, "");
1276
+ values[name] = content.trim();
1277
+ }
1278
+ }
1279
+ if (!matches && elementName && namePattern.test(elementName)) {
1280
+ name = elementName;
1281
+ if (content) {
1282
+ name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":");
1283
+ values[name] = content.trim();
1284
+ }
1285
+ }
1286
+ });
1287
+ metadata.title = jsonld.title || values["dc:title"] || values["dcterm:title"] || values["og:title"] || values["weibo:article:title"] || values["weibo:webpage:title"] || values.title || values["twitter:title"] || values["parsely-title"];
1288
+ if (!metadata.title) {
1289
+ metadata.title = this._getArticleTitle();
1290
+ }
1291
+ const articleAuthor = typeof values["article:author"] === "string" && !this._isUrl(values["article:author"]) ? values["article:author"] : void 0;
1292
+ metadata.byline = jsonld.byline || values["dc:creator"] || values["dcterm:creator"] || values.author || values["parsely-author"] || articleAuthor;
1293
+ metadata.excerpt = jsonld.excerpt || values["dc:description"] || values["dcterm:description"] || values["og:description"] || values["weibo:article:description"] || values["weibo:webpage:description"] || values.description || values["twitter:description"];
1294
+ metadata.siteName = jsonld.siteName || values["og:site_name"];
1295
+ metadata.publishedTime = jsonld.datePublished || values["article:published_time"] || values["parsely-pub-date"] || null;
1296
+ metadata.title = this._unescapeHtmlEntities(metadata.title);
1297
+ metadata.byline = this._unescapeHtmlEntities(metadata.byline);
1298
+ metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
1299
+ metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
1300
+ metadata.publishedTime = this._unescapeHtmlEntities(metadata.publishedTime);
1301
+ return metadata;
1302
+ },
1303
+ /**
1304
+ * Check if node is image, or if node contains exactly only one image
1305
+ * whether as a direct child or as its descendants.
1306
+ *
1307
+ * @param Element
1308
+ **/
1309
+ _isSingleImage(node) {
1310
+ while (node) {
1311
+ if (node.tagName === "IMG") {
1312
+ return true;
1313
+ }
1314
+ if (node.children.length !== 1 || node.textContent.trim() !== "") {
1315
+ return false;
1316
+ }
1317
+ node = node.children[0];
1318
+ }
1319
+ return false;
1320
+ },
1321
+ /**
1322
+ * Find all <noscript> that are located after <img> nodes, and which contain only one
1323
+ * <img> element. Replace the first image with the image from inside the <noscript> tag,
1324
+ * and remove the <noscript> tag. This improves the quality of the images we use on
1325
+ * some sites (e.g. Medium).
1326
+ *
1327
+ * @param Element
1328
+ **/
1329
+ _unwrapNoscriptImages(doc) {
1330
+ var imgs = Array.from(doc.getElementsByTagName("img"));
1331
+ this._forEachNode(imgs, function(img) {
1332
+ for (var i = 0; i < img.attributes.length; i++) {
1333
+ var attr = img.attributes[i];
1334
+ switch (attr.name) {
1335
+ case "src":
1336
+ case "srcset":
1337
+ case "data-src":
1338
+ case "data-srcset":
1339
+ return;
1340
+ }
1341
+ if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
1342
+ return;
1343
+ }
1344
+ }
1345
+ img.remove();
1346
+ });
1347
+ var noscripts = Array.from(doc.getElementsByTagName("noscript"));
1348
+ this._forEachNode(noscripts, function(noscript) {
1349
+ if (!this._isSingleImage(noscript)) {
1350
+ return;
1351
+ }
1352
+ var tmp = doc.createElement("div");
1353
+ tmp.innerHTML = noscript.innerHTML;
1354
+ var prevElement = noscript.previousElementSibling;
1355
+ if (prevElement && this._isSingleImage(prevElement)) {
1356
+ var prevImg = prevElement;
1357
+ if (prevImg.tagName !== "IMG") {
1358
+ prevImg = prevElement.getElementsByTagName("img")[0];
1359
+ }
1360
+ var newImg = tmp.getElementsByTagName("img")[0];
1361
+ for (var i = 0; i < prevImg.attributes.length; i++) {
1362
+ var attr = prevImg.attributes[i];
1363
+ if (attr.value === "") {
1364
+ continue;
1365
+ }
1366
+ if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
1367
+ if (newImg.getAttribute(attr.name) === attr.value) {
1368
+ continue;
1369
+ }
1370
+ var attrName = attr.name;
1371
+ if (newImg.hasAttribute(attrName)) {
1372
+ attrName = "data-old-" + attrName;
1373
+ }
1374
+ newImg.setAttribute(attrName, attr.value);
1375
+ }
1376
+ }
1377
+ noscript.parentNode.replaceChild(tmp.firstElementChild, prevElement);
1378
+ }
1379
+ });
1380
+ },
1381
+ /**
1382
+ * Removes script tags from the document.
1383
+ *
1384
+ * @param Element
1385
+ **/
1386
+ _removeScripts(doc) {
1387
+ this._removeNodes(this._getAllNodesWithTag(doc, ["script", "noscript"]));
1388
+ },
1389
+ /**
1390
+ * Check if this node has only whitespace and a single element with given tag
1391
+ * Returns false if the DIV node contains non-empty text nodes
1392
+ * or if it contains no element with given tag or more than 1 element.
1393
+ *
1394
+ * @param Element
1395
+ * @param string tag of child element
1396
+ **/
1397
+ _hasSingleTagInsideElement(element, tag) {
1398
+ if (element.children.length != 1 || element.children[0].tagName !== tag) {
1399
+ return false;
1400
+ }
1401
+ return !this._someNode(element.childNodes, function(node) {
1402
+ return node.nodeType === this.TEXT_NODE && this.REGEXPS.hasContent.test(node.textContent);
1403
+ });
1404
+ },
1405
+ _isElementWithoutContent(node) {
1406
+ return node.nodeType === this.ELEMENT_NODE && !node.textContent.trim().length && (!node.children.length || node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
1407
+ },
1408
+ /**
1409
+ * Determine whether element has any children block level elements.
1410
+ *
1411
+ * @param Element
1412
+ */
1413
+ _hasChildBlockElement(element) {
1414
+ return this._someNode(element.childNodes, function(node) {
1415
+ return this.DIV_TO_P_ELEMS.has(node.tagName) || this._hasChildBlockElement(node);
1416
+ });
1417
+ },
1418
+ /***
1419
+ * Determine if a node qualifies as phrasing content.
1420
+ * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
1421
+ **/
1422
+ _isPhrasingContent(node) {
1423
+ return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.includes(node.tagName) || (node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && this._everyNode(node.childNodes, this._isPhrasingContent);
1424
+ },
1425
+ _isWhitespace(node) {
1426
+ return node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0 || node.nodeType === this.ELEMENT_NODE && node.tagName === "BR";
1427
+ },
1428
+ /**
1429
+ * Get the inner text of a node - cross browser compatibly.
1430
+ * This also strips out any excess whitespace to be found.
1431
+ *
1432
+ * @param Element
1433
+ * @param Boolean normalizeSpaces (default: true)
1434
+ * @return string
1435
+ **/
1436
+ _getInnerText(e, normalizeSpaces) {
1437
+ normalizeSpaces = typeof normalizeSpaces === "undefined" ? true : normalizeSpaces;
1438
+ var textContent = e.textContent.trim();
1439
+ if (normalizeSpaces) {
1440
+ return textContent.replace(this.REGEXPS.normalize, " ");
1441
+ }
1442
+ return textContent;
1443
+ },
1444
+ /**
1445
+ * Get the number of times a string s appears in the node e.
1446
+ *
1447
+ * @param Element
1448
+ * @param string - what to split on. Default is ","
1449
+ * @return number (integer)
1450
+ **/
1451
+ _getCharCount(e, s) {
1452
+ s = s || ",";
1453
+ return this._getInnerText(e).split(s).length - 1;
1454
+ },
1455
+ /**
1456
+ * Remove the style attribute on every e and under.
1457
+ * TODO: Test if getElementsByTagName(*) is faster.
1458
+ *
1459
+ * @param Element
1460
+ * @return void
1461
+ **/
1462
+ _cleanStyles(e) {
1463
+ if (!e || e.tagName.toLowerCase() === "svg") {
1464
+ return;
1465
+ }
1466
+ for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) {
1467
+ e.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[i]);
1468
+ }
1469
+ if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.includes(e.tagName)) {
1470
+ e.removeAttribute("width");
1471
+ e.removeAttribute("height");
1472
+ }
1473
+ var cur = e.firstElementChild;
1474
+ while (cur !== null) {
1475
+ this._cleanStyles(cur);
1476
+ cur = cur.nextElementSibling;
1477
+ }
1478
+ },
1479
+ /**
1480
+ * Get the density of links as a percentage of the content
1481
+ * This is the amount of text that is inside a link divided by the total text in the node.
1482
+ *
1483
+ * @param Element
1484
+ * @return number (float)
1485
+ **/
1486
+ _getLinkDensity(element) {
1487
+ var textLength = this._getInnerText(element).length;
1488
+ if (textLength === 0) {
1489
+ return 0;
1490
+ }
1491
+ var linkLength = 0;
1492
+ this._forEachNode(element.getElementsByTagName("a"), function(linkNode) {
1493
+ var href = linkNode.getAttribute("href");
1494
+ var coefficient = href && this.REGEXPS.hashUrl.test(href) ? 0.3 : 1;
1495
+ linkLength += this._getInnerText(linkNode).length * coefficient;
1496
+ });
1497
+ return linkLength / textLength;
1498
+ },
1499
+ /**
1500
+ * Get an elements class/id weight. Uses regular expressions to tell if this
1501
+ * element looks good or bad.
1502
+ *
1503
+ * @param Element
1504
+ * @return number (Integer)
1505
+ **/
1506
+ _getClassWeight(e) {
1507
+ if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
1508
+ return 0;
1509
+ }
1510
+ var weight = 0;
1511
+ if (typeof e.className === "string" && e.className !== "") {
1512
+ if (this.REGEXPS.negative.test(e.className)) {
1513
+ weight -= 25;
1514
+ }
1515
+ if (this.REGEXPS.positive.test(e.className)) {
1516
+ weight += 25;
1517
+ }
1518
+ }
1519
+ if (typeof e.id === "string" && e.id !== "") {
1520
+ if (this.REGEXPS.negative.test(e.id)) {
1521
+ weight -= 25;
1522
+ }
1523
+ if (this.REGEXPS.positive.test(e.id)) {
1524
+ weight += 25;
1525
+ }
1526
+ }
1527
+ return weight;
1528
+ },
1529
+ /**
1530
+ * Clean a node of all elements of type "tag".
1531
+ * (Unless it's a youtube/vimeo video. People love movies.)
1532
+ *
1533
+ * @param Element
1534
+ * @param string tag to clean
1535
+ * @return void
1536
+ **/
1537
+ _clean(e, tag) {
1538
+ var isEmbed = ["object", "embed", "iframe"].includes(tag);
1539
+ this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(element) {
1540
+ if (isEmbed) {
1541
+ for (var i = 0; i < element.attributes.length; i++) {
1542
+ if (this._allowedVideoRegex.test(element.attributes[i].value)) {
1543
+ return false;
1544
+ }
1545
+ }
1546
+ if (element.tagName === "object" && this._allowedVideoRegex.test(element.innerHTML)) {
1547
+ return false;
1548
+ }
1549
+ }
1550
+ return true;
1551
+ });
1552
+ },
1553
+ /**
1554
+ * Check if a given node has one of its ancestor tag name matching the
1555
+ * provided one.
1556
+ * @param HTMLElement node
1557
+ * @param String tagName
1558
+ * @param Number maxDepth
1559
+ * @param Function filterFn a filter to invoke to determine whether this node 'counts'
1560
+ * @return Boolean
1561
+ */
1562
+ _hasAncestorTag(node, tagName, maxDepth, filterFn) {
1563
+ maxDepth = maxDepth || 3;
1564
+ tagName = tagName.toUpperCase();
1565
+ var depth = 0;
1566
+ while (node.parentNode) {
1567
+ if (maxDepth > 0 && depth > maxDepth) {
1568
+ return false;
1569
+ }
1570
+ if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode))) {
1571
+ return true;
1572
+ }
1573
+ node = node.parentNode;
1574
+ depth++;
1575
+ }
1576
+ return false;
1577
+ },
1578
+ /**
1579
+ * Return an object indicating how many rows and columns this table has.
1580
+ */
1581
+ _getRowAndColumnCount(table) {
1582
+ var rows = 0;
1583
+ var columns = 0;
1584
+ var trs = table.getElementsByTagName("tr");
1585
+ for (var i = 0; i < trs.length; i++) {
1586
+ var rowspan = trs[i].getAttribute("rowspan") || 0;
1587
+ if (rowspan) {
1588
+ rowspan = parseInt(rowspan, 10);
1589
+ }
1590
+ rows += rowspan || 1;
1591
+ var columnsInThisRow = 0;
1592
+ var cells = trs[i].getElementsByTagName("td");
1593
+ for (var j = 0; j < cells.length; j++) {
1594
+ var colspan = cells[j].getAttribute("colspan") || 0;
1595
+ if (colspan) {
1596
+ colspan = parseInt(colspan, 10);
1597
+ }
1598
+ columnsInThisRow += colspan || 1;
1599
+ }
1600
+ columns = Math.max(columns, columnsInThisRow);
1601
+ }
1602
+ return { rows, columns };
1603
+ },
1604
+ /**
1605
+ * Look for 'data' (as opposed to 'layout') tables, for which we use
1606
+ * similar checks as
1607
+ * https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19
1608
+ */
1609
+ _markDataTables(root) {
1610
+ var tables = root.getElementsByTagName("table");
1611
+ for (var i = 0; i < tables.length; i++) {
1612
+ var table = tables[i];
1613
+ var role = table.getAttribute("role");
1614
+ if (role == "presentation") {
1615
+ table._readabilityDataTable = false;
1616
+ continue;
1617
+ }
1618
+ var datatable = table.getAttribute("datatable");
1619
+ if (datatable == "0") {
1620
+ table._readabilityDataTable = false;
1621
+ continue;
1622
+ }
1623
+ var summary = table.getAttribute("summary");
1624
+ if (summary) {
1625
+ table._readabilityDataTable = true;
1626
+ continue;
1627
+ }
1628
+ var caption = table.getElementsByTagName("caption")[0];
1629
+ if (caption && caption.childNodes.length) {
1630
+ table._readabilityDataTable = true;
1631
+ continue;
1632
+ }
1633
+ var dataTableDescendants = ["col", "colgroup", "tfoot", "thead", "th"];
1634
+ var descendantExists = function(tag) {
1635
+ return !!table.getElementsByTagName(tag)[0];
1636
+ };
1637
+ if (dataTableDescendants.some(descendantExists)) {
1638
+ this.log("Data table because found data-y descendant");
1639
+ table._readabilityDataTable = true;
1640
+ continue;
1641
+ }
1642
+ if (table.getElementsByTagName("table")[0]) {
1643
+ table._readabilityDataTable = false;
1644
+ continue;
1645
+ }
1646
+ var sizeInfo = this._getRowAndColumnCount(table);
1647
+ if (sizeInfo.columns == 1 || sizeInfo.rows == 1) {
1648
+ table._readabilityDataTable = false;
1649
+ continue;
1650
+ }
1651
+ if (sizeInfo.rows >= 10 || sizeInfo.columns > 4) {
1652
+ table._readabilityDataTable = true;
1653
+ continue;
1654
+ }
1655
+ table._readabilityDataTable = sizeInfo.rows * sizeInfo.columns > 10;
1656
+ }
1657
+ },
1658
+ /* convert images and figures that have properties like data-src into images that can be loaded without JS */
1659
+ _fixLazyImages(root) {
1660
+ this._forEachNode(
1661
+ this._getAllNodesWithTag(root, ["img", "picture", "figure"]),
1662
+ function(elem) {
1663
+ if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) {
1664
+ var parts = this.REGEXPS.b64DataUrl.exec(elem.src);
1665
+ if (parts[1] === "image/svg+xml") {
1666
+ return;
1667
+ }
1668
+ var srcCouldBeRemoved = false;
1669
+ for (var i = 0; i < elem.attributes.length; i++) {
1670
+ var attr = elem.attributes[i];
1671
+ if (attr.name === "src") {
1672
+ continue;
1673
+ }
1674
+ if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
1675
+ srcCouldBeRemoved = true;
1676
+ break;
1677
+ }
1678
+ }
1679
+ if (srcCouldBeRemoved) {
1680
+ var b64starts = parts[0].length;
1681
+ var b64length = elem.src.length - b64starts;
1682
+ if (b64length < 133) {
1683
+ elem.removeAttribute("src");
1684
+ }
1685
+ }
1686
+ }
1687
+ if ((elem.src || elem.srcset && elem.srcset != "null") && !elem.className.toLowerCase().includes("lazy")) {
1688
+ return;
1689
+ }
1690
+ for (var j = 0; j < elem.attributes.length; j++) {
1691
+ attr = elem.attributes[j];
1692
+ if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") {
1693
+ continue;
1694
+ }
1695
+ var copyTo = null;
1696
+ if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
1697
+ copyTo = "srcset";
1698
+ } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
1699
+ copyTo = "src";
1700
+ }
1701
+ if (copyTo) {
1702
+ if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
1703
+ elem.setAttribute(copyTo, attr.value);
1704
+ } else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
1705
+ var img = this._doc.createElement("img");
1706
+ img.setAttribute(copyTo, attr.value);
1707
+ elem.appendChild(img);
1708
+ }
1709
+ }
1710
+ }
1711
+ }
1712
+ );
1713
+ },
1714
+ _getTextDensity(e, tags) {
1715
+ var textLength = this._getInnerText(e, true).length;
1716
+ if (textLength === 0) {
1717
+ return 0;
1718
+ }
1719
+ var childrenLength = 0;
1720
+ var children = this._getAllNodesWithTag(e, tags);
1721
+ this._forEachNode(
1722
+ children,
1723
+ (child) => childrenLength += this._getInnerText(child, true).length
1724
+ );
1725
+ return childrenLength / textLength;
1726
+ },
1727
+ /**
1728
+ * Clean an element of all tags of type "tag" if they look fishy.
1729
+ * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
1730
+ *
1731
+ * @return void
1732
+ **/
1733
+ _cleanConditionally(e, tag) {
1734
+ if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
1735
+ return;
1736
+ }
1737
+ this._removeNodes(this._getAllNodesWithTag(e, [tag]), function(node) {
1738
+ var isDataTable = function(t) {
1739
+ return t._readabilityDataTable;
1740
+ };
1741
+ var isList = tag === "ul" || tag === "ol";
1742
+ if (!isList) {
1743
+ var listLength = 0;
1744
+ var listNodes = this._getAllNodesWithTag(node, ["ul", "ol"]);
1745
+ this._forEachNode(
1746
+ listNodes,
1747
+ (list) => listLength += this._getInnerText(list).length
1748
+ );
1749
+ isList = listLength / this._getInnerText(node).length > 0.9;
1750
+ }
1751
+ if (tag === "table" && isDataTable(node)) {
1752
+ return false;
1753
+ }
1754
+ if (this._hasAncestorTag(node, "table", -1, isDataTable)) {
1755
+ return false;
1756
+ }
1757
+ if (this._hasAncestorTag(node, "code")) {
1758
+ return false;
1759
+ }
1760
+ if ([...node.getElementsByTagName("table")].some(
1761
+ (tbl) => tbl._readabilityDataTable
1762
+ )) {
1763
+ return false;
1764
+ }
1765
+ var weight = this._getClassWeight(node);
1766
+ this.log("Cleaning Conditionally", node);
1767
+ var contentScore = 0;
1768
+ if (weight + contentScore < 0) {
1769
+ return true;
1770
+ }
1771
+ if (this._getCharCount(node, ",") < 10) {
1772
+ var p = node.getElementsByTagName("p").length;
1773
+ var img = node.getElementsByTagName("img").length;
1774
+ var li = node.getElementsByTagName("li").length - 100;
1775
+ var input = node.getElementsByTagName("input").length;
1776
+ var headingDensity = this._getTextDensity(node, [
1777
+ "h1",
1778
+ "h2",
1779
+ "h3",
1780
+ "h4",
1781
+ "h5",
1782
+ "h6"
1783
+ ]);
1784
+ var embedCount = 0;
1785
+ var embeds = this._getAllNodesWithTag(node, [
1786
+ "object",
1787
+ "embed",
1788
+ "iframe"
1789
+ ]);
1790
+ for (var i = 0; i < embeds.length; i++) {
1791
+ for (var j = 0; j < embeds[i].attributes.length; j++) {
1792
+ if (this._allowedVideoRegex.test(embeds[i].attributes[j].value)) {
1793
+ return false;
1794
+ }
1795
+ }
1796
+ if (embeds[i].tagName === "object" && this._allowedVideoRegex.test(embeds[i].innerHTML)) {
1797
+ return false;
1798
+ }
1799
+ embedCount++;
1800
+ }
1801
+ var innerText = this._getInnerText(node);
1802
+ if (this.REGEXPS.adWords.test(innerText) || this.REGEXPS.loadingWords.test(innerText)) {
1803
+ return true;
1804
+ }
1805
+ var contentLength = innerText.length;
1806
+ var linkDensity = this._getLinkDensity(node);
1807
+ var textishTags = ["SPAN", "LI", "TD"].concat(
1808
+ Array.from(this.DIV_TO_P_ELEMS)
1809
+ );
1810
+ var textDensity = this._getTextDensity(node, textishTags);
1811
+ var isFigureChild = this._hasAncestorTag(node, "figure");
1812
+ const shouldRemoveNode = () => {
1813
+ const errs = [];
1814
+ if (!isFigureChild && img > 1 && p / img < 0.5) {
1815
+ errs.push(`Bad p to img ratio (img=${img}, p=${p})`);
1816
+ }
1817
+ if (!isList && li > p) {
1818
+ errs.push(`Too many li's outside of a list. (li=${li} > p=${p})`);
1819
+ }
1820
+ if (input > Math.floor(p / 3)) {
1821
+ errs.push(`Too many inputs per p. (input=${input}, p=${p})`);
1822
+ }
1823
+ if (!isList && !isFigureChild && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && linkDensity > 0) {
1824
+ errs.push(
1825
+ `Suspiciously short. (headingDensity=${headingDensity}, img=${img}, linkDensity=${linkDensity})`
1826
+ );
1827
+ }
1828
+ if (!isList && weight < 25 && linkDensity > 0.2 + this._linkDensityModifier) {
1829
+ errs.push(
1830
+ `Low weight and a little linky. (linkDensity=${linkDensity})`
1831
+ );
1832
+ }
1833
+ if (weight >= 25 && linkDensity > 0.5 + this._linkDensityModifier) {
1834
+ errs.push(
1835
+ `High weight and mostly links. (linkDensity=${linkDensity})`
1836
+ );
1837
+ }
1838
+ if (embedCount === 1 && contentLength < 75 || embedCount > 1) {
1839
+ errs.push(
1840
+ `Suspicious embed. (embedCount=${embedCount}, contentLength=${contentLength})`
1841
+ );
1842
+ }
1843
+ if (img === 0 && textDensity === 0) {
1844
+ errs.push(
1845
+ `No useful content. (img=${img}, textDensity=${textDensity})`
1846
+ );
1847
+ }
1848
+ if (errs.length) {
1849
+ this.log("Checks failed", errs);
1850
+ return true;
1851
+ }
1852
+ return false;
1853
+ };
1854
+ var haveToRemove = shouldRemoveNode();
1855
+ if (isList && haveToRemove) {
1856
+ for (var x = 0; x < node.children.length; x++) {
1857
+ let child = node.children[x];
1858
+ if (child.children.length > 1) {
1859
+ return haveToRemove;
1860
+ }
1861
+ }
1862
+ let li_count = node.getElementsByTagName("li").length;
1863
+ if (img == li_count) {
1864
+ return false;
1865
+ }
1866
+ }
1867
+ return haveToRemove;
1868
+ }
1869
+ return false;
1870
+ });
1871
+ },
1872
+ /**
1873
+ * Clean out elements that match the specified conditions
1874
+ *
1875
+ * @param Element
1876
+ * @param Function determines whether a node should be removed
1877
+ * @return void
1878
+ **/
1879
+ _cleanMatchedNodes(e, filter) {
1880
+ var endOfSearchMarkerNode = this._getNextNode(e, true);
1881
+ var next = this._getNextNode(e);
1882
+ while (next && next != endOfSearchMarkerNode) {
1883
+ if (filter.call(this, next, next.className + " " + next.id)) {
1884
+ next = this._removeAndGetNext(next);
1885
+ } else {
1886
+ next = this._getNextNode(next);
1887
+ }
1888
+ }
1889
+ },
1890
+ /**
1891
+ * Clean out spurious headers from an Element.
1892
+ *
1893
+ * @param Element
1894
+ * @return void
1895
+ **/
1896
+ _cleanHeaders(e) {
1897
+ let headingNodes = this._getAllNodesWithTag(e, ["h1", "h2"]);
1898
+ this._removeNodes(headingNodes, function(node) {
1899
+ let shouldRemove = this._getClassWeight(node) < 0;
1900
+ if (shouldRemove) {
1901
+ this.log("Removing header with low class weight:", node);
1902
+ }
1903
+ return shouldRemove;
1904
+ });
1905
+ },
1906
+ /**
1907
+ * Check if this node is an H1 or H2 element whose content is mostly
1908
+ * the same as the article title.
1909
+ *
1910
+ * @param Element the node to check.
1911
+ * @return boolean indicating whether this is a title-like header.
1912
+ */
1913
+ _headerDuplicatesTitle(node) {
1914
+ if (node.tagName != "H1" && node.tagName != "H2") {
1915
+ return false;
1916
+ }
1917
+ var heading = this._getInnerText(node, false);
1918
+ this.log("Evaluating similarity of header:", heading, this._articleTitle);
1919
+ return this._textSimilarity(this._articleTitle, heading) > 0.75;
1920
+ },
1921
+ _flagIsActive(flag) {
1922
+ return (this._flags & flag) > 0;
1923
+ },
1924
+ _removeFlag(flag) {
1925
+ this._flags = this._flags & ~flag;
1926
+ },
1927
+ _isProbablyVisible(node) {
1928
+ return (!node.style || node.style.display != "none") && (!node.style || node.style.visibility != "hidden") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
1929
+ (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
1930
+ },
1931
+ /**
1932
+ * Runs readability.
1933
+ *
1934
+ * Workflow:
1935
+ * 1. Prep the document by removing script tags, css, etc.
1936
+ * 2. Build readability's DOM tree.
1937
+ * 3. Grab the article content from the current dom tree.
1938
+ * 4. Replace the current DOM tree with the new one.
1939
+ * 5. Read peacefully.
1940
+ *
1941
+ * @return void
1942
+ **/
1943
+ parse() {
1944
+ if (this._maxElemsToParse > 0) {
1945
+ var numTags = this._doc.getElementsByTagName("*").length;
1946
+ if (numTags > this._maxElemsToParse) {
1947
+ throw new Error(
1948
+ "Aborting parsing document; " + numTags + " elements found"
1949
+ );
1950
+ }
1951
+ }
1952
+ this._unwrapNoscriptImages(this._doc);
1953
+ var jsonLd = this._disableJSONLD ? {} : this._getJSONLD(this._doc);
1954
+ this._removeScripts(this._doc);
1955
+ this._prepDocument();
1956
+ var metadata = this._getArticleMetadata(jsonLd);
1957
+ this._metadata = metadata;
1958
+ this._articleTitle = metadata.title;
1959
+ var articleContent = this._grabArticle();
1960
+ if (!articleContent) {
1961
+ return null;
1962
+ }
1963
+ this.log("Grabbed: " + articleContent.innerHTML);
1964
+ this._postProcessContent(articleContent);
1965
+ if (!metadata.excerpt) {
1966
+ var paragraphs = articleContent.getElementsByTagName("p");
1967
+ if (paragraphs.length) {
1968
+ metadata.excerpt = paragraphs[0].textContent.trim();
1969
+ }
1970
+ }
1971
+ var textContent = articleContent.textContent;
1972
+ return {
1973
+ title: this._articleTitle,
1974
+ byline: metadata.byline || this._articleByline,
1975
+ dir: this._articleDir,
1976
+ lang: this._articleLang,
1977
+ content: this._serializer(articleContent),
1978
+ textContent,
1979
+ length: textContent.length,
1980
+ excerpt: metadata.excerpt,
1981
+ siteName: metadata.siteName || this._articleSiteName,
1982
+ publishedTime: metadata.publishedTime
1983
+ };
1984
+ }
1985
+ };
1986
+ {
1987
+ module.exports = Readability2;
1988
+ }
1989
+ })(Readability);
1990
+ return Readability.exports;
1991
+ }
1992
+ var ReadabilityReaderable = { exports: {} };
1993
+ var hasRequiredReadabilityReaderable;
1994
+ function requireReadabilityReaderable() {
1995
+ if (hasRequiredReadabilityReaderable) return ReadabilityReaderable.exports;
1996
+ hasRequiredReadabilityReaderable = 1;
1997
+ (function(module) {
1998
+ var REGEXPS = {
1999
+ // NOTE: These two regular expressions are duplicated in
2000
+ // Readability.js. Please keep both copies in sync.
2001
+ unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
2002
+ okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i
2003
+ };
2004
+ function isNodeVisible(node) {
2005
+ return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
2006
+ (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
2007
+ }
2008
+ function isProbablyReaderable(doc, options = {}) {
2009
+ if (typeof options == "function") {
2010
+ options = { visibilityChecker: options };
2011
+ }
2012
+ var defaultOptions = {
2013
+ minScore: 20,
2014
+ minContentLength: 140,
2015
+ visibilityChecker: isNodeVisible
2016
+ };
2017
+ options = Object.assign(defaultOptions, options);
2018
+ var nodes = doc.querySelectorAll("p, pre, article");
2019
+ var brNodes = doc.querySelectorAll("div > br");
2020
+ if (brNodes.length) {
2021
+ var set = new Set(nodes);
2022
+ [].forEach.call(brNodes, function(node) {
2023
+ set.add(node.parentNode);
2024
+ });
2025
+ nodes = Array.from(set);
2026
+ }
2027
+ var score = 0;
2028
+ return [].some.call(nodes, function(node) {
2029
+ if (!options.visibilityChecker(node)) {
2030
+ return false;
2031
+ }
2032
+ var matchString = node.className + " " + node.id;
2033
+ if (REGEXPS.unlikelyCandidates.test(matchString) && !REGEXPS.okMaybeItsACandidate.test(matchString)) {
2034
+ return false;
2035
+ }
2036
+ if (node.matches("li p")) {
2037
+ return false;
2038
+ }
2039
+ var textContentLength = node.textContent.trim().length;
2040
+ if (textContentLength < options.minContentLength) {
2041
+ return false;
2042
+ }
2043
+ score += Math.sqrt(textContentLength - options.minContentLength);
2044
+ if (score > options.minScore) {
2045
+ return true;
2046
+ }
2047
+ return false;
2048
+ });
2049
+ }
2050
+ {
2051
+ module.exports = isProbablyReaderable;
2052
+ }
2053
+ })(ReadabilityReaderable);
2054
+ return ReadabilityReaderable.exports;
2055
+ }
2056
+ var readability;
2057
+ var hasRequiredReadability;
2058
+ function requireReadability() {
2059
+ if (hasRequiredReadability) return readability;
2060
+ hasRequiredReadability = 1;
2061
+ var Readability2 = requireReadability$1();
2062
+ var isProbablyReaderable = requireReadabilityReaderable();
2063
+ readability = {
2064
+ Readability: Readability2,
2065
+ isProbablyReaderable
2066
+ };
2067
+ return readability;
2068
+ }
2069
+ var readabilityExports = requireReadability();
2070
+ function escapeSelectorValue(value) {
2071
+ if (typeof CSS !== "undefined" && typeof CSS.escape === "function") {
2072
+ return CSS.escape(value);
2073
+ }
2074
+ return value.replace(/["\\]/g, "\\$&");
2075
+ }
2076
+ function uniqueSelector(document2, candidate) {
2077
+ if (!candidate) return null;
2078
+ try {
2079
+ return document2.querySelectorAll(candidate).length === 1 ? candidate : null;
2080
+ } catch {
2081
+ return null;
2082
+ }
2083
+ }
2084
+ function uniqueAttributeSelector(el, attribute) {
2085
+ const value = el.getAttribute(attribute)?.trim();
2086
+ if (!value) return null;
2087
+ const candidate = `${el.tagName.toLowerCase()}[${attribute}="${escapeSelectorValue(value)}"]`;
2088
+ return uniqueSelector(el.ownerDocument, candidate);
2089
+ }
2090
+ function generateStableSelector(el) {
2091
+ const document2 = el.ownerDocument;
2092
+ if (el.id) {
2093
+ return `#${escapeSelectorValue(el.id)}`;
2094
+ }
2095
+ for (const attribute of ["data-testid", "name", "form", "aria-label"]) {
2096
+ const candidate = uniqueAttributeSelector(el, attribute);
2097
+ if (candidate) return candidate;
2098
+ }
2099
+ const parts = [];
2100
+ let current = el;
2101
+ while (current) {
2102
+ if (current.id) {
2103
+ parts.unshift(`#${escapeSelectorValue(current.id)}`);
2104
+ break;
2105
+ }
2106
+ const tag = current.tagName.toLowerCase();
2107
+ const parent = current.parentElement;
2108
+ if (!parent) {
2109
+ parts.unshift(tag);
2110
+ break;
2111
+ }
2112
+ const siblings = Array.from(parent.children).filter(
2113
+ (child) => child.tagName === current.tagName
2114
+ );
2115
+ const index = siblings.indexOf(current) + 1;
2116
+ parts.unshift(
2117
+ siblings.length > 1 ? `${tag}:nth-of-type(${index})` : tag
2118
+ );
2119
+ current = parent;
2120
+ if (uniqueSelector(document2, parts.join(" > "))) {
2121
+ break;
2122
+ }
2123
+ }
2124
+ return uniqueSelector(document2, parts.join(" > ")) || parts.join(" > ");
2125
+ }
2126
+ function getEnvFlag(name) {
2127
+ const globalProcess = typeof globalThis === "object" && "process" in globalThis ? globalThis.process : void 0;
2128
+ return globalProcess?.env?.[name];
2129
+ }
2130
+ function isDebugEnabled() {
2131
+ const value = getEnvFlag("VESSEL_DEBUG")?.trim().toLowerCase();
2132
+ return value === "1" || value === "true" || value === "yes" || value === "on";
2133
+ }
2134
+ function writeLog(level, scope, args) {
2135
+ if (level === "debug" && !isDebugEnabled()) {
2136
+ return;
2137
+ }
2138
+ const prefix = `[Vessel ${scope}]`;
2139
+ switch (level) {
2140
+ case "debug":
2141
+ console.debug(prefix, ...args);
2142
+ return;
2143
+ case "info":
2144
+ console.info(prefix, ...args);
2145
+ return;
2146
+ case "warn":
2147
+ console.warn(prefix, ...args);
2148
+ return;
2149
+ case "error":
2150
+ console.error(prefix, ...args);
2151
+ return;
2152
+ }
2153
+ }
2154
+ function createLogger(scope) {
2155
+ return {
2156
+ debug: (...args) => writeLog("debug", scope, args),
2157
+ info: (...args) => writeLog("info", scope, args),
2158
+ warn: (...args) => writeLog("warn", scope, args),
2159
+ error: (...args) => writeLog("error", scope, args)
2160
+ };
2161
+ }
2162
+ const logger = createLogger("ContentScript");
2163
+ const MAX_VISIBLE_TEXT = 500;
2164
+ const MAX_HREF_LENGTH = 500;
2165
+ const MAX_ATTR_TEXT = 200;
2166
+ const MAX_DESCRIPTION_LENGTH = 160;
2167
+ const MAX_LABEL_LENGTH = 100;
2168
+ const MAX_SHORT_TEXT = 60;
2169
+ const MAX_RESULT_COUNT = 10;
2170
+ const MAX_DIFF_HEADINGS = 8;
2171
+ const MAX_OPTIONS_PER_SELECT = 8;
2172
+ const MAX_OPTIONS_DISPLAY = 25;
2173
+ function looksLikeCorrectOption(value) {
2174
+ const text = getTrimmedText(value);
2175
+ if (!text) return void 0;
2176
+ if (/\b(correct|right choice|this is correct|correct answer|pick this|select this|choose this|right answer)\b/i.test(
2177
+ text
2178
+ )) {
2179
+ return true;
2180
+ }
2181
+ if (/\b(wrong|incorrect|not this|don't pick|do not pick|bad option|decoy)\b/i.test(
2182
+ text
2183
+ )) {
2184
+ return false;
2185
+ }
2186
+ return void 0;
2187
+ }
2188
+ let elementIndex = 0;
2189
+ const elementSelectors = {};
2190
+ const indexedElements = /* @__PURE__ */ new WeakMap();
2191
+ const indexedElementRefs = {};
2192
+ let activeOverlays = [];
2193
+ let pageDiffMutationTimer = null;
2194
+ let pageDiffActivityThrottleTimer = null;
2195
+ let lastPageDiffSignature = "";
2196
+ const PAGE_DIFF_ACTIVITY_THROTTLE_MS = 350;
2197
+ const PAGE_DIFF_MUTATION_DEBOUNCE_MS = 1200;
2198
+ const CUSTOM_TEXT_FIELD_SELECTOR = '[contenteditable]:not([contenteditable="false"]), [role="textbox"], [role="searchbox"], [role="combobox"]';
2199
+ const ACTION_CONTROL_SELECTOR = [
2200
+ "button",
2201
+ '[role="button"]',
2202
+ '[role="tab"]',
2203
+ '[role="menuitem"]',
2204
+ '[role="option"]',
2205
+ '[role="radio"]',
2206
+ '[role="checkbox"]',
2207
+ '[role="switch"]',
2208
+ 'input[type="submit"]',
2209
+ 'input[type="button"]',
2210
+ 'a[href^="#"][aria-selected]',
2211
+ 'a[href^="#"][data-date]',
2212
+ 'a[href^="#"][data-day]',
2213
+ 'a[href^="#"][role="tab"]',
2214
+ 'a[href^="#"][role="button"]'
2215
+ ].join(", ");
2216
+ function normalizeSignatureText(value) {
2217
+ return (value || "").replace(/\s+/g, " ").trim();
2218
+ }
2219
+ function collectBoundedVisibleText(root, maxLength) {
2220
+ if (!root) return "";
2221
+ const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
2222
+ const parts = [];
2223
+ let length = 0;
2224
+ while (length < maxLength) {
2225
+ const node = walker.nextNode();
2226
+ if (!node) break;
2227
+ const parent = node.parentElement;
2228
+ if (!parent || parent.closest("script, style, noscript, [hidden], [aria-hidden='true']")) {
2229
+ continue;
2230
+ }
2231
+ const text = normalizeSignatureText(node.textContent);
2232
+ if (!text) continue;
2233
+ parts.push(text);
2234
+ length += text.length + 1;
2235
+ }
2236
+ return parts.join(" ").slice(0, maxLength);
2237
+ }
2238
+ function getPageDiffSignature() {
2239
+ const title = normalizeSignatureText(document.title);
2240
+ const headings = Array.from(document.querySelectorAll("h1, h2, h3")).slice(0, MAX_DIFF_HEADINGS).map((el) => normalizeSignatureText(el.textContent)).filter(Boolean).join(" | ");
2241
+ const mainRoot = document.querySelector("main, article, [role='main']") || document.body;
2242
+ const visibleText = collectBoundedVisibleText(mainRoot, 1200);
2243
+ return [window.location.href, title, headings, visibleText].join("\n");
2244
+ }
2245
+ function asElement(node) {
2246
+ if (node instanceof Element) return node;
2247
+ return node?.parentElement || null;
2248
+ }
2249
+ function isVesselOwnedNode(node) {
2250
+ const el = asElement(node);
2251
+ return !!el?.closest?.("[data-vessel-highlight], .__vessel-highlight-label");
2252
+ }
2253
+ function shouldIgnorePageDiffMutation(mutation) {
2254
+ if (mutation.type === "attributes") {
2255
+ return isVesselOwnedNode(mutation.target);
2256
+ }
2257
+ if (mutation.type === "characterData") {
2258
+ return isVesselOwnedNode(mutation.target);
2259
+ }
2260
+ if (mutation.type === "childList") {
2261
+ const added = Array.from(mutation.addedNodes);
2262
+ const removed = Array.from(mutation.removedNodes);
2263
+ return [...added, ...removed].every((node) => isVesselOwnedNode(node));
2264
+ }
2265
+ return false;
2266
+ }
2267
+ function emitPageDiffDirty() {
2268
+ const nextSignature = getPageDiffSignature();
2269
+ if (!nextSignature || nextSignature === lastPageDiffSignature) return;
2270
+ lastPageDiffSignature = nextSignature;
2271
+ electron.ipcRenderer.send("page:diff-dirty");
2272
+ }
2273
+ function notifyPageDiffActivity() {
2274
+ if (pageDiffActivityThrottleTimer) return;
2275
+ electron.ipcRenderer.send("page:diff-activity");
2276
+ pageDiffActivityThrottleTimer = setTimeout(() => {
2277
+ pageDiffActivityThrottleTimer = null;
2278
+ }, PAGE_DIFF_ACTIVITY_THROTTLE_MS);
2279
+ }
2280
+ function startPageDiffObserver() {
2281
+ if (typeof MutationObserver === "undefined") return;
2282
+ if (!document.documentElement) return;
2283
+ if (isDocumentViewerPage()) return;
2284
+ lastPageDiffSignature = getPageDiffSignature();
2285
+ const observer = new MutationObserver((mutations) => {
2286
+ if (mutations.every((mutation) => shouldIgnorePageDiffMutation(mutation))) {
2287
+ return;
2288
+ }
2289
+ notifyPageDiffActivity();
2290
+ if (pageDiffMutationTimer) {
2291
+ clearTimeout(pageDiffMutationTimer);
2292
+ }
2293
+ pageDiffMutationTimer = setTimeout(() => {
2294
+ pageDiffMutationTimer = null;
2295
+ emitPageDiffDirty();
2296
+ }, PAGE_DIFF_MUTATION_DEBOUNCE_MS);
2297
+ });
2298
+ const resetSignature = () => {
2299
+ lastPageDiffSignature = "";
2300
+ };
2301
+ window.addEventListener("popstate", resetSignature);
2302
+ window.addEventListener("hashchange", resetSignature);
2303
+ observer.observe(document.documentElement, {
2304
+ subtree: true,
2305
+ childList: true,
2306
+ characterData: true,
2307
+ attributes: true,
2308
+ attributeFilter: [
2309
+ "class",
2310
+ "style",
2311
+ "hidden",
2312
+ "aria-hidden",
2313
+ "aria-expanded",
2314
+ "aria-selected",
2315
+ "aria-checked",
2316
+ "aria-label",
2317
+ "title",
2318
+ "open"
2319
+ ]
2320
+ });
2321
+ window.addEventListener("beforeunload", () => {
2322
+ observer.disconnect();
2323
+ if (pageDiffActivityThrottleTimer) {
2324
+ clearTimeout(pageDiffActivityThrottleTimer);
2325
+ pageDiffActivityThrottleTimer = null;
2326
+ }
2327
+ if (pageDiffMutationTimer) {
2328
+ clearTimeout(pageDiffMutationTimer);
2329
+ pageDiffMutationTimer = null;
2330
+ }
2331
+ });
2332
+ }
2333
+ function isDocumentViewerPage() {
2334
+ const contentType = document.contentType?.toLowerCase() || "";
2335
+ if (contentType.includes("application/pdf")) return true;
2336
+ try {
2337
+ const url = new URL(window.location.href);
2338
+ const pathname = decodeURIComponent(url.pathname).toLowerCase();
2339
+ if (/\.(pdf|epub|mobi|cbz|cbr)$/.test(pathname)) return true;
2340
+ const host = url.hostname.toLowerCase().replace(/^www\./, "");
2341
+ if (host === "archive.org" && /^\/(details|stream|download)\//.test(pathname)) {
2342
+ return true;
2343
+ }
2344
+ } catch {
2345
+ }
2346
+ return !!document.querySelector(
2347
+ "#BookReader, ia-bookreader, bookreader, embed[type='application/pdf'], object[type='application/pdf']"
2348
+ );
2349
+ }
2350
+ const MAX_SHADOW_HOSTS = 150;
2351
+ const MAX_SHADOW_DEPTH = 5;
2352
+ const MAX_WALK_ELEMENTS = 1e4;
2353
+ function collectShadowRoots(root) {
2354
+ const shadowRoots = [];
2355
+ let walked = 0;
2356
+ const walk = (node, depth) => {
2357
+ if (depth > MAX_SHADOW_DEPTH || shadowRoots.length >= MAX_SHADOW_HOSTS)
2358
+ return;
2359
+ const tw = document.createTreeWalker(node, NodeFilter.SHOW_ELEMENT);
2360
+ let el = tw.nextNode();
2361
+ while (el && walked < MAX_WALK_ELEMENTS && shadowRoots.length < MAX_SHADOW_HOSTS) {
2362
+ walked++;
2363
+ if (el.shadowRoot) {
2364
+ shadowRoots.push(el.shadowRoot);
2365
+ walk(el.shadowRoot, depth + 1);
2366
+ }
2367
+ el = tw.nextNode();
2368
+ }
2369
+ };
2370
+ walk(root, 0);
2371
+ return shadowRoots;
2372
+ }
2373
+ function deepQuerySelectorAll(selector, root = document) {
2374
+ const results = [];
2375
+ root.querySelectorAll(selector).forEach((el) => results.push(el));
2376
+ for (const sr of collectShadowRoots(root)) {
2377
+ sr.querySelectorAll(selector).forEach((el) => results.push(el));
2378
+ }
2379
+ return results;
2380
+ }
2381
+ function isInShadowDom(el) {
2382
+ return el.getRootNode() instanceof ShadowRoot;
2383
+ }
2384
+ function generateShadowPiercingSelector(el) {
2385
+ const segments = [];
2386
+ let current = el;
2387
+ while (current) {
2388
+ const rootNode = current.getRootNode();
2389
+ const innerSel = generateStableSelector(current);
2390
+ if (rootNode instanceof ShadowRoot) {
2391
+ segments.unshift(innerSel);
2392
+ current = rootNode.host;
2393
+ } else {
2394
+ segments.unshift(innerSel);
2395
+ break;
2396
+ }
2397
+ }
2398
+ if (segments.length <= 1) return null;
2399
+ return segments.join(" >>> ");
2400
+ }
2401
+ function resolveShadowSelector(selectorPath) {
2402
+ const segments = selectorPath.split(" >>> ").map((s) => s.trim());
2403
+ let scope = document;
2404
+ for (let i = 0; i < segments.length; i++) {
2405
+ const el = scope.querySelector(segments[i]);
2406
+ if (!el) return null;
2407
+ if (i < segments.length - 1) {
2408
+ if (!el.shadowRoot) return null;
2409
+ scope = el.shadowRoot;
2410
+ } else {
2411
+ return el;
2412
+ }
2413
+ }
2414
+ return null;
2415
+ }
2416
+ function generateSelector(el) {
2417
+ if (isInShadowDom(el)) {
2418
+ const shadowPath = generateShadowPiercingSelector(el);
2419
+ if (shadowPath) return shadowPath;
2420
+ }
2421
+ return generateStableSelector(el);
2422
+ }
2423
+ function assignIndex(el) {
2424
+ const existing = indexedElements.get(el);
2425
+ if (existing != null) return existing;
2426
+ elementIndex += 1;
2427
+ elementSelectors[elementIndex] = generateSelector(el);
2428
+ indexedElementRefs[elementIndex] = el;
2429
+ indexedElements.set(el, elementIndex);
2430
+ return elementIndex;
2431
+ }
2432
+ function getNodeTextByIds(ids) {
2433
+ if (!ids) return void 0;
2434
+ const text = ids.split(/\s+/).map((id) => document.getElementById(id)?.textContent?.trim() || "").filter(Boolean).join(" ").trim();
2435
+ return text || void 0;
2436
+ }
2437
+ function getTrimmedText(value) {
2438
+ const text = value?.trim();
2439
+ return text || void 0;
2440
+ }
2441
+ function pushPropertyValue(target, key, value) {
2442
+ if (!key || value == null) return;
2443
+ const existing = target[key];
2444
+ if (existing === void 0) {
2445
+ target[key] = value;
2446
+ return;
2447
+ }
2448
+ if (Array.isArray(existing)) {
2449
+ existing.push(value);
2450
+ return;
2451
+ }
2452
+ target[key] = [existing, value];
2453
+ }
2454
+ function getStructuredElementValue(el) {
2455
+ if (el instanceof HTMLMetaElement) {
2456
+ return getTrimmedText(el.content);
2457
+ }
2458
+ if (el instanceof HTMLAnchorElement || el instanceof HTMLAreaElement || el instanceof HTMLLinkElement) {
2459
+ return getTrimmedText(el.href);
2460
+ }
2461
+ if (el instanceof HTMLImageElement || el instanceof HTMLAudioElement || el instanceof HTMLVideoElement || el instanceof HTMLSourceElement || el instanceof HTMLTrackElement || el instanceof HTMLIFrameElement || el instanceof HTMLEmbedElement) {
2462
+ return getTrimmedText(el.src);
2463
+ }
2464
+ if (el instanceof HTMLObjectElement) {
2465
+ return getTrimmedText(el.data);
2466
+ }
2467
+ if (el instanceof HTMLDataElement || el instanceof HTMLMeterElement) {
2468
+ return getTrimmedText(el.value);
2469
+ }
2470
+ if (el instanceof HTMLTimeElement) {
2471
+ return getTrimmedText(el.dateTime) || getTrimmedText(el.textContent);
2472
+ }
2473
+ if (el instanceof HTMLInputElement || el instanceof HTMLSelectElement || el instanceof HTMLTextAreaElement) {
2474
+ return getTrimmedText(el.value);
2475
+ }
2476
+ const contentAttr = getTrimmedText(el.getAttribute("content"));
2477
+ if (contentAttr) return contentAttr;
2478
+ const resourceAttr = getTrimmedText(el.getAttribute("resource")) || getTrimmedText(el.getAttribute("href")) || getTrimmedText(el.getAttribute("src")) || getTrimmedText(el.getAttribute("datetime")) || getTrimmedText(el.getAttribute("data"));
2479
+ if (resourceAttr) return resourceAttr;
2480
+ return getTrimmedText(el.textContent);
2481
+ }
2482
+ function isElementVisible(el) {
2483
+ if (!(el instanceof HTMLElement)) return true;
2484
+ const style = window.getComputedStyle(el);
2485
+ if (style.display === "none" || style.visibility === "hidden" || style.opacity === "0") {
2486
+ return false;
2487
+ }
2488
+ if (el.hasAttribute("hidden") || el.getAttribute("aria-hidden") === "true") {
2489
+ return false;
2490
+ }
2491
+ const rect = el.getBoundingClientRect();
2492
+ return rect.width > 0 && rect.height > 0;
2493
+ }
2494
+ function isInViewportRect(rect) {
2495
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2496
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2497
+ return rect.width > 0 && rect.height > 0 && rect.bottom > 0 && rect.right > 0 && rect.top < viewportHeight && rect.left < viewportWidth;
2498
+ }
2499
+ function isFullyInViewportRect(rect) {
2500
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2501
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2502
+ return rect.width > 0 && rect.height > 0 && rect.top >= 0 && rect.left >= 0 && rect.bottom <= viewportHeight && rect.right <= viewportWidth;
2503
+ }
2504
+ function parseZIndex(style) {
2505
+ const value = Number.parseInt(style.zIndex, 10);
2506
+ return Number.isFinite(value) ? value : 0;
2507
+ }
2508
+ function getViewportCenterCoverage(rect) {
2509
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2510
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2511
+ const centerX = viewportWidth / 2;
2512
+ const centerY = viewportHeight / 2;
2513
+ return rect.left <= centerX && rect.right >= centerX && rect.top <= centerY && rect.bottom >= centerY;
2514
+ }
2515
+ function getOverlayLabel(el) {
2516
+ return getTrimmedText(el.getAttribute("aria-label")) || getNodeTextByIds(el.getAttribute("aria-labelledby")) || getTrimmedText(el.id) || void 0;
2517
+ }
2518
+ function getOverlayType(el) {
2519
+ const tag = el.tagName.toLowerCase();
2520
+ const role = el.getAttribute("role");
2521
+ if (tag === "dialog" || role === "dialog" || role === "alertdialog") {
2522
+ return "dialog";
2523
+ }
2524
+ if (el.getAttribute("aria-modal") === "true") {
2525
+ return "modal";
2526
+ }
2527
+ return "overlay";
2528
+ }
2529
+ function touchesViewportEdge(rect) {
2530
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2531
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2532
+ const edgePadding = 24;
2533
+ return rect.left <= edgePadding || rect.top <= edgePadding || rect.right >= viewportWidth - edgePadding || rect.bottom >= viewportHeight - edgePadding;
2534
+ }
2535
+ function hasFixedAncestor(el) {
2536
+ let current = el.parentElement;
2537
+ while (current && current !== document.body) {
2538
+ const position = window.getComputedStyle(current).position;
2539
+ if (position === "fixed" || position === "sticky") return true;
2540
+ current = current.parentElement;
2541
+ }
2542
+ return false;
2543
+ }
2544
+ function getEffectiveZIndex(el, style = window.getComputedStyle(el)) {
2545
+ const own = parseZIndex(style);
2546
+ if (own > 0) return own;
2547
+ let current = el.parentElement;
2548
+ while (current && current !== document.body) {
2549
+ const parentZ = parseZIndex(window.getComputedStyle(current));
2550
+ if (parentZ > 0) return parentZ;
2551
+ current = current.parentElement;
2552
+ }
2553
+ return 0;
2554
+ }
2555
+ function looksLikeDrawer(el, style, rect, areaRatio) {
2556
+ if (rect.width < 220 || rect.height < 160 || areaRatio < 0.08) return false;
2557
+ if (!touchesViewportEdge(rect)) return false;
2558
+ if (style.position === "fixed" || style.position === "sticky") {
2559
+ return getEffectiveZIndex(el, style) >= 5;
2560
+ }
2561
+ if (style.position === "absolute" && hasFixedAncestor(el)) {
2562
+ return getEffectiveZIndex(el, style) >= 5;
2563
+ }
2564
+ return false;
2565
+ }
2566
+ function looksLikeCartConfirmation(el) {
2567
+ const text = (el.textContent || "").slice(0, MAX_VISIBLE_TEXT).toLowerCase();
2568
+ const signals = [
2569
+ "added to cart",
2570
+ "added to bag",
2571
+ "added to basket",
2572
+ "added to your cart",
2573
+ "added to your bag",
2574
+ "added to your basket"
2575
+ ];
2576
+ return signals.some((signal) => text.includes(signal));
2577
+ }
2578
+ function getControlTextData(el) {
2579
+ if (el instanceof HTMLInputElement && (el.type === "radio" || el.type === "checkbox")) {
2580
+ const label = getInputLabel(el);
2581
+ if (label) return { text: label, source: "label" };
2582
+ }
2583
+ const aria = getTrimmedText(el.getAttribute("aria-label"));
2584
+ if (aria) return { text: aria, source: "aria-label" };
2585
+ const textContent = getTrimmedText(el.textContent);
2586
+ if (textContent) return { text: textContent, source: "textContent" };
2587
+ if (el instanceof HTMLInputElement) {
2588
+ const value = getTrimmedText(el.value) || getTrimmedText(el.getAttribute("value"));
2589
+ if (value) return { text: value, source: "value" };
2590
+ }
2591
+ const valueAttr = getTrimmedText(el.getAttribute("value"));
2592
+ if (valueAttr) return { text: valueAttr, source: "value" };
2593
+ const title = getTrimmedText(el.getAttribute("title"));
2594
+ if (title) return { text: title, source: "title" };
2595
+ return {};
2596
+ }
2597
+ function getOverlayActionKind(el, label) {
2598
+ const lower = label.toLowerCase();
2599
+ const attrText = [
2600
+ el.getAttribute("id"),
2601
+ typeof el.className === "string" ? el.className : "",
2602
+ el.getAttribute("name"),
2603
+ el.getAttribute("title")
2604
+ ].filter(Boolean).join(" ").toLowerCase();
2605
+ if (el.getAttribute("role") === "radio" || el instanceof HTMLInputElement && el.type === "radio") {
2606
+ return "radio";
2607
+ }
2608
+ if (/close|dismiss|skip|cancel|not now|maybe later|no thanks|reject|decline/.test(
2609
+ lower
2610
+ ) || /modal-close|overlay-close/.test(attrText)) {
2611
+ return "dismiss";
2612
+ }
2613
+ if (/accept|agree|allow/.test(lower) && /cookie|consent|privacy|gdpr|onetrust|cookiebot/.test(
2614
+ `${lower} ${attrText}`
2615
+ )) {
2616
+ return "accept";
2617
+ }
2618
+ if (/submit|continue|next|confirm|done|ok|start|proceed/.test(lower)) {
2619
+ return "submit";
2620
+ }
2621
+ return "action";
2622
+ }
2623
+ function getOverlayActionPriority(action) {
2624
+ switch (action.kind) {
2625
+ case "dismiss":
2626
+ return 40;
2627
+ case "accept":
2628
+ return 35;
2629
+ case "submit":
2630
+ return 30;
2631
+ case "radio":
2632
+ return 20;
2633
+ default:
2634
+ return 10;
2635
+ }
2636
+ }
2637
+ function collectOverlayRadioOptions(root) {
2638
+ const seen = /* @__PURE__ */ new Set();
2639
+ const options = [];
2640
+ root.querySelectorAll('[role="radio"], input[type="radio"]').forEach((node) => {
2641
+ if (!(node instanceof HTMLElement) || !isElementVisible(node)) return;
2642
+ const data = getControlTextData(node);
2643
+ if (!data.text) return;
2644
+ const selector = generateSelector(node);
2645
+ const key = selector || data.text;
2646
+ if (seen.has(key)) return;
2647
+ seen.add(key);
2648
+ const checked = node.getAttribute("aria-checked") === "true" || (node instanceof HTMLInputElement ? node.checked : false);
2649
+ options.push({
2650
+ label: data.text.slice(0, MAX_LABEL_LENGTH),
2651
+ selector,
2652
+ checked,
2653
+ labelSource: data.source,
2654
+ looksCorrect: looksLikeCorrectOption(data.text)
2655
+ });
2656
+ });
2657
+ return options.slice(0, MAX_OPTIONS_PER_SELECT);
2658
+ }
2659
+ function collectOverlayActions(root) {
2660
+ const seen = /* @__PURE__ */ new Set();
2661
+ const actions = [];
2662
+ root.querySelectorAll(
2663
+ 'button, [role="button"], a[href], input[type="button"], input[type="submit"], [role="radio"], input[type="radio"]'
2664
+ ).forEach((node) => {
2665
+ if (!(node instanceof HTMLElement) || !isElementVisible(node)) return;
2666
+ const selector = generateSelector(node);
2667
+ if (!selector || seen.has(selector)) return;
2668
+ let data = getControlTextData(node);
2669
+ if (!data.text) {
2670
+ const attrText = [
2671
+ node.id,
2672
+ typeof node.className === "string" ? node.className : ""
2673
+ ].filter(Boolean).join(" ").toLowerCase();
2674
+ if (/onetrust|consent|cookie|banner|gdpr|trustarc|cookiebot/.test(
2675
+ attrText
2676
+ )) {
2677
+ data = {
2678
+ text: attrText.includes("accept") ? "Accept cookies" : attrText.includes("reject") ? "Reject cookies" : attrText.includes("close") ? "Close" : "Consent button",
2679
+ source: "fallback"
2680
+ };
2681
+ }
2682
+ }
2683
+ if (!data.text) return;
2684
+ seen.add(selector);
2685
+ actions.push({
2686
+ label: data.text.slice(0, MAX_LABEL_LENGTH),
2687
+ selector,
2688
+ kind: getOverlayActionKind(node, data.text),
2689
+ disabled: isElementDisabled(node)
2690
+ });
2691
+ });
2692
+ return actions.sort((a, b) => getOverlayActionPriority(b) - getOverlayActionPriority(a)).slice(0, MAX_RESULT_COUNT);
2693
+ }
2694
+ function getOverlayMessage(el) {
2695
+ const heading = el.querySelector("h1, h2, h3, h4, h5, h6");
2696
+ return getTrimmedText(heading?.textContent)?.slice(0, MAX_DESCRIPTION_LENGTH) || getNodeTextByIds(el.getAttribute("aria-describedby"))?.slice(0, MAX_DESCRIPTION_LENGTH) || getTrimmedText(el.textContent)?.slice(0, MAX_DESCRIPTION_LENGTH);
2697
+ }
2698
+ function classifyOverlayKind(args) {
2699
+ const haystack = [
2700
+ args.node.id,
2701
+ typeof args.node.className === "string" ? args.node.className : "",
2702
+ args.node.getAttribute("role"),
2703
+ args.node.getAttribute("aria-label"),
2704
+ args.node.textContent
2705
+ ].filter(Boolean).join(" ").toLowerCase();
2706
+ if (/cookie|consent|privacy|gdpr|onetrust|cookiebot|trustarc/.test(haystack)) {
2707
+ return "cookie_consent";
2708
+ }
2709
+ if (args.cartConfirm) return "cart_confirmation";
2710
+ if (args.radioOptions.length > 0) return "selection_modal";
2711
+ if (args.drawerLike) return "drawer";
2712
+ if (/alert|warning|notice|success|error/.test(haystack)) return "alert";
2713
+ return "overlay";
2714
+ }
2715
+ const MAX_OVERLAY_CANDIDATES = 2e3;
2716
+ function forEachOverlayCandidate(maxCandidates, visitor) {
2717
+ if (!document.body) return;
2718
+ let visited = 0;
2719
+ for (const node of document.body.querySelectorAll("*")) {
2720
+ if (!(node instanceof HTMLElement)) continue;
2721
+ if (visited >= maxCandidates) break;
2722
+ visited++;
2723
+ visitor(node);
2724
+ }
2725
+ }
2726
+ function detectOverlays() {
2727
+ if (!document.body) return [];
2728
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2729
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2730
+ const viewportArea = Math.max(1, viewportWidth * viewportHeight);
2731
+ const overlays = [];
2732
+ const seen = /* @__PURE__ */ new Set();
2733
+ forEachOverlayCandidate(MAX_OVERLAY_CANDIDATES, (node) => {
2734
+ if (seen.has(node)) return;
2735
+ if (!isElementVisible(node)) return;
2736
+ const style = window.getComputedStyle(node);
2737
+ if (style.pointerEvents === "none") return;
2738
+ const rect = node.getBoundingClientRect();
2739
+ if (!isInViewportRect(rect)) return;
2740
+ const overlayType = getOverlayType(node);
2741
+ const dialogLike = overlayType === "dialog" || overlayType === "modal";
2742
+ const areaRatio = rect.width * rect.height / viewportArea;
2743
+ const drawerLike = looksLikeDrawer(node, style, rect, areaRatio);
2744
+ const cartConfirm = !dialogLike && !drawerLike && (style.position === "fixed" || style.position === "sticky" || style.position === "absolute") && rect.width >= 160 && rect.height >= 100 && looksLikeCartConfirmation(node);
2745
+ const blockingSurface = dialogLike || drawerLike || cartConfirm || (style.position === "fixed" || style.position === "sticky") && parseZIndex(style) >= 10 && areaRatio >= 0.3 && getViewportCenterCoverage(rect);
2746
+ if (!blockingSurface && overlayType !== "dialog" && overlayType !== "modal") {
2747
+ return;
2748
+ }
2749
+ const actions = collectOverlayActions(node);
2750
+ const radioOptions = collectOverlayRadioOptions(node);
2751
+ seen.add(node);
2752
+ overlays.push({
2753
+ element: node,
2754
+ type: overlayType ?? "overlay",
2755
+ kind: classifyOverlayKind({
2756
+ node,
2757
+ drawerLike,
2758
+ cartConfirm,
2759
+ radioOptions
2760
+ }),
2761
+ role: getTrimmedText(node.getAttribute("role")) || void 0,
2762
+ label: getOverlayLabel(node),
2763
+ selector: generateSelector(node),
2764
+ text: getTrimmedText(node.textContent)?.slice(0, MAX_DESCRIPTION_LENGTH),
2765
+ message: getOverlayMessage(node),
2766
+ blocksInteraction: blockingSurface,
2767
+ dismissSelector: actions.find((action) => action.kind === "dismiss")?.selector,
2768
+ acceptSelector: actions.find((action) => action.kind === "accept")?.selector,
2769
+ submitSelector: actions.find((action) => action.kind === "submit")?.selector,
2770
+ actions,
2771
+ radioOptions,
2772
+ zIndex: parseZIndex(style)
2773
+ });
2774
+ });
2775
+ return overlays.sort((a, b) => {
2776
+ if ((a.blocksInteraction ? 1 : 0) !== (b.blocksInteraction ? 1 : 0)) {
2777
+ return (b.blocksInteraction ? 1 : 0) - (a.blocksInteraction ? 1 : 0);
2778
+ }
2779
+ return b.zIndex - a.zIndex;
2780
+ });
2781
+ }
2782
+ function isLikelyDormantOverlay(el) {
2783
+ const tag = el.tagName.toLowerCase();
2784
+ const role = getTrimmedText(el.getAttribute("role")) || "";
2785
+ const attrs = [
2786
+ el.id,
2787
+ el.className,
2788
+ el.getAttribute("data-testid"),
2789
+ el.getAttribute("data-test"),
2790
+ el.getAttribute("aria-label"),
2791
+ el.getAttribute("title"),
2792
+ el.getAttribute("data-module-name")
2793
+ ].filter(Boolean).join(" ").toLowerCase();
2794
+ const text = getTrimmedText(el.textContent)?.toLowerCase() || "";
2795
+ if (tag === "dialog" || role === "dialog" || role === "alertdialog" || el.getAttribute("aria-modal") === "true") {
2796
+ return true;
2797
+ }
2798
+ return /cookie|consent|privacy|gdpr|ccpa|onetrust|ot-sdk|trustarc|didomi|sp_message|qc-cmp|cmp|newsletter|subscribe/.test(
2799
+ `${attrs} ${text.slice(0, MAX_ATTR_TEXT)}`
2800
+ );
2801
+ }
2802
+ function detectDormantOverlays() {
2803
+ if (!document.body) return [];
2804
+ const seen = /* @__PURE__ */ new Set();
2805
+ const matches = [];
2806
+ forEachOverlayCandidate(MAX_OVERLAY_CANDIDATES, (node) => {
2807
+ if (isElementVisible(node)) return;
2808
+ if (!isLikelyDormantOverlay(node)) return;
2809
+ const selector = generateSelector(node);
2810
+ if (!selector || seen.has(selector)) return;
2811
+ seen.add(selector);
2812
+ matches.push({
2813
+ type: getOverlayType(node) ?? "overlay",
2814
+ role: getTrimmedText(node.getAttribute("role")) || void 0,
2815
+ label: getOverlayLabel(node),
2816
+ selector,
2817
+ text: getTrimmedText(node.textContent)?.slice(0, MAX_DESCRIPTION_LENGTH)
2818
+ });
2819
+ });
2820
+ return matches.slice(0, MAX_RESULT_COUNT);
2821
+ }
2822
+ function samplePointForRect(rect) {
2823
+ if (!isInViewportRect(rect)) return null;
2824
+ const viewportWidth = window.innerWidth || document.documentElement?.clientWidth || 0;
2825
+ const viewportHeight = window.innerHeight || document.documentElement?.clientHeight || 0;
2826
+ const maxX = Math.max(0, viewportWidth - 1);
2827
+ const maxY = Math.max(0, viewportHeight - 1);
2828
+ return {
2829
+ x: Math.min(maxX, Math.max(0, rect.left + rect.width / 2)),
2830
+ y: Math.min(maxY, Math.max(0, rect.top + rect.height / 2))
2831
+ };
2832
+ }
2833
+ function getVisibilityState(el) {
2834
+ if (!(el instanceof HTMLElement)) {
2835
+ return {
2836
+ visible: true,
2837
+ inViewport: true,
2838
+ fullyInViewport: true,
2839
+ obscured: false,
2840
+ blockedByOverlay: false
2841
+ };
2842
+ }
2843
+ const rect = el.getBoundingClientRect();
2844
+ const visible = isElementVisible(el);
2845
+ const inViewport = visible && isInViewportRect(rect);
2846
+ const fullyInViewport = visible && isFullyInViewportRect(rect);
2847
+ let obscured = false;
2848
+ let blockedByOverlay = false;
2849
+ if (inViewport) {
2850
+ const point = samplePointForRect(rect);
2851
+ if (point) {
2852
+ const topElement = document.elementFromPoint(point.x, point.y);
2853
+ if (topElement && topElement !== el && !el.contains(topElement) && !(topElement instanceof HTMLElement && topElement.contains(el))) {
2854
+ obscured = true;
2855
+ blockedByOverlay = activeOverlays.some(
2856
+ (overlay) => overlay.blocksInteraction && overlay.element.contains(topElement) && !overlay.element.contains(el)
2857
+ );
2858
+ }
2859
+ }
2860
+ }
2861
+ return {
2862
+ visible,
2863
+ inViewport,
2864
+ fullyInViewport,
2865
+ obscured,
2866
+ blockedByOverlay
2867
+ };
2868
+ }
2869
+ function getViewportSnapshot() {
2870
+ const scrollingElement = document.scrollingElement || document.documentElement || document.body;
2871
+ const scrollXCandidates = [
2872
+ window.scrollX,
2873
+ window.pageXOffset,
2874
+ window.visualViewport?.pageLeft,
2875
+ scrollingElement?.scrollLeft,
2876
+ document.documentElement?.scrollLeft,
2877
+ document.body?.scrollLeft
2878
+ ].filter((value) => typeof value === "number");
2879
+ const scrollYCandidates = [
2880
+ window.scrollY,
2881
+ window.pageYOffset,
2882
+ window.visualViewport?.pageTop,
2883
+ scrollingElement?.scrollTop,
2884
+ document.documentElement?.scrollTop,
2885
+ document.body?.scrollTop
2886
+ ].filter((value) => typeof value === "number");
2887
+ return {
2888
+ width: window.innerWidth || document.documentElement?.clientWidth || 0,
2889
+ height: window.innerHeight || document.documentElement?.clientHeight || 0,
2890
+ scrollX: Math.max(0, ...scrollXCandidates),
2891
+ scrollY: Math.max(0, ...scrollYCandidates)
2892
+ };
2893
+ }
2894
+ function isElementDisabled(el) {
2895
+ return el.hasAttribute("disabled") || el.getAttribute("aria-disabled") === "true";
2896
+ }
2897
+ function getElementContext(el) {
2898
+ let parent = el.parentElement;
2899
+ while (parent) {
2900
+ const tag = parent.tagName.toLowerCase();
2901
+ const role = parent.getAttribute("role");
2902
+ if (tag === "nav" || role === "navigation") return "nav";
2903
+ if (tag === "header" || role === "banner") return "header";
2904
+ if (tag === "main" || role === "main") return "main";
2905
+ if (tag === "footer" || role === "contentinfo") return "footer";
2906
+ if (tag === "aside" || role === "complementary") return "sidebar";
2907
+ if (tag === "article" || role === "article") return "article";
2908
+ if (tag === "dialog" || role === "dialog" || role === "alertdialog") {
2909
+ return "dialog";
2910
+ }
2911
+ if (tag === "form") return `form${parent.id ? `#${parent.id}` : ""}`;
2912
+ parent = parent.parentElement;
2913
+ }
2914
+ return "content";
2915
+ }
2916
+ function getInputLabel(el) {
2917
+ if (el.id) {
2918
+ const label = document.querySelector(
2919
+ `label[for="${escapeSelectorValue(el.id)}"]`
2920
+ );
2921
+ if (label) return getTrimmedText(label.textContent);
2922
+ }
2923
+ const parentLabel = el.closest("label");
2924
+ if (parentLabel) {
2925
+ const clone = parentLabel.cloneNode(true);
2926
+ clone.querySelectorAll("input, select, textarea").forEach((input) => {
2927
+ input.remove();
2928
+ });
2929
+ const text = getTrimmedText(clone.textContent);
2930
+ if (text) return text;
2931
+ }
2932
+ return getTrimmedText(el.getAttribute("aria-label")) || getNodeTextByIds(el.getAttribute("aria-labelledby")) || getTrimmedText(el.getAttribute("placeholder")) || void 0;
2933
+ }
2934
+ function getInputLabelWithSource(el) {
2935
+ if (el.id) {
2936
+ const label = document.querySelector(
2937
+ `label[for="${escapeSelectorValue(el.id)}"]`
2938
+ );
2939
+ const text = getTrimmedText(label?.textContent);
2940
+ if (text) return { label: text, source: "label" };
2941
+ }
2942
+ const parentLabel = el.closest("label");
2943
+ if (parentLabel) {
2944
+ const clone = parentLabel.cloneNode(true);
2945
+ clone.querySelectorAll("input, select, textarea").forEach((input) => {
2946
+ input.remove();
2947
+ });
2948
+ const text = getTrimmedText(clone.textContent);
2949
+ if (text) return { label: text, source: "label" };
2950
+ }
2951
+ const ariaLabel = getTrimmedText(el.getAttribute("aria-label"));
2952
+ if (ariaLabel) return { label: ariaLabel, source: "aria-label" };
2953
+ const labelledBy = getNodeTextByIds(el.getAttribute("aria-labelledby"));
2954
+ if (labelledBy) return { label: labelledBy, source: "label" };
2955
+ const placeholder = getTrimmedText(el.getAttribute("placeholder"));
2956
+ if (placeholder) return { label: placeholder, source: "placeholder" };
2957
+ return {};
2958
+ }
2959
+ function getCustomTextFieldLabelWithSource(el) {
2960
+ const ariaLabel = getTrimmedText(el.getAttribute("aria-label"));
2961
+ if (ariaLabel) return { label: ariaLabel, source: "aria-label" };
2962
+ const labelledBy = getNodeTextByIds(el.getAttribute("aria-labelledby"));
2963
+ if (labelledBy) return { label: labelledBy, source: "label" };
2964
+ const placeholder = getTrimmedText(el.getAttribute("placeholder"));
2965
+ if (placeholder) return { label: placeholder, source: "placeholder" };
2966
+ const text = getTrimmedText(el.textContent);
2967
+ if (text) return { label: text, source: "text" };
2968
+ return {};
2969
+ }
2970
+ function isNativeFormField(el) {
2971
+ return el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement;
2972
+ }
2973
+ function shouldExposeCustomTextField(el) {
2974
+ if (!(el instanceof HTMLElement) || isNativeFormField(el)) return false;
2975
+ if (!isElementVisible(el) || isElementDisabled(el)) return false;
2976
+ const role = getElementRole(el);
2977
+ return el.isContentEditable || el.hasAttribute("contenteditable") && el.getAttribute("contenteditable") !== "false" || role === "textbox" || role === "searchbox" || role === "combobox";
2978
+ }
2979
+ function getCustomTextFieldInputType(el) {
2980
+ const role = getElementRole(el);
2981
+ if (role === "searchbox") return "search";
2982
+ if (role === "combobox") return "combobox";
2983
+ if (role === "textbox") return "text";
2984
+ return el instanceof HTMLElement && el.isContentEditable ? "text" : void 0;
2985
+ }
2986
+ function getCustomTextFieldHasValue(el) {
2987
+ const value = getTrimmedText(el.textContent) || getTrimmedText(el.getAttribute("aria-valuetext")) || getTrimmedText(el.getAttribute("value"));
2988
+ return value ? true : void 0;
2989
+ }
2990
+ function getButtonTextWithSource(el) {
2991
+ const textContent = getTrimmedText(el.textContent);
2992
+ if (textContent) return { text: textContent, source: "text" };
2993
+ const value = el instanceof HTMLInputElement || el instanceof HTMLButtonElement ? getTrimmedText(el.value) : getTrimmedText(el.getAttribute("value"));
2994
+ if (value) return { text: value, source: "value" };
2995
+ const ariaLabel = getTrimmedText(el.getAttribute("aria-label"));
2996
+ if (ariaLabel) return { text: ariaLabel, source: "aria-label" };
2997
+ return { text: "Button", source: "text" };
2998
+ }
2999
+ function getParentOverlaySelector(el) {
3000
+ const overlay = activeOverlays.find(
3001
+ (candidate) => candidate.element === el || candidate.element.contains(el) || el instanceof HTMLElement && el.contains(candidate.element)
3002
+ );
3003
+ return overlay?.selector;
3004
+ }
3005
+ function getElementRole(el) {
3006
+ return getTrimmedText(el.getAttribute("role")) || (el.tagName.toLowerCase() === "a" ? "link" : el.tagName.toLowerCase() === "button" ? "button" : void 0);
3007
+ }
3008
+ function getElementDescription(el) {
3009
+ return getTrimmedText(el.getAttribute("aria-description")) || getNodeTextByIds(el.getAttribute("aria-describedby")) || getTrimmedText(el.getAttribute("title")) || void 0;
3010
+ }
3011
+ function shouldExposeFieldValue(el) {
3012
+ if (!(el instanceof HTMLInputElement)) {
3013
+ return false;
3014
+ }
3015
+ const type = (el.type || "").toLowerCase();
3016
+ if (type !== "number") {
3017
+ return false;
3018
+ }
3019
+ const label = getInputLabelWithSource(el).label;
3020
+ const signals = [
3021
+ el.name,
3022
+ el.id,
3023
+ el.getAttribute("placeholder"),
3024
+ el.getAttribute("aria-label"),
3025
+ label
3026
+ ].filter(Boolean).join(" ").toLowerCase();
3027
+ return /\b(qty|quantity|count|items?)\b/.test(signals);
3028
+ }
3029
+ function getElementValue(el) {
3030
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
3031
+ if (el.type === "password" || el.type === "checkbox" || el.type === "radio") {
3032
+ return void 0;
3033
+ }
3034
+ return shouldExposeFieldValue(el) ? getTrimmedText(el.value) : void 0;
3035
+ }
3036
+ return void 0;
3037
+ }
3038
+ function getElementHasValue(el) {
3039
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
3040
+ if (el.type === "password" || el.type === "checkbox" || el.type === "radio") {
3041
+ return void 0;
3042
+ }
3043
+ return getTrimmedText(el.value) ? true : void 0;
3044
+ }
3045
+ if (el instanceof HTMLSelectElement) {
3046
+ return getTrimmedText(el.value) ? true : void 0;
3047
+ }
3048
+ return void 0;
3049
+ }
3050
+ function getSelectOptions(el) {
3051
+ const options = Array.from(el.options).map((option) => ({
3052
+ label: option.textContent?.trim() || option.value.trim(),
3053
+ value: option.value
3054
+ })).filter((o) => o.label || o.value).slice(0, MAX_OPTIONS_DISPLAY);
3055
+ return options.length > 0 ? options : void 0;
3056
+ }
3057
+ function getAriaBoolean(el, attr) {
3058
+ const val = el.getAttribute(attr);
3059
+ if (val === "true") return true;
3060
+ if (val === "false") return false;
3061
+ return void 0;
3062
+ }
3063
+ function getDeepActiveElement() {
3064
+ let active = document.activeElement;
3065
+ while (active instanceof HTMLElement && active.shadowRoot?.activeElement) {
3066
+ active = active.shadowRoot.activeElement;
3067
+ }
3068
+ return active;
3069
+ }
3070
+ function buildBaseMetadata(el) {
3071
+ return {
3072
+ context: getElementContext(el),
3073
+ parentOverlay: getParentOverlaySelector(el),
3074
+ selector: generateSelector(el),
3075
+ index: assignIndex(el),
3076
+ role: getElementRole(el),
3077
+ description: getElementDescription(el),
3078
+ ...getVisibilityState(el),
3079
+ disabled: isElementDisabled(el),
3080
+ focused: getDeepActiveElement() === el || void 0,
3081
+ ariaExpanded: getAriaBoolean(el, "aria-expanded"),
3082
+ ariaPressed: getAriaBoolean(el, "aria-pressed"),
3083
+ ariaSelected: getAriaBoolean(el, "aria-selected")
3084
+ };
3085
+ }
3086
+ function isNavigableEmbeddedSrc(src) {
3087
+ const normalized = src.trim().toLowerCase();
3088
+ return Boolean(normalized) && !/^(about:blank|javascript:|data:|blob:|file:)/i.test(normalized);
3089
+ }
3090
+ function getEmbeddedFrameLabel(iframe) {
3091
+ const explicitLabel = getTrimmedText(iframe.getAttribute("title")) || getTrimmedText(iframe.getAttribute("aria-label")) || getTrimmedText(iframe.name) || getTrimmedText(iframe.id);
3092
+ if (explicitLabel) {
3093
+ return explicitLabel.slice(0, MAX_LABEL_LENGTH);
3094
+ }
3095
+ let host = "embedded page";
3096
+ try {
3097
+ host = new URL(iframe.src, window.location.href).hostname.replace(
3098
+ /^www\./,
3099
+ ""
3100
+ );
3101
+ } catch {
3102
+ }
3103
+ const srcText = iframe.src.toLowerCase();
3104
+ const prefix = /\b(ticket|showtime|movie|cinema|theat(?:er|re))\b/.test(
3105
+ srcText
3106
+ ) ? "Embedded ticketing page" : "Embedded page";
3107
+ return `${prefix}: ${host}`.slice(0, MAX_LABEL_LENGTH);
3108
+ }
3109
+ function extractHeadings() {
3110
+ return deepQuerySelectorAll("h1, h2, h3, h4, h5, h6").map((el) => {
3111
+ const text = el.textContent?.trim() || "";
3112
+ if (!text) return null;
3113
+ return {
3114
+ level: Number.parseInt(el.tagName[1], 10),
3115
+ text
3116
+ };
3117
+ }).filter((value) => Boolean(value));
3118
+ }
3119
+ function extractNavigation() {
3120
+ const navigation = [];
3121
+ deepQuerySelectorAll(
3122
+ 'nav, [role="navigation"], header nav, [role="banner"] nav'
3123
+ ).forEach((nav) => {
3124
+ deepQuerySelectorAll("a[href]", nav).forEach((link) => {
3125
+ const anchor = link;
3126
+ const text = anchor.textContent?.trim();
3127
+ if (!text || anchor.getAttribute("href")?.startsWith("#")) return;
3128
+ navigation.push({
3129
+ type: "link",
3130
+ text: text.slice(0, MAX_LABEL_LENGTH),
3131
+ href: anchor.href.slice(0, MAX_HREF_LENGTH),
3132
+ ...buildBaseMetadata(anchor),
3133
+ context: "nav"
3134
+ });
3135
+ });
3136
+ });
3137
+ const seen = /* @__PURE__ */ new Set();
3138
+ return navigation.filter((item) => {
3139
+ if (!item.href || seen.has(item.href)) return false;
3140
+ seen.add(item.href);
3141
+ return true;
3142
+ });
3143
+ }
3144
+ function getFieldMetadata(el) {
3145
+ const meta = {};
3146
+ const name = el.name;
3147
+ if (name) meta.name = name;
3148
+ const autocomplete = el.getAttribute("autocomplete");
3149
+ if (autocomplete) meta.autocomplete = autocomplete;
3150
+ if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio")) {
3151
+ meta.checked = el.checked;
3152
+ }
3153
+ if (el instanceof HTMLInputElement) {
3154
+ if (el.maxLength >= 0) meta.maxLength = el.maxLength;
3155
+ const min = el.getAttribute("min");
3156
+ if (min) meta.min = min;
3157
+ const max = el.getAttribute("max");
3158
+ if (max) meta.max = max;
3159
+ const pattern = el.getAttribute("pattern");
3160
+ if (pattern) meta.pattern = pattern;
3161
+ }
3162
+ if (el instanceof HTMLTextAreaElement) {
3163
+ if (el.maxLength >= 0) meta.maxLength = el.maxLength;
3164
+ }
3165
+ return meta;
3166
+ }
3167
+ function extractInteractiveElements() {
3168
+ const elements = [];
3169
+ deepQuerySelectorAll(ACTION_CONTROL_SELECTOR).forEach((btn) => {
3170
+ const { text, source } = getButtonTextWithSource(btn);
3171
+ const role = getElementRole(btn);
3172
+ elements.push({
3173
+ type: "button",
3174
+ text: text?.slice(0, MAX_LABEL_LENGTH),
3175
+ labelSource: source,
3176
+ ...buildBaseMetadata(btn),
3177
+ role,
3178
+ looksCorrect: role === "radio" ? looksLikeCorrectOption(text) : void 0
3179
+ });
3180
+ });
3181
+ deepQuerySelectorAll("a[href]").forEach((link) => {
3182
+ const anchor = link;
3183
+ const text = anchor.textContent?.trim();
3184
+ if (!text || anchor.getAttribute("href")?.startsWith("#")) return;
3185
+ const context = getElementContext(anchor);
3186
+ if (context === "nav") return;
3187
+ elements.push({
3188
+ type: "link",
3189
+ text: text.slice(0, MAX_LABEL_LENGTH),
3190
+ href: anchor.href.slice(0, MAX_HREF_LENGTH),
3191
+ ...buildBaseMetadata(anchor),
3192
+ context
3193
+ });
3194
+ });
3195
+ deepQuerySelectorAll("iframe[src]").forEach((frame) => {
3196
+ const iframe = frame;
3197
+ if (!isNavigableEmbeddedSrc(iframe.src)) return;
3198
+ elements.push({
3199
+ type: "link",
3200
+ text: getEmbeddedFrameLabel(iframe),
3201
+ href: iframe.src.slice(0, MAX_HREF_LENGTH),
3202
+ ...buildBaseMetadata(iframe),
3203
+ context: getElementContext(iframe)
3204
+ });
3205
+ });
3206
+ deepQuerySelectorAll(
3207
+ 'input:not([type="hidden"]):not([type="submit"]):not([type="button"]), select, textarea'
3208
+ ).forEach((input) => {
3209
+ const element = input;
3210
+ const tag = input.tagName.toLowerCase();
3211
+ const label = getInputLabelWithSource(element);
3212
+ const role = getElementRole(input);
3213
+ const radioText = role === "radio" || element instanceof HTMLInputElement && element.type === "radio" ? getTrimmedText(
3214
+ element.getAttribute("value") || element.getAttribute("aria-label") || label.label
3215
+ ) : void 0;
3216
+ elements.push({
3217
+ type: tag === "select" ? "select" : tag === "textarea" ? "textarea" : "input",
3218
+ label: label.label?.slice(0, MAX_LABEL_LENGTH),
3219
+ labelSource: label.source,
3220
+ inputType: element.getAttribute("type") || void 0,
3221
+ placeholder: element.getAttribute("placeholder") || void 0,
3222
+ required: element.hasAttribute("required") || void 0,
3223
+ value: getElementValue(element),
3224
+ hasValue: getElementHasValue(element),
3225
+ options: element instanceof HTMLSelectElement ? getSelectOptions(element) : void 0,
3226
+ ...buildBaseMetadata(input),
3227
+ role,
3228
+ text: radioText?.slice(0, MAX_LABEL_LENGTH),
3229
+ looksCorrect: radioText || label.label ? looksLikeCorrectOption(radioText || label.label) : void 0,
3230
+ ...getFieldMetadata(element)
3231
+ });
3232
+ });
3233
+ deepQuerySelectorAll(CUSTOM_TEXT_FIELD_SELECTOR).forEach((field) => {
3234
+ if (!shouldExposeCustomTextField(field)) return;
3235
+ const label = getCustomTextFieldLabelWithSource(field);
3236
+ const role = getElementRole(field);
3237
+ elements.push({
3238
+ type: "input",
3239
+ label: label.label?.slice(0, MAX_LABEL_LENGTH),
3240
+ labelSource: label.source,
3241
+ inputType: getCustomTextFieldInputType(field),
3242
+ hasValue: getCustomTextFieldHasValue(field),
3243
+ ...buildBaseMetadata(field),
3244
+ role
3245
+ });
3246
+ });
3247
+ return elements;
3248
+ }
3249
+ function extractForms() {
3250
+ const forms = [];
3251
+ function isSubmitControlForForm(el, form) {
3252
+ if (el instanceof HTMLButtonElement) {
3253
+ const type = getTrimmedText(el.getAttribute("type"))?.toLowerCase();
3254
+ return (!type || type === "submit") && el.form === form;
3255
+ }
3256
+ return el instanceof HTMLInputElement && (el.type === "submit" || el.type === "image") && el.form === form;
3257
+ }
3258
+ deepQuerySelectorAll("form").forEach((formEl) => {
3259
+ const form = formEl;
3260
+ const fields = [];
3261
+ form.querySelectorAll(
3262
+ `input:not([type='hidden']):not([type='submit']):not([type='button']):not([type='image']), select, textarea, ${CUSTOM_TEXT_FIELD_SELECTOR}`
3263
+ ).forEach((input) => {
3264
+ if (!isNativeFormField(input) && !shouldExposeCustomTextField(input)) {
3265
+ return;
3266
+ }
3267
+ const element = input;
3268
+ const tag = input.tagName.toLowerCase();
3269
+ const label = getInputLabelWithSource(element);
3270
+ const customLabel = isNativeFormField(input) ? {} : getCustomTextFieldLabelWithSource(input);
3271
+ const role = getElementRole(input);
3272
+ const radioText = role === "radio" || element instanceof HTMLInputElement && element.type === "radio" ? getTrimmedText(
3273
+ element.getAttribute("value") || element.getAttribute("aria-label") || label.label
3274
+ ) : void 0;
3275
+ fields.push({
3276
+ type: tag === "select" ? "select" : tag === "textarea" ? "textarea" : "input",
3277
+ label: (label.label || customLabel.label)?.slice(0, MAX_LABEL_LENGTH),
3278
+ labelSource: label.source || customLabel.source,
3279
+ inputType: element.getAttribute("type") || getCustomTextFieldInputType(input) || void 0,
3280
+ placeholder: element.getAttribute("placeholder") || void 0,
3281
+ required: element.hasAttribute("required") || void 0,
3282
+ value: getElementValue(element),
3283
+ hasValue: isNativeFormField(input) ? getElementHasValue(element) : getCustomTextFieldHasValue(input),
3284
+ options: element instanceof HTMLSelectElement ? getSelectOptions(element) : void 0,
3285
+ ...buildBaseMetadata(input),
3286
+ role,
3287
+ text: radioText?.slice(0, MAX_LABEL_LENGTH),
3288
+ looksCorrect: radioText || label.label ? looksLikeCorrectOption(radioText || label.label) : void 0,
3289
+ ...getFieldMetadata(element)
3290
+ });
3291
+ });
3292
+ Array.from(
3293
+ document.querySelectorAll(
3294
+ "button, input[type='submit'], input[type='image']"
3295
+ )
3296
+ ).filter((control) => isSubmitControlForForm(control, form)).forEach((btn) => {
3297
+ const { text, source } = getButtonTextWithSource(btn);
3298
+ fields.push({
3299
+ type: "button",
3300
+ text: text?.slice(0, MAX_LABEL_LENGTH),
3301
+ labelSource: source,
3302
+ ...buildBaseMetadata(btn)
3303
+ });
3304
+ });
3305
+ forms.push({
3306
+ id: form.id || void 0,
3307
+ action: form.getAttribute("action") || void 0,
3308
+ method: form.getAttribute("method") || void 0,
3309
+ fields
3310
+ });
3311
+ });
3312
+ return forms;
3313
+ }
3314
+ function extractLandmarks() {
3315
+ const landmarks = [];
3316
+ const selectors = [
3317
+ "header, [role='banner']",
3318
+ "nav, [role='navigation']",
3319
+ "main, [role='main']",
3320
+ "aside, [role='complementary']",
3321
+ "footer, [role='contentinfo']",
3322
+ "article, [role='article']",
3323
+ "section, [role='region']",
3324
+ "[role='search']",
3325
+ "[role='form']",
3326
+ "dialog, [role='dialog'], [role='alertdialog']"
3327
+ ];
3328
+ selectors.forEach((selector) => {
3329
+ deepQuerySelectorAll(selector).forEach((el) => {
3330
+ const tag = el.tagName.toLowerCase();
3331
+ const role = el.getAttribute("role") || (tag === "header" ? "banner" : tag === "nav" ? "navigation" : tag === "main" ? "main" : tag === "aside" ? "complementary" : tag === "footer" ? "contentinfo" : tag === "article" ? "article" : tag === "section" ? "region" : tag === "dialog" ? "dialog" : "generic");
3332
+ landmarks.push({
3333
+ role,
3334
+ label: getTrimmedText(el.getAttribute("aria-label")) || getNodeTextByIds(el.getAttribute("aria-labelledby")) || getTrimmedText(el.id),
3335
+ text: getTrimmedText(el.textContent)?.slice(0, MAX_ATTR_TEXT)
3336
+ });
3337
+ });
3338
+ });
3339
+ return landmarks;
3340
+ }
3341
+ function extractJsonLd() {
3342
+ const results = [];
3343
+ const scripts = document.querySelectorAll(
3344
+ 'script[type="application/ld+json"]'
3345
+ );
3346
+ for (const script of scripts) {
3347
+ try {
3348
+ const parsed = JSON.parse(script.textContent || "");
3349
+ if (Array.isArray(parsed)) {
3350
+ for (const item of parsed) {
3351
+ if (item && typeof item === "object") results.push(item);
3352
+ }
3353
+ } else if (parsed && typeof parsed === "object") {
3354
+ results.push(parsed);
3355
+ }
3356
+ } catch {
3357
+ }
3358
+ }
3359
+ return results;
3360
+ }
3361
+ function extractMetaTags() {
3362
+ const tags = {};
3363
+ document.querySelectorAll("meta[name], meta[property], meta[itemprop]").forEach((el) => {
3364
+ if (!(el instanceof HTMLMetaElement)) return;
3365
+ const key = getTrimmedText(el.getAttribute("property")) || getTrimmedText(el.getAttribute("name")) || getTrimmedText(el.getAttribute("itemprop"));
3366
+ const value = getTrimmedText(el.content);
3367
+ if (!key || !value || tags[key]) return;
3368
+ if (key === "description" || key === "author" || key.startsWith("og:") || key.startsWith("article:") || key.startsWith("product:") || key.startsWith("recipe:") || key.startsWith("twitter:")) {
3369
+ tags[key] = value;
3370
+ }
3371
+ });
3372
+ const canonical = document.querySelector('link[rel="canonical"]');
3373
+ if (canonical instanceof HTMLLinkElement && canonical.href) {
3374
+ tags.canonical = canonical.href;
3375
+ }
3376
+ return tags;
3377
+ }
3378
+ function extractMicrodata() {
3379
+ const serializeItem = (scope, depth = 0) => {
3380
+ if (depth > 3) return null;
3381
+ const item = {};
3382
+ const itemType = getTrimmedText(scope.getAttribute("itemtype"));
3383
+ const itemId = getTrimmedText(scope.getAttribute("itemid"));
3384
+ if (itemType) {
3385
+ const types = itemType.split(/\s+/).filter(Boolean);
3386
+ item["@type"] = types.length === 1 ? types[0] : types;
3387
+ }
3388
+ if (itemId) item["@id"] = itemId;
3389
+ scope.querySelectorAll("[itemprop]").forEach((node) => {
3390
+ if (!(node instanceof HTMLElement)) return;
3391
+ const nearestScope = node.closest("[itemscope]");
3392
+ const isNestedItemRoot = nearestScope === node && node.hasAttribute("itemscope");
3393
+ if (nearestScope !== scope && !isNestedItemRoot) {
3394
+ return;
3395
+ }
3396
+ if (isNestedItemRoot && node.parentElement?.closest("[itemscope]") !== scope) {
3397
+ return;
3398
+ }
3399
+ const propNames = (node.getAttribute("itemprop") || "").split(/\s+/).map((name) => name.trim()).filter(Boolean);
3400
+ if (propNames.length === 0) return;
3401
+ const value = node.hasAttribute("itemscope") && isNestedItemRoot ? serializeItem(node, depth + 1) : getStructuredElementValue(node);
3402
+ if (value == null) return;
3403
+ propNames.forEach((name) => pushPropertyValue(item, name, value));
3404
+ });
3405
+ return Object.keys(item).length > 0 ? item : null;
3406
+ };
3407
+ return Array.from(document.querySelectorAll("[itemscope]")).filter(
3408
+ (node) => node instanceof HTMLElement && !node.hasAttribute("itemprop")
3409
+ ).map((scope) => serializeItem(scope)).filter((item) => item !== null);
3410
+ }
3411
+ function extractRdfa() {
3412
+ const serializeEntity = (scope, depth = 0) => {
3413
+ if (depth > 3) return null;
3414
+ const entity = {};
3415
+ const typeAttr = getTrimmedText(scope.getAttribute("typeof"));
3416
+ const about = getTrimmedText(scope.getAttribute("about")) || getTrimmedText(scope.getAttribute("resource")) || getTrimmedText(scope.getAttribute("href")) || getTrimmedText(scope.getAttribute("src"));
3417
+ if (typeAttr) {
3418
+ const types = typeAttr.split(/\s+/).filter(Boolean);
3419
+ entity["@type"] = types.length === 1 ? types[0] : types;
3420
+ }
3421
+ if (about) entity["@id"] = about;
3422
+ scope.querySelectorAll("[property]").forEach((node) => {
3423
+ if (!(node instanceof HTMLElement)) return;
3424
+ const nearestTypedAncestor = node.closest("[typeof]");
3425
+ const isNestedEntityRoot = nearestTypedAncestor === node && node.hasAttribute("typeof");
3426
+ if (nearestTypedAncestor !== scope && !isNestedEntityRoot) {
3427
+ return;
3428
+ }
3429
+ if (isNestedEntityRoot && node.parentElement?.closest("[typeof]") !== scope && node !== scope) {
3430
+ return;
3431
+ }
3432
+ const propNames = (node.getAttribute("property") || "").split(/\s+/).map((name) => name.trim()).filter(Boolean);
3433
+ if (propNames.length === 0) return;
3434
+ const value = node.hasAttribute("typeof") && isNestedEntityRoot && node !== scope ? serializeEntity(node, depth + 1) : getStructuredElementValue(node);
3435
+ if (value == null) return;
3436
+ propNames.forEach((name) => pushPropertyValue(entity, name, value));
3437
+ });
3438
+ return Object.keys(entity).length > 0 ? entity : null;
3439
+ };
3440
+ return Array.from(document.querySelectorAll("[typeof]")).filter((node) => node instanceof HTMLElement).map((scope) => serializeEntity(scope)).filter((entity) => entity !== null);
3441
+ }
3442
+ function withHighlightLabelsRemoved(read) {
3443
+ const labels = Array.from(
3444
+ document.querySelectorAll(
3445
+ ".__vessel-highlight-label[data-vessel-highlight]"
3446
+ )
3447
+ ).filter((node) => node instanceof HTMLElement);
3448
+ const removed = labels.map((label) => {
3449
+ const parent = label.parentNode;
3450
+ if (!parent) return null;
3451
+ const nextSibling = label.nextSibling;
3452
+ parent.removeChild(label);
3453
+ return { label, parent, nextSibling };
3454
+ }).filter(
3455
+ (entry) => entry !== null
3456
+ );
3457
+ try {
3458
+ return read();
3459
+ } finally {
3460
+ for (let i = removed.length - 1; i >= 0; i -= 1) {
3461
+ const { label, parent, nextSibling } = removed[i];
3462
+ parent.insertBefore(label, nextSibling);
3463
+ }
3464
+ }
3465
+ }
3466
+ function getVisiblePageText() {
3467
+ return withHighlightLabelsRemoved(
3468
+ () => document.body?.innerText || document.documentElement?.innerText || ""
3469
+ );
3470
+ }
3471
+ function vesselExtractContent() {
3472
+ const extractStructuredContent = (article) => {
3473
+ activeOverlays = detectOverlays();
3474
+ const readabilityText = article?.textContent || "";
3475
+ const visibleText = getVisiblePageText();
3476
+ const content = readabilityText.length > visibleText.length * 0.3 ? readabilityText : visibleText;
3477
+ return {
3478
+ title: article?.title || document.title,
3479
+ content,
3480
+ htmlContent: article?.content || "",
3481
+ byline: article?.byline || "",
3482
+ excerpt: article?.excerpt || "",
3483
+ url: window.location.href,
3484
+ headings: extractHeadings(),
3485
+ navigation: extractNavigation(),
3486
+ interactiveElements: extractInteractiveElements(),
3487
+ forms: extractForms(),
3488
+ viewport: getViewportSnapshot(),
3489
+ overlays: activeOverlays.map(
3490
+ ({ element: _element, zIndex: _zIndex, ...overlay }) => overlay
3491
+ ),
3492
+ dormantOverlays: detectDormantOverlays(),
3493
+ landmarks: extractLandmarks(),
3494
+ jsonLd: extractJsonLd(),
3495
+ microdata: extractMicrodata(),
3496
+ rdfa: extractRdfa(),
3497
+ metaTags: extractMetaTags()
3498
+ };
3499
+ };
3500
+ try {
3501
+ elementIndex = 0;
3502
+ activeOverlays = [];
3503
+ Object.keys(elementSelectors).forEach(
3504
+ (key) => delete elementSelectors[Number(key)]
3505
+ );
3506
+ Object.keys(indexedElementRefs).forEach(
3507
+ (key) => delete indexedElementRefs[Number(key)]
3508
+ );
3509
+ const documentClone = document.cloneNode(true);
3510
+ const reader = new readabilityExports.Readability(documentClone);
3511
+ const article = reader.parse();
3512
+ return extractStructuredContent(article || void 0);
3513
+ } catch (error) {
3514
+ logger.error("Vessel content extraction error:", error);
3515
+ return extractStructuredContent();
3516
+ }
3517
+ }
3518
+ function resolveElementSelector(index) {
3519
+ return elementSelectors[index] || null;
3520
+ }
3521
+ function resolveElementIndexBySelector(selector) {
3522
+ if (!selector || typeof selector !== "string") return null;
3523
+ let el = null;
3524
+ try {
3525
+ if (selector.includes(" >>> ")) {
3526
+ el = resolveShadowSelector(selector);
3527
+ } else {
3528
+ el = document.querySelector(selector);
3529
+ }
3530
+ } catch {
3531
+ return null;
3532
+ }
3533
+ if (!el) return null;
3534
+ const existing = indexedElements.get(el);
3535
+ return typeof existing === "number" ? existing : null;
3536
+ }
3537
+ function interactByIndex(index, action, value) {
3538
+ const el = indexedElementRefs[index];
3539
+ if (!el || !(el instanceof HTMLElement) || !document.contains(el)) {
3540
+ return "Error[stale-index]: Element not found — the page may have changed. Call read_page to refresh.";
3541
+ }
3542
+ if (action === "click") {
3543
+ el.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
3544
+ const rect = el.getBoundingClientRect();
3545
+ if (rect.width <= 0 || rect.height <= 0) {
3546
+ return "Error[hidden]: Element has no visible area. It may be inside a collapsed, lazy-loaded, or virtual-scroll section. Scroll toward it, then call read_page to refresh visible elements.";
3547
+ }
3548
+ el.focus();
3549
+ el.click();
3550
+ if (el instanceof HTMLInputElement) {
3551
+ if (el.type === "checkbox") {
3552
+ const label = getInputLabel(el) || el.getAttribute("aria-label") || el.name || "checkbox";
3553
+ return `${el.checked ? "Checked" : "Unchecked"}: ${label}`;
3554
+ }
3555
+ if (el.type === "radio") {
3556
+ const label = getTrimmedText(el.value) || getInputLabel(el) || el.getAttribute("aria-label") || el.name || "radio";
3557
+ return `${el.checked ? "Selected" : "Clicked"}: ${label}`;
3558
+ }
3559
+ }
3560
+ const role = el.getAttribute("role");
3561
+ if (role === "checkbox" || role === "radio") {
3562
+ const label = getTrimmedText(el.getAttribute("aria-label")) || getTrimmedText(el.textContent) || el.tagName.toLowerCase();
3563
+ const ariaChecked = el.getAttribute("aria-checked");
3564
+ if (role === "checkbox") {
3565
+ return `${ariaChecked === "true" ? "Checked" : "Unchecked"}: ${label}`;
3566
+ }
3567
+ return `${ariaChecked === "true" ? "Selected" : "Clicked"}: ${label}`;
3568
+ }
3569
+ const anchor = el instanceof HTMLAnchorElement ? el : el.closest("a[href]");
3570
+ const href = anchor instanceof HTMLAnchorElement ? anchor.href : null;
3571
+ return "Clicked: " + (el.getAttribute("aria-label") || el.textContent?.trim().slice(0, MAX_SHORT_TEXT) || el.tagName.toLowerCase()) + (href ? "\nhref: " + href : "");
3572
+ }
3573
+ if (action === "focus") {
3574
+ el.scrollIntoView({ behavior: "instant", block: "center", inline: "center" });
3575
+ el.focus();
3576
+ return "Focused: " + (el.getAttribute("aria-label") || el.textContent?.trim().slice(0, MAX_SHORT_TEXT) || el.tagName.toLowerCase());
3577
+ }
3578
+ if (action === "value" && value != null) {
3579
+ if (!(el instanceof HTMLInputElement) && !(el instanceof HTMLTextAreaElement) && !(el instanceof HTMLSelectElement)) {
3580
+ return "Error[not-input]: Element is not a text input";
3581
+ }
3582
+ const proto = el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : el instanceof HTMLSelectElement ? HTMLSelectElement.prototype : HTMLInputElement.prototype;
3583
+ const desc = Object.getOwnPropertyDescriptor(proto, "value");
3584
+ if (desc?.set) {
3585
+ desc.set.call(el, value);
3586
+ } else {
3587
+ el.value = value;
3588
+ }
3589
+ el.focus();
3590
+ el.dispatchEvent(new Event("input", { bubbles: true }));
3591
+ el.dispatchEvent(new Event("change", { bubbles: true }));
3592
+ return "Typed into: " + (el.getAttribute("aria-label") || el.placeholder || el.name || "input");
3593
+ }
3594
+ return "Error: Unknown action";
3595
+ }
3596
+ electron.contextBridge.exposeInMainWorld("__vessel", {
3597
+ extractContent: vesselExtractContent,
3598
+ getElementSelector: resolveElementSelector,
3599
+ getElementIndexBySelector: resolveElementIndexBySelector,
3600
+ interactByIndex,
3601
+ resolveShadowSelector,
3602
+ notifyHighlightSelection: (text) => {
3603
+ if (typeof text === "string" && text.trim()) {
3604
+ electron.ipcRenderer.send("vessel:highlight-selection", text.trim());
3605
+ }
3606
+ }
3607
+ });
3608
+ if (document.readyState === "loading") {
3609
+ window.addEventListener(
3610
+ "DOMContentLoaded",
3611
+ () => {
3612
+ startPageDiffObserver();
3613
+ },
3614
+ { once: true }
3615
+ );
3616
+ } else {
3617
+ startPageDiffObserver();
3618
+ }