fss-link 1.0.62 → 1.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bundle/fss-link.js +589 -364
  2. package/package.json +5 -5
@@ -21920,7 +21920,7 @@ function createContentGeneratorConfig(config, authType) {
21920
21920
  return contentGeneratorConfig;
21921
21921
  }
21922
21922
  async function createContentGenerator(config, gcConfig, sessionId2) {
21923
- const version = "1.0.62";
21923
+ const version = "1.0.64";
21924
21924
  const userAgent = `FSS-Link/${version} (${process.platform}; ${process.arch})`;
21925
21925
  const baseHeaders = {
21926
21926
  "User-Agent": userAgent
@@ -55941,15 +55941,17 @@ var init_p_limit = __esm({
55941
55941
  }
55942
55942
  });
55943
55943
 
55944
- // packages/core/node_modules/@mozilla/readability/Readability.js
55944
+ // node_modules/@mozilla/readability/Readability.js
55945
55945
  var require_Readability = __commonJS({
55946
- "packages/core/node_modules/@mozilla/readability/Readability.js"(exports, module) {
55946
+ "node_modules/@mozilla/readability/Readability.js"(exports, module) {
55947
55947
  function Readability2(doc, options3) {
55948
55948
  if (options3 && options3.documentElement) {
55949
55949
  doc = options3;
55950
55950
  options3 = arguments[2];
55951
55951
  } else if (!doc || !doc.documentElement) {
55952
- throw new Error("First argument to Readability constructor should be a document object.");
55952
+ throw new Error(
55953
+ "First argument to Readability constructor should be a document object."
55954
+ );
55953
55955
  }
55954
55956
  options3 = options3 || {};
55955
55957
  this._doc = doc;
@@ -55959,17 +55961,21 @@ var require_Readability = __commonJS({
55959
55961
  this._articleDir = null;
55960
55962
  this._articleSiteName = null;
55961
55963
  this._attempts = [];
55964
+ this._metadata = {};
55962
55965
  this._debug = !!options3.debug;
55963
55966
  this._maxElemsToParse = options3.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
55964
55967
  this._nbTopCandidates = options3.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
55965
55968
  this._charThreshold = options3.charThreshold || this.DEFAULT_CHAR_THRESHOLD;
55966
- this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options3.classesToPreserve || []);
55969
+ this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(
55970
+ options3.classesToPreserve || []
55971
+ );
55967
55972
  this._keepClasses = !!options3.keepClasses;
55968
55973
  this._serializer = options3.serializer || function(el) {
55969
55974
  return el.innerHTML;
55970
55975
  };
55971
55976
  this._disableJSONLD = !!options3.disableJSONLD;
55972
55977
  this._allowedVideoRegex = options3.allowedVideoRegex || this.REGEXPS.videos;
55978
+ this._linkDensityModifier = options3.linkDensityModifier || 0;
55973
55979
  this._flags = this.FLAG_STRIP_UNLIKELYS | this.FLAG_WEIGHT_CLASSES | this.FLAG_CLEAN_CONDITIONALLY;
55974
55980
  if (this._debug) {
55975
55981
  let logNode = function(node) {
@@ -55990,7 +55996,7 @@ var require_Readability = __commonJS({
55990
55996
  return arg;
55991
55997
  });
55992
55998
  args.unshift("Reader: (Readability)");
55993
- console.log.apply(console, args);
55999
+ console.log(...args);
55994
56000
  } else if (typeof dump !== "undefined") {
55995
56001
  var msg = Array.prototype.map.call(arguments, function(x) {
55996
56002
  return x && x.nodeName ? logNode(x) : x;
@@ -56027,7 +56033,7 @@ var require_Readability = __commonJS({
56027
56033
  unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
56028
56034
  okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
56029
56035
  positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
56030
- negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
56036
+ negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|footer|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|widget/i,
56031
56037
  extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
56032
56038
  byline: /byline|author|dateline|writtenby|p-author/i,
56033
56039
  replaceFonts: /<(\/?)font[^>]*>/gi,
@@ -56046,12 +56052,46 @@ var require_Readability = __commonJS({
56046
56052
  // see: https://en.wikipedia.org/wiki/Comma#Comma_variants
56047
56053
  commas: /\u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C/g,
56048
56054
  // See: https://schema.org/Article
56049
- jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/
56055
+ jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/,
56056
+ // used to see if a node's content matches words commonly used for ad blocks or loading indicators
56057
+ adWords: /^(ad(vertising|vertisement)?|pub(licité)?|werb(ung)?|广告|Реклама|Anuncio)$/iu,
56058
+ loadingWords: /^((loading|正在加载|Загрузка|chargement|cargando)(…|\.\.\.)?)$/iu
56050
56059
  },
56051
- UNLIKELY_ROLES: ["menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog"],
56052
- DIV_TO_P_ELEMS: /* @__PURE__ */ new Set(["BLOCKQUOTE", "DL", "DIV", "IMG", "OL", "P", "PRE", "TABLE", "UL"]),
56053
- ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P"],
56054
- PRESENTATIONAL_ATTRIBUTES: ["align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace"],
56060
+ UNLIKELY_ROLES: [
56061
+ "menu",
56062
+ "menubar",
56063
+ "complementary",
56064
+ "navigation",
56065
+ "alert",
56066
+ "alertdialog",
56067
+ "dialog"
56068
+ ],
56069
+ DIV_TO_P_ELEMS: /* @__PURE__ */ new Set([
56070
+ "BLOCKQUOTE",
56071
+ "DL",
56072
+ "DIV",
56073
+ "IMG",
56074
+ "OL",
56075
+ "P",
56076
+ "PRE",
56077
+ "TABLE",
56078
+ "UL"
56079
+ ]),
56080
+ ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P", "OL", "UL"],
56081
+ PRESENTATIONAL_ATTRIBUTES: [
56082
+ "align",
56083
+ "background",
56084
+ "bgcolor",
56085
+ "border",
56086
+ "cellpadding",
56087
+ "cellspacing",
56088
+ "frame",
56089
+ "hspace",
56090
+ "rules",
56091
+ "style",
56092
+ "valign",
56093
+ "vspace"
56094
+ ],
56055
56095
  DEPRECATED_SIZE_ATTRIBUTE_ELEMS: ["TABLE", "TH", "TD", "HR", "PRE"],
56056
56096
  // The commented out elements qualify as phrasing content but tend to be
56057
56097
  // removed by readability when put into paragraphs, so we ignore them here.
@@ -56101,19 +56141,19 @@ var require_Readability = __commonJS({
56101
56141
  CLASSES_TO_PRESERVE: ["page"],
56102
56142
  // These are the list of HTML entities that need to be escaped.
56103
56143
  HTML_ESCAPE_MAP: {
56104
- "lt": "<",
56105
- "gt": ">",
56106
- "amp": "&",
56107
- "quot": '"',
56108
- "apos": "'"
56144
+ lt: "<",
56145
+ gt: ">",
56146
+ amp: "&",
56147
+ quot: '"',
56148
+ apos: "'"
56109
56149
  },
56110
56150
  /**
56111
56151
  * Run any post-process modifications to article content as necessary.
56112
56152
  *
56113
56153
  * @param Element
56114
56154
  * @return void
56115
- **/
56116
- _postProcessContent: function(articleContent) {
56155
+ **/
56156
+ _postProcessContent(articleContent) {
56117
56157
  this._fixRelativeUris(articleContent);
56118
56158
  this._simplifyNestedElements(articleContent);
56119
56159
  if (!this._keepClasses) {
@@ -56130,7 +56170,7 @@ var require_Readability = __commonJS({
56130
56170
  * @param Function filterFn the function to use as a filter
56131
56171
  * @return void
56132
56172
  */
56133
- _removeNodes: function(nodeList, filterFn) {
56173
+ _removeNodes(nodeList, filterFn) {
56134
56174
  if (this._docJSDOMParser && nodeList._isLiveNodeList) {
56135
56175
  throw new Error("Do not pass live node lists to _removeNodes");
56136
56176
  }
@@ -56151,7 +56191,7 @@ var require_Readability = __commonJS({
56151
56191
  * @param String newTagName the new tag name to use
56152
56192
  * @return void
56153
56193
  */
56154
- _replaceNodeTags: function(nodeList, newTagName) {
56194
+ _replaceNodeTags(nodeList, newTagName) {
56155
56195
  if (this._docJSDOMParser && nodeList._isLiveNodeList) {
56156
56196
  throw new Error("Do not pass live node lists to _replaceNodeTags");
56157
56197
  }
@@ -56170,7 +56210,7 @@ var require_Readability = __commonJS({
56170
56210
  * @param Function fn The iterate function.
56171
56211
  * @return void
56172
56212
  */
56173
- _forEachNode: function(nodeList, fn) {
56213
+ _forEachNode(nodeList, fn) {
56174
56214
  Array.prototype.forEach.call(nodeList, fn, this);
56175
56215
  },
56176
56216
  /**
@@ -56184,7 +56224,7 @@ var require_Readability = __commonJS({
56184
56224
  * @param Function fn The test function.
56185
56225
  * @return void
56186
56226
  */
56187
- _findNode: function(nodeList, fn) {
56227
+ _findNode(nodeList, fn) {
56188
56228
  return Array.prototype.find.call(nodeList, fn, this);
56189
56229
  },
56190
56230
  /**
@@ -56198,7 +56238,7 @@ var require_Readability = __commonJS({
56198
56238
  * @param Function fn The iterate function.
56199
56239
  * @return Boolean
56200
56240
  */
56201
- _someNode: function(nodeList, fn) {
56241
+ _someNode(nodeList, fn) {
56202
56242
  return Array.prototype.some.call(nodeList, fn, this);
56203
56243
  },
56204
56244
  /**
@@ -56212,31 +56252,20 @@ var require_Readability = __commonJS({
56212
56252
  * @param Function fn The iterate function.
56213
56253
  * @return Boolean
56214
56254
  */
56215
- _everyNode: function(nodeList, fn) {
56255
+ _everyNode(nodeList, fn) {
56216
56256
  return Array.prototype.every.call(nodeList, fn, this);
56217
56257
  },
56218
- /**
56219
- * Concat all nodelists passed as arguments.
56220
- *
56221
- * @return ...NodeList
56222
- * @return Array
56223
- */
56224
- _concatNodeLists: function() {
56225
- var slice = Array.prototype.slice;
56226
- var args = slice.call(arguments);
56227
- var nodeLists = args.map(function(list2) {
56228
- return slice.call(list2);
56229
- });
56230
- return Array.prototype.concat.apply([], nodeLists);
56231
- },
56232
- _getAllNodesWithTag: function(node, tagNames) {
56258
+ _getAllNodesWithTag(node, tagNames) {
56233
56259
  if (node.querySelectorAll) {
56234
56260
  return node.querySelectorAll(tagNames.join(","));
56235
56261
  }
56236
- return [].concat.apply([], tagNames.map(function(tag2) {
56237
- var collection = node.getElementsByTagName(tag2);
56238
- return Array.isArray(collection) ? collection : Array.from(collection);
56239
- }));
56262
+ return [].concat.apply(
56263
+ [],
56264
+ tagNames.map(function(tag2) {
56265
+ var collection = node.getElementsByTagName(tag2);
56266
+ return Array.isArray(collection) ? collection : Array.from(collection);
56267
+ })
56268
+ );
56240
56269
  },
56241
56270
  /**
56242
56271
  * Removes the class="" attribute from every element in the given
@@ -56246,11 +56275,9 @@ var require_Readability = __commonJS({
56246
56275
  * @param Element
56247
56276
  * @return void
56248
56277
  */
56249
- _cleanClasses: function(node) {
56278
+ _cleanClasses(node) {
56250
56279
  var classesToPreserve = this._classesToPreserve;
56251
- var className = (node.getAttribute("class") || "").split(/\s+/).filter(function(cls) {
56252
- return classesToPreserve.indexOf(cls) != -1;
56253
- }).join(" ");
56280
+ var className = (node.getAttribute("class") || "").split(/\s+/).filter((cls) => classesToPreserve.includes(cls)).join(" ");
56254
56281
  if (className) {
56255
56282
  node.setAttribute("class", className);
56256
56283
  } else {
@@ -56260,6 +56287,20 @@ var require_Readability = __commonJS({
56260
56287
  this._cleanClasses(node);
56261
56288
  }
56262
56289
  },
56290
+ /**
56291
+ * Tests whether a string is a URL or not.
56292
+ *
56293
+ * @param {string} str The string to test
56294
+ * @return {boolean} true if str is a URL, false if not
56295
+ */
56296
+ _isUrl(str3) {
56297
+ try {
56298
+ new URL(str3);
56299
+ return true;
56300
+ } catch {
56301
+ return false;
56302
+ }
56303
+ },
56263
56304
  /**
56264
56305
  * Converts each <a> and <img> uri in the given element to an absolute URI,
56265
56306
  * ignoring #ref URIs.
@@ -56267,7 +56308,7 @@ var require_Readability = __commonJS({
56267
56308
  * @param Element
56268
56309
  * @return void
56269
56310
  */
56270
- _fixRelativeUris: function(articleContent) {
56311
+ _fixRelativeUris(articleContent) {
56271
56312
  var baseURI = this._doc.baseURI;
56272
56313
  var documentURI = this._doc.documentURI;
56273
56314
  function toAbsoluteURI(uri) {
@@ -56319,14 +56360,17 @@ var require_Readability = __commonJS({
56319
56360
  media.setAttribute("poster", toAbsoluteURI(poster));
56320
56361
  }
56321
56362
  if (srcset) {
56322
- var newSrcset = srcset.replace(this.REGEXPS.srcsetUrl, function(_, p1, p2, p3) {
56323
- return toAbsoluteURI(p1) + (p2 || "") + p3;
56324
- });
56363
+ var newSrcset = srcset.replace(
56364
+ this.REGEXPS.srcsetUrl,
56365
+ function(_, p1, p2, p3) {
56366
+ return toAbsoluteURI(p1) + (p2 || "") + p3;
56367
+ }
56368
+ );
56325
56369
  media.setAttribute("srcset", newSrcset);
56326
56370
  }
56327
56371
  });
56328
56372
  },
56329
- _simplifyNestedElements: function(articleContent) {
56373
+ _simplifyNestedElements(articleContent) {
56330
56374
  var node = articleContent;
56331
56375
  while (node) {
56332
56376
  if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) {
@@ -56336,7 +56380,7 @@ var require_Readability = __commonJS({
56336
56380
  } else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) {
56337
56381
  var child = node.children[0];
56338
56382
  for (var i = 0; i < node.attributes.length; i++) {
56339
- child.setAttribute(node.attributes[i].name, node.attributes[i].value);
56383
+ child.setAttributeNode(node.attributes[i].cloneNode());
56340
56384
  }
56341
56385
  node.parentNode.replaceChild(child, node);
56342
56386
  node = child;
@@ -56351,14 +56395,17 @@ var require_Readability = __commonJS({
56351
56395
  *
56352
56396
  * @return string
56353
56397
  **/
56354
- _getArticleTitle: function() {
56398
+ _getArticleTitle() {
56355
56399
  var doc = this._doc;
56356
56400
  var curTitle = "";
56357
56401
  var origTitle = "";
56358
56402
  try {
56359
56403
  curTitle = origTitle = doc.title.trim();
56360
- if (typeof curTitle !== "string")
56361
- curTitle = origTitle = this._getInnerText(doc.getElementsByTagName("title")[0]);
56404
+ if (typeof curTitle !== "string") {
56405
+ curTitle = origTitle = this._getInnerText(
56406
+ doc.getElementsByTagName("title")[0]
56407
+ );
56408
+ }
56362
56409
  } catch (e2) {
56363
56410
  }
56364
56411
  var titleHadHierarchicalSeparators = false;
@@ -56367,14 +56414,13 @@ var require_Readability = __commonJS({
56367
56414
  }
56368
56415
  if (/ [\|\-\\\/>»] /.test(curTitle)) {
56369
56416
  titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle);
56370
- curTitle = origTitle.replace(/(.*)[\|\-\\\/>»] .*/gi, "$1");
56371
- if (wordCount(curTitle) < 3)
56372
- curTitle = origTitle.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi, "$1");
56373
- } else if (curTitle.indexOf(": ") !== -1) {
56374
- var headings = this._concatNodeLists(
56375
- doc.getElementsByTagName("h1"),
56376
- doc.getElementsByTagName("h2")
56377
- );
56417
+ let allSeparators = Array.from(origTitle.matchAll(/ [\|\-\\\/>»] /gi));
56418
+ curTitle = origTitle.substring(0, allSeparators.pop().index);
56419
+ if (wordCount(curTitle) < 3) {
56420
+ curTitle = origTitle.replace(/^[^\|\-\\\/>»]*[\|\-\\\/>»]/gi, "");
56421
+ }
56422
+ } else if (curTitle.includes(": ")) {
56423
+ var headings = this._getAllNodesWithTag(doc, ["h1", "h2"]);
56378
56424
  var trimmedTitle = curTitle.trim();
56379
56425
  var match2 = this._someNode(headings, function(heading2) {
56380
56426
  return heading2.textContent.trim() === trimmedTitle;
@@ -56389,8 +56435,9 @@ var require_Readability = __commonJS({
56389
56435
  }
56390
56436
  } else if (curTitle.length > 150 || curTitle.length < 15) {
56391
56437
  var hOnes = doc.getElementsByTagName("h1");
56392
- if (hOnes.length === 1)
56438
+ if (hOnes.length === 1) {
56393
56439
  curTitle = this._getInnerText(hOnes[0]);
56440
+ }
56394
56441
  }
56395
56442
  curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " ");
56396
56443
  var curTitleWordCount = wordCount(curTitle);
@@ -56405,7 +56452,7 @@ var require_Readability = __commonJS({
56405
56452
  *
56406
56453
  * @return void
56407
56454
  **/
56408
- _prepDocument: function() {
56455
+ _prepDocument() {
56409
56456
  var doc = this._doc;
56410
56457
  this._removeNodes(this._getAllNodesWithTag(doc, ["style"]));
56411
56458
  if (doc.body) {
@@ -56418,7 +56465,7 @@ var require_Readability = __commonJS({
56418
56465
  * whitespace in between. If the given node is an element, the same node is
56419
56466
  * returned.
56420
56467
  */
56421
- _nextNode: function(node) {
56468
+ _nextNode(node) {
56422
56469
  var next = node;
56423
56470
  while (next && next.nodeType != this.ELEMENT_NODE && this.REGEXPS.whitespace.test(next.textContent)) {
56424
56471
  next = next.nextSibling;
@@ -56432,14 +56479,14 @@ var require_Readability = __commonJS({
56432
56479
  * will become:
56433
56480
  * <div>foo<br>bar<p>abc</p></div>
56434
56481
  */
56435
- _replaceBrs: function(elem) {
56482
+ _replaceBrs(elem) {
56436
56483
  this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br2) {
56437
56484
  var next = br2.nextSibling;
56438
56485
  var replaced = false;
56439
56486
  while ((next = this._nextNode(next)) && next.tagName == "BR") {
56440
56487
  replaced = true;
56441
56488
  var brSibling = next.nextSibling;
56442
- next.parentNode.removeChild(next);
56489
+ next.remove();
56443
56490
  next = brSibling;
56444
56491
  }
56445
56492
  if (replaced) {
@@ -56449,24 +56496,27 @@ var require_Readability = __commonJS({
56449
56496
  while (next) {
56450
56497
  if (next.tagName == "BR") {
56451
56498
  var nextElem = this._nextNode(next.nextSibling);
56452
- if (nextElem && nextElem.tagName == "BR")
56499
+ if (nextElem && nextElem.tagName == "BR") {
56453
56500
  break;
56501
+ }
56454
56502
  }
56455
- if (!this._isPhrasingContent(next))
56503
+ if (!this._isPhrasingContent(next)) {
56456
56504
  break;
56505
+ }
56457
56506
  var sibling = next.nextSibling;
56458
56507
  p.appendChild(next);
56459
56508
  next = sibling;
56460
56509
  }
56461
56510
  while (p.lastChild && this._isWhitespace(p.lastChild)) {
56462
- p.removeChild(p.lastChild);
56511
+ p.lastChild.remove();
56463
56512
  }
56464
- if (p.parentNode.tagName === "P")
56513
+ if (p.parentNode.tagName === "P") {
56465
56514
  this._setNodeTag(p.parentNode, "DIV");
56515
+ }
56466
56516
  }
56467
56517
  });
56468
56518
  },
56469
- _setNodeTag: function(node, tag2) {
56519
+ _setNodeTag(node, tag2) {
56470
56520
  this.log("_setNodeTag", node, tag2);
56471
56521
  if (this._docJSDOMParser) {
56472
56522
  node.localName = tag2.toLowerCase();
@@ -56478,13 +56528,11 @@ var require_Readability = __commonJS({
56478
56528
  replacement.appendChild(node.firstChild);
56479
56529
  }
56480
56530
  node.parentNode.replaceChild(replacement, node);
56481
- if (node.readability)
56531
+ if (node.readability) {
56482
56532
  replacement.readability = node.readability;
56533
+ }
56483
56534
  for (var i = 0; i < node.attributes.length; i++) {
56484
- try {
56485
- replacement.setAttribute(node.attributes[i].name, node.attributes[i].value);
56486
- } catch (ex) {
56487
- }
56535
+ replacement.setAttributeNode(node.attributes[i].cloneNode());
56488
56536
  }
56489
56537
  return replacement;
56490
56538
  },
@@ -56495,7 +56543,7 @@ var require_Readability = __commonJS({
56495
56543
  * @param Element
56496
56544
  * @return void
56497
56545
  **/
56498
- _prepArticle: function(articleContent) {
56546
+ _prepArticle(articleContent) {
56499
56547
  this._cleanStyles(articleContent);
56500
56548
  this._markDataTables(articleContent);
56501
56549
  this._fixLazyImages(articleContent);
@@ -56521,31 +56569,48 @@ var require_Readability = __commonJS({
56521
56569
  this._cleanConditionally(articleContent, "table");
56522
56570
  this._cleanConditionally(articleContent, "ul");
56523
56571
  this._cleanConditionally(articleContent, "div");
56524
- this._replaceNodeTags(this._getAllNodesWithTag(articleContent, ["h1"]), "h2");
56525
- this._removeNodes(this._getAllNodesWithTag(articleContent, ["p"]), function(paragraph2) {
56526
- var imgCount = paragraph2.getElementsByTagName("img").length;
56527
- var embedCount = paragraph2.getElementsByTagName("embed").length;
56528
- var objectCount = paragraph2.getElementsByTagName("object").length;
56529
- var iframeCount = paragraph2.getElementsByTagName("iframe").length;
56530
- var totalCount = imgCount + embedCount + objectCount + iframeCount;
56531
- return totalCount === 0 && !this._getInnerText(paragraph2, false);
56532
- });
56533
- this._forEachNode(this._getAllNodesWithTag(articleContent, ["br"]), function(br2) {
56534
- var next = this._nextNode(br2.nextSibling);
56535
- if (next && next.tagName == "P")
56536
- br2.parentNode.removeChild(br2);
56537
- });
56538
- this._forEachNode(this._getAllNodesWithTag(articleContent, ["table"]), function(table) {
56539
- var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table;
56540
- if (this._hasSingleTagInsideElement(tbody, "TR")) {
56541
- var row = tbody.firstElementChild;
56542
- if (this._hasSingleTagInsideElement(row, "TD")) {
56543
- var cell = row.firstElementChild;
56544
- cell = this._setNodeTag(cell, this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV");
56545
- table.parentNode.replaceChild(cell, table);
56572
+ this._replaceNodeTags(
56573
+ this._getAllNodesWithTag(articleContent, ["h1"]),
56574
+ "h2"
56575
+ );
56576
+ this._removeNodes(
56577
+ this._getAllNodesWithTag(articleContent, ["p"]),
56578
+ function(paragraph2) {
56579
+ var contentElementCount = this._getAllNodesWithTag(paragraph2, [
56580
+ "img",
56581
+ "embed",
56582
+ "object",
56583
+ "iframe"
56584
+ ]).length;
56585
+ return contentElementCount === 0 && !this._getInnerText(paragraph2, false);
56586
+ }
56587
+ );
56588
+ this._forEachNode(
56589
+ this._getAllNodesWithTag(articleContent, ["br"]),
56590
+ function(br2) {
56591
+ var next = this._nextNode(br2.nextSibling);
56592
+ if (next && next.tagName == "P") {
56593
+ br2.remove();
56546
56594
  }
56547
56595
  }
56548
- });
56596
+ );
56597
+ this._forEachNode(
56598
+ this._getAllNodesWithTag(articleContent, ["table"]),
56599
+ function(table) {
56600
+ var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table;
56601
+ if (this._hasSingleTagInsideElement(tbody, "TR")) {
56602
+ var row = tbody.firstElementChild;
56603
+ if (this._hasSingleTagInsideElement(row, "TD")) {
56604
+ var cell = row.firstElementChild;
56605
+ cell = this._setNodeTag(
56606
+ cell,
56607
+ this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV"
56608
+ );
56609
+ table.parentNode.replaceChild(cell, table);
56610
+ }
56611
+ }
56612
+ }
56613
+ );
56549
56614
  },
56550
56615
  /**
56551
56616
  * Initialize a node with the readability object. Also checks the
@@ -56553,9 +56618,9 @@ var require_Readability = __commonJS({
56553
56618
  *
56554
56619
  * @param Element
56555
56620
  * @return void
56556
- **/
56557
- _initializeNode: function(node) {
56558
- node.readability = { "contentScore": 0 };
56621
+ **/
56622
+ _initializeNode(node) {
56623
+ node.readability = { contentScore: 0 };
56559
56624
  switch (node.tagName) {
56560
56625
  case "DIV":
56561
56626
  node.readability.contentScore += 5;
@@ -56587,9 +56652,9 @@ var require_Readability = __commonJS({
56587
56652
  }
56588
56653
  node.readability.contentScore += this._getClassWeight(node);
56589
56654
  },
56590
- _removeAndGetNext: function(node) {
56655
+ _removeAndGetNext(node) {
56591
56656
  var nextNode = this._getNextNode(node, true);
56592
- node.parentNode.removeChild(node);
56657
+ node.remove();
56593
56658
  return nextNode;
56594
56659
  },
56595
56660
  /**
@@ -56598,8 +56663,12 @@ var require_Readability = __commonJS({
56598
56663
  * (and its kids) are going away, and we want the next node over.
56599
56664
  *
56600
56665
  * Calling this in a loop will traverse the DOM depth-first.
56666
+ *
56667
+ * @param {Element} node
56668
+ * @param {boolean} ignoreSelfAndKids
56669
+ * @return {Element}
56601
56670
  */
56602
- _getNextNode: function(node, ignoreSelfAndKids) {
56671
+ _getNextNode(node, ignoreSelfAndKids) {
56603
56672
  if (!ignoreSelfAndKids && node.firstElementChild) {
56604
56673
  return node.firstElementChild;
56605
56674
  }
@@ -56615,7 +56684,7 @@ var require_Readability = __commonJS({
56615
56684
  // 1 = same text, 0 = completely different text
56616
56685
  // works the way that it splits both texts into words and then finds words that are unique in second text
56617
56686
  // the result is given by the lower length of unique parts
56618
- _textSimilarity: function(textA, textB) {
56687
+ _textSimilarity(textA, textB) {
56619
56688
  var tokensA = textA.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
56620
56689
  var tokensB = textB.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
56621
56690
  if (!tokensA.length || !tokensB.length) {
@@ -56625,27 +56694,27 @@ var require_Readability = __commonJS({
56625
56694
  var distanceB = uniqTokensB.join(" ").length / tokensB.join(" ").length;
56626
56695
  return 1 - distanceB;
56627
56696
  },
56628
- _checkByline: function(node, matchString) {
56629
- if (this._articleByline) {
56630
- return false;
56631
- }
56632
- if (node.getAttribute !== void 0) {
56633
- var rel = node.getAttribute("rel");
56634
- var itemprop = node.getAttribute("itemprop");
56635
- }
56636
- if ((rel === "author" || itemprop && itemprop.indexOf("author") !== -1 || this.REGEXPS.byline.test(matchString)) && this._isValidByline(node.textContent)) {
56637
- this._articleByline = node.textContent.trim();
56638
- return true;
56639
- }
56640
- return false;
56697
+ /**
56698
+ * Checks whether an element node contains a valid byline
56699
+ *
56700
+ * @param node {Element}
56701
+ * @param matchString {string}
56702
+ * @return boolean
56703
+ */
56704
+ _isValidByline(node, matchString) {
56705
+ var rel = node.getAttribute("rel");
56706
+ var itemprop = node.getAttribute("itemprop");
56707
+ var bylineLength = node.textContent.trim().length;
56708
+ return (rel === "author" || itemprop && itemprop.includes("author") || this.REGEXPS.byline.test(matchString)) && !!bylineLength && bylineLength < 100;
56641
56709
  },
56642
- _getNodeAncestors: function(node, maxDepth) {
56710
+ _getNodeAncestors(node, maxDepth) {
56643
56711
  maxDepth = maxDepth || 0;
56644
56712
  var i = 0, ancestors = [];
56645
56713
  while (node.parentNode) {
56646
56714
  ancestors.push(node.parentNode);
56647
- if (maxDepth && ++i === maxDepth)
56715
+ if (maxDepth && ++i === maxDepth) {
56648
56716
  break;
56717
+ }
56649
56718
  node = node.parentNode;
56650
56719
  }
56651
56720
  return ancestors;
@@ -56656,8 +56725,9 @@ var require_Readability = __commonJS({
56656
56725
  *
56657
56726
  * @param page a document to run upon. Needs to be a full document, complete with body.
56658
56727
  * @return Element
56659
- **/
56660
- _grabArticle: function(page) {
56728
+ **/
56729
+ /* eslint-disable-next-line complexity */
56730
+ _grabArticle(page) {
56661
56731
  this.log("**** grabArticle ****");
56662
56732
  var doc = this._doc;
56663
56733
  var isPaging = page !== null;
@@ -56669,7 +56739,9 @@ var require_Readability = __commonJS({
56669
56739
  var pageCacheHtml = page.innerHTML;
56670
56740
  while (true) {
56671
56741
  this.log("Starting grabArticle loop");
56672
- var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS);
56742
+ var stripUnlikelyCandidates = this._flagIsActive(
56743
+ this.FLAG_STRIP_UNLIKELYS
56744
+ );
56673
56745
  var elementsToScore = [];
56674
56746
  var node = this._doc.documentElement;
56675
56747
  let shouldRemoveTitleHeader = true;
@@ -56687,12 +56759,29 @@ var require_Readability = __commonJS({
56687
56759
  node = this._removeAndGetNext(node);
56688
56760
  continue;
56689
56761
  }
56690
- if (this._checkByline(node, matchString)) {
56762
+ if (!this._articleByline && !this._metadata.byline && this._isValidByline(node, matchString)) {
56763
+ var endOfSearchMarkerNode = this._getNextNode(node, true);
56764
+ var next = this._getNextNode(node);
56765
+ var itemPropNameNode = null;
56766
+ while (next && next != endOfSearchMarkerNode) {
56767
+ var itemprop = next.getAttribute("itemprop");
56768
+ if (itemprop && itemprop.includes("name")) {
56769
+ itemPropNameNode = next;
56770
+ break;
56771
+ } else {
56772
+ next = this._getNextNode(next);
56773
+ }
56774
+ }
56775
+ this._articleByline = (itemPropNameNode ?? node).textContent.trim();
56691
56776
  node = this._removeAndGetNext(node);
56692
56777
  continue;
56693
56778
  }
56694
56779
  if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) {
56695
- this.log("Removing header: ", node.textContent.trim(), this._articleTitle.trim());
56780
+ this.log(
56781
+ "Removing header: ",
56782
+ node.textContent.trim(),
56783
+ this._articleTitle.trim()
56784
+ );
56696
56785
  shouldRemoveTitleHeader = false;
56697
56786
  node = this._removeAndGetNext(node);
56698
56787
  continue;
@@ -56704,7 +56793,9 @@ var require_Readability = __commonJS({
56704
56793
  continue;
56705
56794
  }
56706
56795
  if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) {
56707
- this.log("Removing content with role " + node.getAttribute("role") + " - " + matchString);
56796
+ this.log(
56797
+ "Removing content with role " + node.getAttribute("role") + " - " + matchString
56798
+ );
56708
56799
  node = this._removeAndGetNext(node);
56709
56800
  continue;
56710
56801
  }
@@ -56713,7 +56804,7 @@ var require_Readability = __commonJS({
56713
56804
  node = this._removeAndGetNext(node);
56714
56805
  continue;
56715
56806
  }
56716
- if (this.DEFAULT_TAGS_TO_SCORE.indexOf(node.tagName) !== -1) {
56807
+ if (this.DEFAULT_TAGS_TO_SCORE.includes(node.tagName)) {
56717
56808
  elementsToScore.push(node);
56718
56809
  }
56719
56810
  if (node.tagName === "DIV") {
@@ -56731,7 +56822,7 @@ var require_Readability = __commonJS({
56731
56822
  }
56732
56823
  } else if (p !== null) {
56733
56824
  while (p.lastChild && this._isWhitespace(p.lastChild)) {
56734
- p.removeChild(p.lastChild);
56825
+ p.lastChild.remove();
56735
56826
  }
56736
56827
  p = null;
56737
56828
  }
@@ -56751,31 +56842,36 @@ var require_Readability = __commonJS({
56751
56842
  }
56752
56843
  var candidates = [];
56753
56844
  this._forEachNode(elementsToScore, function(elementToScore) {
56754
- if (!elementToScore.parentNode || typeof elementToScore.parentNode.tagName === "undefined")
56845
+ if (!elementToScore.parentNode || typeof elementToScore.parentNode.tagName === "undefined") {
56755
56846
  return;
56847
+ }
56756
56848
  var innerText = this._getInnerText(elementToScore);
56757
- if (innerText.length < 25)
56849
+ if (innerText.length < 25) {
56758
56850
  return;
56851
+ }
56759
56852
  var ancestors2 = this._getNodeAncestors(elementToScore, 5);
56760
- if (ancestors2.length === 0)
56853
+ if (ancestors2.length === 0) {
56761
56854
  return;
56855
+ }
56762
56856
  var contentScore = 0;
56763
56857
  contentScore += 1;
56764
56858
  contentScore += innerText.split(this.REGEXPS.commas).length;
56765
56859
  contentScore += Math.min(Math.floor(innerText.length / 100), 3);
56766
56860
  this._forEachNode(ancestors2, function(ancestor, level) {
56767
- if (!ancestor.tagName || !ancestor.parentNode || typeof ancestor.parentNode.tagName === "undefined")
56861
+ if (!ancestor.tagName || !ancestor.parentNode || typeof ancestor.parentNode.tagName === "undefined") {
56768
56862
  return;
56863
+ }
56769
56864
  if (typeof ancestor.readability === "undefined") {
56770
56865
  this._initializeNode(ancestor);
56771
56866
  candidates.push(ancestor);
56772
56867
  }
56773
- if (level === 0)
56868
+ if (level === 0) {
56774
56869
  var scoreDivider = 1;
56775
- else if (level === 1)
56870
+ } else if (level === 1) {
56776
56871
  scoreDivider = 2;
56777
- else
56872
+ } else {
56778
56873
  scoreDivider = level * 3;
56874
+ }
56779
56875
  ancestor.readability.contentScore += contentScore / scoreDivider;
56780
56876
  });
56781
56877
  });
@@ -56789,8 +56885,9 @@ var require_Readability = __commonJS({
56789
56885
  var aTopCandidate = topCandidates[t2];
56790
56886
  if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
56791
56887
  topCandidates.splice(t2, 0, candidate);
56792
- if (topCandidates.length > this._nbTopCandidates)
56888
+ if (topCandidates.length > this._nbTopCandidates) {
56793
56889
  topCandidates.pop();
56890
+ }
56794
56891
  break;
56795
56892
  }
56796
56893
  }
@@ -56811,7 +56908,9 @@ var require_Readability = __commonJS({
56811
56908
  var alternativeCandidateAncestors = [];
56812
56909
  for (var i = 1; i < topCandidates.length; i++) {
56813
56910
  if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) {
56814
- alternativeCandidateAncestors.push(this._getNodeAncestors(topCandidates[i]));
56911
+ alternativeCandidateAncestors.push(
56912
+ this._getNodeAncestors(topCandidates[i])
56913
+ );
56815
56914
  }
56816
56915
  }
56817
56916
  var MINIMUM_TOPCANDIDATES = 3;
@@ -56820,7 +56919,11 @@ var require_Readability = __commonJS({
56820
56919
  while (parentOfTopCandidate.tagName !== "BODY") {
56821
56920
  var listsContainingThisAncestor = 0;
56822
56921
  for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) {
56823
- listsContainingThisAncestor += Number(alternativeCandidateAncestors[ancestorIndex].includes(parentOfTopCandidate));
56922
+ listsContainingThisAncestor += Number(
56923
+ alternativeCandidateAncestors[ancestorIndex].includes(
56924
+ parentOfTopCandidate
56925
+ )
56926
+ );
56824
56927
  }
56825
56928
  if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) {
56826
56929
  topCandidate = parentOfTopCandidate;
@@ -56841,8 +56944,9 @@ var require_Readability = __commonJS({
56841
56944
  continue;
56842
56945
  }
56843
56946
  var parentScore = parentOfTopCandidate.readability.contentScore;
56844
- if (parentScore < scoreThreshold)
56947
+ if (parentScore < scoreThreshold) {
56845
56948
  break;
56949
+ }
56846
56950
  if (parentScore > lastScore) {
56847
56951
  topCandidate = parentOfTopCandidate;
56848
56952
  break;
@@ -56860,22 +56964,34 @@ var require_Readability = __commonJS({
56860
56964
  }
56861
56965
  }
56862
56966
  var articleContent = doc.createElement("DIV");
56863
- if (isPaging)
56967
+ if (isPaging) {
56864
56968
  articleContent.id = "readability-content";
56865
- var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
56969
+ }
56970
+ var siblingScoreThreshold = Math.max(
56971
+ 10,
56972
+ topCandidate.readability.contentScore * 0.2
56973
+ );
56866
56974
  parentOfTopCandidate = topCandidate.parentNode;
56867
56975
  var siblings = parentOfTopCandidate.children;
56868
56976
  for (var s2 = 0, sl = siblings.length; s2 < sl; s2++) {
56869
56977
  var sibling = siblings[s2];
56870
56978
  var append = false;
56871
- this.log("Looking at sibling node:", sibling, sibling.readability ? "with score " + sibling.readability.contentScore : "");
56872
- this.log("Sibling has score", sibling.readability ? sibling.readability.contentScore : "Unknown");
56979
+ this.log(
56980
+ "Looking at sibling node:",
56981
+ sibling,
56982
+ sibling.readability ? "with score " + sibling.readability.contentScore : ""
56983
+ );
56984
+ this.log(
56985
+ "Sibling has score",
56986
+ sibling.readability ? sibling.readability.contentScore : "Unknown"
56987
+ );
56873
56988
  if (sibling === topCandidate) {
56874
56989
  append = true;
56875
56990
  } else {
56876
56991
  var contentBonus = 0;
56877
- if (sibling.className === topCandidate.className && topCandidate.className !== "")
56992
+ if (sibling.className === topCandidate.className && topCandidate.className !== "") {
56878
56993
  contentBonus += topCandidate.readability.contentScore * 0.2;
56994
+ }
56879
56995
  if (sibling.readability && sibling.readability.contentScore + contentBonus >= siblingScoreThreshold) {
56880
56996
  append = true;
56881
56997
  } else if (sibling.nodeName === "P") {
@@ -56891,7 +57007,7 @@ var require_Readability = __commonJS({
56891
57007
  }
56892
57008
  if (append) {
56893
57009
  this.log("Appending node:", sibling);
56894
- if (this.ALTER_TO_DIV_EXCEPTIONS.indexOf(sibling.nodeName) === -1) {
57010
+ if (!this.ALTER_TO_DIV_EXCEPTIONS.includes(sibling.nodeName)) {
56895
57011
  this.log("Altering sibling:", sibling, "to div.");
56896
57012
  sibling = this._setNodeTag(sibling, "DIV");
56897
57013
  }
@@ -56901,11 +57017,13 @@ var require_Readability = __commonJS({
56901
57017
  sl -= 1;
56902
57018
  }
56903
57019
  }
56904
- if (this._debug)
57020
+ if (this._debug) {
56905
57021
  this.log("Article content pre-prep: " + articleContent.innerHTML);
57022
+ }
56906
57023
  this._prepArticle(articleContent);
56907
- if (this._debug)
57024
+ if (this._debug) {
56908
57025
  this.log("Article content post-prep: " + articleContent.innerHTML);
57026
+ }
56909
57027
  if (neededToCreateTopCandidate) {
56910
57028
  topCandidate.id = "readability-page-1";
56911
57029
  topCandidate.className = "page";
@@ -56918,24 +57036,25 @@ var require_Readability = __commonJS({
56918
57036
  }
56919
57037
  articleContent.appendChild(div);
56920
57038
  }
56921
- if (this._debug)
57039
+ if (this._debug) {
56922
57040
  this.log("Article content after paging: " + articleContent.innerHTML);
57041
+ }
56923
57042
  var parseSuccessful = true;
56924
57043
  var textLength = this._getInnerText(articleContent, true).length;
56925
57044
  if (textLength < this._charThreshold) {
56926
57045
  parseSuccessful = false;
56927
57046
  page.innerHTML = pageCacheHtml;
57047
+ this._attempts.push({
57048
+ articleContent,
57049
+ textLength
57050
+ });
56928
57051
  if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
56929
57052
  this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
56930
- this._attempts.push({ articleContent, textLength });
56931
57053
  } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
56932
57054
  this._removeFlag(this.FLAG_WEIGHT_CLASSES);
56933
- this._attempts.push({ articleContent, textLength });
56934
57055
  } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
56935
57056
  this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
56936
- this._attempts.push({ articleContent, textLength });
56937
57057
  } else {
56938
- this._attempts.push({ articleContent, textLength });
56939
57058
  this._attempts.sort(function(a, b) {
56940
57059
  return b.textLength - a.textLength;
56941
57060
  });
@@ -56947,10 +57066,13 @@ var require_Readability = __commonJS({
56947
57066
  }
56948
57067
  }
56949
57068
  if (parseSuccessful) {
56950
- var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate));
57069
+ var ancestors = [parentOfTopCandidate, topCandidate].concat(
57070
+ this._getNodeAncestors(parentOfTopCandidate)
57071
+ );
56951
57072
  this._someNode(ancestors, function(ancestor) {
56952
- if (!ancestor.tagName)
57073
+ if (!ancestor.tagName) {
56953
57074
  return false;
57075
+ }
56954
57076
  var articleDir = ancestor.getAttribute("dir");
56955
57077
  if (articleDir) {
56956
57078
  this._articleDir = articleDir;
@@ -56962,37 +57084,25 @@ var require_Readability = __commonJS({
56962
57084
  }
56963
57085
  }
56964
57086
  },
56965
- /**
56966
- * Check whether the input string could be a byline.
56967
- * This verifies that the input is a string, and that the length
56968
- * is less than 100 chars.
56969
- *
56970
- * @param possibleByline {string} - a string to check whether its a byline.
56971
- * @return Boolean - whether the input string is a byline.
56972
- */
56973
- _isValidByline: function(byline) {
56974
- if (typeof byline == "string" || byline instanceof String) {
56975
- byline = byline.trim();
56976
- return byline.length > 0 && byline.length < 100;
56977
- }
56978
- return false;
56979
- },
56980
57087
  /**
56981
57088
  * Converts some of the common HTML entities in string to their corresponding characters.
56982
57089
  *
56983
57090
  * @param str {string} - a string to unescape.
56984
57091
  * @return string without HTML entity.
56985
57092
  */
56986
- _unescapeHtmlEntities: function(str3) {
57093
+ _unescapeHtmlEntities(str3) {
56987
57094
  if (!str3) {
56988
57095
  return str3;
56989
57096
  }
56990
57097
  var htmlEscapeMap = this.HTML_ESCAPE_MAP;
56991
57098
  return str3.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag2) {
56992
57099
  return htmlEscapeMap[tag2];
56993
- }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(_, hex, numStr) {
57100
+ }).replace(/&#(?:x([0-9a-f]+)|([0-9]+));/gi, function(_, hex, numStr) {
56994
57101
  var num = parseInt(hex || numStr, hex ? 16 : 10);
56995
- return String.fromCharCode(num);
57102
+ if (num == 0 || num > 1114111 || num >= 55296 && num <= 57343) {
57103
+ num = 65533;
57104
+ }
57105
+ return String.fromCodePoint(num);
56996
57106
  });
56997
57107
  },
56998
57108
  /**
@@ -57000,22 +57110,33 @@ var require_Readability = __commonJS({
57000
57110
  * For now, only Schema.org objects of type Article or its subtypes are supported.
57001
57111
  * @return Object with any metadata that could be extracted (possibly none)
57002
57112
  */
57003
- _getJSONLD: function(doc) {
57113
+ _getJSONLD(doc) {
57004
57114
  var scripts = this._getAllNodesWithTag(doc, ["script"]);
57005
57115
  var metadata;
57006
57116
  this._forEachNode(scripts, function(jsonLdElement) {
57007
57117
  if (!metadata && jsonLdElement.getAttribute("type") === "application/ld+json") {
57008
57118
  try {
57009
- var content = jsonLdElement.textContent.replace(/^\s*<!\[CDATA\[|\]\]>\s*$/g, "");
57119
+ var content = jsonLdElement.textContent.replace(
57120
+ /^\s*<!\[CDATA\[|\]\]>\s*$/g,
57121
+ ""
57122
+ );
57010
57123
  var parsed = JSON.parse(content);
57011
- if (!parsed["@context"] || !parsed["@context"].match(/^https?\:\/\/schema\.org$/)) {
57124
+ if (Array.isArray(parsed)) {
57125
+ parsed = parsed.find((it) => {
57126
+ return it["@type"] && it["@type"].match(this.REGEXPS.jsonLdArticleTypes);
57127
+ });
57128
+ if (!parsed) {
57129
+ return;
57130
+ }
57131
+ }
57132
+ var schemaDotOrgRegex = /^https?\:\/\/schema\.org\/?$/;
57133
+ var matches = typeof parsed["@context"] === "string" && parsed["@context"].match(schemaDotOrgRegex) || typeof parsed["@context"] === "object" && typeof parsed["@context"]["@vocab"] == "string" && parsed["@context"]["@vocab"].match(schemaDotOrgRegex);
57134
+ if (!matches) {
57012
57135
  return;
57013
57136
  }
57014
57137
  if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
57015
- parsed = parsed["@graph"].find(function(it) {
57016
- return (it["@type"] || "").match(
57017
- this.REGEXPS.jsonLdArticleTypes
57018
- );
57138
+ parsed = parsed["@graph"].find((it) => {
57139
+ return (it["@type"] || "").match(this.REGEXPS.jsonLdArticleTypes);
57019
57140
  });
57020
57141
  }
57021
57142
  if (!parsed || !parsed["@type"] || !parsed["@type"].match(this.REGEXPS.jsonLdArticleTypes)) {
@@ -57056,7 +57177,6 @@ var require_Readability = __commonJS({
57056
57177
  if (typeof parsed.datePublished === "string") {
57057
57178
  metadata.datePublished = parsed.datePublished.trim();
57058
57179
  }
57059
- return;
57060
57180
  } catch (err) {
57061
57181
  this.log(err.message);
57062
57182
  }
@@ -57072,12 +57192,12 @@ var require_Readability = __commonJS({
57072
57192
  *
57073
57193
  * @return Object with optional "excerpt" and "byline" properties
57074
57194
  */
57075
- _getArticleMetadata: function(jsonld) {
57195
+ _getArticleMetadata(jsonld) {
57076
57196
  var metadata = {};
57077
57197
  var values = {};
57078
57198
  var metaElements = this._doc.getElementsByTagName("meta");
57079
57199
  var propertyPattern = /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;
57080
- var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i;
57200
+ var namePattern = /^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
57081
57201
  this._forEachNode(metaElements, function(element) {
57082
57202
  var elementName = element.getAttribute("name");
57083
57203
  var elementProperty = element.getAttribute("property");
@@ -57102,14 +57222,15 @@ var require_Readability = __commonJS({
57102
57222
  }
57103
57223
  }
57104
57224
  });
57105
- metadata.title = jsonld.title || values["dc:title"] || values["dcterm:title"] || values["og:title"] || values["weibo:article:title"] || values["weibo:webpage:title"] || values["title"] || values["twitter:title"];
57225
+ metadata.title = jsonld.title || values["dc:title"] || values["dcterm:title"] || values["og:title"] || values["weibo:article:title"] || values["weibo:webpage:title"] || values.title || values["twitter:title"] || values["parsely-title"];
57106
57226
  if (!metadata.title) {
57107
57227
  metadata.title = this._getArticleTitle();
57108
57228
  }
57109
- metadata.byline = jsonld.byline || values["dc:creator"] || values["dcterm:creator"] || values["author"];
57110
- metadata.excerpt = jsonld.excerpt || values["dc:description"] || values["dcterm:description"] || values["og:description"] || values["weibo:article:description"] || values["weibo:webpage:description"] || values["description"] || values["twitter:description"];
57229
+ const articleAuthor = typeof values["article:author"] === "string" && !this._isUrl(values["article:author"]) ? values["article:author"] : void 0;
57230
+ metadata.byline = jsonld.byline || values["dc:creator"] || values["dcterm:creator"] || values.author || values["parsely-author"] || articleAuthor;
57231
+ metadata.excerpt = jsonld.excerpt || values["dc:description"] || values["dcterm:description"] || values["og:description"] || values["weibo:article:description"] || values["weibo:webpage:description"] || values.description || values["twitter:description"];
57111
57232
  metadata.siteName = jsonld.siteName || values["og:site_name"];
57112
- metadata.publishedTime = jsonld.datePublished || values["article:published_time"] || null;
57233
+ metadata.publishedTime = jsonld.datePublished || values["article:published_time"] || values["parsely-pub-date"] || null;
57113
57234
  metadata.title = this._unescapeHtmlEntities(metadata.title);
57114
57235
  metadata.byline = this._unescapeHtmlEntities(metadata.byline);
57115
57236
  metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
@@ -57122,15 +57243,18 @@ var require_Readability = __commonJS({
57122
57243
  * whether as a direct child or as its descendants.
57123
57244
  *
57124
57245
  * @param Element
57125
- **/
57126
- _isSingleImage: function(node) {
57127
- if (node.tagName === "IMG") {
57128
- return true;
57129
- }
57130
- if (node.children.length !== 1 || node.textContent.trim() !== "") {
57131
- return false;
57246
+ **/
57247
+ _isSingleImage(node) {
57248
+ while (node) {
57249
+ if (node.tagName === "IMG") {
57250
+ return true;
57251
+ }
57252
+ if (node.children.length !== 1 || node.textContent.trim() !== "") {
57253
+ return false;
57254
+ }
57255
+ node = node.children[0];
57132
57256
  }
57133
- return this._isSingleImage(node.children[0]);
57257
+ return false;
57134
57258
  },
57135
57259
  /**
57136
57260
  * Find all <noscript> that are located after <img> nodes, and which contain only one
@@ -57139,8 +57263,8 @@ var require_Readability = __commonJS({
57139
57263
  * some sites (e.g. Medium).
57140
57264
  *
57141
57265
  * @param Element
57142
- **/
57143
- _unwrapNoscriptImages: function(doc) {
57266
+ **/
57267
+ _unwrapNoscriptImages(doc) {
57144
57268
  var imgs = Array.from(doc.getElementsByTagName("img"));
57145
57269
  this._forEachNode(imgs, function(img) {
57146
57270
  for (var i = 0; i < img.attributes.length; i++) {
@@ -57156,15 +57280,15 @@ var require_Readability = __commonJS({
57156
57280
  return;
57157
57281
  }
57158
57282
  }
57159
- img.parentNode.removeChild(img);
57283
+ img.remove();
57160
57284
  });
57161
57285
  var noscripts = Array.from(doc.getElementsByTagName("noscript"));
57162
57286
  this._forEachNode(noscripts, function(noscript) {
57163
- var tmp = doc.createElement("div");
57164
- tmp.innerHTML = noscript.innerHTML;
57165
- if (!this._isSingleImage(tmp)) {
57287
+ if (!this._isSingleImage(noscript)) {
57166
57288
  return;
57167
57289
  }
57290
+ var tmp = doc.createElement("div");
57291
+ tmp.innerHTML = noscript.innerHTML;
57168
57292
  var prevElement = noscript.previousElementSibling;
57169
57293
  if (prevElement && this._isSingleImage(prevElement)) {
57170
57294
  var prevImg = prevElement;
@@ -57196,8 +57320,8 @@ var require_Readability = __commonJS({
57196
57320
  * Removes script tags from the document.
57197
57321
  *
57198
57322
  * @param Element
57199
- **/
57200
- _removeScripts: function(doc) {
57323
+ **/
57324
+ _removeScripts(doc) {
57201
57325
  this._removeNodes(this._getAllNodesWithTag(doc, ["script", "noscript"]));
57202
57326
  },
57203
57327
  /**
@@ -57207,8 +57331,8 @@ var require_Readability = __commonJS({
57207
57331
  *
57208
57332
  * @param Element
57209
57333
  * @param string tag of child element
57210
- **/
57211
- _hasSingleTagInsideElement: function(element, tag2) {
57334
+ **/
57335
+ _hasSingleTagInsideElement(element, tag2) {
57212
57336
  if (element.children.length != 1 || element.children[0].tagName !== tag2) {
57213
57337
  return false;
57214
57338
  }
@@ -57216,15 +57340,15 @@ var require_Readability = __commonJS({
57216
57340
  return node.nodeType === this.TEXT_NODE && this.REGEXPS.hasContent.test(node.textContent);
57217
57341
  });
57218
57342
  },
57219
- _isElementWithoutContent: function(node) {
57220
- return node.nodeType === this.ELEMENT_NODE && node.textContent.trim().length == 0 && (node.children.length == 0 || node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
57343
+ _isElementWithoutContent(node) {
57344
+ return node.nodeType === this.ELEMENT_NODE && !node.textContent.trim().length && (!node.children.length || node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
57221
57345
  },
57222
57346
  /**
57223
57347
  * Determine whether element has any children block level elements.
57224
57348
  *
57225
57349
  * @param Element
57226
57350
  */
57227
- _hasChildBlockElement: function(element) {
57351
+ _hasChildBlockElement(element) {
57228
57352
  return this._someNode(element.childNodes, function(node) {
57229
57353
  return this.DIV_TO_P_ELEMS.has(node.tagName) || this._hasChildBlockElement(node);
57230
57354
  });
@@ -57232,11 +57356,11 @@ var require_Readability = __commonJS({
57232
57356
  /***
57233
57357
  * Determine if a node qualifies as phrasing content.
57234
57358
  * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
57235
- **/
57236
- _isPhrasingContent: function(node) {
57237
- return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.indexOf(node.tagName) !== -1 || (node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && this._everyNode(node.childNodes, this._isPhrasingContent);
57359
+ **/
57360
+ _isPhrasingContent(node) {
57361
+ return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.includes(node.tagName) || (node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && this._everyNode(node.childNodes, this._isPhrasingContent);
57238
57362
  },
57239
- _isWhitespace: function(node) {
57363
+ _isWhitespace(node) {
57240
57364
  return node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0 || node.nodeType === this.ELEMENT_NODE && node.tagName === "BR";
57241
57365
  },
57242
57366
  /**
@@ -57246,8 +57370,8 @@ var require_Readability = __commonJS({
57246
57370
  * @param Element
57247
57371
  * @param Boolean normalizeSpaces (default: true)
57248
57372
  * @return string
57249
- **/
57250
- _getInnerText: function(e2, normalizeSpaces) {
57373
+ **/
57374
+ _getInnerText(e2, normalizeSpaces) {
57251
57375
  normalizeSpaces = typeof normalizeSpaces === "undefined" ? true : normalizeSpaces;
57252
57376
  var textContent2 = e2.textContent.trim();
57253
57377
  if (normalizeSpaces) {
@@ -57261,8 +57385,8 @@ var require_Readability = __commonJS({
57261
57385
  * @param Element
57262
57386
  * @param string - what to split on. Default is ","
57263
57387
  * @return number (integer)
57264
- **/
57265
- _getCharCount: function(e2, s2) {
57388
+ **/
57389
+ _getCharCount(e2, s2) {
57266
57390
  s2 = s2 || ",";
57267
57391
  return this._getInnerText(e2).split(s2).length - 1;
57268
57392
  },
@@ -57272,14 +57396,15 @@ var require_Readability = __commonJS({
57272
57396
  *
57273
57397
  * @param Element
57274
57398
  * @return void
57275
- **/
57276
- _cleanStyles: function(e2) {
57277
- if (!e2 || e2.tagName.toLowerCase() === "svg")
57399
+ **/
57400
+ _cleanStyles(e2) {
57401
+ if (!e2 || e2.tagName.toLowerCase() === "svg") {
57278
57402
  return;
57403
+ }
57279
57404
  for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) {
57280
57405
  e2.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[i]);
57281
57406
  }
57282
- if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e2.tagName) !== -1) {
57407
+ if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.includes(e2.tagName)) {
57283
57408
  e2.removeAttribute("width");
57284
57409
  e2.removeAttribute("height");
57285
57410
  }
@@ -57295,11 +57420,12 @@ var require_Readability = __commonJS({
57295
57420
  *
57296
57421
  * @param Element
57297
57422
  * @return number (float)
57298
- **/
57299
- _getLinkDensity: function(element) {
57423
+ **/
57424
+ _getLinkDensity(element) {
57300
57425
  var textLength = this._getInnerText(element).length;
57301
- if (textLength === 0)
57426
+ if (textLength === 0) {
57302
57427
  return 0;
57428
+ }
57303
57429
  var linkLength = 0;
57304
57430
  this._forEachNode(element.getElementsByTagName("a"), function(linkNode) {
57305
57431
  var href = linkNode.getAttribute("href");
@@ -57314,22 +57440,27 @@ var require_Readability = __commonJS({
57314
57440
  *
57315
57441
  * @param Element
57316
57442
  * @return number (Integer)
57317
- **/
57318
- _getClassWeight: function(e2) {
57319
- if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))
57443
+ **/
57444
+ _getClassWeight(e2) {
57445
+ if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
57320
57446
  return 0;
57447
+ }
57321
57448
  var weight = 0;
57322
57449
  if (typeof e2.className === "string" && e2.className !== "") {
57323
- if (this.REGEXPS.negative.test(e2.className))
57450
+ if (this.REGEXPS.negative.test(e2.className)) {
57324
57451
  weight -= 25;
57325
- if (this.REGEXPS.positive.test(e2.className))
57452
+ }
57453
+ if (this.REGEXPS.positive.test(e2.className)) {
57326
57454
  weight += 25;
57455
+ }
57327
57456
  }
57328
57457
  if (typeof e2.id === "string" && e2.id !== "") {
57329
- if (this.REGEXPS.negative.test(e2.id))
57458
+ if (this.REGEXPS.negative.test(e2.id)) {
57330
57459
  weight -= 25;
57331
- if (this.REGEXPS.positive.test(e2.id))
57460
+ }
57461
+ if (this.REGEXPS.positive.test(e2.id)) {
57332
57462
  weight += 25;
57463
+ }
57333
57464
  }
57334
57465
  return weight;
57335
57466
  },
@@ -57341,8 +57472,8 @@ var require_Readability = __commonJS({
57341
57472
  * @param string tag to clean
57342
57473
  * @return void
57343
57474
  **/
57344
- _clean: function(e2, tag2) {
57345
- var isEmbed = ["object", "embed", "iframe"].indexOf(tag2) !== -1;
57475
+ _clean(e2, tag2) {
57476
+ var isEmbed = ["object", "embed", "iframe"].includes(tag2);
57346
57477
  this._removeNodes(this._getAllNodesWithTag(e2, [tag2]), function(element) {
57347
57478
  if (isEmbed) {
57348
57479
  for (var i = 0; i < element.attributes.length; i++) {
@@ -57366,15 +57497,17 @@ var require_Readability = __commonJS({
57366
57497
  * @param Function filterFn a filter to invoke to determine whether this node 'counts'
57367
57498
  * @return Boolean
57368
57499
  */
57369
- _hasAncestorTag: function(node, tagName, maxDepth, filterFn) {
57500
+ _hasAncestorTag(node, tagName, maxDepth, filterFn) {
57370
57501
  maxDepth = maxDepth || 3;
57371
57502
  tagName = tagName.toUpperCase();
57372
57503
  var depth = 0;
57373
57504
  while (node.parentNode) {
57374
- if (maxDepth > 0 && depth > maxDepth)
57505
+ if (maxDepth > 0 && depth > maxDepth) {
57375
57506
  return false;
57376
- if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode)))
57507
+ }
57508
+ if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode))) {
57377
57509
  return true;
57510
+ }
57378
57511
  node = node.parentNode;
57379
57512
  depth++;
57380
57513
  }
@@ -57383,7 +57516,7 @@ var require_Readability = __commonJS({
57383
57516
  /**
57384
57517
  * Return an object indicating how many rows and columns this table has.
57385
57518
  */
57386
- _getRowAndColumnCount: function(table) {
57519
+ _getRowAndColumnCount(table) {
57387
57520
  var rows = 0;
57388
57521
  var columns = 0;
57389
57522
  var trs = table.getElementsByTagName("tr");
@@ -57411,7 +57544,7 @@ var require_Readability = __commonJS({
57411
57544
  * similar checks as
57412
57545
  * https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19
57413
57546
  */
57414
- _markDataTables: function(root) {
57547
+ _markDataTables(root) {
57415
57548
  var tables = root.getElementsByTagName("table");
57416
57549
  for (var i = 0; i < tables.length; i++) {
57417
57550
  var table = tables[i];
@@ -57431,7 +57564,7 @@ var require_Readability = __commonJS({
57431
57564
  continue;
57432
57565
  }
57433
57566
  var caption = table.getElementsByTagName("caption")[0];
57434
- if (caption && caption.childNodes.length > 0) {
57567
+ if (caption && caption.childNodes.length) {
57435
57568
  table._readabilityDataTable = true;
57436
57569
  continue;
57437
57570
  }
@@ -57449,6 +57582,10 @@ var require_Readability = __commonJS({
57449
57582
  continue;
57450
57583
  }
57451
57584
  var sizeInfo = this._getRowAndColumnCount(table);
57585
+ if (sizeInfo.columns == 1 || sizeInfo.rows == 1) {
57586
+ table._readabilityDataTable = false;
57587
+ continue;
57588
+ }
57452
57589
  if (sizeInfo.rows >= 10 || sizeInfo.columns > 4) {
57453
57590
  table._readabilityDataTable = true;
57454
57591
  continue;
@@ -57457,66 +57594,72 @@ var require_Readability = __commonJS({
57457
57594
  }
57458
57595
  },
57459
57596
  /* convert images and figures that have properties like data-src into images that can be loaded without JS */
57460
- _fixLazyImages: function(root) {
57461
- this._forEachNode(this._getAllNodesWithTag(root, ["img", "picture", "figure"]), function(elem) {
57462
- if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) {
57463
- var parts = this.REGEXPS.b64DataUrl.exec(elem.src);
57464
- if (parts[1] === "image/svg+xml") {
57465
- return;
57466
- }
57467
- var srcCouldBeRemoved = false;
57468
- for (var i = 0; i < elem.attributes.length; i++) {
57469
- var attr = elem.attributes[i];
57470
- if (attr.name === "src") {
57471
- continue;
57597
+ _fixLazyImages(root) {
57598
+ this._forEachNode(
57599
+ this._getAllNodesWithTag(root, ["img", "picture", "figure"]),
57600
+ function(elem) {
57601
+ if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) {
57602
+ var parts = this.REGEXPS.b64DataUrl.exec(elem.src);
57603
+ if (parts[1] === "image/svg+xml") {
57604
+ return;
57472
57605
  }
57473
- if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
57474
- srcCouldBeRemoved = true;
57475
- break;
57606
+ var srcCouldBeRemoved = false;
57607
+ for (var i = 0; i < elem.attributes.length; i++) {
57608
+ var attr = elem.attributes[i];
57609
+ if (attr.name === "src") {
57610
+ continue;
57611
+ }
57612
+ if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
57613
+ srcCouldBeRemoved = true;
57614
+ break;
57615
+ }
57476
57616
  }
57477
- }
57478
- if (srcCouldBeRemoved) {
57479
- var b64starts = elem.src.search(/base64\s*/i) + 7;
57480
- var b64length = elem.src.length - b64starts;
57481
- if (b64length < 133) {
57482
- elem.removeAttribute("src");
57617
+ if (srcCouldBeRemoved) {
57618
+ var b64starts = parts[0].length;
57619
+ var b64length = elem.src.length - b64starts;
57620
+ if (b64length < 133) {
57621
+ elem.removeAttribute("src");
57622
+ }
57483
57623
  }
57484
57624
  }
57485
- }
57486
- if ((elem.src || elem.srcset && elem.srcset != "null") && elem.className.toLowerCase().indexOf("lazy") === -1) {
57487
- return;
57488
- }
57489
- for (var j = 0; j < elem.attributes.length; j++) {
57490
- attr = elem.attributes[j];
57491
- if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") {
57492
- continue;
57493
- }
57494
- var copyTo = null;
57495
- if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
57496
- copyTo = "srcset";
57497
- } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
57498
- copyTo = "src";
57625
+ if ((elem.src || elem.srcset && elem.srcset != "null") && !elem.className.toLowerCase().includes("lazy")) {
57626
+ return;
57499
57627
  }
57500
- if (copyTo) {
57501
- if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
57502
- elem.setAttribute(copyTo, attr.value);
57503
- } else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
57504
- var img = this._doc.createElement("img");
57505
- img.setAttribute(copyTo, attr.value);
57506
- elem.appendChild(img);
57628
+ for (var j = 0; j < elem.attributes.length; j++) {
57629
+ attr = elem.attributes[j];
57630
+ if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") {
57631
+ continue;
57632
+ }
57633
+ var copyTo = null;
57634
+ if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
57635
+ copyTo = "srcset";
57636
+ } else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
57637
+ copyTo = "src";
57638
+ }
57639
+ if (copyTo) {
57640
+ if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
57641
+ elem.setAttribute(copyTo, attr.value);
57642
+ } else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
57643
+ var img = this._doc.createElement("img");
57644
+ img.setAttribute(copyTo, attr.value);
57645
+ elem.appendChild(img);
57646
+ }
57507
57647
  }
57508
57648
  }
57509
57649
  }
57510
- });
57650
+ );
57511
57651
  },
57512
- _getTextDensity: function(e2, tags) {
57652
+ _getTextDensity(e2, tags) {
57513
57653
  var textLength = this._getInnerText(e2, true).length;
57514
57654
  if (textLength === 0) {
57515
57655
  return 0;
57516
57656
  }
57517
57657
  var childrenLength = 0;
57518
57658
  var children2 = this._getAllNodesWithTag(e2, tags);
57519
- this._forEachNode(children2, (child) => childrenLength += this._getInnerText(child, true).length);
57659
+ this._forEachNode(
57660
+ children2,
57661
+ (child) => childrenLength += this._getInnerText(child, true).length
57662
+ );
57520
57663
  return childrenLength / textLength;
57521
57664
  },
57522
57665
  /**
@@ -57525,9 +57668,10 @@ var require_Readability = __commonJS({
57525
57668
  *
57526
57669
  * @return void
57527
57670
  **/
57528
- _cleanConditionally: function(e2, tag2) {
57529
- if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))
57671
+ _cleanConditionally(e2, tag2) {
57672
+ if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
57530
57673
  return;
57674
+ }
57531
57675
  this._removeNodes(this._getAllNodesWithTag(e2, [tag2]), function(node) {
57532
57676
  var isDataTable2 = function(t2) {
57533
57677
  return t2._readabilityDataTable;
@@ -57536,7 +57680,10 @@ var require_Readability = __commonJS({
57536
57680
  if (!isList) {
57537
57681
  var listLength = 0;
57538
57682
  var listNodes = this._getAllNodesWithTag(node, ["ul", "ol"]);
57539
- this._forEachNode(listNodes, (list2) => listLength += this._getInnerText(list2).length);
57683
+ this._forEachNode(
57684
+ listNodes,
57685
+ (list2) => listLength += this._getInnerText(list2).length
57686
+ );
57540
57687
  isList = listLength / this._getInnerText(node).length > 0.9;
57541
57688
  }
57542
57689
  if (tag2 === "table" && isDataTable2(node)) {
@@ -57548,6 +57695,11 @@ var require_Readability = __commonJS({
57548
57695
  if (this._hasAncestorTag(node, "code")) {
57549
57696
  return false;
57550
57697
  }
57698
+ if ([...node.getElementsByTagName("table")].some(
57699
+ (tbl) => tbl._readabilityDataTable
57700
+ )) {
57701
+ return false;
57702
+ }
57551
57703
  var weight = this._getClassWeight(node);
57552
57704
  this.log("Cleaning Conditionally", node);
57553
57705
  var contentScore = 0;
@@ -57559,9 +57711,20 @@ var require_Readability = __commonJS({
57559
57711
  var img = node.getElementsByTagName("img").length;
57560
57712
  var li = node.getElementsByTagName("li").length - 100;
57561
57713
  var input = node.getElementsByTagName("input").length;
57562
- var headingDensity = this._getTextDensity(node, ["h1", "h2", "h3", "h4", "h5", "h6"]);
57714
+ var headingDensity = this._getTextDensity(node, [
57715
+ "h1",
57716
+ "h2",
57717
+ "h3",
57718
+ "h4",
57719
+ "h5",
57720
+ "h6"
57721
+ ]);
57563
57722
  var embedCount = 0;
57564
- var embeds = this._getAllNodesWithTag(node, ["object", "embed", "iframe"]);
57723
+ var embeds = this._getAllNodesWithTag(node, [
57724
+ "object",
57725
+ "embed",
57726
+ "iframe"
57727
+ ]);
57565
57728
  for (var i = 0; i < embeds.length; i++) {
57566
57729
  for (var j = 0; j < embeds[i].attributes.length; j++) {
57567
57730
  if (this._allowedVideoRegex.test(embeds[i].attributes[j].value)) {
@@ -57573,9 +57736,60 @@ var require_Readability = __commonJS({
57573
57736
  }
57574
57737
  embedCount++;
57575
57738
  }
57739
+ var innerText = this._getInnerText(node);
57740
+ if (this.REGEXPS.adWords.test(innerText) || this.REGEXPS.loadingWords.test(innerText)) {
57741
+ return true;
57742
+ }
57743
+ var contentLength = innerText.length;
57576
57744
  var linkDensity = this._getLinkDensity(node);
57577
- var contentLength = this._getInnerText(node).length;
57578
- var haveToRemove = img > 1 && p / img < 0.5 && !this._hasAncestorTag(node, "figure") || !isList && li > p || input > Math.floor(p / 3) || !isList && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && !this._hasAncestorTag(node, "figure") || !isList && weight < 25 && linkDensity > 0.2 || weight >= 25 && linkDensity > 0.5 || (embedCount === 1 && contentLength < 75 || embedCount > 1);
57745
+ var textishTags = ["SPAN", "LI", "TD"].concat(
57746
+ Array.from(this.DIV_TO_P_ELEMS)
57747
+ );
57748
+ var textDensity = this._getTextDensity(node, textishTags);
57749
+ var isFigureChild = this._hasAncestorTag(node, "figure");
57750
+ const shouldRemoveNode = () => {
57751
+ const errs = [];
57752
+ if (!isFigureChild && img > 1 && p / img < 0.5) {
57753
+ errs.push(`Bad p to img ratio (img=${img}, p=${p})`);
57754
+ }
57755
+ if (!isList && li > p) {
57756
+ errs.push(`Too many li's outside of a list. (li=${li} > p=${p})`);
57757
+ }
57758
+ if (input > Math.floor(p / 3)) {
57759
+ errs.push(`Too many inputs per p. (input=${input}, p=${p})`);
57760
+ }
57761
+ if (!isList && !isFigureChild && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && linkDensity > 0) {
57762
+ errs.push(
57763
+ `Suspiciously short. (headingDensity=${headingDensity}, img=${img}, linkDensity=${linkDensity})`
57764
+ );
57765
+ }
57766
+ if (!isList && weight < 25 && linkDensity > 0.2 + this._linkDensityModifier) {
57767
+ errs.push(
57768
+ `Low weight and a little linky. (linkDensity=${linkDensity})`
57769
+ );
57770
+ }
57771
+ if (weight >= 25 && linkDensity > 0.5 + this._linkDensityModifier) {
57772
+ errs.push(
57773
+ `High weight and mostly links. (linkDensity=${linkDensity})`
57774
+ );
57775
+ }
57776
+ if (embedCount === 1 && contentLength < 75 || embedCount > 1) {
57777
+ errs.push(
57778
+ `Suspicious embed. (embedCount=${embedCount}, contentLength=${contentLength})`
57779
+ );
57780
+ }
57781
+ if (img === 0 && textDensity === 0) {
57782
+ errs.push(
57783
+ `No useful content. (img=${img}, textDensity=${textDensity})`
57784
+ );
57785
+ }
57786
+ if (errs.length) {
57787
+ this.log("Checks failed", errs);
57788
+ return true;
57789
+ }
57790
+ return false;
57791
+ };
57792
+ var haveToRemove = shouldRemoveNode();
57579
57793
  if (isList && haveToRemove) {
57580
57794
  for (var x = 0; x < node.children.length; x++) {
57581
57795
  let child = node.children[x];
@@ -57600,7 +57814,7 @@ var require_Readability = __commonJS({
57600
57814
  * @param Function determines whether a node should be removed
57601
57815
  * @return void
57602
57816
  **/
57603
- _cleanMatchedNodes: function(e2, filter4) {
57817
+ _cleanMatchedNodes(e2, filter4) {
57604
57818
  var endOfSearchMarkerNode = this._getNextNode(e2, true);
57605
57819
  var next = this._getNextNode(e2);
57606
57820
  while (next && next != endOfSearchMarkerNode) {
@@ -57616,8 +57830,8 @@ var require_Readability = __commonJS({
57616
57830
  *
57617
57831
  * @param Element
57618
57832
  * @return void
57619
- **/
57620
- _cleanHeaders: function(e2) {
57833
+ **/
57834
+ _cleanHeaders(e2) {
57621
57835
  let headingNodes = this._getAllNodesWithTag(e2, ["h1", "h2"]);
57622
57836
  this._removeNodes(headingNodes, function(node) {
57623
57837
  let shouldRemove = this._getClassWeight(node) < 0;
@@ -57634,7 +57848,7 @@ var require_Readability = __commonJS({
57634
57848
  * @param Element the node to check.
57635
57849
  * @return boolean indicating whether this is a title-like header.
57636
57850
  */
57637
- _headerDuplicatesTitle: function(node) {
57851
+ _headerDuplicatesTitle(node) {
57638
57852
  if (node.tagName != "H1" && node.tagName != "H2") {
57639
57853
  return false;
57640
57854
  }
@@ -57642,14 +57856,15 @@ var require_Readability = __commonJS({
57642
57856
  this.log("Evaluating similarity of header:", heading2, this._articleTitle);
57643
57857
  return this._textSimilarity(this._articleTitle, heading2) > 0.75;
57644
57858
  },
57645
- _flagIsActive: function(flag) {
57859
+ _flagIsActive(flag) {
57646
57860
  return (this._flags & flag) > 0;
57647
57861
  },
57648
- _removeFlag: function(flag) {
57862
+ _removeFlag(flag) {
57649
57863
  this._flags = this._flags & ~flag;
57650
57864
  },
57651
- _isProbablyVisible: function(node) {
57652
- return (!node.style || node.style.display != "none") && (!node.style || node.style.visibility != "hidden") && !node.hasAttribute("hidden") && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1);
57865
+ _isProbablyVisible(node) {
57866
+ return (!node.style || node.style.display != "none") && (!node.style || node.style.visibility != "hidden") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
57867
+ (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
57653
57868
  },
57654
57869
  /**
57655
57870
  * Runs readability.
@@ -57663,11 +57878,13 @@ var require_Readability = __commonJS({
57663
57878
  *
57664
57879
  * @return void
57665
57880
  **/
57666
- parse: function() {
57881
+ parse() {
57667
57882
  if (this._maxElemsToParse > 0) {
57668
57883
  var numTags = this._doc.getElementsByTagName("*").length;
57669
57884
  if (numTags > this._maxElemsToParse) {
57670
- throw new Error("Aborting parsing document; " + numTags + " elements found");
57885
+ throw new Error(
57886
+ "Aborting parsing document; " + numTags + " elements found"
57887
+ );
57671
57888
  }
57672
57889
  }
57673
57890
  this._unwrapNoscriptImages(this._doc);
@@ -57675,15 +57892,17 @@ var require_Readability = __commonJS({
57675
57892
  this._removeScripts(this._doc);
57676
57893
  this._prepDocument();
57677
57894
  var metadata = this._getArticleMetadata(jsonLd);
57895
+ this._metadata = metadata;
57678
57896
  this._articleTitle = metadata.title;
57679
57897
  var articleContent = this._grabArticle();
57680
- if (!articleContent)
57898
+ if (!articleContent) {
57681
57899
  return null;
57900
+ }
57682
57901
  this.log("Grabbed: " + articleContent.innerHTML);
57683
57902
  this._postProcessContent(articleContent);
57684
57903
  if (!metadata.excerpt) {
57685
57904
  var paragraphs = articleContent.getElementsByTagName("p");
57686
- if (paragraphs.length > 0) {
57905
+ if (paragraphs.length) {
57687
57906
  metadata.excerpt = paragraphs[0].textContent.trim();
57688
57907
  }
57689
57908
  }
@@ -57708,9 +57927,9 @@ var require_Readability = __commonJS({
57708
57927
  }
57709
57928
  });
57710
57929
 
57711
- // packages/core/node_modules/@mozilla/readability/Readability-readerable.js
57930
+ // node_modules/@mozilla/readability/Readability-readerable.js
57712
57931
  var require_Readability_readerable = __commonJS({
57713
- "packages/core/node_modules/@mozilla/readability/Readability-readerable.js"(exports, module) {
57932
+ "node_modules/@mozilla/readability/Readability-readerable.js"(exports, module) {
57714
57933
  var REGEXPS = {
57715
57934
  // NOTE: These two regular expressions are duplicated in
57716
57935
  // Readability.js. Please keep both copies in sync.
@@ -57718,13 +57937,18 @@ var require_Readability_readerable = __commonJS({
57718
57937
  okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i
57719
57938
  };
57720
57939
  function isNodeVisible(node) {
57721
- return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1);
57940
+ return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
57941
+ (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
57722
57942
  }
57723
57943
  function isProbablyReaderable(doc, options3 = {}) {
57724
57944
  if (typeof options3 == "function") {
57725
57945
  options3 = { visibilityChecker: options3 };
57726
57946
  }
57727
- var defaultOptions = { minScore: 20, minContentLength: 140, visibilityChecker: isNodeVisible };
57947
+ var defaultOptions = {
57948
+ minScore: 20,
57949
+ minContentLength: 140,
57950
+ visibilityChecker: isNodeVisible
57951
+ };
57728
57952
  options3 = Object.assign(defaultOptions, options3);
57729
57953
  var nodes = doc.querySelectorAll("p, pre, article");
57730
57954
  var brNodes = doc.querySelectorAll("div > br");
@@ -57764,9 +57988,9 @@ var require_Readability_readerable = __commonJS({
57764
57988
  }
57765
57989
  });
57766
57990
 
57767
- // packages/core/node_modules/@mozilla/readability/index.js
57991
+ // node_modules/@mozilla/readability/index.js
57768
57992
  var require_readability = __commonJS({
57769
- "packages/core/node_modules/@mozilla/readability/index.js"(exports, module) {
57993
+ "node_modules/@mozilla/readability/index.js"(exports, module) {
57770
57994
  var Readability2 = require_Readability();
57771
57995
  var isProbablyReaderable = require_Readability_readerable();
57772
57996
  module.exports = {
@@ -93428,7 +93652,7 @@ var require_xlsx = __commonJS({
93428
93652
  }
93429
93653
  return ws2;
93430
93654
  }
93431
- var utils2 = {
93655
+ var utils = {
93432
93656
  encode_col,
93433
93657
  encode_row,
93434
93658
  encode_cell,
@@ -93627,7 +93851,7 @@ var require_xlsx = __commonJS({
93627
93851
  XLSX3.writeFile = writeFileSync7;
93628
93852
  XLSX3.writeFileSync = writeFileSync7;
93629
93853
  XLSX3.writeFileAsync = writeFileAsync;
93630
- XLSX3.utils = utils2;
93854
+ XLSX3.utils = utils;
93631
93855
  XLSX3.writeXLSX = writeSyncXLSX;
93632
93856
  XLSX3.writeFileXLSX = writeFileSyncXLSX;
93633
93857
  XLSX3.SSF = SSF;
@@ -97968,13 +98192,13 @@ var init_memory_tracker = __esm({
97968
98192
  MemoryTracker = class {
97969
98193
  maxMemoryMB;
97970
98194
  startTime;
97971
- lastCheckTime;
97972
- checkInterval = 1e3;
98195
+ _lastCheckTime;
98196
+ _checkInterval = 1e3;
97973
98197
  // Check every second
97974
98198
  constructor(maxMemoryMB) {
97975
98199
  this.maxMemoryMB = maxMemoryMB;
97976
98200
  this.startTime = Date.now();
97977
- this.lastCheckTime = this.startTime;
98201
+ this._lastCheckTime = this.startTime;
97978
98202
  }
97979
98203
  /**
97980
98204
  * Get current memory usage in MB
@@ -98037,7 +98261,7 @@ var init_memory_tracker = __esm({
98037
98261
  */
98038
98262
  reset() {
98039
98263
  this.startTime = Date.now();
98040
- this.lastCheckTime = this.startTime;
98264
+ this._lastCheckTime = this.startTime;
98041
98265
  }
98042
98266
  /**
98043
98267
  * Log memory status for debugging
@@ -98063,11 +98287,11 @@ __export(excel_parser_exports, {
98063
98287
  ExcelParser: () => ExcelParser
98064
98288
  });
98065
98289
  import * as fs24 from "fs";
98066
- var XLSX, ExcelParser;
98290
+ var import_xlsx, ExcelParser;
98067
98291
  var init_excel_parser = __esm({
98068
98292
  "packages/core/dist/src/parsers/excel/excel-parser.js"() {
98069
98293
  "use strict";
98070
- XLSX = __toESM(require_xlsx(), 1);
98294
+ import_xlsx = __toESM(require_xlsx(), 1);
98071
98295
  init_lib2();
98072
98296
  init_js_yaml();
98073
98297
  init_safety_manager();
@@ -98230,9 +98454,9 @@ var init_excel_parser = __esm({
98230
98454
  return result;
98231
98455
  }
98232
98456
  // Enhanced streaming Excel file parser
98233
- async parseExcelFileStreaming(filePath) {
98457
+ async _parseExcelFileStreaming(filePath) {
98234
98458
  this.reportProgress("reading", filePath, 0, 1, 0, 0);
98235
- const workbook = XLSX.readFile(filePath, {
98459
+ const workbook = import_xlsx.default.readFile(filePath, {
98236
98460
  bookProps: true,
98237
98461
  bookSheets: true,
98238
98462
  sheetRows: 0
@@ -98265,7 +98489,7 @@ var init_excel_parser = __esm({
98265
98489
  }
98266
98490
  // Process worksheet with streaming and enhanced features
98267
98491
  async processWorksheetStreaming(workbook, sheetName, filePath) {
98268
- const sheetWorkbook = XLSX.readFile(filePath, {
98492
+ const sheetWorkbook = import_xlsx.default.readFile(filePath, {
98269
98493
  sheets: [sheetName],
98270
98494
  cellFormula: this.config.preserveFormulas,
98271
98495
  cellStyles: this.config.includeFormatting
@@ -98305,9 +98529,7 @@ var init_excel_parser = __esm({
98305
98529
  options3.cellFormula = this.config.preserveFormulas;
98306
98530
  if (this.config.includeFormatting !== void 0)
98307
98531
  options3.cellStyles = this.config.includeFormatting;
98308
- if (this.config.includeMetadata !== void 0)
98309
- options3.bookProps = this.config.includeMetadata;
98310
- const workbook = XLSX.readFile(filePath, options3);
98532
+ const workbook = import_xlsx.default.readFile(filePath, options3);
98311
98533
  const workbookData = {
98312
98534
  sheets: {}
98313
98535
  };
@@ -98328,6 +98550,9 @@ var init_excel_parser = __esm({
98328
98550
  metadata.sheetNames = workbook.SheetNames;
98329
98551
  workbookData.metadata = metadata;
98330
98552
  }
98553
+ if (!workbook.SheetNames || !Array.isArray(workbook.SheetNames) || workbook.SheetNames.length === 0) {
98554
+ throw new Error("No sheets found in workbook or SheetNames is invalid");
98555
+ }
98331
98556
  const sheetsToProcess = this.config.readAllSheets ? workbook.SheetNames : [workbook.SheetNames[0]];
98332
98557
  for (const sheetName of sheetsToProcess) {
98333
98558
  const worksheet = workbook.Sheets[sheetName];
@@ -98342,16 +98567,16 @@ var init_excel_parser = __esm({
98342
98567
  const targetRange = this.config.range || sheetRange;
98343
98568
  let range;
98344
98569
  try {
98345
- range = XLSX.utils.decode_range(targetRange);
98570
+ range = import_xlsx.default.utils.decode_range(targetRange);
98346
98571
  } catch (_error) {
98347
98572
  console.warn(`Invalid range specified: ${targetRange}, falling back to full sheet`);
98348
- range = XLSX.utils.decode_range(sheetRange);
98573
+ range = import_xlsx.default.utils.decode_range(sheetRange);
98349
98574
  }
98350
98575
  const data = [];
98351
98576
  for (let row = range.s.r; row <= range.e.r; row++) {
98352
98577
  const rowData = [];
98353
98578
  for (let col = range.s.c; col <= range.e.c; col++) {
98354
- const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
98579
+ const cellAddress = import_xlsx.default.utils.encode_cell({ r: row, c: col });
98355
98580
  const cell = worksheet[cellAddress];
98356
98581
  if (cell) {
98357
98582
  const cellData = {
@@ -98412,7 +98637,7 @@ var init_excel_parser = __esm({
98412
98637
  const sheetData = {
98413
98638
  name: "Sheet1",
98414
98639
  data,
98415
- range: `A1:${XLSX.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
98640
+ range: `A1:${import_xlsx.default.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
98416
98641
  rowCount: data.length,
98417
98642
  columnCount: data[0]?.length || 0
98418
98643
  };
@@ -98594,7 +98819,7 @@ var init_excel_parser = __esm({
98594
98819
  extractTableInfo(worksheet) {
98595
98820
  const tables = [];
98596
98821
  if (worksheet["!ref"]) {
98597
- const range = XLSX.utils.decode_range(worksheet["!ref"]);
98822
+ const range = import_xlsx.default.utils.decode_range(worksheet["!ref"]);
98598
98823
  const hasHeaders = this.detectHeaders(worksheet, range);
98599
98824
  if (hasHeaders) {
98600
98825
  const headers = this.extractTableHeaders(worksheet, range);
@@ -98623,7 +98848,7 @@ var init_excel_parser = __esm({
98623
98848
  // Detect if worksheet has headers
98624
98849
  detectHeaders(worksheet, range) {
98625
98850
  for (let col = range.s.c; col <= range.e.c; col++) {
98626
- const cellAddr = XLSX.utils.encode_cell({ r: range.s.r, c: col });
98851
+ const cellAddr = import_xlsx.default.utils.encode_cell({ r: range.s.r, c: col });
98627
98852
  const cell = worksheet[cellAddr];
98628
98853
  if (cell && typeof cell.v === "string") {
98629
98854
  return true;
@@ -98635,14 +98860,14 @@ var init_excel_parser = __esm({
98635
98860
  extractTableHeaders(worksheet, range) {
98636
98861
  const headers = [];
98637
98862
  for (let col = range.s.c; col <= range.e.c; col++) {
98638
- const cellAddr = XLSX.utils.encode_cell({ r: range.s.r, c: col });
98863
+ const cellAddr = import_xlsx.default.utils.encode_cell({ r: range.s.r, c: col });
98639
98864
  const cell = worksheet[cellAddr];
98640
98865
  headers.push(cell ? String(cell.v || `Column ${col + 1}`) : `Column ${col + 1}`);
98641
98866
  }
98642
98867
  return headers;
98643
98868
  }
98644
98869
  // Streaming CSV parser for large files
98645
- async parseCsvFileStreaming(filePath, delimiter2 = ",") {
98870
+ async _parseCsvFileStreaming(filePath, delimiter2 = ",") {
98646
98871
  return new Promise((resolve19, reject) => {
98647
98872
  const data = [];
98648
98873
  const fileStream = fs24.createReadStream(filePath);
@@ -98673,7 +98898,7 @@ var init_excel_parser = __esm({
98673
98898
  const sheetData = {
98674
98899
  name: "Sheet1",
98675
98900
  data,
98676
- range: `A1:${XLSX.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
98901
+ range: `A1:${import_xlsx.default.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
98677
98902
  rowCount: data.length,
98678
98903
  columnCount: data[0]?.length || 0
98679
98904
  };
@@ -98691,20 +98916,20 @@ var init_excel_parser = __esm({
98691
98916
  }
98692
98917
  // New method: Generate Excel file (round-trip functionality)
98693
98918
  async generateExcel(data, outputPath) {
98694
- const workbook = XLSX.utils.book_new();
98919
+ const workbook = import_xlsx.default.utils.book_new();
98695
98920
  for (const [sheetName, sheetData] of Object.entries(data.sheets)) {
98696
- const worksheet = XLSX.utils.aoa_to_sheet(sheetData.data.map((row) => row.map((cell) => cell.value)));
98921
+ const worksheet = import_xlsx.default.utils.aoa_to_sheet(sheetData.data.map((row) => row.map((cell) => cell.value)));
98697
98922
  if (sheetData.data.some((row) => row.some((cell) => cell.formula))) {
98698
98923
  sheetData.data.forEach((row, rowIndex) => {
98699
98924
  row.forEach((cell, colIndex) => {
98700
98925
  if (cell.formula) {
98701
- const cellAddr = XLSX.utils.encode_cell({ r: rowIndex, c: colIndex });
98926
+ const cellAddr = import_xlsx.default.utils.encode_cell({ r: rowIndex, c: colIndex });
98702
98927
  worksheet[cellAddr] = { ...worksheet[cellAddr], f: cell.formula };
98703
98928
  }
98704
98929
  });
98705
98930
  });
98706
98931
  }
98707
- XLSX.utils.book_append_sheet(workbook, worksheet, sheetName);
98932
+ import_xlsx.default.utils.book_append_sheet(workbook, worksheet, sheetName);
98708
98933
  }
98709
98934
  if (data.metadata) {
98710
98935
  workbook.Props = {
@@ -98716,7 +98941,7 @@ var init_excel_parser = __esm({
98716
98941
  Application: data.metadata.application
98717
98942
  };
98718
98943
  }
98719
- XLSX.writeFile(workbook, outputPath);
98944
+ import_xlsx.default.writeFile(workbook, outputPath);
98720
98945
  }
98721
98946
  };
98722
98947
  }
@@ -355281,7 +355506,7 @@ var require_utils3 = __commonJS({
355281
355506
  var require_scan = __commonJS({
355282
355507
  "node_modules/picomatch/lib/scan.js"(exports, module) {
355283
355508
  "use strict";
355284
- var utils2 = require_utils3();
355509
+ var utils = require_utils3();
355285
355510
  var {
355286
355511
  CHAR_ASTERISK: CHAR_ASTERISK2,
355287
355512
  /* * */
@@ -355542,9 +355767,9 @@ var require_scan = __commonJS({
355542
355767
  }
355543
355768
  }
355544
355769
  if (opts.unescape === true) {
355545
- if (glob2) glob2 = utils2.removeBackslashes(glob2);
355770
+ if (glob2) glob2 = utils.removeBackslashes(glob2);
355546
355771
  if (base && backslashes === true) {
355547
- base = utils2.removeBackslashes(base);
355772
+ base = utils.removeBackslashes(base);
355548
355773
  }
355549
355774
  }
355550
355775
  const state = {
@@ -355612,7 +355837,7 @@ var require_parse2 = __commonJS({
355612
355837
  "node_modules/picomatch/lib/parse.js"(exports, module) {
355613
355838
  "use strict";
355614
355839
  var constants2 = require_constants();
355615
- var utils2 = require_utils3();
355840
+ var utils = require_utils3();
355616
355841
  var {
355617
355842
  MAX_LENGTH,
355618
355843
  POSIX_REGEX_SOURCE,
@@ -355629,7 +355854,7 @@ var require_parse2 = __commonJS({
355629
355854
  try {
355630
355855
  new RegExp(value);
355631
355856
  } catch (ex) {
355632
- return args.map((v) => utils2.escapeRegex(v)).join("..");
355857
+ return args.map((v) => utils.escapeRegex(v)).join("..");
355633
355858
  }
355634
355859
  return value;
355635
355860
  };
@@ -355695,7 +355920,7 @@ var require_parse2 = __commonJS({
355695
355920
  globstar: false,
355696
355921
  tokens
355697
355922
  };
355698
- input = utils2.removePrefix(input, state);
355923
+ input = utils.removePrefix(input, state);
355699
355924
  len = input.length;
355700
355925
  const extglobs = [];
355701
355926
  const braces = [];
@@ -355834,7 +356059,7 @@ var require_parse2 = __commonJS({
355834
356059
  state.output = input;
355835
356060
  return state;
355836
356061
  }
355837
- state.output = utils2.wrapOutput(output, state, options3);
356062
+ state.output = utils.wrapOutput(output, state, options3);
355838
356063
  return state;
355839
356064
  }
355840
356065
  while (!eos()) {
@@ -355910,7 +356135,7 @@ var require_parse2 = __commonJS({
355910
356135
  continue;
355911
356136
  }
355912
356137
  if (state.quotes === 1 && value !== '"') {
355913
- value = utils2.escapeRegex(value);
356138
+ value = utils.escapeRegex(value);
355914
356139
  prev.value += value;
355915
356140
  append({ value });
355916
356141
  continue;
@@ -355971,10 +356196,10 @@ var require_parse2 = __commonJS({
355971
356196
  }
355972
356197
  prev.value += value;
355973
356198
  append({ value });
355974
- if (opts.literalBrackets === false || utils2.hasRegexChars(prevValue)) {
356199
+ if (opts.literalBrackets === false || utils.hasRegexChars(prevValue)) {
355975
356200
  continue;
355976
356201
  }
355977
- const escaped = utils2.escapeRegex(prev.value);
356202
+ const escaped = utils.escapeRegex(prev.value);
355978
356203
  state.output = state.output.slice(0, -prev.value.length);
355979
356204
  if (opts.literalBrackets === true) {
355980
356205
  state.output += escaped;
@@ -356279,17 +356504,17 @@ var require_parse2 = __commonJS({
356279
356504
  }
356280
356505
  while (state.brackets > 0) {
356281
356506
  if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "]"));
356282
- state.output = utils2.escapeLast(state.output, "[");
356507
+ state.output = utils.escapeLast(state.output, "[");
356283
356508
  decrement("brackets");
356284
356509
  }
356285
356510
  while (state.parens > 0) {
356286
356511
  if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", ")"));
356287
- state.output = utils2.escapeLast(state.output, "(");
356512
+ state.output = utils.escapeLast(state.output, "(");
356288
356513
  decrement("parens");
356289
356514
  }
356290
356515
  while (state.braces > 0) {
356291
356516
  if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "}"));
356292
- state.output = utils2.escapeLast(state.output, "{");
356517
+ state.output = utils.escapeLast(state.output, "{");
356293
356518
  decrement("braces");
356294
356519
  }
356295
356520
  if (opts.strictSlashes !== true && (prev.type === "star" || prev.type === "bracket")) {
@@ -356364,7 +356589,7 @@ var require_parse2 = __commonJS({
356364
356589
  }
356365
356590
  }
356366
356591
  };
356367
- const output = utils2.removePrefix(input, state);
356592
+ const output = utils.removePrefix(input, state);
356368
356593
  let source2 = create(output);
356369
356594
  if (source2 && opts.strictSlashes !== true) {
356370
356595
  source2 += `${SLASH_LITERAL}?`;
@@ -356381,7 +356606,7 @@ var require_picomatch = __commonJS({
356381
356606
  "use strict";
356382
356607
  var scan = require_scan();
356383
356608
  var parse6 = require_parse2();
356384
- var utils2 = require_utils3();
356609
+ var utils = require_utils3();
356385
356610
  var constants2 = require_constants();
356386
356611
  var isObject2 = (val) => val && typeof val === "object" && !Array.isArray(val);
356387
356612
  var picomatch3 = (glob2, options3, returnState = false) => {
@@ -356445,7 +356670,7 @@ var require_picomatch = __commonJS({
356445
356670
  return { isMatch: false, output: "" };
356446
356671
  }
356447
356672
  const opts = options3 || {};
356448
- const format = opts.format || (posix2 ? utils2.toPosixSlashes : null);
356673
+ const format = opts.format || (posix2 ? utils.toPosixSlashes : null);
356449
356674
  let match2 = input === glob2;
356450
356675
  let output = match2 && format ? format(input) : input;
356451
356676
  if (match2 === false) {
@@ -356463,7 +356688,7 @@ var require_picomatch = __commonJS({
356463
356688
  };
356464
356689
  picomatch3.matchBase = (input, glob2, options3) => {
356465
356690
  const regex = glob2 instanceof RegExp ? glob2 : picomatch3.makeRe(glob2, options3);
356466
- return regex.test(utils2.basename(input));
356691
+ return regex.test(utils.basename(input));
356467
356692
  };
356468
356693
  picomatch3.isMatch = (str3, patterns, options3) => picomatch3(patterns, options3)(str3);
356469
356694
  picomatch3.parse = (pattern, options3) => {
@@ -356520,10 +356745,10 @@ var require_picomatch2 = __commonJS({
356520
356745
  "node_modules/picomatch/index.js"(exports, module) {
356521
356746
  "use strict";
356522
356747
  var pico = require_picomatch();
356523
- var utils2 = require_utils3();
356748
+ var utils = require_utils3();
356524
356749
  function picomatch3(glob2, options3, returnState = false) {
356525
356750
  if (options3 && (options3.windows === null || options3.windows === void 0)) {
356526
- options3 = { ...options3, windows: utils2.isWindows() };
356751
+ options3 = { ...options3, windows: utils.isWindows() };
356527
356752
  }
356528
356753
  return pico(glob2, options3, returnState);
356529
356754
  }
@@ -372796,7 +373021,7 @@ async function getPackageJson() {
372796
373021
  // packages/cli/src/utils/version.ts
372797
373022
  async function getCliVersion() {
372798
373023
  const pkgJson = await getPackageJson();
372799
- return "1.0.62";
373024
+ return "1.0.64";
372800
373025
  }
372801
373026
 
372802
373027
  // packages/cli/src/ui/commands/aboutCommand.ts
@@ -372848,7 +373073,7 @@ import open4 from "open";
372848
373073
  import process11 from "node:process";
372849
373074
 
372850
373075
  // packages/cli/src/generated/git-commit.ts
372851
- var GIT_COMMIT_INFO = "81a9bcbc";
373076
+ var GIT_COMMIT_INFO = "24d4af6d";
372852
373077
 
372853
373078
  // packages/cli/src/ui/commands/bugCommand.ts
372854
373079
  init_dist2();
@@ -404288,7 +404513,7 @@ import { exec as exec7, execSync as execSync6, spawn as spawn6 } from "node:chil
404288
404513
  import os29 from "node:os";
404289
404514
  import path78 from "node:path";
404290
404515
  import fs66 from "node:fs";
404291
- import { readFile as readFile12 } from "node:fs/promises";
404516
+ import { readFile as readFile11 } from "node:fs/promises";
404292
404517
  import { promisify as promisify6 } from "util";
404293
404518
  var execAsync5 = promisify6(exec7);
404294
404519
  function getContainerPath(hostPath) {
@@ -404323,7 +404548,7 @@ async function shouldUseCurrentUserInSandbox() {
404323
404548
  }
404324
404549
  if (os29.platform() === "linux") {
404325
404550
  try {
404326
- const osReleaseContent = await readFile12("/etc/os-release", "utf8");
404551
+ const osReleaseContent = await readFile11("/etc/os-release", "utf8");
404327
404552
  if (osReleaseContent.includes("ID=debian") || osReleaseContent.includes("ID=ubuntu") || osReleaseContent.match(/^ID_LIKE=.*debian.*/m) || // Covers derivatives
404328
404553
  osReleaseContent.match(/^ID_LIKE=.*ubuntu.*/m)) {
404329
404554
  console.error(