fss-link 1.0.63 → 1.0.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundle/fss-link.js +589 -364
- package/package.json +4 -12
package/bundle/fss-link.js
CHANGED
|
@@ -21920,7 +21920,7 @@ function createContentGeneratorConfig(config, authType) {
|
|
|
21920
21920
|
return contentGeneratorConfig;
|
|
21921
21921
|
}
|
|
21922
21922
|
async function createContentGenerator(config, gcConfig, sessionId2) {
|
|
21923
|
-
const version = "1.0.
|
|
21923
|
+
const version = "1.0.64";
|
|
21924
21924
|
const userAgent = `FSS-Link/${version} (${process.platform}; ${process.arch})`;
|
|
21925
21925
|
const baseHeaders = {
|
|
21926
21926
|
"User-Agent": userAgent
|
|
@@ -55941,15 +55941,17 @@ var init_p_limit = __esm({
|
|
|
55941
55941
|
}
|
|
55942
55942
|
});
|
|
55943
55943
|
|
|
55944
|
-
//
|
|
55944
|
+
// node_modules/@mozilla/readability/Readability.js
|
|
55945
55945
|
var require_Readability = __commonJS({
|
|
55946
|
-
"
|
|
55946
|
+
"node_modules/@mozilla/readability/Readability.js"(exports, module) {
|
|
55947
55947
|
function Readability2(doc, options3) {
|
|
55948
55948
|
if (options3 && options3.documentElement) {
|
|
55949
55949
|
doc = options3;
|
|
55950
55950
|
options3 = arguments[2];
|
|
55951
55951
|
} else if (!doc || !doc.documentElement) {
|
|
55952
|
-
throw new Error(
|
|
55952
|
+
throw new Error(
|
|
55953
|
+
"First argument to Readability constructor should be a document object."
|
|
55954
|
+
);
|
|
55953
55955
|
}
|
|
55954
55956
|
options3 = options3 || {};
|
|
55955
55957
|
this._doc = doc;
|
|
@@ -55959,17 +55961,21 @@ var require_Readability = __commonJS({
|
|
|
55959
55961
|
this._articleDir = null;
|
|
55960
55962
|
this._articleSiteName = null;
|
|
55961
55963
|
this._attempts = [];
|
|
55964
|
+
this._metadata = {};
|
|
55962
55965
|
this._debug = !!options3.debug;
|
|
55963
55966
|
this._maxElemsToParse = options3.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
|
|
55964
55967
|
this._nbTopCandidates = options3.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
|
|
55965
55968
|
this._charThreshold = options3.charThreshold || this.DEFAULT_CHAR_THRESHOLD;
|
|
55966
|
-
this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(
|
|
55969
|
+
this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(
|
|
55970
|
+
options3.classesToPreserve || []
|
|
55971
|
+
);
|
|
55967
55972
|
this._keepClasses = !!options3.keepClasses;
|
|
55968
55973
|
this._serializer = options3.serializer || function(el) {
|
|
55969
55974
|
return el.innerHTML;
|
|
55970
55975
|
};
|
|
55971
55976
|
this._disableJSONLD = !!options3.disableJSONLD;
|
|
55972
55977
|
this._allowedVideoRegex = options3.allowedVideoRegex || this.REGEXPS.videos;
|
|
55978
|
+
this._linkDensityModifier = options3.linkDensityModifier || 0;
|
|
55973
55979
|
this._flags = this.FLAG_STRIP_UNLIKELYS | this.FLAG_WEIGHT_CLASSES | this.FLAG_CLEAN_CONDITIONALLY;
|
|
55974
55980
|
if (this._debug) {
|
|
55975
55981
|
let logNode = function(node) {
|
|
@@ -55990,7 +55996,7 @@ var require_Readability = __commonJS({
|
|
|
55990
55996
|
return arg;
|
|
55991
55997
|
});
|
|
55992
55998
|
args.unshift("Reader: (Readability)");
|
|
55993
|
-
console.log
|
|
55999
|
+
console.log(...args);
|
|
55994
56000
|
} else if (typeof dump !== "undefined") {
|
|
55995
56001
|
var msg = Array.prototype.map.call(arguments, function(x) {
|
|
55996
56002
|
return x && x.nodeName ? logNode(x) : x;
|
|
@@ -56027,7 +56033,7 @@ var require_Readability = __commonJS({
|
|
|
56027
56033
|
unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
|
|
56028
56034
|
okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
|
|
56029
56035
|
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
|
|
56030
|
-
negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|
|
|
56036
|
+
negative: /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|footer|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|widget/i,
|
|
56031
56037
|
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
|
|
56032
56038
|
byline: /byline|author|dateline|writtenby|p-author/i,
|
|
56033
56039
|
replaceFonts: /<(\/?)font[^>]*>/gi,
|
|
@@ -56046,12 +56052,46 @@ var require_Readability = __commonJS({
|
|
|
56046
56052
|
// see: https://en.wikipedia.org/wiki/Comma#Comma_variants
|
|
56047
56053
|
commas: /\u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C/g,
|
|
56048
56054
|
// See: https://schema.org/Article
|
|
56049
|
-
jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference
|
|
56055
|
+
jsonLdArticleTypes: /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/,
|
|
56056
|
+
// used to see if a node's content matches words commonly used for ad blocks or loading indicators
|
|
56057
|
+
adWords: /^(ad(vertising|vertisement)?|pub(licité)?|werb(ung)?|广告|Реклама|Anuncio)$/iu,
|
|
56058
|
+
loadingWords: /^((loading|正在加载|Загрузка|chargement|cargando)(…|\.\.\.)?)$/iu
|
|
56050
56059
|
},
|
|
56051
|
-
UNLIKELY_ROLES: [
|
|
56052
|
-
|
|
56053
|
-
|
|
56054
|
-
|
|
56060
|
+
UNLIKELY_ROLES: [
|
|
56061
|
+
"menu",
|
|
56062
|
+
"menubar",
|
|
56063
|
+
"complementary",
|
|
56064
|
+
"navigation",
|
|
56065
|
+
"alert",
|
|
56066
|
+
"alertdialog",
|
|
56067
|
+
"dialog"
|
|
56068
|
+
],
|
|
56069
|
+
DIV_TO_P_ELEMS: /* @__PURE__ */ new Set([
|
|
56070
|
+
"BLOCKQUOTE",
|
|
56071
|
+
"DL",
|
|
56072
|
+
"DIV",
|
|
56073
|
+
"IMG",
|
|
56074
|
+
"OL",
|
|
56075
|
+
"P",
|
|
56076
|
+
"PRE",
|
|
56077
|
+
"TABLE",
|
|
56078
|
+
"UL"
|
|
56079
|
+
]),
|
|
56080
|
+
ALTER_TO_DIV_EXCEPTIONS: ["DIV", "ARTICLE", "SECTION", "P", "OL", "UL"],
|
|
56081
|
+
PRESENTATIONAL_ATTRIBUTES: [
|
|
56082
|
+
"align",
|
|
56083
|
+
"background",
|
|
56084
|
+
"bgcolor",
|
|
56085
|
+
"border",
|
|
56086
|
+
"cellpadding",
|
|
56087
|
+
"cellspacing",
|
|
56088
|
+
"frame",
|
|
56089
|
+
"hspace",
|
|
56090
|
+
"rules",
|
|
56091
|
+
"style",
|
|
56092
|
+
"valign",
|
|
56093
|
+
"vspace"
|
|
56094
|
+
],
|
|
56055
56095
|
DEPRECATED_SIZE_ATTRIBUTE_ELEMS: ["TABLE", "TH", "TD", "HR", "PRE"],
|
|
56056
56096
|
// The commented out elements qualify as phrasing content but tend to be
|
|
56057
56097
|
// removed by readability when put into paragraphs, so we ignore them here.
|
|
@@ -56101,19 +56141,19 @@ var require_Readability = __commonJS({
|
|
|
56101
56141
|
CLASSES_TO_PRESERVE: ["page"],
|
|
56102
56142
|
// These are the list of HTML entities that need to be escaped.
|
|
56103
56143
|
HTML_ESCAPE_MAP: {
|
|
56104
|
-
|
|
56105
|
-
|
|
56106
|
-
|
|
56107
|
-
|
|
56108
|
-
|
|
56144
|
+
lt: "<",
|
|
56145
|
+
gt: ">",
|
|
56146
|
+
amp: "&",
|
|
56147
|
+
quot: '"',
|
|
56148
|
+
apos: "'"
|
|
56109
56149
|
},
|
|
56110
56150
|
/**
|
|
56111
56151
|
* Run any post-process modifications to article content as necessary.
|
|
56112
56152
|
*
|
|
56113
56153
|
* @param Element
|
|
56114
56154
|
* @return void
|
|
56115
|
-
|
|
56116
|
-
_postProcessContent
|
|
56155
|
+
**/
|
|
56156
|
+
_postProcessContent(articleContent) {
|
|
56117
56157
|
this._fixRelativeUris(articleContent);
|
|
56118
56158
|
this._simplifyNestedElements(articleContent);
|
|
56119
56159
|
if (!this._keepClasses) {
|
|
@@ -56130,7 +56170,7 @@ var require_Readability = __commonJS({
|
|
|
56130
56170
|
* @param Function filterFn the function to use as a filter
|
|
56131
56171
|
* @return void
|
|
56132
56172
|
*/
|
|
56133
|
-
_removeNodes
|
|
56173
|
+
_removeNodes(nodeList, filterFn) {
|
|
56134
56174
|
if (this._docJSDOMParser && nodeList._isLiveNodeList) {
|
|
56135
56175
|
throw new Error("Do not pass live node lists to _removeNodes");
|
|
56136
56176
|
}
|
|
@@ -56151,7 +56191,7 @@ var require_Readability = __commonJS({
|
|
|
56151
56191
|
* @param String newTagName the new tag name to use
|
|
56152
56192
|
* @return void
|
|
56153
56193
|
*/
|
|
56154
|
-
_replaceNodeTags
|
|
56194
|
+
_replaceNodeTags(nodeList, newTagName) {
|
|
56155
56195
|
if (this._docJSDOMParser && nodeList._isLiveNodeList) {
|
|
56156
56196
|
throw new Error("Do not pass live node lists to _replaceNodeTags");
|
|
56157
56197
|
}
|
|
@@ -56170,7 +56210,7 @@ var require_Readability = __commonJS({
|
|
|
56170
56210
|
* @param Function fn The iterate function.
|
|
56171
56211
|
* @return void
|
|
56172
56212
|
*/
|
|
56173
|
-
_forEachNode
|
|
56213
|
+
_forEachNode(nodeList, fn) {
|
|
56174
56214
|
Array.prototype.forEach.call(nodeList, fn, this);
|
|
56175
56215
|
},
|
|
56176
56216
|
/**
|
|
@@ -56184,7 +56224,7 @@ var require_Readability = __commonJS({
|
|
|
56184
56224
|
* @param Function fn The test function.
|
|
56185
56225
|
* @return void
|
|
56186
56226
|
*/
|
|
56187
|
-
_findNode
|
|
56227
|
+
_findNode(nodeList, fn) {
|
|
56188
56228
|
return Array.prototype.find.call(nodeList, fn, this);
|
|
56189
56229
|
},
|
|
56190
56230
|
/**
|
|
@@ -56198,7 +56238,7 @@ var require_Readability = __commonJS({
|
|
|
56198
56238
|
* @param Function fn The iterate function.
|
|
56199
56239
|
* @return Boolean
|
|
56200
56240
|
*/
|
|
56201
|
-
_someNode
|
|
56241
|
+
_someNode(nodeList, fn) {
|
|
56202
56242
|
return Array.prototype.some.call(nodeList, fn, this);
|
|
56203
56243
|
},
|
|
56204
56244
|
/**
|
|
@@ -56212,31 +56252,20 @@ var require_Readability = __commonJS({
|
|
|
56212
56252
|
* @param Function fn The iterate function.
|
|
56213
56253
|
* @return Boolean
|
|
56214
56254
|
*/
|
|
56215
|
-
_everyNode
|
|
56255
|
+
_everyNode(nodeList, fn) {
|
|
56216
56256
|
return Array.prototype.every.call(nodeList, fn, this);
|
|
56217
56257
|
},
|
|
56218
|
-
|
|
56219
|
-
* Concat all nodelists passed as arguments.
|
|
56220
|
-
*
|
|
56221
|
-
* @return ...NodeList
|
|
56222
|
-
* @return Array
|
|
56223
|
-
*/
|
|
56224
|
-
_concatNodeLists: function() {
|
|
56225
|
-
var slice = Array.prototype.slice;
|
|
56226
|
-
var args = slice.call(arguments);
|
|
56227
|
-
var nodeLists = args.map(function(list2) {
|
|
56228
|
-
return slice.call(list2);
|
|
56229
|
-
});
|
|
56230
|
-
return Array.prototype.concat.apply([], nodeLists);
|
|
56231
|
-
},
|
|
56232
|
-
_getAllNodesWithTag: function(node, tagNames) {
|
|
56258
|
+
_getAllNodesWithTag(node, tagNames) {
|
|
56233
56259
|
if (node.querySelectorAll) {
|
|
56234
56260
|
return node.querySelectorAll(tagNames.join(","));
|
|
56235
56261
|
}
|
|
56236
|
-
return [].concat.apply(
|
|
56237
|
-
|
|
56238
|
-
|
|
56239
|
-
|
|
56262
|
+
return [].concat.apply(
|
|
56263
|
+
[],
|
|
56264
|
+
tagNames.map(function(tag2) {
|
|
56265
|
+
var collection = node.getElementsByTagName(tag2);
|
|
56266
|
+
return Array.isArray(collection) ? collection : Array.from(collection);
|
|
56267
|
+
})
|
|
56268
|
+
);
|
|
56240
56269
|
},
|
|
56241
56270
|
/**
|
|
56242
56271
|
* Removes the class="" attribute from every element in the given
|
|
@@ -56246,11 +56275,9 @@ var require_Readability = __commonJS({
|
|
|
56246
56275
|
* @param Element
|
|
56247
56276
|
* @return void
|
|
56248
56277
|
*/
|
|
56249
|
-
_cleanClasses
|
|
56278
|
+
_cleanClasses(node) {
|
|
56250
56279
|
var classesToPreserve = this._classesToPreserve;
|
|
56251
|
-
var className = (node.getAttribute("class") || "").split(/\s+/).filter(
|
|
56252
|
-
return classesToPreserve.indexOf(cls) != -1;
|
|
56253
|
-
}).join(" ");
|
|
56280
|
+
var className = (node.getAttribute("class") || "").split(/\s+/).filter((cls) => classesToPreserve.includes(cls)).join(" ");
|
|
56254
56281
|
if (className) {
|
|
56255
56282
|
node.setAttribute("class", className);
|
|
56256
56283
|
} else {
|
|
@@ -56260,6 +56287,20 @@ var require_Readability = __commonJS({
|
|
|
56260
56287
|
this._cleanClasses(node);
|
|
56261
56288
|
}
|
|
56262
56289
|
},
|
|
56290
|
+
/**
|
|
56291
|
+
* Tests whether a string is a URL or not.
|
|
56292
|
+
*
|
|
56293
|
+
* @param {string} str The string to test
|
|
56294
|
+
* @return {boolean} true if str is a URL, false if not
|
|
56295
|
+
*/
|
|
56296
|
+
_isUrl(str3) {
|
|
56297
|
+
try {
|
|
56298
|
+
new URL(str3);
|
|
56299
|
+
return true;
|
|
56300
|
+
} catch {
|
|
56301
|
+
return false;
|
|
56302
|
+
}
|
|
56303
|
+
},
|
|
56263
56304
|
/**
|
|
56264
56305
|
* Converts each <a> and <img> uri in the given element to an absolute URI,
|
|
56265
56306
|
* ignoring #ref URIs.
|
|
@@ -56267,7 +56308,7 @@ var require_Readability = __commonJS({
|
|
|
56267
56308
|
* @param Element
|
|
56268
56309
|
* @return void
|
|
56269
56310
|
*/
|
|
56270
|
-
_fixRelativeUris
|
|
56311
|
+
_fixRelativeUris(articleContent) {
|
|
56271
56312
|
var baseURI = this._doc.baseURI;
|
|
56272
56313
|
var documentURI = this._doc.documentURI;
|
|
56273
56314
|
function toAbsoluteURI(uri) {
|
|
@@ -56319,14 +56360,17 @@ var require_Readability = __commonJS({
|
|
|
56319
56360
|
media.setAttribute("poster", toAbsoluteURI(poster));
|
|
56320
56361
|
}
|
|
56321
56362
|
if (srcset) {
|
|
56322
|
-
var newSrcset = srcset.replace(
|
|
56323
|
-
|
|
56324
|
-
|
|
56363
|
+
var newSrcset = srcset.replace(
|
|
56364
|
+
this.REGEXPS.srcsetUrl,
|
|
56365
|
+
function(_, p1, p2, p3) {
|
|
56366
|
+
return toAbsoluteURI(p1) + (p2 || "") + p3;
|
|
56367
|
+
}
|
|
56368
|
+
);
|
|
56325
56369
|
media.setAttribute("srcset", newSrcset);
|
|
56326
56370
|
}
|
|
56327
56371
|
});
|
|
56328
56372
|
},
|
|
56329
|
-
_simplifyNestedElements
|
|
56373
|
+
_simplifyNestedElements(articleContent) {
|
|
56330
56374
|
var node = articleContent;
|
|
56331
56375
|
while (node) {
|
|
56332
56376
|
if (node.parentNode && ["DIV", "SECTION"].includes(node.tagName) && !(node.id && node.id.startsWith("readability"))) {
|
|
@@ -56336,7 +56380,7 @@ var require_Readability = __commonJS({
|
|
|
56336
56380
|
} else if (this._hasSingleTagInsideElement(node, "DIV") || this._hasSingleTagInsideElement(node, "SECTION")) {
|
|
56337
56381
|
var child = node.children[0];
|
|
56338
56382
|
for (var i = 0; i < node.attributes.length; i++) {
|
|
56339
|
-
child.
|
|
56383
|
+
child.setAttributeNode(node.attributes[i].cloneNode());
|
|
56340
56384
|
}
|
|
56341
56385
|
node.parentNode.replaceChild(child, node);
|
|
56342
56386
|
node = child;
|
|
@@ -56351,14 +56395,17 @@ var require_Readability = __commonJS({
|
|
|
56351
56395
|
*
|
|
56352
56396
|
* @return string
|
|
56353
56397
|
**/
|
|
56354
|
-
_getArticleTitle
|
|
56398
|
+
_getArticleTitle() {
|
|
56355
56399
|
var doc = this._doc;
|
|
56356
56400
|
var curTitle = "";
|
|
56357
56401
|
var origTitle = "";
|
|
56358
56402
|
try {
|
|
56359
56403
|
curTitle = origTitle = doc.title.trim();
|
|
56360
|
-
if (typeof curTitle !== "string")
|
|
56361
|
-
curTitle = origTitle = this._getInnerText(
|
|
56404
|
+
if (typeof curTitle !== "string") {
|
|
56405
|
+
curTitle = origTitle = this._getInnerText(
|
|
56406
|
+
doc.getElementsByTagName("title")[0]
|
|
56407
|
+
);
|
|
56408
|
+
}
|
|
56362
56409
|
} catch (e2) {
|
|
56363
56410
|
}
|
|
56364
56411
|
var titleHadHierarchicalSeparators = false;
|
|
@@ -56367,14 +56414,13 @@ var require_Readability = __commonJS({
|
|
|
56367
56414
|
}
|
|
56368
56415
|
if (/ [\|\-\\\/>»] /.test(curTitle)) {
|
|
56369
56416
|
titleHadHierarchicalSeparators = / [\\\/>»] /.test(curTitle);
|
|
56370
|
-
|
|
56371
|
-
|
|
56372
|
-
|
|
56373
|
-
|
|
56374
|
-
|
|
56375
|
-
|
|
56376
|
-
|
|
56377
|
-
);
|
|
56417
|
+
let allSeparators = Array.from(origTitle.matchAll(/ [\|\-\\\/>»] /gi));
|
|
56418
|
+
curTitle = origTitle.substring(0, allSeparators.pop().index);
|
|
56419
|
+
if (wordCount(curTitle) < 3) {
|
|
56420
|
+
curTitle = origTitle.replace(/^[^\|\-\\\/>»]*[\|\-\\\/>»]/gi, "");
|
|
56421
|
+
}
|
|
56422
|
+
} else if (curTitle.includes(": ")) {
|
|
56423
|
+
var headings = this._getAllNodesWithTag(doc, ["h1", "h2"]);
|
|
56378
56424
|
var trimmedTitle = curTitle.trim();
|
|
56379
56425
|
var match2 = this._someNode(headings, function(heading2) {
|
|
56380
56426
|
return heading2.textContent.trim() === trimmedTitle;
|
|
@@ -56389,8 +56435,9 @@ var require_Readability = __commonJS({
|
|
|
56389
56435
|
}
|
|
56390
56436
|
} else if (curTitle.length > 150 || curTitle.length < 15) {
|
|
56391
56437
|
var hOnes = doc.getElementsByTagName("h1");
|
|
56392
|
-
if (hOnes.length === 1)
|
|
56438
|
+
if (hOnes.length === 1) {
|
|
56393
56439
|
curTitle = this._getInnerText(hOnes[0]);
|
|
56440
|
+
}
|
|
56394
56441
|
}
|
|
56395
56442
|
curTitle = curTitle.trim().replace(this.REGEXPS.normalize, " ");
|
|
56396
56443
|
var curTitleWordCount = wordCount(curTitle);
|
|
@@ -56405,7 +56452,7 @@ var require_Readability = __commonJS({
|
|
|
56405
56452
|
*
|
|
56406
56453
|
* @return void
|
|
56407
56454
|
**/
|
|
56408
|
-
_prepDocument
|
|
56455
|
+
_prepDocument() {
|
|
56409
56456
|
var doc = this._doc;
|
|
56410
56457
|
this._removeNodes(this._getAllNodesWithTag(doc, ["style"]));
|
|
56411
56458
|
if (doc.body) {
|
|
@@ -56418,7 +56465,7 @@ var require_Readability = __commonJS({
|
|
|
56418
56465
|
* whitespace in between. If the given node is an element, the same node is
|
|
56419
56466
|
* returned.
|
|
56420
56467
|
*/
|
|
56421
|
-
_nextNode
|
|
56468
|
+
_nextNode(node) {
|
|
56422
56469
|
var next = node;
|
|
56423
56470
|
while (next && next.nodeType != this.ELEMENT_NODE && this.REGEXPS.whitespace.test(next.textContent)) {
|
|
56424
56471
|
next = next.nextSibling;
|
|
@@ -56432,14 +56479,14 @@ var require_Readability = __commonJS({
|
|
|
56432
56479
|
* will become:
|
|
56433
56480
|
* <div>foo<br>bar<p>abc</p></div>
|
|
56434
56481
|
*/
|
|
56435
|
-
_replaceBrs
|
|
56482
|
+
_replaceBrs(elem) {
|
|
56436
56483
|
this._forEachNode(this._getAllNodesWithTag(elem, ["br"]), function(br2) {
|
|
56437
56484
|
var next = br2.nextSibling;
|
|
56438
56485
|
var replaced = false;
|
|
56439
56486
|
while ((next = this._nextNode(next)) && next.tagName == "BR") {
|
|
56440
56487
|
replaced = true;
|
|
56441
56488
|
var brSibling = next.nextSibling;
|
|
56442
|
-
next.
|
|
56489
|
+
next.remove();
|
|
56443
56490
|
next = brSibling;
|
|
56444
56491
|
}
|
|
56445
56492
|
if (replaced) {
|
|
@@ -56449,24 +56496,27 @@ var require_Readability = __commonJS({
|
|
|
56449
56496
|
while (next) {
|
|
56450
56497
|
if (next.tagName == "BR") {
|
|
56451
56498
|
var nextElem = this._nextNode(next.nextSibling);
|
|
56452
|
-
if (nextElem && nextElem.tagName == "BR")
|
|
56499
|
+
if (nextElem && nextElem.tagName == "BR") {
|
|
56453
56500
|
break;
|
|
56501
|
+
}
|
|
56454
56502
|
}
|
|
56455
|
-
if (!this._isPhrasingContent(next))
|
|
56503
|
+
if (!this._isPhrasingContent(next)) {
|
|
56456
56504
|
break;
|
|
56505
|
+
}
|
|
56457
56506
|
var sibling = next.nextSibling;
|
|
56458
56507
|
p.appendChild(next);
|
|
56459
56508
|
next = sibling;
|
|
56460
56509
|
}
|
|
56461
56510
|
while (p.lastChild && this._isWhitespace(p.lastChild)) {
|
|
56462
|
-
p.
|
|
56511
|
+
p.lastChild.remove();
|
|
56463
56512
|
}
|
|
56464
|
-
if (p.parentNode.tagName === "P")
|
|
56513
|
+
if (p.parentNode.tagName === "P") {
|
|
56465
56514
|
this._setNodeTag(p.parentNode, "DIV");
|
|
56515
|
+
}
|
|
56466
56516
|
}
|
|
56467
56517
|
});
|
|
56468
56518
|
},
|
|
56469
|
-
_setNodeTag
|
|
56519
|
+
_setNodeTag(node, tag2) {
|
|
56470
56520
|
this.log("_setNodeTag", node, tag2);
|
|
56471
56521
|
if (this._docJSDOMParser) {
|
|
56472
56522
|
node.localName = tag2.toLowerCase();
|
|
@@ -56478,13 +56528,11 @@ var require_Readability = __commonJS({
|
|
|
56478
56528
|
replacement.appendChild(node.firstChild);
|
|
56479
56529
|
}
|
|
56480
56530
|
node.parentNode.replaceChild(replacement, node);
|
|
56481
|
-
if (node.readability)
|
|
56531
|
+
if (node.readability) {
|
|
56482
56532
|
replacement.readability = node.readability;
|
|
56533
|
+
}
|
|
56483
56534
|
for (var i = 0; i < node.attributes.length; i++) {
|
|
56484
|
-
|
|
56485
|
-
replacement.setAttribute(node.attributes[i].name, node.attributes[i].value);
|
|
56486
|
-
} catch (ex) {
|
|
56487
|
-
}
|
|
56535
|
+
replacement.setAttributeNode(node.attributes[i].cloneNode());
|
|
56488
56536
|
}
|
|
56489
56537
|
return replacement;
|
|
56490
56538
|
},
|
|
@@ -56495,7 +56543,7 @@ var require_Readability = __commonJS({
|
|
|
56495
56543
|
* @param Element
|
|
56496
56544
|
* @return void
|
|
56497
56545
|
**/
|
|
56498
|
-
_prepArticle
|
|
56546
|
+
_prepArticle(articleContent) {
|
|
56499
56547
|
this._cleanStyles(articleContent);
|
|
56500
56548
|
this._markDataTables(articleContent);
|
|
56501
56549
|
this._fixLazyImages(articleContent);
|
|
@@ -56521,31 +56569,48 @@ var require_Readability = __commonJS({
|
|
|
56521
56569
|
this._cleanConditionally(articleContent, "table");
|
|
56522
56570
|
this._cleanConditionally(articleContent, "ul");
|
|
56523
56571
|
this._cleanConditionally(articleContent, "div");
|
|
56524
|
-
this._replaceNodeTags(
|
|
56525
|
-
|
|
56526
|
-
|
|
56527
|
-
|
|
56528
|
-
|
|
56529
|
-
|
|
56530
|
-
|
|
56531
|
-
|
|
56532
|
-
|
|
56533
|
-
|
|
56534
|
-
|
|
56535
|
-
|
|
56536
|
-
|
|
56537
|
-
|
|
56538
|
-
|
|
56539
|
-
|
|
56540
|
-
|
|
56541
|
-
|
|
56542
|
-
|
|
56543
|
-
|
|
56544
|
-
|
|
56545
|
-
|
|
56572
|
+
this._replaceNodeTags(
|
|
56573
|
+
this._getAllNodesWithTag(articleContent, ["h1"]),
|
|
56574
|
+
"h2"
|
|
56575
|
+
);
|
|
56576
|
+
this._removeNodes(
|
|
56577
|
+
this._getAllNodesWithTag(articleContent, ["p"]),
|
|
56578
|
+
function(paragraph2) {
|
|
56579
|
+
var contentElementCount = this._getAllNodesWithTag(paragraph2, [
|
|
56580
|
+
"img",
|
|
56581
|
+
"embed",
|
|
56582
|
+
"object",
|
|
56583
|
+
"iframe"
|
|
56584
|
+
]).length;
|
|
56585
|
+
return contentElementCount === 0 && !this._getInnerText(paragraph2, false);
|
|
56586
|
+
}
|
|
56587
|
+
);
|
|
56588
|
+
this._forEachNode(
|
|
56589
|
+
this._getAllNodesWithTag(articleContent, ["br"]),
|
|
56590
|
+
function(br2) {
|
|
56591
|
+
var next = this._nextNode(br2.nextSibling);
|
|
56592
|
+
if (next && next.tagName == "P") {
|
|
56593
|
+
br2.remove();
|
|
56546
56594
|
}
|
|
56547
56595
|
}
|
|
56548
|
-
|
|
56596
|
+
);
|
|
56597
|
+
this._forEachNode(
|
|
56598
|
+
this._getAllNodesWithTag(articleContent, ["table"]),
|
|
56599
|
+
function(table) {
|
|
56600
|
+
var tbody = this._hasSingleTagInsideElement(table, "TBODY") ? table.firstElementChild : table;
|
|
56601
|
+
if (this._hasSingleTagInsideElement(tbody, "TR")) {
|
|
56602
|
+
var row = tbody.firstElementChild;
|
|
56603
|
+
if (this._hasSingleTagInsideElement(row, "TD")) {
|
|
56604
|
+
var cell = row.firstElementChild;
|
|
56605
|
+
cell = this._setNodeTag(
|
|
56606
|
+
cell,
|
|
56607
|
+
this._everyNode(cell.childNodes, this._isPhrasingContent) ? "P" : "DIV"
|
|
56608
|
+
);
|
|
56609
|
+
table.parentNode.replaceChild(cell, table);
|
|
56610
|
+
}
|
|
56611
|
+
}
|
|
56612
|
+
}
|
|
56613
|
+
);
|
|
56549
56614
|
},
|
|
56550
56615
|
/**
|
|
56551
56616
|
* Initialize a node with the readability object. Also checks the
|
|
@@ -56553,9 +56618,9 @@ var require_Readability = __commonJS({
|
|
|
56553
56618
|
*
|
|
56554
56619
|
* @param Element
|
|
56555
56620
|
* @return void
|
|
56556
|
-
|
|
56557
|
-
_initializeNode
|
|
56558
|
-
node.readability = {
|
|
56621
|
+
**/
|
|
56622
|
+
_initializeNode(node) {
|
|
56623
|
+
node.readability = { contentScore: 0 };
|
|
56559
56624
|
switch (node.tagName) {
|
|
56560
56625
|
case "DIV":
|
|
56561
56626
|
node.readability.contentScore += 5;
|
|
@@ -56587,9 +56652,9 @@ var require_Readability = __commonJS({
|
|
|
56587
56652
|
}
|
|
56588
56653
|
node.readability.contentScore += this._getClassWeight(node);
|
|
56589
56654
|
},
|
|
56590
|
-
_removeAndGetNext
|
|
56655
|
+
_removeAndGetNext(node) {
|
|
56591
56656
|
var nextNode = this._getNextNode(node, true);
|
|
56592
|
-
node.
|
|
56657
|
+
node.remove();
|
|
56593
56658
|
return nextNode;
|
|
56594
56659
|
},
|
|
56595
56660
|
/**
|
|
@@ -56598,8 +56663,12 @@ var require_Readability = __commonJS({
|
|
|
56598
56663
|
* (and its kids) are going away, and we want the next node over.
|
|
56599
56664
|
*
|
|
56600
56665
|
* Calling this in a loop will traverse the DOM depth-first.
|
|
56666
|
+
*
|
|
56667
|
+
* @param {Element} node
|
|
56668
|
+
* @param {boolean} ignoreSelfAndKids
|
|
56669
|
+
* @return {Element}
|
|
56601
56670
|
*/
|
|
56602
|
-
_getNextNode
|
|
56671
|
+
_getNextNode(node, ignoreSelfAndKids) {
|
|
56603
56672
|
if (!ignoreSelfAndKids && node.firstElementChild) {
|
|
56604
56673
|
return node.firstElementChild;
|
|
56605
56674
|
}
|
|
@@ -56615,7 +56684,7 @@ var require_Readability = __commonJS({
|
|
|
56615
56684
|
// 1 = same text, 0 = completely different text
|
|
56616
56685
|
// works the way that it splits both texts into words and then finds words that are unique in second text
|
|
56617
56686
|
// the result is given by the lower length of unique parts
|
|
56618
|
-
_textSimilarity
|
|
56687
|
+
_textSimilarity(textA, textB) {
|
|
56619
56688
|
var tokensA = textA.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
|
|
56620
56689
|
var tokensB = textB.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);
|
|
56621
56690
|
if (!tokensA.length || !tokensB.length) {
|
|
@@ -56625,27 +56694,27 @@ var require_Readability = __commonJS({
|
|
|
56625
56694
|
var distanceB = uniqTokensB.join(" ").length / tokensB.join(" ").length;
|
|
56626
56695
|
return 1 - distanceB;
|
|
56627
56696
|
},
|
|
56628
|
-
|
|
56629
|
-
|
|
56630
|
-
|
|
56631
|
-
|
|
56632
|
-
|
|
56633
|
-
|
|
56634
|
-
|
|
56635
|
-
|
|
56636
|
-
|
|
56637
|
-
|
|
56638
|
-
|
|
56639
|
-
|
|
56640
|
-
return false;
|
|
56697
|
+
/**
|
|
56698
|
+
* Checks whether an element node contains a valid byline
|
|
56699
|
+
*
|
|
56700
|
+
* @param node {Element}
|
|
56701
|
+
* @param matchString {string}
|
|
56702
|
+
* @return boolean
|
|
56703
|
+
*/
|
|
56704
|
+
_isValidByline(node, matchString) {
|
|
56705
|
+
var rel = node.getAttribute("rel");
|
|
56706
|
+
var itemprop = node.getAttribute("itemprop");
|
|
56707
|
+
var bylineLength = node.textContent.trim().length;
|
|
56708
|
+
return (rel === "author" || itemprop && itemprop.includes("author") || this.REGEXPS.byline.test(matchString)) && !!bylineLength && bylineLength < 100;
|
|
56641
56709
|
},
|
|
56642
|
-
_getNodeAncestors
|
|
56710
|
+
_getNodeAncestors(node, maxDepth) {
|
|
56643
56711
|
maxDepth = maxDepth || 0;
|
|
56644
56712
|
var i = 0, ancestors = [];
|
|
56645
56713
|
while (node.parentNode) {
|
|
56646
56714
|
ancestors.push(node.parentNode);
|
|
56647
|
-
if (maxDepth && ++i === maxDepth)
|
|
56715
|
+
if (maxDepth && ++i === maxDepth) {
|
|
56648
56716
|
break;
|
|
56717
|
+
}
|
|
56649
56718
|
node = node.parentNode;
|
|
56650
56719
|
}
|
|
56651
56720
|
return ancestors;
|
|
@@ -56656,8 +56725,9 @@ var require_Readability = __commonJS({
|
|
|
56656
56725
|
*
|
|
56657
56726
|
* @param page a document to run upon. Needs to be a full document, complete with body.
|
|
56658
56727
|
* @return Element
|
|
56659
|
-
|
|
56660
|
-
|
|
56728
|
+
**/
|
|
56729
|
+
/* eslint-disable-next-line complexity */
|
|
56730
|
+
_grabArticle(page) {
|
|
56661
56731
|
this.log("**** grabArticle ****");
|
|
56662
56732
|
var doc = this._doc;
|
|
56663
56733
|
var isPaging = page !== null;
|
|
@@ -56669,7 +56739,9 @@ var require_Readability = __commonJS({
|
|
|
56669
56739
|
var pageCacheHtml = page.innerHTML;
|
|
56670
56740
|
while (true) {
|
|
56671
56741
|
this.log("Starting grabArticle loop");
|
|
56672
|
-
var stripUnlikelyCandidates = this._flagIsActive(
|
|
56742
|
+
var stripUnlikelyCandidates = this._flagIsActive(
|
|
56743
|
+
this.FLAG_STRIP_UNLIKELYS
|
|
56744
|
+
);
|
|
56673
56745
|
var elementsToScore = [];
|
|
56674
56746
|
var node = this._doc.documentElement;
|
|
56675
56747
|
let shouldRemoveTitleHeader = true;
|
|
@@ -56687,12 +56759,29 @@ var require_Readability = __commonJS({
|
|
|
56687
56759
|
node = this._removeAndGetNext(node);
|
|
56688
56760
|
continue;
|
|
56689
56761
|
}
|
|
56690
|
-
if (this.
|
|
56762
|
+
if (!this._articleByline && !this._metadata.byline && this._isValidByline(node, matchString)) {
|
|
56763
|
+
var endOfSearchMarkerNode = this._getNextNode(node, true);
|
|
56764
|
+
var next = this._getNextNode(node);
|
|
56765
|
+
var itemPropNameNode = null;
|
|
56766
|
+
while (next && next != endOfSearchMarkerNode) {
|
|
56767
|
+
var itemprop = next.getAttribute("itemprop");
|
|
56768
|
+
if (itemprop && itemprop.includes("name")) {
|
|
56769
|
+
itemPropNameNode = next;
|
|
56770
|
+
break;
|
|
56771
|
+
} else {
|
|
56772
|
+
next = this._getNextNode(next);
|
|
56773
|
+
}
|
|
56774
|
+
}
|
|
56775
|
+
this._articleByline = (itemPropNameNode ?? node).textContent.trim();
|
|
56691
56776
|
node = this._removeAndGetNext(node);
|
|
56692
56777
|
continue;
|
|
56693
56778
|
}
|
|
56694
56779
|
if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) {
|
|
56695
|
-
this.log(
|
|
56780
|
+
this.log(
|
|
56781
|
+
"Removing header: ",
|
|
56782
|
+
node.textContent.trim(),
|
|
56783
|
+
this._articleTitle.trim()
|
|
56784
|
+
);
|
|
56696
56785
|
shouldRemoveTitleHeader = false;
|
|
56697
56786
|
node = this._removeAndGetNext(node);
|
|
56698
56787
|
continue;
|
|
@@ -56704,7 +56793,9 @@ var require_Readability = __commonJS({
|
|
|
56704
56793
|
continue;
|
|
56705
56794
|
}
|
|
56706
56795
|
if (this.UNLIKELY_ROLES.includes(node.getAttribute("role"))) {
|
|
56707
|
-
this.log(
|
|
56796
|
+
this.log(
|
|
56797
|
+
"Removing content with role " + node.getAttribute("role") + " - " + matchString
|
|
56798
|
+
);
|
|
56708
56799
|
node = this._removeAndGetNext(node);
|
|
56709
56800
|
continue;
|
|
56710
56801
|
}
|
|
@@ -56713,7 +56804,7 @@ var require_Readability = __commonJS({
|
|
|
56713
56804
|
node = this._removeAndGetNext(node);
|
|
56714
56805
|
continue;
|
|
56715
56806
|
}
|
|
56716
|
-
if (this.DEFAULT_TAGS_TO_SCORE.
|
|
56807
|
+
if (this.DEFAULT_TAGS_TO_SCORE.includes(node.tagName)) {
|
|
56717
56808
|
elementsToScore.push(node);
|
|
56718
56809
|
}
|
|
56719
56810
|
if (node.tagName === "DIV") {
|
|
@@ -56731,7 +56822,7 @@ var require_Readability = __commonJS({
|
|
|
56731
56822
|
}
|
|
56732
56823
|
} else if (p !== null) {
|
|
56733
56824
|
while (p.lastChild && this._isWhitespace(p.lastChild)) {
|
|
56734
|
-
p.
|
|
56825
|
+
p.lastChild.remove();
|
|
56735
56826
|
}
|
|
56736
56827
|
p = null;
|
|
56737
56828
|
}
|
|
@@ -56751,31 +56842,36 @@ var require_Readability = __commonJS({
|
|
|
56751
56842
|
}
|
|
56752
56843
|
var candidates = [];
|
|
56753
56844
|
this._forEachNode(elementsToScore, function(elementToScore) {
|
|
56754
|
-
if (!elementToScore.parentNode || typeof elementToScore.parentNode.tagName === "undefined")
|
|
56845
|
+
if (!elementToScore.parentNode || typeof elementToScore.parentNode.tagName === "undefined") {
|
|
56755
56846
|
return;
|
|
56847
|
+
}
|
|
56756
56848
|
var innerText = this._getInnerText(elementToScore);
|
|
56757
|
-
if (innerText.length < 25)
|
|
56849
|
+
if (innerText.length < 25) {
|
|
56758
56850
|
return;
|
|
56851
|
+
}
|
|
56759
56852
|
var ancestors2 = this._getNodeAncestors(elementToScore, 5);
|
|
56760
|
-
if (ancestors2.length === 0)
|
|
56853
|
+
if (ancestors2.length === 0) {
|
|
56761
56854
|
return;
|
|
56855
|
+
}
|
|
56762
56856
|
var contentScore = 0;
|
|
56763
56857
|
contentScore += 1;
|
|
56764
56858
|
contentScore += innerText.split(this.REGEXPS.commas).length;
|
|
56765
56859
|
contentScore += Math.min(Math.floor(innerText.length / 100), 3);
|
|
56766
56860
|
this._forEachNode(ancestors2, function(ancestor, level) {
|
|
56767
|
-
if (!ancestor.tagName || !ancestor.parentNode || typeof ancestor.parentNode.tagName === "undefined")
|
|
56861
|
+
if (!ancestor.tagName || !ancestor.parentNode || typeof ancestor.parentNode.tagName === "undefined") {
|
|
56768
56862
|
return;
|
|
56863
|
+
}
|
|
56769
56864
|
if (typeof ancestor.readability === "undefined") {
|
|
56770
56865
|
this._initializeNode(ancestor);
|
|
56771
56866
|
candidates.push(ancestor);
|
|
56772
56867
|
}
|
|
56773
|
-
if (level === 0)
|
|
56868
|
+
if (level === 0) {
|
|
56774
56869
|
var scoreDivider = 1;
|
|
56775
|
-
else if (level === 1)
|
|
56870
|
+
} else if (level === 1) {
|
|
56776
56871
|
scoreDivider = 2;
|
|
56777
|
-
else
|
|
56872
|
+
} else {
|
|
56778
56873
|
scoreDivider = level * 3;
|
|
56874
|
+
}
|
|
56779
56875
|
ancestor.readability.contentScore += contentScore / scoreDivider;
|
|
56780
56876
|
});
|
|
56781
56877
|
});
|
|
@@ -56789,8 +56885,9 @@ var require_Readability = __commonJS({
|
|
|
56789
56885
|
var aTopCandidate = topCandidates[t2];
|
|
56790
56886
|
if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
|
|
56791
56887
|
topCandidates.splice(t2, 0, candidate);
|
|
56792
|
-
if (topCandidates.length > this._nbTopCandidates)
|
|
56888
|
+
if (topCandidates.length > this._nbTopCandidates) {
|
|
56793
56889
|
topCandidates.pop();
|
|
56890
|
+
}
|
|
56794
56891
|
break;
|
|
56795
56892
|
}
|
|
56796
56893
|
}
|
|
@@ -56811,7 +56908,9 @@ var require_Readability = __commonJS({
|
|
|
56811
56908
|
var alternativeCandidateAncestors = [];
|
|
56812
56909
|
for (var i = 1; i < topCandidates.length; i++) {
|
|
56813
56910
|
if (topCandidates[i].readability.contentScore / topCandidate.readability.contentScore >= 0.75) {
|
|
56814
|
-
alternativeCandidateAncestors.push(
|
|
56911
|
+
alternativeCandidateAncestors.push(
|
|
56912
|
+
this._getNodeAncestors(topCandidates[i])
|
|
56913
|
+
);
|
|
56815
56914
|
}
|
|
56816
56915
|
}
|
|
56817
56916
|
var MINIMUM_TOPCANDIDATES = 3;
|
|
@@ -56820,7 +56919,11 @@ var require_Readability = __commonJS({
|
|
|
56820
56919
|
while (parentOfTopCandidate.tagName !== "BODY") {
|
|
56821
56920
|
var listsContainingThisAncestor = 0;
|
|
56822
56921
|
for (var ancestorIndex = 0; ancestorIndex < alternativeCandidateAncestors.length && listsContainingThisAncestor < MINIMUM_TOPCANDIDATES; ancestorIndex++) {
|
|
56823
|
-
listsContainingThisAncestor += Number(
|
|
56922
|
+
listsContainingThisAncestor += Number(
|
|
56923
|
+
alternativeCandidateAncestors[ancestorIndex].includes(
|
|
56924
|
+
parentOfTopCandidate
|
|
56925
|
+
)
|
|
56926
|
+
);
|
|
56824
56927
|
}
|
|
56825
56928
|
if (listsContainingThisAncestor >= MINIMUM_TOPCANDIDATES) {
|
|
56826
56929
|
topCandidate = parentOfTopCandidate;
|
|
@@ -56841,8 +56944,9 @@ var require_Readability = __commonJS({
|
|
|
56841
56944
|
continue;
|
|
56842
56945
|
}
|
|
56843
56946
|
var parentScore = parentOfTopCandidate.readability.contentScore;
|
|
56844
|
-
if (parentScore < scoreThreshold)
|
|
56947
|
+
if (parentScore < scoreThreshold) {
|
|
56845
56948
|
break;
|
|
56949
|
+
}
|
|
56846
56950
|
if (parentScore > lastScore) {
|
|
56847
56951
|
topCandidate = parentOfTopCandidate;
|
|
56848
56952
|
break;
|
|
@@ -56860,22 +56964,34 @@ var require_Readability = __commonJS({
|
|
|
56860
56964
|
}
|
|
56861
56965
|
}
|
|
56862
56966
|
var articleContent = doc.createElement("DIV");
|
|
56863
|
-
if (isPaging)
|
|
56967
|
+
if (isPaging) {
|
|
56864
56968
|
articleContent.id = "readability-content";
|
|
56865
|
-
|
|
56969
|
+
}
|
|
56970
|
+
var siblingScoreThreshold = Math.max(
|
|
56971
|
+
10,
|
|
56972
|
+
topCandidate.readability.contentScore * 0.2
|
|
56973
|
+
);
|
|
56866
56974
|
parentOfTopCandidate = topCandidate.parentNode;
|
|
56867
56975
|
var siblings = parentOfTopCandidate.children;
|
|
56868
56976
|
for (var s2 = 0, sl = siblings.length; s2 < sl; s2++) {
|
|
56869
56977
|
var sibling = siblings[s2];
|
|
56870
56978
|
var append = false;
|
|
56871
|
-
this.log(
|
|
56872
|
-
|
|
56979
|
+
this.log(
|
|
56980
|
+
"Looking at sibling node:",
|
|
56981
|
+
sibling,
|
|
56982
|
+
sibling.readability ? "with score " + sibling.readability.contentScore : ""
|
|
56983
|
+
);
|
|
56984
|
+
this.log(
|
|
56985
|
+
"Sibling has score",
|
|
56986
|
+
sibling.readability ? sibling.readability.contentScore : "Unknown"
|
|
56987
|
+
);
|
|
56873
56988
|
if (sibling === topCandidate) {
|
|
56874
56989
|
append = true;
|
|
56875
56990
|
} else {
|
|
56876
56991
|
var contentBonus = 0;
|
|
56877
|
-
if (sibling.className === topCandidate.className && topCandidate.className !== "")
|
|
56992
|
+
if (sibling.className === topCandidate.className && topCandidate.className !== "") {
|
|
56878
56993
|
contentBonus += topCandidate.readability.contentScore * 0.2;
|
|
56994
|
+
}
|
|
56879
56995
|
if (sibling.readability && sibling.readability.contentScore + contentBonus >= siblingScoreThreshold) {
|
|
56880
56996
|
append = true;
|
|
56881
56997
|
} else if (sibling.nodeName === "P") {
|
|
@@ -56891,7 +57007,7 @@ var require_Readability = __commonJS({
|
|
|
56891
57007
|
}
|
|
56892
57008
|
if (append) {
|
|
56893
57009
|
this.log("Appending node:", sibling);
|
|
56894
|
-
if (this.ALTER_TO_DIV_EXCEPTIONS.
|
|
57010
|
+
if (!this.ALTER_TO_DIV_EXCEPTIONS.includes(sibling.nodeName)) {
|
|
56895
57011
|
this.log("Altering sibling:", sibling, "to div.");
|
|
56896
57012
|
sibling = this._setNodeTag(sibling, "DIV");
|
|
56897
57013
|
}
|
|
@@ -56901,11 +57017,13 @@ var require_Readability = __commonJS({
|
|
|
56901
57017
|
sl -= 1;
|
|
56902
57018
|
}
|
|
56903
57019
|
}
|
|
56904
|
-
if (this._debug)
|
|
57020
|
+
if (this._debug) {
|
|
56905
57021
|
this.log("Article content pre-prep: " + articleContent.innerHTML);
|
|
57022
|
+
}
|
|
56906
57023
|
this._prepArticle(articleContent);
|
|
56907
|
-
if (this._debug)
|
|
57024
|
+
if (this._debug) {
|
|
56908
57025
|
this.log("Article content post-prep: " + articleContent.innerHTML);
|
|
57026
|
+
}
|
|
56909
57027
|
if (neededToCreateTopCandidate) {
|
|
56910
57028
|
topCandidate.id = "readability-page-1";
|
|
56911
57029
|
topCandidate.className = "page";
|
|
@@ -56918,24 +57036,25 @@ var require_Readability = __commonJS({
|
|
|
56918
57036
|
}
|
|
56919
57037
|
articleContent.appendChild(div);
|
|
56920
57038
|
}
|
|
56921
|
-
if (this._debug)
|
|
57039
|
+
if (this._debug) {
|
|
56922
57040
|
this.log("Article content after paging: " + articleContent.innerHTML);
|
|
57041
|
+
}
|
|
56923
57042
|
var parseSuccessful = true;
|
|
56924
57043
|
var textLength = this._getInnerText(articleContent, true).length;
|
|
56925
57044
|
if (textLength < this._charThreshold) {
|
|
56926
57045
|
parseSuccessful = false;
|
|
56927
57046
|
page.innerHTML = pageCacheHtml;
|
|
57047
|
+
this._attempts.push({
|
|
57048
|
+
articleContent,
|
|
57049
|
+
textLength
|
|
57050
|
+
});
|
|
56928
57051
|
if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
|
|
56929
57052
|
this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
|
|
56930
|
-
this._attempts.push({ articleContent, textLength });
|
|
56931
57053
|
} else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
|
|
56932
57054
|
this._removeFlag(this.FLAG_WEIGHT_CLASSES);
|
|
56933
|
-
this._attempts.push({ articleContent, textLength });
|
|
56934
57055
|
} else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
|
|
56935
57056
|
this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
|
|
56936
|
-
this._attempts.push({ articleContent, textLength });
|
|
56937
57057
|
} else {
|
|
56938
|
-
this._attempts.push({ articleContent, textLength });
|
|
56939
57058
|
this._attempts.sort(function(a, b) {
|
|
56940
57059
|
return b.textLength - a.textLength;
|
|
56941
57060
|
});
|
|
@@ -56947,10 +57066,13 @@ var require_Readability = __commonJS({
|
|
|
56947
57066
|
}
|
|
56948
57067
|
}
|
|
56949
57068
|
if (parseSuccessful) {
|
|
56950
|
-
var ancestors = [parentOfTopCandidate, topCandidate].concat(
|
|
57069
|
+
var ancestors = [parentOfTopCandidate, topCandidate].concat(
|
|
57070
|
+
this._getNodeAncestors(parentOfTopCandidate)
|
|
57071
|
+
);
|
|
56951
57072
|
this._someNode(ancestors, function(ancestor) {
|
|
56952
|
-
if (!ancestor.tagName)
|
|
57073
|
+
if (!ancestor.tagName) {
|
|
56953
57074
|
return false;
|
|
57075
|
+
}
|
|
56954
57076
|
var articleDir = ancestor.getAttribute("dir");
|
|
56955
57077
|
if (articleDir) {
|
|
56956
57078
|
this._articleDir = articleDir;
|
|
@@ -56962,37 +57084,25 @@ var require_Readability = __commonJS({
|
|
|
56962
57084
|
}
|
|
56963
57085
|
}
|
|
56964
57086
|
},
|
|
56965
|
-
/**
|
|
56966
|
-
* Check whether the input string could be a byline.
|
|
56967
|
-
* This verifies that the input is a string, and that the length
|
|
56968
|
-
* is less than 100 chars.
|
|
56969
|
-
*
|
|
56970
|
-
* @param possibleByline {string} - a string to check whether its a byline.
|
|
56971
|
-
* @return Boolean - whether the input string is a byline.
|
|
56972
|
-
*/
|
|
56973
|
-
_isValidByline: function(byline) {
|
|
56974
|
-
if (typeof byline == "string" || byline instanceof String) {
|
|
56975
|
-
byline = byline.trim();
|
|
56976
|
-
return byline.length > 0 && byline.length < 100;
|
|
56977
|
-
}
|
|
56978
|
-
return false;
|
|
56979
|
-
},
|
|
56980
57087
|
/**
|
|
56981
57088
|
* Converts some of the common HTML entities in string to their corresponding characters.
|
|
56982
57089
|
*
|
|
56983
57090
|
* @param str {string} - a string to unescape.
|
|
56984
57091
|
* @return string without HTML entity.
|
|
56985
57092
|
*/
|
|
56986
|
-
_unescapeHtmlEntities
|
|
57093
|
+
_unescapeHtmlEntities(str3) {
|
|
56987
57094
|
if (!str3) {
|
|
56988
57095
|
return str3;
|
|
56989
57096
|
}
|
|
56990
57097
|
var htmlEscapeMap = this.HTML_ESCAPE_MAP;
|
|
56991
57098
|
return str3.replace(/&(quot|amp|apos|lt|gt);/g, function(_, tag2) {
|
|
56992
57099
|
return htmlEscapeMap[tag2];
|
|
56993
|
-
}).replace(/&#(?:x([0-9a-
|
|
57100
|
+
}).replace(/&#(?:x([0-9a-f]+)|([0-9]+));/gi, function(_, hex, numStr) {
|
|
56994
57101
|
var num = parseInt(hex || numStr, hex ? 16 : 10);
|
|
56995
|
-
|
|
57102
|
+
if (num == 0 || num > 1114111 || num >= 55296 && num <= 57343) {
|
|
57103
|
+
num = 65533;
|
|
57104
|
+
}
|
|
57105
|
+
return String.fromCodePoint(num);
|
|
56996
57106
|
});
|
|
56997
57107
|
},
|
|
56998
57108
|
/**
|
|
@@ -57000,22 +57110,33 @@ var require_Readability = __commonJS({
|
|
|
57000
57110
|
* For now, only Schema.org objects of type Article or its subtypes are supported.
|
|
57001
57111
|
* @return Object with any metadata that could be extracted (possibly none)
|
|
57002
57112
|
*/
|
|
57003
|
-
_getJSONLD
|
|
57113
|
+
_getJSONLD(doc) {
|
|
57004
57114
|
var scripts = this._getAllNodesWithTag(doc, ["script"]);
|
|
57005
57115
|
var metadata;
|
|
57006
57116
|
this._forEachNode(scripts, function(jsonLdElement) {
|
|
57007
57117
|
if (!metadata && jsonLdElement.getAttribute("type") === "application/ld+json") {
|
|
57008
57118
|
try {
|
|
57009
|
-
var content = jsonLdElement.textContent.replace(
|
|
57119
|
+
var content = jsonLdElement.textContent.replace(
|
|
57120
|
+
/^\s*<!\[CDATA\[|\]\]>\s*$/g,
|
|
57121
|
+
""
|
|
57122
|
+
);
|
|
57010
57123
|
var parsed = JSON.parse(content);
|
|
57011
|
-
if (
|
|
57124
|
+
if (Array.isArray(parsed)) {
|
|
57125
|
+
parsed = parsed.find((it) => {
|
|
57126
|
+
return it["@type"] && it["@type"].match(this.REGEXPS.jsonLdArticleTypes);
|
|
57127
|
+
});
|
|
57128
|
+
if (!parsed) {
|
|
57129
|
+
return;
|
|
57130
|
+
}
|
|
57131
|
+
}
|
|
57132
|
+
var schemaDotOrgRegex = /^https?\:\/\/schema\.org\/?$/;
|
|
57133
|
+
var matches = typeof parsed["@context"] === "string" && parsed["@context"].match(schemaDotOrgRegex) || typeof parsed["@context"] === "object" && typeof parsed["@context"]["@vocab"] == "string" && parsed["@context"]["@vocab"].match(schemaDotOrgRegex);
|
|
57134
|
+
if (!matches) {
|
|
57012
57135
|
return;
|
|
57013
57136
|
}
|
|
57014
57137
|
if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
|
|
57015
|
-
parsed = parsed["@graph"].find(
|
|
57016
|
-
return (it["@type"] || "").match(
|
|
57017
|
-
this.REGEXPS.jsonLdArticleTypes
|
|
57018
|
-
);
|
|
57138
|
+
parsed = parsed["@graph"].find((it) => {
|
|
57139
|
+
return (it["@type"] || "").match(this.REGEXPS.jsonLdArticleTypes);
|
|
57019
57140
|
});
|
|
57020
57141
|
}
|
|
57021
57142
|
if (!parsed || !parsed["@type"] || !parsed["@type"].match(this.REGEXPS.jsonLdArticleTypes)) {
|
|
@@ -57056,7 +57177,6 @@ var require_Readability = __commonJS({
|
|
|
57056
57177
|
if (typeof parsed.datePublished === "string") {
|
|
57057
57178
|
metadata.datePublished = parsed.datePublished.trim();
|
|
57058
57179
|
}
|
|
57059
|
-
return;
|
|
57060
57180
|
} catch (err) {
|
|
57061
57181
|
this.log(err.message);
|
|
57062
57182
|
}
|
|
@@ -57072,12 +57192,12 @@ var require_Readability = __commonJS({
|
|
|
57072
57192
|
*
|
|
57073
57193
|
* @return Object with optional "excerpt" and "byline" properties
|
|
57074
57194
|
*/
|
|
57075
|
-
_getArticleMetadata
|
|
57195
|
+
_getArticleMetadata(jsonld) {
|
|
57076
57196
|
var metadata = {};
|
|
57077
57197
|
var values = {};
|
|
57078
57198
|
var metaElements = this._doc.getElementsByTagName("meta");
|
|
57079
57199
|
var propertyPattern = /\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;
|
|
57080
|
-
var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[
|
|
57200
|
+
var namePattern = /^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
|
|
57081
57201
|
this._forEachNode(metaElements, function(element) {
|
|
57082
57202
|
var elementName = element.getAttribute("name");
|
|
57083
57203
|
var elementProperty = element.getAttribute("property");
|
|
@@ -57102,14 +57222,15 @@ var require_Readability = __commonJS({
|
|
|
57102
57222
|
}
|
|
57103
57223
|
}
|
|
57104
57224
|
});
|
|
57105
|
-
metadata.title = jsonld.title || values["dc:title"] || values["dcterm:title"] || values["og:title"] || values["weibo:article:title"] || values["weibo:webpage:title"] || values["title"] || values["
|
|
57225
|
+
metadata.title = jsonld.title || values["dc:title"] || values["dcterm:title"] || values["og:title"] || values["weibo:article:title"] || values["weibo:webpage:title"] || values.title || values["twitter:title"] || values["parsely-title"];
|
|
57106
57226
|
if (!metadata.title) {
|
|
57107
57227
|
metadata.title = this._getArticleTitle();
|
|
57108
57228
|
}
|
|
57109
|
-
|
|
57110
|
-
metadata.
|
|
57229
|
+
const articleAuthor = typeof values["article:author"] === "string" && !this._isUrl(values["article:author"]) ? values["article:author"] : void 0;
|
|
57230
|
+
metadata.byline = jsonld.byline || values["dc:creator"] || values["dcterm:creator"] || values.author || values["parsely-author"] || articleAuthor;
|
|
57231
|
+
metadata.excerpt = jsonld.excerpt || values["dc:description"] || values["dcterm:description"] || values["og:description"] || values["weibo:article:description"] || values["weibo:webpage:description"] || values.description || values["twitter:description"];
|
|
57111
57232
|
metadata.siteName = jsonld.siteName || values["og:site_name"];
|
|
57112
|
-
metadata.publishedTime = jsonld.datePublished || values["article:published_time"] || null;
|
|
57233
|
+
metadata.publishedTime = jsonld.datePublished || values["article:published_time"] || values["parsely-pub-date"] || null;
|
|
57113
57234
|
metadata.title = this._unescapeHtmlEntities(metadata.title);
|
|
57114
57235
|
metadata.byline = this._unescapeHtmlEntities(metadata.byline);
|
|
57115
57236
|
metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
|
|
@@ -57122,15 +57243,18 @@ var require_Readability = __commonJS({
|
|
|
57122
57243
|
* whether as a direct child or as its descendants.
|
|
57123
57244
|
*
|
|
57124
57245
|
* @param Element
|
|
57125
|
-
|
|
57126
|
-
_isSingleImage
|
|
57127
|
-
|
|
57128
|
-
|
|
57129
|
-
|
|
57130
|
-
|
|
57131
|
-
|
|
57246
|
+
**/
|
|
57247
|
+
_isSingleImage(node) {
|
|
57248
|
+
while (node) {
|
|
57249
|
+
if (node.tagName === "IMG") {
|
|
57250
|
+
return true;
|
|
57251
|
+
}
|
|
57252
|
+
if (node.children.length !== 1 || node.textContent.trim() !== "") {
|
|
57253
|
+
return false;
|
|
57254
|
+
}
|
|
57255
|
+
node = node.children[0];
|
|
57132
57256
|
}
|
|
57133
|
-
return
|
|
57257
|
+
return false;
|
|
57134
57258
|
},
|
|
57135
57259
|
/**
|
|
57136
57260
|
* Find all <noscript> that are located after <img> nodes, and which contain only one
|
|
@@ -57139,8 +57263,8 @@ var require_Readability = __commonJS({
|
|
|
57139
57263
|
* some sites (e.g. Medium).
|
|
57140
57264
|
*
|
|
57141
57265
|
* @param Element
|
|
57142
|
-
|
|
57143
|
-
_unwrapNoscriptImages
|
|
57266
|
+
**/
|
|
57267
|
+
_unwrapNoscriptImages(doc) {
|
|
57144
57268
|
var imgs = Array.from(doc.getElementsByTagName("img"));
|
|
57145
57269
|
this._forEachNode(imgs, function(img) {
|
|
57146
57270
|
for (var i = 0; i < img.attributes.length; i++) {
|
|
@@ -57156,15 +57280,15 @@ var require_Readability = __commonJS({
|
|
|
57156
57280
|
return;
|
|
57157
57281
|
}
|
|
57158
57282
|
}
|
|
57159
|
-
img.
|
|
57283
|
+
img.remove();
|
|
57160
57284
|
});
|
|
57161
57285
|
var noscripts = Array.from(doc.getElementsByTagName("noscript"));
|
|
57162
57286
|
this._forEachNode(noscripts, function(noscript) {
|
|
57163
|
-
|
|
57164
|
-
tmp.innerHTML = noscript.innerHTML;
|
|
57165
|
-
if (!this._isSingleImage(tmp)) {
|
|
57287
|
+
if (!this._isSingleImage(noscript)) {
|
|
57166
57288
|
return;
|
|
57167
57289
|
}
|
|
57290
|
+
var tmp = doc.createElement("div");
|
|
57291
|
+
tmp.innerHTML = noscript.innerHTML;
|
|
57168
57292
|
var prevElement = noscript.previousElementSibling;
|
|
57169
57293
|
if (prevElement && this._isSingleImage(prevElement)) {
|
|
57170
57294
|
var prevImg = prevElement;
|
|
@@ -57196,8 +57320,8 @@ var require_Readability = __commonJS({
|
|
|
57196
57320
|
* Removes script tags from the document.
|
|
57197
57321
|
*
|
|
57198
57322
|
* @param Element
|
|
57199
|
-
|
|
57200
|
-
_removeScripts
|
|
57323
|
+
**/
|
|
57324
|
+
_removeScripts(doc) {
|
|
57201
57325
|
this._removeNodes(this._getAllNodesWithTag(doc, ["script", "noscript"]));
|
|
57202
57326
|
},
|
|
57203
57327
|
/**
|
|
@@ -57207,8 +57331,8 @@ var require_Readability = __commonJS({
|
|
|
57207
57331
|
*
|
|
57208
57332
|
* @param Element
|
|
57209
57333
|
* @param string tag of child element
|
|
57210
|
-
|
|
57211
|
-
_hasSingleTagInsideElement
|
|
57334
|
+
**/
|
|
57335
|
+
_hasSingleTagInsideElement(element, tag2) {
|
|
57212
57336
|
if (element.children.length != 1 || element.children[0].tagName !== tag2) {
|
|
57213
57337
|
return false;
|
|
57214
57338
|
}
|
|
@@ -57216,15 +57340,15 @@ var require_Readability = __commonJS({
|
|
|
57216
57340
|
return node.nodeType === this.TEXT_NODE && this.REGEXPS.hasContent.test(node.textContent);
|
|
57217
57341
|
});
|
|
57218
57342
|
},
|
|
57219
|
-
_isElementWithoutContent
|
|
57220
|
-
return node.nodeType === this.ELEMENT_NODE && node.textContent.trim().length
|
|
57343
|
+
_isElementWithoutContent(node) {
|
|
57344
|
+
return node.nodeType === this.ELEMENT_NODE && !node.textContent.trim().length && (!node.children.length || node.children.length == node.getElementsByTagName("br").length + node.getElementsByTagName("hr").length);
|
|
57221
57345
|
},
|
|
57222
57346
|
/**
|
|
57223
57347
|
* Determine whether element has any children block level elements.
|
|
57224
57348
|
*
|
|
57225
57349
|
* @param Element
|
|
57226
57350
|
*/
|
|
57227
|
-
_hasChildBlockElement
|
|
57351
|
+
_hasChildBlockElement(element) {
|
|
57228
57352
|
return this._someNode(element.childNodes, function(node) {
|
|
57229
57353
|
return this.DIV_TO_P_ELEMS.has(node.tagName) || this._hasChildBlockElement(node);
|
|
57230
57354
|
});
|
|
@@ -57232,11 +57356,11 @@ var require_Readability = __commonJS({
|
|
|
57232
57356
|
/***
|
|
57233
57357
|
* Determine if a node qualifies as phrasing content.
|
|
57234
57358
|
* https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
|
|
57235
|
-
|
|
57236
|
-
_isPhrasingContent
|
|
57237
|
-
return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.
|
|
57359
|
+
**/
|
|
57360
|
+
_isPhrasingContent(node) {
|
|
57361
|
+
return node.nodeType === this.TEXT_NODE || this.PHRASING_ELEMS.includes(node.tagName) || (node.tagName === "A" || node.tagName === "DEL" || node.tagName === "INS") && this._everyNode(node.childNodes, this._isPhrasingContent);
|
|
57238
57362
|
},
|
|
57239
|
-
_isWhitespace
|
|
57363
|
+
_isWhitespace(node) {
|
|
57240
57364
|
return node.nodeType === this.TEXT_NODE && node.textContent.trim().length === 0 || node.nodeType === this.ELEMENT_NODE && node.tagName === "BR";
|
|
57241
57365
|
},
|
|
57242
57366
|
/**
|
|
@@ -57246,8 +57370,8 @@ var require_Readability = __commonJS({
|
|
|
57246
57370
|
* @param Element
|
|
57247
57371
|
* @param Boolean normalizeSpaces (default: true)
|
|
57248
57372
|
* @return string
|
|
57249
|
-
|
|
57250
|
-
_getInnerText
|
|
57373
|
+
**/
|
|
57374
|
+
_getInnerText(e2, normalizeSpaces) {
|
|
57251
57375
|
normalizeSpaces = typeof normalizeSpaces === "undefined" ? true : normalizeSpaces;
|
|
57252
57376
|
var textContent2 = e2.textContent.trim();
|
|
57253
57377
|
if (normalizeSpaces) {
|
|
@@ -57261,8 +57385,8 @@ var require_Readability = __commonJS({
|
|
|
57261
57385
|
* @param Element
|
|
57262
57386
|
* @param string - what to split on. Default is ","
|
|
57263
57387
|
* @return number (integer)
|
|
57264
|
-
|
|
57265
|
-
_getCharCount
|
|
57388
|
+
**/
|
|
57389
|
+
_getCharCount(e2, s2) {
|
|
57266
57390
|
s2 = s2 || ",";
|
|
57267
57391
|
return this._getInnerText(e2).split(s2).length - 1;
|
|
57268
57392
|
},
|
|
@@ -57272,14 +57396,15 @@ var require_Readability = __commonJS({
|
|
|
57272
57396
|
*
|
|
57273
57397
|
* @param Element
|
|
57274
57398
|
* @return void
|
|
57275
|
-
|
|
57276
|
-
_cleanStyles
|
|
57277
|
-
if (!e2 || e2.tagName.toLowerCase() === "svg")
|
|
57399
|
+
**/
|
|
57400
|
+
_cleanStyles(e2) {
|
|
57401
|
+
if (!e2 || e2.tagName.toLowerCase() === "svg") {
|
|
57278
57402
|
return;
|
|
57403
|
+
}
|
|
57279
57404
|
for (var i = 0; i < this.PRESENTATIONAL_ATTRIBUTES.length; i++) {
|
|
57280
57405
|
e2.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[i]);
|
|
57281
57406
|
}
|
|
57282
|
-
if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.
|
|
57407
|
+
if (this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.includes(e2.tagName)) {
|
|
57283
57408
|
e2.removeAttribute("width");
|
|
57284
57409
|
e2.removeAttribute("height");
|
|
57285
57410
|
}
|
|
@@ -57295,11 +57420,12 @@ var require_Readability = __commonJS({
|
|
|
57295
57420
|
*
|
|
57296
57421
|
* @param Element
|
|
57297
57422
|
* @return number (float)
|
|
57298
|
-
|
|
57299
|
-
_getLinkDensity
|
|
57423
|
+
**/
|
|
57424
|
+
_getLinkDensity(element) {
|
|
57300
57425
|
var textLength = this._getInnerText(element).length;
|
|
57301
|
-
if (textLength === 0)
|
|
57426
|
+
if (textLength === 0) {
|
|
57302
57427
|
return 0;
|
|
57428
|
+
}
|
|
57303
57429
|
var linkLength = 0;
|
|
57304
57430
|
this._forEachNode(element.getElementsByTagName("a"), function(linkNode) {
|
|
57305
57431
|
var href = linkNode.getAttribute("href");
|
|
@@ -57314,22 +57440,27 @@ var require_Readability = __commonJS({
|
|
|
57314
57440
|
*
|
|
57315
57441
|
* @param Element
|
|
57316
57442
|
* @return number (Integer)
|
|
57317
|
-
|
|
57318
|
-
_getClassWeight
|
|
57319
|
-
if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))
|
|
57443
|
+
**/
|
|
57444
|
+
_getClassWeight(e2) {
|
|
57445
|
+
if (!this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
|
|
57320
57446
|
return 0;
|
|
57447
|
+
}
|
|
57321
57448
|
var weight = 0;
|
|
57322
57449
|
if (typeof e2.className === "string" && e2.className !== "") {
|
|
57323
|
-
if (this.REGEXPS.negative.test(e2.className))
|
|
57450
|
+
if (this.REGEXPS.negative.test(e2.className)) {
|
|
57324
57451
|
weight -= 25;
|
|
57325
|
-
|
|
57452
|
+
}
|
|
57453
|
+
if (this.REGEXPS.positive.test(e2.className)) {
|
|
57326
57454
|
weight += 25;
|
|
57455
|
+
}
|
|
57327
57456
|
}
|
|
57328
57457
|
if (typeof e2.id === "string" && e2.id !== "") {
|
|
57329
|
-
if (this.REGEXPS.negative.test(e2.id))
|
|
57458
|
+
if (this.REGEXPS.negative.test(e2.id)) {
|
|
57330
57459
|
weight -= 25;
|
|
57331
|
-
|
|
57460
|
+
}
|
|
57461
|
+
if (this.REGEXPS.positive.test(e2.id)) {
|
|
57332
57462
|
weight += 25;
|
|
57463
|
+
}
|
|
57333
57464
|
}
|
|
57334
57465
|
return weight;
|
|
57335
57466
|
},
|
|
@@ -57341,8 +57472,8 @@ var require_Readability = __commonJS({
|
|
|
57341
57472
|
* @param string tag to clean
|
|
57342
57473
|
* @return void
|
|
57343
57474
|
**/
|
|
57344
|
-
_clean
|
|
57345
|
-
var isEmbed = ["object", "embed", "iframe"].
|
|
57475
|
+
_clean(e2, tag2) {
|
|
57476
|
+
var isEmbed = ["object", "embed", "iframe"].includes(tag2);
|
|
57346
57477
|
this._removeNodes(this._getAllNodesWithTag(e2, [tag2]), function(element) {
|
|
57347
57478
|
if (isEmbed) {
|
|
57348
57479
|
for (var i = 0; i < element.attributes.length; i++) {
|
|
@@ -57366,15 +57497,17 @@ var require_Readability = __commonJS({
|
|
|
57366
57497
|
* @param Function filterFn a filter to invoke to determine whether this node 'counts'
|
|
57367
57498
|
* @return Boolean
|
|
57368
57499
|
*/
|
|
57369
|
-
_hasAncestorTag
|
|
57500
|
+
_hasAncestorTag(node, tagName, maxDepth, filterFn) {
|
|
57370
57501
|
maxDepth = maxDepth || 3;
|
|
57371
57502
|
tagName = tagName.toUpperCase();
|
|
57372
57503
|
var depth = 0;
|
|
57373
57504
|
while (node.parentNode) {
|
|
57374
|
-
if (maxDepth > 0 && depth > maxDepth)
|
|
57505
|
+
if (maxDepth > 0 && depth > maxDepth) {
|
|
57375
57506
|
return false;
|
|
57376
|
-
|
|
57507
|
+
}
|
|
57508
|
+
if (node.parentNode.tagName === tagName && (!filterFn || filterFn(node.parentNode))) {
|
|
57377
57509
|
return true;
|
|
57510
|
+
}
|
|
57378
57511
|
node = node.parentNode;
|
|
57379
57512
|
depth++;
|
|
57380
57513
|
}
|
|
@@ -57383,7 +57516,7 @@ var require_Readability = __commonJS({
|
|
|
57383
57516
|
/**
|
|
57384
57517
|
* Return an object indicating how many rows and columns this table has.
|
|
57385
57518
|
*/
|
|
57386
|
-
_getRowAndColumnCount
|
|
57519
|
+
_getRowAndColumnCount(table) {
|
|
57387
57520
|
var rows = 0;
|
|
57388
57521
|
var columns = 0;
|
|
57389
57522
|
var trs = table.getElementsByTagName("tr");
|
|
@@ -57411,7 +57544,7 @@ var require_Readability = __commonJS({
|
|
|
57411
57544
|
* similar checks as
|
|
57412
57545
|
* https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19
|
|
57413
57546
|
*/
|
|
57414
|
-
_markDataTables
|
|
57547
|
+
_markDataTables(root) {
|
|
57415
57548
|
var tables = root.getElementsByTagName("table");
|
|
57416
57549
|
for (var i = 0; i < tables.length; i++) {
|
|
57417
57550
|
var table = tables[i];
|
|
@@ -57431,7 +57564,7 @@ var require_Readability = __commonJS({
|
|
|
57431
57564
|
continue;
|
|
57432
57565
|
}
|
|
57433
57566
|
var caption = table.getElementsByTagName("caption")[0];
|
|
57434
|
-
if (caption && caption.childNodes.length
|
|
57567
|
+
if (caption && caption.childNodes.length) {
|
|
57435
57568
|
table._readabilityDataTable = true;
|
|
57436
57569
|
continue;
|
|
57437
57570
|
}
|
|
@@ -57449,6 +57582,10 @@ var require_Readability = __commonJS({
|
|
|
57449
57582
|
continue;
|
|
57450
57583
|
}
|
|
57451
57584
|
var sizeInfo = this._getRowAndColumnCount(table);
|
|
57585
|
+
if (sizeInfo.columns == 1 || sizeInfo.rows == 1) {
|
|
57586
|
+
table._readabilityDataTable = false;
|
|
57587
|
+
continue;
|
|
57588
|
+
}
|
|
57452
57589
|
if (sizeInfo.rows >= 10 || sizeInfo.columns > 4) {
|
|
57453
57590
|
table._readabilityDataTable = true;
|
|
57454
57591
|
continue;
|
|
@@ -57457,66 +57594,72 @@ var require_Readability = __commonJS({
|
|
|
57457
57594
|
}
|
|
57458
57595
|
},
|
|
57459
57596
|
/* convert images and figures that have properties like data-src into images that can be loaded without JS */
|
|
57460
|
-
_fixLazyImages
|
|
57461
|
-
this._forEachNode(
|
|
57462
|
-
|
|
57463
|
-
|
|
57464
|
-
if (
|
|
57465
|
-
|
|
57466
|
-
|
|
57467
|
-
|
|
57468
|
-
for (var i = 0; i < elem.attributes.length; i++) {
|
|
57469
|
-
var attr = elem.attributes[i];
|
|
57470
|
-
if (attr.name === "src") {
|
|
57471
|
-
continue;
|
|
57597
|
+
_fixLazyImages(root) {
|
|
57598
|
+
this._forEachNode(
|
|
57599
|
+
this._getAllNodesWithTag(root, ["img", "picture", "figure"]),
|
|
57600
|
+
function(elem) {
|
|
57601
|
+
if (elem.src && this.REGEXPS.b64DataUrl.test(elem.src)) {
|
|
57602
|
+
var parts = this.REGEXPS.b64DataUrl.exec(elem.src);
|
|
57603
|
+
if (parts[1] === "image/svg+xml") {
|
|
57604
|
+
return;
|
|
57472
57605
|
}
|
|
57473
|
-
|
|
57474
|
-
|
|
57475
|
-
|
|
57606
|
+
var srcCouldBeRemoved = false;
|
|
57607
|
+
for (var i = 0; i < elem.attributes.length; i++) {
|
|
57608
|
+
var attr = elem.attributes[i];
|
|
57609
|
+
if (attr.name === "src") {
|
|
57610
|
+
continue;
|
|
57611
|
+
}
|
|
57612
|
+
if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
|
|
57613
|
+
srcCouldBeRemoved = true;
|
|
57614
|
+
break;
|
|
57615
|
+
}
|
|
57476
57616
|
}
|
|
57477
|
-
|
|
57478
|
-
|
|
57479
|
-
|
|
57480
|
-
|
|
57481
|
-
|
|
57482
|
-
|
|
57617
|
+
if (srcCouldBeRemoved) {
|
|
57618
|
+
var b64starts = parts[0].length;
|
|
57619
|
+
var b64length = elem.src.length - b64starts;
|
|
57620
|
+
if (b64length < 133) {
|
|
57621
|
+
elem.removeAttribute("src");
|
|
57622
|
+
}
|
|
57483
57623
|
}
|
|
57484
57624
|
}
|
|
57485
|
-
|
|
57486
|
-
|
|
57487
|
-
return;
|
|
57488
|
-
}
|
|
57489
|
-
for (var j = 0; j < elem.attributes.length; j++) {
|
|
57490
|
-
attr = elem.attributes[j];
|
|
57491
|
-
if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") {
|
|
57492
|
-
continue;
|
|
57493
|
-
}
|
|
57494
|
-
var copyTo = null;
|
|
57495
|
-
if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
|
|
57496
|
-
copyTo = "srcset";
|
|
57497
|
-
} else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
|
|
57498
|
-
copyTo = "src";
|
|
57625
|
+
if ((elem.src || elem.srcset && elem.srcset != "null") && !elem.className.toLowerCase().includes("lazy")) {
|
|
57626
|
+
return;
|
|
57499
57627
|
}
|
|
57500
|
-
|
|
57501
|
-
|
|
57502
|
-
|
|
57503
|
-
|
|
57504
|
-
|
|
57505
|
-
|
|
57506
|
-
|
|
57628
|
+
for (var j = 0; j < elem.attributes.length; j++) {
|
|
57629
|
+
attr = elem.attributes[j];
|
|
57630
|
+
if (attr.name === "src" || attr.name === "srcset" || attr.name === "alt") {
|
|
57631
|
+
continue;
|
|
57632
|
+
}
|
|
57633
|
+
var copyTo = null;
|
|
57634
|
+
if (/\.(jpg|jpeg|png|webp)\s+\d/.test(attr.value)) {
|
|
57635
|
+
copyTo = "srcset";
|
|
57636
|
+
} else if (/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(attr.value)) {
|
|
57637
|
+
copyTo = "src";
|
|
57638
|
+
}
|
|
57639
|
+
if (copyTo) {
|
|
57640
|
+
if (elem.tagName === "IMG" || elem.tagName === "PICTURE") {
|
|
57641
|
+
elem.setAttribute(copyTo, attr.value);
|
|
57642
|
+
} else if (elem.tagName === "FIGURE" && !this._getAllNodesWithTag(elem, ["img", "picture"]).length) {
|
|
57643
|
+
var img = this._doc.createElement("img");
|
|
57644
|
+
img.setAttribute(copyTo, attr.value);
|
|
57645
|
+
elem.appendChild(img);
|
|
57646
|
+
}
|
|
57507
57647
|
}
|
|
57508
57648
|
}
|
|
57509
57649
|
}
|
|
57510
|
-
|
|
57650
|
+
);
|
|
57511
57651
|
},
|
|
57512
|
-
_getTextDensity
|
|
57652
|
+
_getTextDensity(e2, tags) {
|
|
57513
57653
|
var textLength = this._getInnerText(e2, true).length;
|
|
57514
57654
|
if (textLength === 0) {
|
|
57515
57655
|
return 0;
|
|
57516
57656
|
}
|
|
57517
57657
|
var childrenLength = 0;
|
|
57518
57658
|
var children2 = this._getAllNodesWithTag(e2, tags);
|
|
57519
|
-
this._forEachNode(
|
|
57659
|
+
this._forEachNode(
|
|
57660
|
+
children2,
|
|
57661
|
+
(child) => childrenLength += this._getInnerText(child, true).length
|
|
57662
|
+
);
|
|
57520
57663
|
return childrenLength / textLength;
|
|
57521
57664
|
},
|
|
57522
57665
|
/**
|
|
@@ -57525,9 +57668,10 @@ var require_Readability = __commonJS({
|
|
|
57525
57668
|
*
|
|
57526
57669
|
* @return void
|
|
57527
57670
|
**/
|
|
57528
|
-
_cleanConditionally
|
|
57529
|
-
if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))
|
|
57671
|
+
_cleanConditionally(e2, tag2) {
|
|
57672
|
+
if (!this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
|
|
57530
57673
|
return;
|
|
57674
|
+
}
|
|
57531
57675
|
this._removeNodes(this._getAllNodesWithTag(e2, [tag2]), function(node) {
|
|
57532
57676
|
var isDataTable2 = function(t2) {
|
|
57533
57677
|
return t2._readabilityDataTable;
|
|
@@ -57536,7 +57680,10 @@ var require_Readability = __commonJS({
|
|
|
57536
57680
|
if (!isList) {
|
|
57537
57681
|
var listLength = 0;
|
|
57538
57682
|
var listNodes = this._getAllNodesWithTag(node, ["ul", "ol"]);
|
|
57539
|
-
this._forEachNode(
|
|
57683
|
+
this._forEachNode(
|
|
57684
|
+
listNodes,
|
|
57685
|
+
(list2) => listLength += this._getInnerText(list2).length
|
|
57686
|
+
);
|
|
57540
57687
|
isList = listLength / this._getInnerText(node).length > 0.9;
|
|
57541
57688
|
}
|
|
57542
57689
|
if (tag2 === "table" && isDataTable2(node)) {
|
|
@@ -57548,6 +57695,11 @@ var require_Readability = __commonJS({
|
|
|
57548
57695
|
if (this._hasAncestorTag(node, "code")) {
|
|
57549
57696
|
return false;
|
|
57550
57697
|
}
|
|
57698
|
+
if ([...node.getElementsByTagName("table")].some(
|
|
57699
|
+
(tbl) => tbl._readabilityDataTable
|
|
57700
|
+
)) {
|
|
57701
|
+
return false;
|
|
57702
|
+
}
|
|
57551
57703
|
var weight = this._getClassWeight(node);
|
|
57552
57704
|
this.log("Cleaning Conditionally", node);
|
|
57553
57705
|
var contentScore = 0;
|
|
@@ -57559,9 +57711,20 @@ var require_Readability = __commonJS({
|
|
|
57559
57711
|
var img = node.getElementsByTagName("img").length;
|
|
57560
57712
|
var li = node.getElementsByTagName("li").length - 100;
|
|
57561
57713
|
var input = node.getElementsByTagName("input").length;
|
|
57562
|
-
var headingDensity = this._getTextDensity(node, [
|
|
57714
|
+
var headingDensity = this._getTextDensity(node, [
|
|
57715
|
+
"h1",
|
|
57716
|
+
"h2",
|
|
57717
|
+
"h3",
|
|
57718
|
+
"h4",
|
|
57719
|
+
"h5",
|
|
57720
|
+
"h6"
|
|
57721
|
+
]);
|
|
57563
57722
|
var embedCount = 0;
|
|
57564
|
-
var embeds = this._getAllNodesWithTag(node, [
|
|
57723
|
+
var embeds = this._getAllNodesWithTag(node, [
|
|
57724
|
+
"object",
|
|
57725
|
+
"embed",
|
|
57726
|
+
"iframe"
|
|
57727
|
+
]);
|
|
57565
57728
|
for (var i = 0; i < embeds.length; i++) {
|
|
57566
57729
|
for (var j = 0; j < embeds[i].attributes.length; j++) {
|
|
57567
57730
|
if (this._allowedVideoRegex.test(embeds[i].attributes[j].value)) {
|
|
@@ -57573,9 +57736,60 @@ var require_Readability = __commonJS({
|
|
|
57573
57736
|
}
|
|
57574
57737
|
embedCount++;
|
|
57575
57738
|
}
|
|
57739
|
+
var innerText = this._getInnerText(node);
|
|
57740
|
+
if (this.REGEXPS.adWords.test(innerText) || this.REGEXPS.loadingWords.test(innerText)) {
|
|
57741
|
+
return true;
|
|
57742
|
+
}
|
|
57743
|
+
var contentLength = innerText.length;
|
|
57576
57744
|
var linkDensity = this._getLinkDensity(node);
|
|
57577
|
-
var
|
|
57578
|
-
|
|
57745
|
+
var textishTags = ["SPAN", "LI", "TD"].concat(
|
|
57746
|
+
Array.from(this.DIV_TO_P_ELEMS)
|
|
57747
|
+
);
|
|
57748
|
+
var textDensity = this._getTextDensity(node, textishTags);
|
|
57749
|
+
var isFigureChild = this._hasAncestorTag(node, "figure");
|
|
57750
|
+
const shouldRemoveNode = () => {
|
|
57751
|
+
const errs = [];
|
|
57752
|
+
if (!isFigureChild && img > 1 && p / img < 0.5) {
|
|
57753
|
+
errs.push(`Bad p to img ratio (img=${img}, p=${p})`);
|
|
57754
|
+
}
|
|
57755
|
+
if (!isList && li > p) {
|
|
57756
|
+
errs.push(`Too many li's outside of a list. (li=${li} > p=${p})`);
|
|
57757
|
+
}
|
|
57758
|
+
if (input > Math.floor(p / 3)) {
|
|
57759
|
+
errs.push(`Too many inputs per p. (input=${input}, p=${p})`);
|
|
57760
|
+
}
|
|
57761
|
+
if (!isList && !isFigureChild && headingDensity < 0.9 && contentLength < 25 && (img === 0 || img > 2) && linkDensity > 0) {
|
|
57762
|
+
errs.push(
|
|
57763
|
+
`Suspiciously short. (headingDensity=${headingDensity}, img=${img}, linkDensity=${linkDensity})`
|
|
57764
|
+
);
|
|
57765
|
+
}
|
|
57766
|
+
if (!isList && weight < 25 && linkDensity > 0.2 + this._linkDensityModifier) {
|
|
57767
|
+
errs.push(
|
|
57768
|
+
`Low weight and a little linky. (linkDensity=${linkDensity})`
|
|
57769
|
+
);
|
|
57770
|
+
}
|
|
57771
|
+
if (weight >= 25 && linkDensity > 0.5 + this._linkDensityModifier) {
|
|
57772
|
+
errs.push(
|
|
57773
|
+
`High weight and mostly links. (linkDensity=${linkDensity})`
|
|
57774
|
+
);
|
|
57775
|
+
}
|
|
57776
|
+
if (embedCount === 1 && contentLength < 75 || embedCount > 1) {
|
|
57777
|
+
errs.push(
|
|
57778
|
+
`Suspicious embed. (embedCount=${embedCount}, contentLength=${contentLength})`
|
|
57779
|
+
);
|
|
57780
|
+
}
|
|
57781
|
+
if (img === 0 && textDensity === 0) {
|
|
57782
|
+
errs.push(
|
|
57783
|
+
`No useful content. (img=${img}, textDensity=${textDensity})`
|
|
57784
|
+
);
|
|
57785
|
+
}
|
|
57786
|
+
if (errs.length) {
|
|
57787
|
+
this.log("Checks failed", errs);
|
|
57788
|
+
return true;
|
|
57789
|
+
}
|
|
57790
|
+
return false;
|
|
57791
|
+
};
|
|
57792
|
+
var haveToRemove = shouldRemoveNode();
|
|
57579
57793
|
if (isList && haveToRemove) {
|
|
57580
57794
|
for (var x = 0; x < node.children.length; x++) {
|
|
57581
57795
|
let child = node.children[x];
|
|
@@ -57600,7 +57814,7 @@ var require_Readability = __commonJS({
|
|
|
57600
57814
|
* @param Function determines whether a node should be removed
|
|
57601
57815
|
* @return void
|
|
57602
57816
|
**/
|
|
57603
|
-
_cleanMatchedNodes
|
|
57817
|
+
_cleanMatchedNodes(e2, filter4) {
|
|
57604
57818
|
var endOfSearchMarkerNode = this._getNextNode(e2, true);
|
|
57605
57819
|
var next = this._getNextNode(e2);
|
|
57606
57820
|
while (next && next != endOfSearchMarkerNode) {
|
|
@@ -57616,8 +57830,8 @@ var require_Readability = __commonJS({
|
|
|
57616
57830
|
*
|
|
57617
57831
|
* @param Element
|
|
57618
57832
|
* @return void
|
|
57619
|
-
|
|
57620
|
-
_cleanHeaders
|
|
57833
|
+
**/
|
|
57834
|
+
_cleanHeaders(e2) {
|
|
57621
57835
|
let headingNodes = this._getAllNodesWithTag(e2, ["h1", "h2"]);
|
|
57622
57836
|
this._removeNodes(headingNodes, function(node) {
|
|
57623
57837
|
let shouldRemove = this._getClassWeight(node) < 0;
|
|
@@ -57634,7 +57848,7 @@ var require_Readability = __commonJS({
|
|
|
57634
57848
|
* @param Element the node to check.
|
|
57635
57849
|
* @return boolean indicating whether this is a title-like header.
|
|
57636
57850
|
*/
|
|
57637
|
-
_headerDuplicatesTitle
|
|
57851
|
+
_headerDuplicatesTitle(node) {
|
|
57638
57852
|
if (node.tagName != "H1" && node.tagName != "H2") {
|
|
57639
57853
|
return false;
|
|
57640
57854
|
}
|
|
@@ -57642,14 +57856,15 @@ var require_Readability = __commonJS({
|
|
|
57642
57856
|
this.log("Evaluating similarity of header:", heading2, this._articleTitle);
|
|
57643
57857
|
return this._textSimilarity(this._articleTitle, heading2) > 0.75;
|
|
57644
57858
|
},
|
|
57645
|
-
_flagIsActive
|
|
57859
|
+
_flagIsActive(flag) {
|
|
57646
57860
|
return (this._flags & flag) > 0;
|
|
57647
57861
|
},
|
|
57648
|
-
_removeFlag
|
|
57862
|
+
_removeFlag(flag) {
|
|
57649
57863
|
this._flags = this._flags & ~flag;
|
|
57650
57864
|
},
|
|
57651
|
-
_isProbablyVisible
|
|
57652
|
-
return (!node.style || node.style.display != "none") && (!node.style || node.style.visibility != "hidden") && !node.hasAttribute("hidden") &&
|
|
57865
|
+
_isProbablyVisible(node) {
|
|
57866
|
+
return (!node.style || node.style.display != "none") && (!node.style || node.style.visibility != "hidden") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
|
|
57867
|
+
(!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
|
|
57653
57868
|
},
|
|
57654
57869
|
/**
|
|
57655
57870
|
* Runs readability.
|
|
@@ -57663,11 +57878,13 @@ var require_Readability = __commonJS({
|
|
|
57663
57878
|
*
|
|
57664
57879
|
* @return void
|
|
57665
57880
|
**/
|
|
57666
|
-
parse
|
|
57881
|
+
parse() {
|
|
57667
57882
|
if (this._maxElemsToParse > 0) {
|
|
57668
57883
|
var numTags = this._doc.getElementsByTagName("*").length;
|
|
57669
57884
|
if (numTags > this._maxElemsToParse) {
|
|
57670
|
-
throw new Error(
|
|
57885
|
+
throw new Error(
|
|
57886
|
+
"Aborting parsing document; " + numTags + " elements found"
|
|
57887
|
+
);
|
|
57671
57888
|
}
|
|
57672
57889
|
}
|
|
57673
57890
|
this._unwrapNoscriptImages(this._doc);
|
|
@@ -57675,15 +57892,17 @@ var require_Readability = __commonJS({
|
|
|
57675
57892
|
this._removeScripts(this._doc);
|
|
57676
57893
|
this._prepDocument();
|
|
57677
57894
|
var metadata = this._getArticleMetadata(jsonLd);
|
|
57895
|
+
this._metadata = metadata;
|
|
57678
57896
|
this._articleTitle = metadata.title;
|
|
57679
57897
|
var articleContent = this._grabArticle();
|
|
57680
|
-
if (!articleContent)
|
|
57898
|
+
if (!articleContent) {
|
|
57681
57899
|
return null;
|
|
57900
|
+
}
|
|
57682
57901
|
this.log("Grabbed: " + articleContent.innerHTML);
|
|
57683
57902
|
this._postProcessContent(articleContent);
|
|
57684
57903
|
if (!metadata.excerpt) {
|
|
57685
57904
|
var paragraphs = articleContent.getElementsByTagName("p");
|
|
57686
|
-
if (paragraphs.length
|
|
57905
|
+
if (paragraphs.length) {
|
|
57687
57906
|
metadata.excerpt = paragraphs[0].textContent.trim();
|
|
57688
57907
|
}
|
|
57689
57908
|
}
|
|
@@ -57708,9 +57927,9 @@ var require_Readability = __commonJS({
|
|
|
57708
57927
|
}
|
|
57709
57928
|
});
|
|
57710
57929
|
|
|
57711
|
-
//
|
|
57930
|
+
// node_modules/@mozilla/readability/Readability-readerable.js
|
|
57712
57931
|
var require_Readability_readerable = __commonJS({
|
|
57713
|
-
"
|
|
57932
|
+
"node_modules/@mozilla/readability/Readability-readerable.js"(exports, module) {
|
|
57714
57933
|
var REGEXPS = {
|
|
57715
57934
|
// NOTE: These two regular expressions are duplicated in
|
|
57716
57935
|
// Readability.js. Please keep both copies in sync.
|
|
@@ -57718,13 +57937,18 @@ var require_Readability_readerable = __commonJS({
|
|
|
57718
57937
|
okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i
|
|
57719
57938
|
};
|
|
57720
57939
|
function isNodeVisible(node) {
|
|
57721
|
-
return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") &&
|
|
57940
|
+
return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden") && //check for "fallback-image" so that wikimedia math images are displayed
|
|
57941
|
+
(!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || node.className && node.className.includes && node.className.includes("fallback-image"));
|
|
57722
57942
|
}
|
|
57723
57943
|
function isProbablyReaderable(doc, options3 = {}) {
|
|
57724
57944
|
if (typeof options3 == "function") {
|
|
57725
57945
|
options3 = { visibilityChecker: options3 };
|
|
57726
57946
|
}
|
|
57727
|
-
var defaultOptions = {
|
|
57947
|
+
var defaultOptions = {
|
|
57948
|
+
minScore: 20,
|
|
57949
|
+
minContentLength: 140,
|
|
57950
|
+
visibilityChecker: isNodeVisible
|
|
57951
|
+
};
|
|
57728
57952
|
options3 = Object.assign(defaultOptions, options3);
|
|
57729
57953
|
var nodes = doc.querySelectorAll("p, pre, article");
|
|
57730
57954
|
var brNodes = doc.querySelectorAll("div > br");
|
|
@@ -57764,9 +57988,9 @@ var require_Readability_readerable = __commonJS({
|
|
|
57764
57988
|
}
|
|
57765
57989
|
});
|
|
57766
57990
|
|
|
57767
|
-
//
|
|
57991
|
+
// node_modules/@mozilla/readability/index.js
|
|
57768
57992
|
var require_readability = __commonJS({
|
|
57769
|
-
"
|
|
57993
|
+
"node_modules/@mozilla/readability/index.js"(exports, module) {
|
|
57770
57994
|
var Readability2 = require_Readability();
|
|
57771
57995
|
var isProbablyReaderable = require_Readability_readerable();
|
|
57772
57996
|
module.exports = {
|
|
@@ -93428,7 +93652,7 @@ var require_xlsx = __commonJS({
|
|
|
93428
93652
|
}
|
|
93429
93653
|
return ws2;
|
|
93430
93654
|
}
|
|
93431
|
-
var
|
|
93655
|
+
var utils = {
|
|
93432
93656
|
encode_col,
|
|
93433
93657
|
encode_row,
|
|
93434
93658
|
encode_cell,
|
|
@@ -93627,7 +93851,7 @@ var require_xlsx = __commonJS({
|
|
|
93627
93851
|
XLSX3.writeFile = writeFileSync7;
|
|
93628
93852
|
XLSX3.writeFileSync = writeFileSync7;
|
|
93629
93853
|
XLSX3.writeFileAsync = writeFileAsync;
|
|
93630
|
-
XLSX3.utils =
|
|
93854
|
+
XLSX3.utils = utils;
|
|
93631
93855
|
XLSX3.writeXLSX = writeSyncXLSX;
|
|
93632
93856
|
XLSX3.writeFileXLSX = writeFileSyncXLSX;
|
|
93633
93857
|
XLSX3.SSF = SSF;
|
|
@@ -97968,13 +98192,13 @@ var init_memory_tracker = __esm({
|
|
|
97968
98192
|
MemoryTracker = class {
|
|
97969
98193
|
maxMemoryMB;
|
|
97970
98194
|
startTime;
|
|
97971
|
-
|
|
97972
|
-
|
|
98195
|
+
_lastCheckTime;
|
|
98196
|
+
_checkInterval = 1e3;
|
|
97973
98197
|
// Check every second
|
|
97974
98198
|
constructor(maxMemoryMB) {
|
|
97975
98199
|
this.maxMemoryMB = maxMemoryMB;
|
|
97976
98200
|
this.startTime = Date.now();
|
|
97977
|
-
this.
|
|
98201
|
+
this._lastCheckTime = this.startTime;
|
|
97978
98202
|
}
|
|
97979
98203
|
/**
|
|
97980
98204
|
* Get current memory usage in MB
|
|
@@ -98037,7 +98261,7 @@ var init_memory_tracker = __esm({
|
|
|
98037
98261
|
*/
|
|
98038
98262
|
reset() {
|
|
98039
98263
|
this.startTime = Date.now();
|
|
98040
|
-
this.
|
|
98264
|
+
this._lastCheckTime = this.startTime;
|
|
98041
98265
|
}
|
|
98042
98266
|
/**
|
|
98043
98267
|
* Log memory status for debugging
|
|
@@ -98063,11 +98287,11 @@ __export(excel_parser_exports, {
|
|
|
98063
98287
|
ExcelParser: () => ExcelParser
|
|
98064
98288
|
});
|
|
98065
98289
|
import * as fs24 from "fs";
|
|
98066
|
-
var
|
|
98290
|
+
var import_xlsx, ExcelParser;
|
|
98067
98291
|
var init_excel_parser = __esm({
|
|
98068
98292
|
"packages/core/dist/src/parsers/excel/excel-parser.js"() {
|
|
98069
98293
|
"use strict";
|
|
98070
|
-
|
|
98294
|
+
import_xlsx = __toESM(require_xlsx(), 1);
|
|
98071
98295
|
init_lib2();
|
|
98072
98296
|
init_js_yaml();
|
|
98073
98297
|
init_safety_manager();
|
|
@@ -98230,9 +98454,9 @@ var init_excel_parser = __esm({
|
|
|
98230
98454
|
return result;
|
|
98231
98455
|
}
|
|
98232
98456
|
// Enhanced streaming Excel file parser
|
|
98233
|
-
async
|
|
98457
|
+
async _parseExcelFileStreaming(filePath) {
|
|
98234
98458
|
this.reportProgress("reading", filePath, 0, 1, 0, 0);
|
|
98235
|
-
const workbook =
|
|
98459
|
+
const workbook = import_xlsx.default.readFile(filePath, {
|
|
98236
98460
|
bookProps: true,
|
|
98237
98461
|
bookSheets: true,
|
|
98238
98462
|
sheetRows: 0
|
|
@@ -98265,7 +98489,7 @@ var init_excel_parser = __esm({
|
|
|
98265
98489
|
}
|
|
98266
98490
|
// Process worksheet with streaming and enhanced features
|
|
98267
98491
|
async processWorksheetStreaming(workbook, sheetName, filePath) {
|
|
98268
|
-
const sheetWorkbook =
|
|
98492
|
+
const sheetWorkbook = import_xlsx.default.readFile(filePath, {
|
|
98269
98493
|
sheets: [sheetName],
|
|
98270
98494
|
cellFormula: this.config.preserveFormulas,
|
|
98271
98495
|
cellStyles: this.config.includeFormatting
|
|
@@ -98305,9 +98529,7 @@ var init_excel_parser = __esm({
|
|
|
98305
98529
|
options3.cellFormula = this.config.preserveFormulas;
|
|
98306
98530
|
if (this.config.includeFormatting !== void 0)
|
|
98307
98531
|
options3.cellStyles = this.config.includeFormatting;
|
|
98308
|
-
|
|
98309
|
-
options3.bookProps = this.config.includeMetadata;
|
|
98310
|
-
const workbook = XLSX.readFile(filePath, options3);
|
|
98532
|
+
const workbook = import_xlsx.default.readFile(filePath, options3);
|
|
98311
98533
|
const workbookData = {
|
|
98312
98534
|
sheets: {}
|
|
98313
98535
|
};
|
|
@@ -98328,6 +98550,9 @@ var init_excel_parser = __esm({
|
|
|
98328
98550
|
metadata.sheetNames = workbook.SheetNames;
|
|
98329
98551
|
workbookData.metadata = metadata;
|
|
98330
98552
|
}
|
|
98553
|
+
if (!workbook.SheetNames || !Array.isArray(workbook.SheetNames) || workbook.SheetNames.length === 0) {
|
|
98554
|
+
throw new Error("No sheets found in workbook or SheetNames is invalid");
|
|
98555
|
+
}
|
|
98331
98556
|
const sheetsToProcess = this.config.readAllSheets ? workbook.SheetNames : [workbook.SheetNames[0]];
|
|
98332
98557
|
for (const sheetName of sheetsToProcess) {
|
|
98333
98558
|
const worksheet = workbook.Sheets[sheetName];
|
|
@@ -98342,16 +98567,16 @@ var init_excel_parser = __esm({
|
|
|
98342
98567
|
const targetRange = this.config.range || sheetRange;
|
|
98343
98568
|
let range;
|
|
98344
98569
|
try {
|
|
98345
|
-
range =
|
|
98570
|
+
range = import_xlsx.default.utils.decode_range(targetRange);
|
|
98346
98571
|
} catch (_error) {
|
|
98347
98572
|
console.warn(`Invalid range specified: ${targetRange}, falling back to full sheet`);
|
|
98348
|
-
range =
|
|
98573
|
+
range = import_xlsx.default.utils.decode_range(sheetRange);
|
|
98349
98574
|
}
|
|
98350
98575
|
const data = [];
|
|
98351
98576
|
for (let row = range.s.r; row <= range.e.r; row++) {
|
|
98352
98577
|
const rowData = [];
|
|
98353
98578
|
for (let col = range.s.c; col <= range.e.c; col++) {
|
|
98354
|
-
const cellAddress =
|
|
98579
|
+
const cellAddress = import_xlsx.default.utils.encode_cell({ r: row, c: col });
|
|
98355
98580
|
const cell = worksheet[cellAddress];
|
|
98356
98581
|
if (cell) {
|
|
98357
98582
|
const cellData = {
|
|
@@ -98412,7 +98637,7 @@ var init_excel_parser = __esm({
|
|
|
98412
98637
|
const sheetData = {
|
|
98413
98638
|
name: "Sheet1",
|
|
98414
98639
|
data,
|
|
98415
|
-
range: `A1:${
|
|
98640
|
+
range: `A1:${import_xlsx.default.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
|
|
98416
98641
|
rowCount: data.length,
|
|
98417
98642
|
columnCount: data[0]?.length || 0
|
|
98418
98643
|
};
|
|
@@ -98594,7 +98819,7 @@ var init_excel_parser = __esm({
|
|
|
98594
98819
|
extractTableInfo(worksheet) {
|
|
98595
98820
|
const tables = [];
|
|
98596
98821
|
if (worksheet["!ref"]) {
|
|
98597
|
-
const range =
|
|
98822
|
+
const range = import_xlsx.default.utils.decode_range(worksheet["!ref"]);
|
|
98598
98823
|
const hasHeaders = this.detectHeaders(worksheet, range);
|
|
98599
98824
|
if (hasHeaders) {
|
|
98600
98825
|
const headers = this.extractTableHeaders(worksheet, range);
|
|
@@ -98623,7 +98848,7 @@ var init_excel_parser = __esm({
|
|
|
98623
98848
|
// Detect if worksheet has headers
|
|
98624
98849
|
detectHeaders(worksheet, range) {
|
|
98625
98850
|
for (let col = range.s.c; col <= range.e.c; col++) {
|
|
98626
|
-
const cellAddr =
|
|
98851
|
+
const cellAddr = import_xlsx.default.utils.encode_cell({ r: range.s.r, c: col });
|
|
98627
98852
|
const cell = worksheet[cellAddr];
|
|
98628
98853
|
if (cell && typeof cell.v === "string") {
|
|
98629
98854
|
return true;
|
|
@@ -98635,14 +98860,14 @@ var init_excel_parser = __esm({
|
|
|
98635
98860
|
extractTableHeaders(worksheet, range) {
|
|
98636
98861
|
const headers = [];
|
|
98637
98862
|
for (let col = range.s.c; col <= range.e.c; col++) {
|
|
98638
|
-
const cellAddr =
|
|
98863
|
+
const cellAddr = import_xlsx.default.utils.encode_cell({ r: range.s.r, c: col });
|
|
98639
98864
|
const cell = worksheet[cellAddr];
|
|
98640
98865
|
headers.push(cell ? String(cell.v || `Column ${col + 1}`) : `Column ${col + 1}`);
|
|
98641
98866
|
}
|
|
98642
98867
|
return headers;
|
|
98643
98868
|
}
|
|
98644
98869
|
// Streaming CSV parser for large files
|
|
98645
|
-
async
|
|
98870
|
+
async _parseCsvFileStreaming(filePath, delimiter2 = ",") {
|
|
98646
98871
|
return new Promise((resolve19, reject) => {
|
|
98647
98872
|
const data = [];
|
|
98648
98873
|
const fileStream = fs24.createReadStream(filePath);
|
|
@@ -98673,7 +98898,7 @@ var init_excel_parser = __esm({
|
|
|
98673
98898
|
const sheetData = {
|
|
98674
98899
|
name: "Sheet1",
|
|
98675
98900
|
data,
|
|
98676
|
-
range: `A1:${
|
|
98901
|
+
range: `A1:${import_xlsx.default.utils.encode_cell({ r: data.length - 1, c: Math.max(0, (data[0]?.length || 1) - 1) })}`,
|
|
98677
98902
|
rowCount: data.length,
|
|
98678
98903
|
columnCount: data[0]?.length || 0
|
|
98679
98904
|
};
|
|
@@ -98691,20 +98916,20 @@ var init_excel_parser = __esm({
|
|
|
98691
98916
|
}
|
|
98692
98917
|
// New method: Generate Excel file (round-trip functionality)
|
|
98693
98918
|
async generateExcel(data, outputPath) {
|
|
98694
|
-
const workbook =
|
|
98919
|
+
const workbook = import_xlsx.default.utils.book_new();
|
|
98695
98920
|
for (const [sheetName, sheetData] of Object.entries(data.sheets)) {
|
|
98696
|
-
const worksheet =
|
|
98921
|
+
const worksheet = import_xlsx.default.utils.aoa_to_sheet(sheetData.data.map((row) => row.map((cell) => cell.value)));
|
|
98697
98922
|
if (sheetData.data.some((row) => row.some((cell) => cell.formula))) {
|
|
98698
98923
|
sheetData.data.forEach((row, rowIndex) => {
|
|
98699
98924
|
row.forEach((cell, colIndex) => {
|
|
98700
98925
|
if (cell.formula) {
|
|
98701
|
-
const cellAddr =
|
|
98926
|
+
const cellAddr = import_xlsx.default.utils.encode_cell({ r: rowIndex, c: colIndex });
|
|
98702
98927
|
worksheet[cellAddr] = { ...worksheet[cellAddr], f: cell.formula };
|
|
98703
98928
|
}
|
|
98704
98929
|
});
|
|
98705
98930
|
});
|
|
98706
98931
|
}
|
|
98707
|
-
|
|
98932
|
+
import_xlsx.default.utils.book_append_sheet(workbook, worksheet, sheetName);
|
|
98708
98933
|
}
|
|
98709
98934
|
if (data.metadata) {
|
|
98710
98935
|
workbook.Props = {
|
|
@@ -98716,7 +98941,7 @@ var init_excel_parser = __esm({
|
|
|
98716
98941
|
Application: data.metadata.application
|
|
98717
98942
|
};
|
|
98718
98943
|
}
|
|
98719
|
-
|
|
98944
|
+
import_xlsx.default.writeFile(workbook, outputPath);
|
|
98720
98945
|
}
|
|
98721
98946
|
};
|
|
98722
98947
|
}
|
|
@@ -355281,7 +355506,7 @@ var require_utils3 = __commonJS({
|
|
|
355281
355506
|
var require_scan = __commonJS({
|
|
355282
355507
|
"node_modules/picomatch/lib/scan.js"(exports, module) {
|
|
355283
355508
|
"use strict";
|
|
355284
|
-
var
|
|
355509
|
+
var utils = require_utils3();
|
|
355285
355510
|
var {
|
|
355286
355511
|
CHAR_ASTERISK: CHAR_ASTERISK2,
|
|
355287
355512
|
/* * */
|
|
@@ -355542,9 +355767,9 @@ var require_scan = __commonJS({
|
|
|
355542
355767
|
}
|
|
355543
355768
|
}
|
|
355544
355769
|
if (opts.unescape === true) {
|
|
355545
|
-
if (glob2) glob2 =
|
|
355770
|
+
if (glob2) glob2 = utils.removeBackslashes(glob2);
|
|
355546
355771
|
if (base && backslashes === true) {
|
|
355547
|
-
base =
|
|
355772
|
+
base = utils.removeBackslashes(base);
|
|
355548
355773
|
}
|
|
355549
355774
|
}
|
|
355550
355775
|
const state = {
|
|
@@ -355612,7 +355837,7 @@ var require_parse2 = __commonJS({
|
|
|
355612
355837
|
"node_modules/picomatch/lib/parse.js"(exports, module) {
|
|
355613
355838
|
"use strict";
|
|
355614
355839
|
var constants2 = require_constants();
|
|
355615
|
-
var
|
|
355840
|
+
var utils = require_utils3();
|
|
355616
355841
|
var {
|
|
355617
355842
|
MAX_LENGTH,
|
|
355618
355843
|
POSIX_REGEX_SOURCE,
|
|
@@ -355629,7 +355854,7 @@ var require_parse2 = __commonJS({
|
|
|
355629
355854
|
try {
|
|
355630
355855
|
new RegExp(value);
|
|
355631
355856
|
} catch (ex) {
|
|
355632
|
-
return args.map((v) =>
|
|
355857
|
+
return args.map((v) => utils.escapeRegex(v)).join("..");
|
|
355633
355858
|
}
|
|
355634
355859
|
return value;
|
|
355635
355860
|
};
|
|
@@ -355695,7 +355920,7 @@ var require_parse2 = __commonJS({
|
|
|
355695
355920
|
globstar: false,
|
|
355696
355921
|
tokens
|
|
355697
355922
|
};
|
|
355698
|
-
input =
|
|
355923
|
+
input = utils.removePrefix(input, state);
|
|
355699
355924
|
len = input.length;
|
|
355700
355925
|
const extglobs = [];
|
|
355701
355926
|
const braces = [];
|
|
@@ -355834,7 +356059,7 @@ var require_parse2 = __commonJS({
|
|
|
355834
356059
|
state.output = input;
|
|
355835
356060
|
return state;
|
|
355836
356061
|
}
|
|
355837
|
-
state.output =
|
|
356062
|
+
state.output = utils.wrapOutput(output, state, options3);
|
|
355838
356063
|
return state;
|
|
355839
356064
|
}
|
|
355840
356065
|
while (!eos()) {
|
|
@@ -355910,7 +356135,7 @@ var require_parse2 = __commonJS({
|
|
|
355910
356135
|
continue;
|
|
355911
356136
|
}
|
|
355912
356137
|
if (state.quotes === 1 && value !== '"') {
|
|
355913
|
-
value =
|
|
356138
|
+
value = utils.escapeRegex(value);
|
|
355914
356139
|
prev.value += value;
|
|
355915
356140
|
append({ value });
|
|
355916
356141
|
continue;
|
|
@@ -355971,10 +356196,10 @@ var require_parse2 = __commonJS({
|
|
|
355971
356196
|
}
|
|
355972
356197
|
prev.value += value;
|
|
355973
356198
|
append({ value });
|
|
355974
|
-
if (opts.literalBrackets === false ||
|
|
356199
|
+
if (opts.literalBrackets === false || utils.hasRegexChars(prevValue)) {
|
|
355975
356200
|
continue;
|
|
355976
356201
|
}
|
|
355977
|
-
const escaped =
|
|
356202
|
+
const escaped = utils.escapeRegex(prev.value);
|
|
355978
356203
|
state.output = state.output.slice(0, -prev.value.length);
|
|
355979
356204
|
if (opts.literalBrackets === true) {
|
|
355980
356205
|
state.output += escaped;
|
|
@@ -356279,17 +356504,17 @@ var require_parse2 = __commonJS({
|
|
|
356279
356504
|
}
|
|
356280
356505
|
while (state.brackets > 0) {
|
|
356281
356506
|
if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "]"));
|
|
356282
|
-
state.output =
|
|
356507
|
+
state.output = utils.escapeLast(state.output, "[");
|
|
356283
356508
|
decrement("brackets");
|
|
356284
356509
|
}
|
|
356285
356510
|
while (state.parens > 0) {
|
|
356286
356511
|
if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", ")"));
|
|
356287
|
-
state.output =
|
|
356512
|
+
state.output = utils.escapeLast(state.output, "(");
|
|
356288
356513
|
decrement("parens");
|
|
356289
356514
|
}
|
|
356290
356515
|
while (state.braces > 0) {
|
|
356291
356516
|
if (opts.strictBrackets === true) throw new SyntaxError(syntaxError("closing", "}"));
|
|
356292
|
-
state.output =
|
|
356517
|
+
state.output = utils.escapeLast(state.output, "{");
|
|
356293
356518
|
decrement("braces");
|
|
356294
356519
|
}
|
|
356295
356520
|
if (opts.strictSlashes !== true && (prev.type === "star" || prev.type === "bracket")) {
|
|
@@ -356364,7 +356589,7 @@ var require_parse2 = __commonJS({
|
|
|
356364
356589
|
}
|
|
356365
356590
|
}
|
|
356366
356591
|
};
|
|
356367
|
-
const output =
|
|
356592
|
+
const output = utils.removePrefix(input, state);
|
|
356368
356593
|
let source2 = create(output);
|
|
356369
356594
|
if (source2 && opts.strictSlashes !== true) {
|
|
356370
356595
|
source2 += `${SLASH_LITERAL}?`;
|
|
@@ -356381,7 +356606,7 @@ var require_picomatch = __commonJS({
|
|
|
356381
356606
|
"use strict";
|
|
356382
356607
|
var scan = require_scan();
|
|
356383
356608
|
var parse6 = require_parse2();
|
|
356384
|
-
var
|
|
356609
|
+
var utils = require_utils3();
|
|
356385
356610
|
var constants2 = require_constants();
|
|
356386
356611
|
var isObject2 = (val) => val && typeof val === "object" && !Array.isArray(val);
|
|
356387
356612
|
var picomatch3 = (glob2, options3, returnState = false) => {
|
|
@@ -356445,7 +356670,7 @@ var require_picomatch = __commonJS({
|
|
|
356445
356670
|
return { isMatch: false, output: "" };
|
|
356446
356671
|
}
|
|
356447
356672
|
const opts = options3 || {};
|
|
356448
|
-
const format = opts.format || (posix2 ?
|
|
356673
|
+
const format = opts.format || (posix2 ? utils.toPosixSlashes : null);
|
|
356449
356674
|
let match2 = input === glob2;
|
|
356450
356675
|
let output = match2 && format ? format(input) : input;
|
|
356451
356676
|
if (match2 === false) {
|
|
@@ -356463,7 +356688,7 @@ var require_picomatch = __commonJS({
|
|
|
356463
356688
|
};
|
|
356464
356689
|
picomatch3.matchBase = (input, glob2, options3) => {
|
|
356465
356690
|
const regex = glob2 instanceof RegExp ? glob2 : picomatch3.makeRe(glob2, options3);
|
|
356466
|
-
return regex.test(
|
|
356691
|
+
return regex.test(utils.basename(input));
|
|
356467
356692
|
};
|
|
356468
356693
|
picomatch3.isMatch = (str3, patterns, options3) => picomatch3(patterns, options3)(str3);
|
|
356469
356694
|
picomatch3.parse = (pattern, options3) => {
|
|
@@ -356520,10 +356745,10 @@ var require_picomatch2 = __commonJS({
|
|
|
356520
356745
|
"node_modules/picomatch/index.js"(exports, module) {
|
|
356521
356746
|
"use strict";
|
|
356522
356747
|
var pico = require_picomatch();
|
|
356523
|
-
var
|
|
356748
|
+
var utils = require_utils3();
|
|
356524
356749
|
function picomatch3(glob2, options3, returnState = false) {
|
|
356525
356750
|
if (options3 && (options3.windows === null || options3.windows === void 0)) {
|
|
356526
|
-
options3 = { ...options3, windows:
|
|
356751
|
+
options3 = { ...options3, windows: utils.isWindows() };
|
|
356527
356752
|
}
|
|
356528
356753
|
return pico(glob2, options3, returnState);
|
|
356529
356754
|
}
|
|
@@ -372796,7 +373021,7 @@ async function getPackageJson() {
|
|
|
372796
373021
|
// packages/cli/src/utils/version.ts
|
|
372797
373022
|
async function getCliVersion() {
|
|
372798
373023
|
const pkgJson = await getPackageJson();
|
|
372799
|
-
return "1.0.
|
|
373024
|
+
return "1.0.64";
|
|
372800
373025
|
}
|
|
372801
373026
|
|
|
372802
373027
|
// packages/cli/src/ui/commands/aboutCommand.ts
|
|
@@ -372848,7 +373073,7 @@ import open4 from "open";
|
|
|
372848
373073
|
import process11 from "node:process";
|
|
372849
373074
|
|
|
372850
373075
|
// packages/cli/src/generated/git-commit.ts
|
|
372851
|
-
var GIT_COMMIT_INFO = "
|
|
373076
|
+
var GIT_COMMIT_INFO = "24d4af6d";
|
|
372852
373077
|
|
|
372853
373078
|
// packages/cli/src/ui/commands/bugCommand.ts
|
|
372854
373079
|
init_dist2();
|
|
@@ -404288,7 +404513,7 @@ import { exec as exec7, execSync as execSync6, spawn as spawn6 } from "node:chil
|
|
|
404288
404513
|
import os29 from "node:os";
|
|
404289
404514
|
import path78 from "node:path";
|
|
404290
404515
|
import fs66 from "node:fs";
|
|
404291
|
-
import { readFile as
|
|
404516
|
+
import { readFile as readFile11 } from "node:fs/promises";
|
|
404292
404517
|
import { promisify as promisify6 } from "util";
|
|
404293
404518
|
var execAsync5 = promisify6(exec7);
|
|
404294
404519
|
function getContainerPath(hostPath) {
|
|
@@ -404323,7 +404548,7 @@ async function shouldUseCurrentUserInSandbox() {
|
|
|
404323
404548
|
}
|
|
404324
404549
|
if (os29.platform() === "linux") {
|
|
404325
404550
|
try {
|
|
404326
|
-
const osReleaseContent = await
|
|
404551
|
+
const osReleaseContent = await readFile11("/etc/os-release", "utf8");
|
|
404327
404552
|
if (osReleaseContent.includes("ID=debian") || osReleaseContent.includes("ID=ubuntu") || osReleaseContent.match(/^ID_LIKE=.*debian.*/m) || // Covers derivatives
|
|
404328
404553
|
osReleaseContent.match(/^ID_LIKE=.*ubuntu.*/m)) {
|
|
404329
404554
|
console.error(
|