@jocmp/mercury-parser 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +8 -10
- package/cli.js +4 -4
- package/dist/generate-custom-parser.js +138 -6
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +132 -5
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +1 -1
- package/dist/mercury.web.js.map +1 -1
- package/package.json +19 -11
package/dist/mercury.js
CHANGED
|
@@ -1003,7 +1003,7 @@ function addScore($node, $, amount) {
|
|
|
1003
1003
|
try {
|
|
1004
1004
|
var score = getOrInitScore($node, $) + amount;
|
|
1005
1005
|
setScore($node, $, score);
|
|
1006
|
-
} catch (
|
|
1006
|
+
} catch (_unused) {
|
|
1007
1007
|
// Ignoring; error occurs in scoreNode
|
|
1008
1008
|
}
|
|
1009
1009
|
return $node;
|
|
@@ -1409,7 +1409,7 @@ function convertLazyLoadedImages($) {
|
|
|
1409
1409
|
var _JSON$parse = JSON.parse(str),
|
|
1410
1410
|
src = _JSON$parse.src;
|
|
1411
1411
|
if (typeof src === 'string') return src;
|
|
1412
|
-
} catch (
|
|
1412
|
+
} catch (_unused) {
|
|
1413
1413
|
return false;
|
|
1414
1414
|
}
|
|
1415
1415
|
return false;
|
|
@@ -1609,6 +1609,44 @@ function addExtractor(extractor) {
|
|
|
1609
1609
|
return apiExtractors;
|
|
1610
1610
|
}
|
|
1611
1611
|
|
|
1612
|
+
var BalloonJuiceComExtractor = {
|
|
1613
|
+
domain: 'balloon-juice.com',
|
|
1614
|
+
title: {
|
|
1615
|
+
selectors: ['h1.entry-title']
|
|
1616
|
+
},
|
|
1617
|
+
author: {
|
|
1618
|
+
selectors: ['.entry-author-name']
|
|
1619
|
+
},
|
|
1620
|
+
date_published: {
|
|
1621
|
+
selectors: [['meta[property="article:published_time"]', 'content'], ['meta[name="article:published_time"]', 'value']]
|
|
1622
|
+
},
|
|
1623
|
+
lead_image_url: {
|
|
1624
|
+
selectors: [['meta[property="og:image"]', 'content'], ['meta[name="og:image"]', 'value']]
|
|
1625
|
+
},
|
|
1626
|
+
content: {
|
|
1627
|
+
selectors: ['.entry-content', 'article'],
|
|
1628
|
+
transforms: {
|
|
1629
|
+
// Handle JS-rendered iframes
|
|
1630
|
+
'iframe[src*="embed.bsky.app"]': function iframeSrcEmbedBskyApp($node) {
|
|
1631
|
+
$node.addClass('mercury-parser-keep iframe-embed-bsky');
|
|
1632
|
+
$node.parent('.bluesky-embed').addClass('mercury-parser-keep');
|
|
1633
|
+
},
|
|
1634
|
+
// Handle no-JS blockquote fallbacks - convert to iframes
|
|
1635
|
+
'blockquote.bluesky-embed[data-bluesky-uri]': function blockquoteBlueskyEmbedDataBlueskyUri($node, $) {
|
|
1636
|
+
var uri = $node.attr('data-bluesky-uri');
|
|
1637
|
+
if (uri) {
|
|
1638
|
+
// Convert at://did:plc:.../app.bsky.feed.post/... to embed URL
|
|
1639
|
+
var embedPath = uri.replace('at://', '');
|
|
1640
|
+
var src = "https://embed.bsky.app/embed/".concat(embedPath);
|
|
1641
|
+
var $iframe = $("<iframe src=\"".concat(src, "\" class=\"mercury-parser-keep iframe-embed-bsky\" width=\"100%\" frameborder=\"0\"></iframe>"));
|
|
1642
|
+
$node.replaceWith($iframe);
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
},
|
|
1646
|
+
clean: ['.shared-counts-wrap', '.entry-meta']
|
|
1647
|
+
}
|
|
1648
|
+
};
|
|
1649
|
+
|
|
1612
1650
|
var BloggerExtractor = {
|
|
1613
1651
|
domain: 'blogspot.com',
|
|
1614
1652
|
content: {
|
|
@@ -6990,8 +7028,96 @@ var NewsPtsOrgTwExtractor = {
|
|
|
6990
7028
|
}
|
|
6991
7029
|
};
|
|
6992
7030
|
|
|
7031
|
+
var WwwThedriveComExtractor = {
|
|
7032
|
+
domain: 'www.thedrive.com',
|
|
7033
|
+
title: {
|
|
7034
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7035
|
+
},
|
|
7036
|
+
author: {
|
|
7037
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
7038
|
+
},
|
|
7039
|
+
date_published: {
|
|
7040
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7041
|
+
},
|
|
7042
|
+
lead_image_url: {
|
|
7043
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7044
|
+
},
|
|
7045
|
+
content: {
|
|
7046
|
+
selectors: ['.entry-content', 'article'],
|
|
7047
|
+
transforms: {
|
|
7048
|
+
img: function img(node) {
|
|
7049
|
+
node.removeAttr('sizes');
|
|
7050
|
+
},
|
|
7051
|
+
h2: function h2(node) {
|
|
7052
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7053
|
+
},
|
|
7054
|
+
h3: function h3(node) {
|
|
7055
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7056
|
+
}
|
|
7057
|
+
},
|
|
7058
|
+
clean: ['.product-disclosure', '.recurrent-newsletter-block', '.pw-incontent-commerce-ad', '#author-widgets']
|
|
7059
|
+
}
|
|
7060
|
+
};
|
|
7061
|
+
|
|
7062
|
+
var ChicagoyimbyComExtractor = {
|
|
7063
|
+
domain: 'chicagoyimby.com',
|
|
7064
|
+
title: {
|
|
7065
|
+
selectors: ['h1.post-title']
|
|
7066
|
+
},
|
|
7067
|
+
author: {
|
|
7068
|
+
selectors: ['.entry-meta-author a']
|
|
7069
|
+
},
|
|
7070
|
+
date_published: {
|
|
7071
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7072
|
+
},
|
|
7073
|
+
lead_image_url: {
|
|
7074
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7075
|
+
},
|
|
7076
|
+
content: {
|
|
7077
|
+
selectors: ['.entry-content'],
|
|
7078
|
+
transforms: {
|
|
7079
|
+
img: function img(node) {
|
|
7080
|
+
node.removeAttr('sizes');
|
|
7081
|
+
}
|
|
7082
|
+
},
|
|
7083
|
+
clean: ['.breadcrumb']
|
|
7084
|
+
}
|
|
7085
|
+
};
|
|
7086
|
+
|
|
7087
|
+
var WwwJalopnikComExtractor = {
|
|
7088
|
+
domain: 'www.jalopnik.com',
|
|
7089
|
+
title: {
|
|
7090
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7091
|
+
},
|
|
7092
|
+
author: {
|
|
7093
|
+
selectors: [['meta[name="article:author"]', 'value']]
|
|
7094
|
+
},
|
|
7095
|
+
date_published: {
|
|
7096
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7097
|
+
},
|
|
7098
|
+
dek: {
|
|
7099
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7100
|
+
},
|
|
7101
|
+
lead_image_url: {
|
|
7102
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7103
|
+
},
|
|
7104
|
+
content: {
|
|
7105
|
+
selectors: ['article.news-post'],
|
|
7106
|
+
transforms: {
|
|
7107
|
+
h2: function h2(node) {
|
|
7108
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7109
|
+
},
|
|
7110
|
+
'.slide-key': function slideKey(node) {
|
|
7111
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7112
|
+
}
|
|
7113
|
+
},
|
|
7114
|
+
clean: ['.breadcrumbs', '.byline-container']
|
|
7115
|
+
}
|
|
7116
|
+
};
|
|
7117
|
+
|
|
6993
7118
|
var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
6994
7119
|
__proto__: null,
|
|
7120
|
+
BalloonJuiceComExtractor: BalloonJuiceComExtractor,
|
|
6995
7121
|
BloggerExtractor: BloggerExtractor,
|
|
6996
7122
|
NYMagExtractor: NYMagExtractor,
|
|
6997
7123
|
WikipediaExtractor: WikipediaExtractor,
|
|
@@ -7179,7 +7305,10 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
7179
7305
|
WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
|
|
7180
7306
|
WwwNumeramaComExtractor: WwwNumeramaComExtractor,
|
|
7181
7307
|
TerminaltroveComExtractor: TerminaltroveComExtractor,
|
|
7182
|
-
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor
|
|
7308
|
+
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
|
|
7309
|
+
WwwThedriveComExtractor: WwwThedriveComExtractor,
|
|
7310
|
+
ChicagoyimbyComExtractor: ChicagoyimbyComExtractor,
|
|
7311
|
+
WwwJalopnikComExtractor: WwwJalopnikComExtractor
|
|
7183
7312
|
});
|
|
7184
7313
|
|
|
7185
7314
|
function ownKeys$5(e, r) { var t = _Object$keys__default["default"](e); if (_Object$getOwnPropertySymbols__default["default"]) { var o = _Object$getOwnPropertySymbols__default["default"](e); r && (o = o.filter(function (r) { return _Object$getOwnPropertyDescriptor__default["default"](e, r).enumerable; })), t.push.apply(t, o); } return t; }
|
|
@@ -8966,8 +9095,6 @@ function _collectAllPages() {
|
|
|
8966
9095
|
result = _objectSpread$1(_objectSpread$1({}, result), {}, {
|
|
8967
9096
|
content: "".concat(result.content, "<hr><h4>Page ").concat(pages, "</h4>").concat(nextPageResult.content)
|
|
8968
9097
|
});
|
|
8969
|
-
|
|
8970
|
-
// eslint-disable-next-line prefer-destructuring
|
|
8971
9098
|
next_page_url = nextPageResult.next_page_url;
|
|
8972
9099
|
_context.next = 1;
|
|
8973
9100
|
break;
|