@jocmp/mercury-parser 2.4.3 → 2.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +110 -45
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +113 -46
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +1 -1
- package/dist/mercury.web.js.map +1 -1
- package/package.json +1 -1
package/dist/mercury.js
CHANGED
|
@@ -2645,50 +2645,6 @@ var MoneyCnnComExtractor = {
|
|
|
2645
2645
|
}
|
|
2646
2646
|
};
|
|
2647
2647
|
|
|
2648
|
-
var WwwThevergeComExtractor = {
|
|
2649
|
-
domain: 'www.theverge.com',
|
|
2650
|
-
supportedDomains: ['www.polygon.com'],
|
|
2651
|
-
title: {
|
|
2652
|
-
selectors: ['h1']
|
|
2653
|
-
},
|
|
2654
|
-
author: {
|
|
2655
|
-
selectors: [['meta[name="author"]', 'value']]
|
|
2656
|
-
},
|
|
2657
|
-
date_published: {
|
|
2658
|
-
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
2659
|
-
},
|
|
2660
|
-
dek: {
|
|
2661
|
-
selectors: ['.p-dek']
|
|
2662
|
-
},
|
|
2663
|
-
lead_image_url: {
|
|
2664
|
-
selectors: [['meta[name="og:image"]', 'value']]
|
|
2665
|
-
},
|
|
2666
|
-
content: {
|
|
2667
|
-
selectors: [// feature template multi-match
|
|
2668
|
-
['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'], // regular post multi-match
|
|
2669
|
-
['.e-image--hero', '.c-entry-content'], // feature template fallback
|
|
2670
|
-
'.l-wrapper .l-feature', // regular post fallback
|
|
2671
|
-
'div.c-entry-content'],
|
|
2672
|
-
// Transform lazy-loaded images
|
|
2673
|
-
transforms: {
|
|
2674
|
-
noscript: function noscript($node) {
|
|
2675
|
-
var $children = $node.children();
|
|
2676
|
-
|
|
2677
|
-
if ($children.length === 1 && $children.get(0).tagName === 'img') {
|
|
2678
|
-
return 'span';
|
|
2679
|
-
}
|
|
2680
|
-
|
|
2681
|
-
return null;
|
|
2682
|
-
}
|
|
2683
|
-
},
|
|
2684
|
-
// Is there anything that is in the result that shouldn't be?
|
|
2685
|
-
// The clean selectors will remove anything that matches from
|
|
2686
|
-
// the result
|
|
2687
|
-
clean: ['.aside', 'img.c-dynamic-image' // images come from noscript transform
|
|
2688
|
-
]
|
|
2689
|
-
}
|
|
2690
|
-
};
|
|
2691
|
-
|
|
2692
2648
|
var WwwCnnComExtractor = {
|
|
2693
2649
|
domain: 'www.cnn.com',
|
|
2694
2650
|
title: {
|
|
@@ -6802,6 +6758,115 @@ var WwwVortezNetExtractor = {
|
|
|
6802
6758
|
}
|
|
6803
6759
|
};
|
|
6804
6760
|
|
|
6761
|
+
var WwwPolygonComExtractor = {
|
|
6762
|
+
domain: 'www.polygon.com',
|
|
6763
|
+
title: {
|
|
6764
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6765
|
+
},
|
|
6766
|
+
author: {
|
|
6767
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6768
|
+
},
|
|
6769
|
+
date_published: {
|
|
6770
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
6771
|
+
},
|
|
6772
|
+
lead_image_url: {
|
|
6773
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6774
|
+
},
|
|
6775
|
+
content: {
|
|
6776
|
+
selectors: ['article'],
|
|
6777
|
+
transforms: {
|
|
6778
|
+
h2: function h2($node) {
|
|
6779
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6780
|
+
},
|
|
6781
|
+
h3: function h3($node) {
|
|
6782
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6783
|
+
},
|
|
6784
|
+
img: function img($node) {
|
|
6785
|
+
var srcset = $node.attr('srcset');
|
|
6786
|
+
|
|
6787
|
+
var _split = (srcset || '').split(','),
|
|
6788
|
+
_split2 = _slicedToArray(_split, 1),
|
|
6789
|
+
src = _split2[0];
|
|
6790
|
+
|
|
6791
|
+
if (src) {
|
|
6792
|
+
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
6793
|
+
}
|
|
6794
|
+
}
|
|
6795
|
+
},
|
|
6796
|
+
clean: ['cite', '.duet--ad--native-ad-rail', '.duet--layout--rail', '.duet--article--table-of-contents']
|
|
6797
|
+
}
|
|
6798
|
+
};
|
|
6799
|
+
|
|
6800
|
+
var WwwThevergeComExtractor = {
|
|
6801
|
+
domain: 'www.theverge.com',
|
|
6802
|
+
title: {
|
|
6803
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6804
|
+
},
|
|
6805
|
+
author: {
|
|
6806
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6807
|
+
},
|
|
6808
|
+
date_published: {
|
|
6809
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
6810
|
+
},
|
|
6811
|
+
lead_image_url: {
|
|
6812
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6813
|
+
},
|
|
6814
|
+
content: {
|
|
6815
|
+
selectors: ['#zephr-anchor', 'article'],
|
|
6816
|
+
transforms: {
|
|
6817
|
+
h2: function h2($node) {
|
|
6818
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6819
|
+
},
|
|
6820
|
+
h3: function h3($node) {
|
|
6821
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6822
|
+
},
|
|
6823
|
+
h4: function h4($node) {
|
|
6824
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6825
|
+
},
|
|
6826
|
+
img: function img($node) {
|
|
6827
|
+
var srcset = $node.attr('srcset');
|
|
6828
|
+
|
|
6829
|
+
var _split = (srcset || '').split(','),
|
|
6830
|
+
_split2 = _slicedToArray(_split, 1),
|
|
6831
|
+
src = _split2[0];
|
|
6832
|
+
|
|
6833
|
+
if (src) {
|
|
6834
|
+
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
6835
|
+
}
|
|
6836
|
+
}
|
|
6837
|
+
},
|
|
6838
|
+
clean: []
|
|
6839
|
+
}
|
|
6840
|
+
};
|
|
6841
|
+
|
|
6842
|
+
var WwwTechpowerupComExtractor = {
|
|
6843
|
+
domain: 'www.techpowerup.com',
|
|
6844
|
+
title: {
|
|
6845
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6846
|
+
},
|
|
6847
|
+
author: {
|
|
6848
|
+
selectors: ['.byline address']
|
|
6849
|
+
},
|
|
6850
|
+
date_published: {
|
|
6851
|
+
selectors: [['.byline time[datetime]', 'datetime']]
|
|
6852
|
+
},
|
|
6853
|
+
lead_image_url: {
|
|
6854
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6855
|
+
},
|
|
6856
|
+
content: {
|
|
6857
|
+
selectors: ['.contnt'],
|
|
6858
|
+
transforms: {
|
|
6859
|
+
h2: function h2(node) {
|
|
6860
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6861
|
+
}
|
|
6862
|
+
},
|
|
6863
|
+
clean: ['header', 'footer']
|
|
6864
|
+
},
|
|
6865
|
+
next_page_url: {
|
|
6866
|
+
selectors: [['.nextpage-bottom', 'href']]
|
|
6867
|
+
}
|
|
6868
|
+
};
|
|
6869
|
+
|
|
6805
6870
|
|
|
6806
6871
|
|
|
6807
6872
|
var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
@@ -6828,7 +6893,6 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6828
6893
|
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
6829
6894
|
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
6830
6895
|
MoneyCnnComExtractor: MoneyCnnComExtractor,
|
|
6831
|
-
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
6832
6896
|
WwwCnnComExtractor: WwwCnnComExtractor,
|
|
6833
6897
|
WwwAolComExtractor: WwwAolComExtractor,
|
|
6834
6898
|
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
|
|
@@ -6980,7 +7044,10 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6980
7044
|
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
6981
7045
|
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
6982
7046
|
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
6983
|
-
WwwVortezNetExtractor: WwwVortezNetExtractor
|
|
7047
|
+
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7048
|
+
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7049
|
+
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
7050
|
+
WwwTechpowerupComExtractor: WwwTechpowerupComExtractor
|
|
6984
7051
|
});
|
|
6985
7052
|
|
|
6986
7053
|
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
|