@jocmp/mercury-parser 2.4.2 → 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +114 -45
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +117 -46
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +1 -1
- package/dist/mercury.web.js.map +1 -1
- package/package.json +1 -1
package/dist/mercury.js
CHANGED
|
@@ -2645,50 +2645,6 @@ var MoneyCnnComExtractor = {
|
|
|
2645
2645
|
}
|
|
2646
2646
|
};
|
|
2647
2647
|
|
|
2648
|
-
var WwwThevergeComExtractor = {
|
|
2649
|
-
domain: 'www.theverge.com',
|
|
2650
|
-
supportedDomains: ['www.polygon.com'],
|
|
2651
|
-
title: {
|
|
2652
|
-
selectors: ['h1']
|
|
2653
|
-
},
|
|
2654
|
-
author: {
|
|
2655
|
-
selectors: [['meta[name="author"]', 'value']]
|
|
2656
|
-
},
|
|
2657
|
-
date_published: {
|
|
2658
|
-
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
2659
|
-
},
|
|
2660
|
-
dek: {
|
|
2661
|
-
selectors: ['.p-dek']
|
|
2662
|
-
},
|
|
2663
|
-
lead_image_url: {
|
|
2664
|
-
selectors: [['meta[name="og:image"]', 'value']]
|
|
2665
|
-
},
|
|
2666
|
-
content: {
|
|
2667
|
-
selectors: [// feature template multi-match
|
|
2668
|
-
['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'], // regular post multi-match
|
|
2669
|
-
['.e-image--hero', '.c-entry-content'], // feature template fallback
|
|
2670
|
-
'.l-wrapper .l-feature', // regular post fallback
|
|
2671
|
-
'div.c-entry-content'],
|
|
2672
|
-
// Transform lazy-loaded images
|
|
2673
|
-
transforms: {
|
|
2674
|
-
noscript: function noscript($node) {
|
|
2675
|
-
var $children = $node.children();
|
|
2676
|
-
|
|
2677
|
-
if ($children.length === 1 && $children.get(0).tagName === 'img') {
|
|
2678
|
-
return 'span';
|
|
2679
|
-
}
|
|
2680
|
-
|
|
2681
|
-
return null;
|
|
2682
|
-
}
|
|
2683
|
-
},
|
|
2684
|
-
// Is there anything that is in the result that shouldn't be?
|
|
2685
|
-
// The clean selectors will remove anything that matches from
|
|
2686
|
-
// the result
|
|
2687
|
-
clean: ['.aside', 'img.c-dynamic-image' // images come from noscript transform
|
|
2688
|
-
]
|
|
2689
|
-
}
|
|
2690
|
-
};
|
|
2691
|
-
|
|
2692
2648
|
var WwwCnnComExtractor = {
|
|
2693
2649
|
domain: 'www.cnn.com',
|
|
2694
2650
|
title: {
|
|
@@ -6770,6 +6726,119 @@ var TarnkappeInfoExtractor = {
|
|
|
6770
6726
|
}
|
|
6771
6727
|
};
|
|
6772
6728
|
|
|
6729
|
+
var WwwVortezNetExtractor = {
|
|
6730
|
+
domain: 'www.vortez.net',
|
|
6731
|
+
title: {
|
|
6732
|
+
selectors: ['title']
|
|
6733
|
+
},
|
|
6734
|
+
author: {
|
|
6735
|
+
selectors: []
|
|
6736
|
+
},
|
|
6737
|
+
date_published: {
|
|
6738
|
+
selectors: []
|
|
6739
|
+
},
|
|
6740
|
+
dek: {
|
|
6741
|
+
selectors: []
|
|
6742
|
+
},
|
|
6743
|
+
lead_image_url: {
|
|
6744
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6745
|
+
},
|
|
6746
|
+
next_page_url: {
|
|
6747
|
+
selectors: ['.pagelink:nth-child(2) > a']
|
|
6748
|
+
},
|
|
6749
|
+
content: {
|
|
6750
|
+
selectors: ['.main-content', '.the-article-content'],
|
|
6751
|
+
transforms: {
|
|
6752
|
+
strong: 'p',
|
|
6753
|
+
h2: function h2(node) {
|
|
6754
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6755
|
+
}
|
|
6756
|
+
},
|
|
6757
|
+
clean: ['.article-header', '.panel-title', 'select', 'br']
|
|
6758
|
+
}
|
|
6759
|
+
};
|
|
6760
|
+
|
|
6761
|
+
var WwwPolygonComExtractor = {
|
|
6762
|
+
domain: 'www.polygon.com',
|
|
6763
|
+
title: {
|
|
6764
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6765
|
+
},
|
|
6766
|
+
author: {
|
|
6767
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6768
|
+
},
|
|
6769
|
+
date_published: {
|
|
6770
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
6771
|
+
},
|
|
6772
|
+
lead_image_url: {
|
|
6773
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6774
|
+
},
|
|
6775
|
+
content: {
|
|
6776
|
+
selectors: ['article'],
|
|
6777
|
+
transforms: {
|
|
6778
|
+
h2: function h2($node) {
|
|
6779
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6780
|
+
},
|
|
6781
|
+
h3: function h3($node) {
|
|
6782
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6783
|
+
},
|
|
6784
|
+
img: function img($node) {
|
|
6785
|
+
var srcset = $node.attr('srcset');
|
|
6786
|
+
|
|
6787
|
+
var _split = (srcset || '').split(','),
|
|
6788
|
+
_split2 = _slicedToArray(_split, 1),
|
|
6789
|
+
src = _split2[0];
|
|
6790
|
+
|
|
6791
|
+
if (src) {
|
|
6792
|
+
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
6793
|
+
}
|
|
6794
|
+
}
|
|
6795
|
+
},
|
|
6796
|
+
clean: ['cite', '.duet--ad--native-ad-rail', '.duet--layout--rail', '.duet--article--table-of-contents']
|
|
6797
|
+
}
|
|
6798
|
+
};
|
|
6799
|
+
|
|
6800
|
+
var WwwThevergeComExtractor = {
|
|
6801
|
+
domain: 'www.theverge.com',
|
|
6802
|
+
title: {
|
|
6803
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6804
|
+
},
|
|
6805
|
+
author: {
|
|
6806
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6807
|
+
},
|
|
6808
|
+
date_published: {
|
|
6809
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
6810
|
+
},
|
|
6811
|
+
lead_image_url: {
|
|
6812
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6813
|
+
},
|
|
6814
|
+
content: {
|
|
6815
|
+
selectors: ['#zephr-anchor', 'article'],
|
|
6816
|
+
transforms: {
|
|
6817
|
+
h2: function h2($node) {
|
|
6818
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6819
|
+
},
|
|
6820
|
+
h3: function h3($node) {
|
|
6821
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6822
|
+
},
|
|
6823
|
+
h4: function h4($node) {
|
|
6824
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6825
|
+
},
|
|
6826
|
+
img: function img($node) {
|
|
6827
|
+
var srcset = $node.attr('srcset');
|
|
6828
|
+
|
|
6829
|
+
var _split = (srcset || '').split(','),
|
|
6830
|
+
_split2 = _slicedToArray(_split, 1),
|
|
6831
|
+
src = _split2[0];
|
|
6832
|
+
|
|
6833
|
+
if (src) {
|
|
6834
|
+
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
6835
|
+
}
|
|
6836
|
+
}
|
|
6837
|
+
},
|
|
6838
|
+
clean: []
|
|
6839
|
+
}
|
|
6840
|
+
};
|
|
6841
|
+
|
|
6773
6842
|
|
|
6774
6843
|
|
|
6775
6844
|
var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
@@ -6796,7 +6865,6 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6796
6865
|
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
6797
6866
|
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
6798
6867
|
MoneyCnnComExtractor: MoneyCnnComExtractor,
|
|
6799
|
-
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
6800
6868
|
WwwCnnComExtractor: WwwCnnComExtractor,
|
|
6801
6869
|
WwwAolComExtractor: WwwAolComExtractor,
|
|
6802
6870
|
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
|
|
@@ -6947,7 +7015,10 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6947
7015
|
WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
|
|
6948
7016
|
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
6949
7017
|
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
6950
|
-
TarnkappeInfoExtractor: TarnkappeInfoExtractor
|
|
7018
|
+
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
7019
|
+
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7020
|
+
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7021
|
+
WwwThevergeComExtractor: WwwThevergeComExtractor
|
|
6951
7022
|
});
|
|
6952
7023
|
|
|
6953
7024
|
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
|