@jocmp/mercury-parser 2.4.2 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mercury.js CHANGED
@@ -2645,50 +2645,6 @@ var MoneyCnnComExtractor = {
2645
2645
  }
2646
2646
  };
2647
2647
 
2648
- var WwwThevergeComExtractor = {
2649
- domain: 'www.theverge.com',
2650
- supportedDomains: ['www.polygon.com'],
2651
- title: {
2652
- selectors: ['h1']
2653
- },
2654
- author: {
2655
- selectors: [['meta[name="author"]', 'value']]
2656
- },
2657
- date_published: {
2658
- selectors: [['meta[name="article:published_time"]', 'value']]
2659
- },
2660
- dek: {
2661
- selectors: ['.p-dek']
2662
- },
2663
- lead_image_url: {
2664
- selectors: [['meta[name="og:image"]', 'value']]
2665
- },
2666
- content: {
2667
- selectors: [// feature template multi-match
2668
- ['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'], // regular post multi-match
2669
- ['.e-image--hero', '.c-entry-content'], // feature template fallback
2670
- '.l-wrapper .l-feature', // regular post fallback
2671
- 'div.c-entry-content'],
2672
- // Transform lazy-loaded images
2673
- transforms: {
2674
- noscript: function noscript($node) {
2675
- var $children = $node.children();
2676
-
2677
- if ($children.length === 1 && $children.get(0).tagName === 'img') {
2678
- return 'span';
2679
- }
2680
-
2681
- return null;
2682
- }
2683
- },
2684
- // Is there anything that is in the result that shouldn't be?
2685
- // The clean selectors will remove anything that matches from
2686
- // the result
2687
- clean: ['.aside', 'img.c-dynamic-image' // images come from noscript transform
2688
- ]
2689
- }
2690
- };
2691
-
2692
2648
  var WwwCnnComExtractor = {
2693
2649
  domain: 'www.cnn.com',
2694
2650
  title: {
@@ -6770,6 +6726,119 @@ var TarnkappeInfoExtractor = {
6770
6726
  }
6771
6727
  };
6772
6728
 
6729
+ var WwwVortezNetExtractor = {
6730
+ domain: 'www.vortez.net',
6731
+ title: {
6732
+ selectors: ['title']
6733
+ },
6734
+ author: {
6735
+ selectors: []
6736
+ },
6737
+ date_published: {
6738
+ selectors: []
6739
+ },
6740
+ dek: {
6741
+ selectors: []
6742
+ },
6743
+ lead_image_url: {
6744
+ selectors: [['meta[name="og:image"]', 'value']]
6745
+ },
6746
+ next_page_url: {
6747
+ selectors: ['.pagelink:nth-child(2) > a']
6748
+ },
6749
+ content: {
6750
+ selectors: ['.main-content', '.the-article-content'],
6751
+ transforms: {
6752
+ strong: 'p',
6753
+ h2: function h2(node) {
6754
+ return node.attr('class', 'mercury-parser-keep');
6755
+ }
6756
+ },
6757
+ clean: ['.article-header', '.panel-title', 'select', 'br']
6758
+ }
6759
+ };
6760
+
6761
+ var WwwPolygonComExtractor = {
6762
+ domain: 'www.polygon.com',
6763
+ title: {
6764
+ selectors: [['meta[name="og:title"]', 'value']]
6765
+ },
6766
+ author: {
6767
+ selectors: [['meta[name="author"]', 'value']]
6768
+ },
6769
+ date_published: {
6770
+ selectors: [['meta[name="article:published_time"]', 'value']]
6771
+ },
6772
+ lead_image_url: {
6773
+ selectors: [['meta[name="og:image"]', 'value']]
6774
+ },
6775
+ content: {
6776
+ selectors: ['article'],
6777
+ transforms: {
6778
+ h2: function h2($node) {
6779
+ return $node.attr('class', 'mercury-parser-keep');
6780
+ },
6781
+ h3: function h3($node) {
6782
+ return $node.attr('class', 'mercury-parser-keep');
6783
+ },
6784
+ img: function img($node) {
6785
+ var srcset = $node.attr('srcset');
6786
+
6787
+ var _split = (srcset || '').split(','),
6788
+ _split2 = _slicedToArray(_split, 1),
6789
+ src = _split2[0];
6790
+
6791
+ if (src) {
6792
+ $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
6793
+ }
6794
+ }
6795
+ },
6796
+ clean: ['cite', '.duet--ad--native-ad-rail', '.duet--layout--rail', '.duet--article--table-of-contents']
6797
+ }
6798
+ };
6799
+
6800
+ var WwwThevergeComExtractor = {
6801
+ domain: 'www.theverge.com',
6802
+ title: {
6803
+ selectors: [['meta[name="og:title"]', 'value']]
6804
+ },
6805
+ author: {
6806
+ selectors: [['meta[name="author"]', 'value']]
6807
+ },
6808
+ date_published: {
6809
+ selectors: [['meta[name="article:published_time"]', 'value']]
6810
+ },
6811
+ lead_image_url: {
6812
+ selectors: [['meta[name="og:image"]', 'value']]
6813
+ },
6814
+ content: {
6815
+ selectors: ['#zephr-anchor', 'article'],
6816
+ transforms: {
6817
+ h2: function h2($node) {
6818
+ return $node.attr('class', 'mercury-parser-keep');
6819
+ },
6820
+ h3: function h3($node) {
6821
+ return $node.attr('class', 'mercury-parser-keep');
6822
+ },
6823
+ h4: function h4($node) {
6824
+ return $node.attr('class', 'mercury-parser-keep');
6825
+ },
6826
+ img: function img($node) {
6827
+ var srcset = $node.attr('srcset');
6828
+
6829
+ var _split = (srcset || '').split(','),
6830
+ _split2 = _slicedToArray(_split, 1),
6831
+ src = _split2[0];
6832
+
6833
+ if (src) {
6834
+ $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
6835
+ }
6836
+ }
6837
+ },
6838
+ clean: []
6839
+ }
6840
+ };
6841
+
6773
6842
 
6774
6843
 
6775
6844
  var CustomExtractors = /*#__PURE__*/Object.freeze({
@@ -6796,7 +6865,6 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
6796
6865
  WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
6797
6866
  NewrepublicComExtractor: NewrepublicComExtractor,
6798
6867
  MoneyCnnComExtractor: MoneyCnnComExtractor,
6799
- WwwThevergeComExtractor: WwwThevergeComExtractor,
6800
6868
  WwwCnnComExtractor: WwwCnnComExtractor,
6801
6869
  WwwAolComExtractor: WwwAolComExtractor,
6802
6870
  WwwYoutubeComExtractor: WwwYoutubeComExtractor,
@@ -6947,7 +7015,10 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
6947
7015
  WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
6948
7016
  Nineto5googleComExtractor: Nineto5googleComExtractor,
6949
7017
  WwwEngadgetComExtractor: WwwEngadgetComExtractor,
6950
- TarnkappeInfoExtractor: TarnkappeInfoExtractor
7018
+ TarnkappeInfoExtractor: TarnkappeInfoExtractor,
7019
+ WwwVortezNetExtractor: WwwVortezNetExtractor,
7020
+ WwwPolygonComExtractor: WwwPolygonComExtractor,
7021
+ WwwThevergeComExtractor: WwwThevergeComExtractor
6951
7022
  });
6952
7023
 
6953
7024
  var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {