@jocmp/mercury-parser 2.4.3 → 2.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mercury.js CHANGED
@@ -2645,50 +2645,6 @@ var MoneyCnnComExtractor = {
2645
2645
  }
2646
2646
  };
2647
2647
 
2648
- var WwwThevergeComExtractor = {
2649
- domain: 'www.theverge.com',
2650
- supportedDomains: ['www.polygon.com'],
2651
- title: {
2652
- selectors: ['h1']
2653
- },
2654
- author: {
2655
- selectors: [['meta[name="author"]', 'value']]
2656
- },
2657
- date_published: {
2658
- selectors: [['meta[name="article:published_time"]', 'value']]
2659
- },
2660
- dek: {
2661
- selectors: ['.p-dek']
2662
- },
2663
- lead_image_url: {
2664
- selectors: [['meta[name="og:image"]', 'value']]
2665
- },
2666
- content: {
2667
- selectors: [// feature template multi-match
2668
- ['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'], // regular post multi-match
2669
- ['.e-image--hero', '.c-entry-content'], // feature template fallback
2670
- '.l-wrapper .l-feature', // regular post fallback
2671
- 'div.c-entry-content'],
2672
- // Transform lazy-loaded images
2673
- transforms: {
2674
- noscript: function noscript($node) {
2675
- var $children = $node.children();
2676
-
2677
- if ($children.length === 1 && $children.get(0).tagName === 'img') {
2678
- return 'span';
2679
- }
2680
-
2681
- return null;
2682
- }
2683
- },
2684
- // Is there anything that is in the result that shouldn't be?
2685
- // The clean selectors will remove anything that matches from
2686
- // the result
2687
- clean: ['.aside', 'img.c-dynamic-image' // images come from noscript transform
2688
- ]
2689
- }
2690
- };
2691
-
2692
2648
  var WwwCnnComExtractor = {
2693
2649
  domain: 'www.cnn.com',
2694
2650
  title: {
@@ -6802,6 +6758,115 @@ var WwwVortezNetExtractor = {
6802
6758
  }
6803
6759
  };
6804
6760
 
6761
+ var WwwPolygonComExtractor = {
6762
+ domain: 'www.polygon.com',
6763
+ title: {
6764
+ selectors: [['meta[name="og:title"]', 'value']]
6765
+ },
6766
+ author: {
6767
+ selectors: [['meta[name="author"]', 'value']]
6768
+ },
6769
+ date_published: {
6770
+ selectors: [['meta[name="article:published_time"]', 'value']]
6771
+ },
6772
+ lead_image_url: {
6773
+ selectors: [['meta[name="og:image"]', 'value']]
6774
+ },
6775
+ content: {
6776
+ selectors: ['article'],
6777
+ transforms: {
6778
+ h2: function h2($node) {
6779
+ return $node.attr('class', 'mercury-parser-keep');
6780
+ },
6781
+ h3: function h3($node) {
6782
+ return $node.attr('class', 'mercury-parser-keep');
6783
+ },
6784
+ img: function img($node) {
6785
+ var srcset = $node.attr('srcset');
6786
+
6787
+ var _split = (srcset || '').split(','),
6788
+ _split2 = _slicedToArray(_split, 1),
6789
+ src = _split2[0];
6790
+
6791
+ if (src) {
6792
+ $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
6793
+ }
6794
+ }
6795
+ },
6796
+ clean: ['cite', '.duet--ad--native-ad-rail', '.duet--layout--rail', '.duet--article--table-of-contents']
6797
+ }
6798
+ };
6799
+
6800
+ var WwwThevergeComExtractor = {
6801
+ domain: 'www.theverge.com',
6802
+ title: {
6803
+ selectors: [['meta[name="og:title"]', 'value']]
6804
+ },
6805
+ author: {
6806
+ selectors: [['meta[name="author"]', 'value']]
6807
+ },
6808
+ date_published: {
6809
+ selectors: [['meta[name="article:published_time"]', 'value']]
6810
+ },
6811
+ lead_image_url: {
6812
+ selectors: [['meta[name="og:image"]', 'value']]
6813
+ },
6814
+ content: {
6815
+ selectors: ['#zephr-anchor', 'article'],
6816
+ transforms: {
6817
+ h2: function h2($node) {
6818
+ return $node.attr('class', 'mercury-parser-keep');
6819
+ },
6820
+ h3: function h3($node) {
6821
+ return $node.attr('class', 'mercury-parser-keep');
6822
+ },
6823
+ h4: function h4($node) {
6824
+ return $node.attr('class', 'mercury-parser-keep');
6825
+ },
6826
+ img: function img($node) {
6827
+ var srcset = $node.attr('srcset');
6828
+
6829
+ var _split = (srcset || '').split(','),
6830
+ _split2 = _slicedToArray(_split, 1),
6831
+ src = _split2[0];
6832
+
6833
+ if (src) {
6834
+ $node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
6835
+ }
6836
+ }
6837
+ },
6838
+ clean: []
6839
+ }
6840
+ };
6841
+
6842
+ var WwwTechpowerupComExtractor = {
6843
+ domain: 'www.techpowerup.com',
6844
+ title: {
6845
+ selectors: [['meta[name="og:title"]', 'value']]
6846
+ },
6847
+ author: {
6848
+ selectors: ['.byline address']
6849
+ },
6850
+ date_published: {
6851
+ selectors: [['.byline time[datetime]', 'datetime']]
6852
+ },
6853
+ lead_image_url: {
6854
+ selectors: [['meta[name="og:image"]', 'value']]
6855
+ },
6856
+ content: {
6857
+ selectors: ['.contnt'],
6858
+ transforms: {
6859
+ h2: function h2(node) {
6860
+ return node.attr('class', 'mercury-parser-keep');
6861
+ }
6862
+ },
6863
+ clean: ['header', 'footer']
6864
+ },
6865
+ next_page_url: {
6866
+ selectors: [['.nextpage-bottom', 'href']]
6867
+ }
6868
+ };
6869
+
6805
6870
 
6806
6871
 
6807
6872
  var CustomExtractors = /*#__PURE__*/Object.freeze({
@@ -6828,7 +6893,6 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
6828
6893
  WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
6829
6894
  NewrepublicComExtractor: NewrepublicComExtractor,
6830
6895
  MoneyCnnComExtractor: MoneyCnnComExtractor,
6831
- WwwThevergeComExtractor: WwwThevergeComExtractor,
6832
6896
  WwwCnnComExtractor: WwwCnnComExtractor,
6833
6897
  WwwAolComExtractor: WwwAolComExtractor,
6834
6898
  WwwYoutubeComExtractor: WwwYoutubeComExtractor,
@@ -6980,7 +7044,10 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
6980
7044
  Nineto5googleComExtractor: Nineto5googleComExtractor,
6981
7045
  WwwEngadgetComExtractor: WwwEngadgetComExtractor,
6982
7046
  TarnkappeInfoExtractor: TarnkappeInfoExtractor,
6983
- WwwVortezNetExtractor: WwwVortezNetExtractor
7047
+ WwwVortezNetExtractor: WwwVortezNetExtractor,
7048
+ WwwPolygonComExtractor: WwwPolygonComExtractor,
7049
+ WwwThevergeComExtractor: WwwThevergeComExtractor,
7050
+ WwwTechpowerupComExtractor: WwwTechpowerupComExtractor
6984
7051
  });
6985
7052
 
6986
7053
  var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {