@jocmp/mercury-parser 2.3.4 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mercury.js CHANGED
@@ -6453,20 +6453,20 @@ var WwwHeiseDeExtractor = {
6453
6453
  return $node.attr('class', 'mercury-parser-keep');
6454
6454
  }
6455
6455
  },
6456
- clean: []
6456
+ clean: ['.ad-mobile-group-1', '.branding', '[data-component="RecommendationBox"]']
6457
6457
  }
6458
6458
  };
6459
6459
 
6460
6460
  var TldrTechExtractor = {
6461
6461
  domain: 'tldr.tech',
6462
6462
  title: {
6463
- selectors: [['meta[name="og:title"]', 'value'], 'title']
6463
+ selectors: ['h1']
6464
6464
  },
6465
6465
  lead_image_url: {
6466
6466
  selectors: [['meta[name="twitter:image"]', 'value']]
6467
6467
  },
6468
6468
  content: {
6469
- selectors: ['body'],
6469
+ selectors: ['.content-center', 'body'],
6470
6470
  transforms: {
6471
6471
  h2: function h2($node) {
6472
6472
  return $node.attr('class', 'mercury-parser-keep');
@@ -6479,6 +6479,153 @@ var TldrTechExtractor = {
6479
6479
  }
6480
6480
  };
6481
6481
 
6482
+ var BskyAppExtractor = {
6483
+ domain: 'bsky.app',
6484
+ title: {
6485
+ selectors: [['meta[name="og:title"]', 'value']]
6486
+ },
6487
+ author: null,
6488
+ date_published: null,
6489
+ lead_image_url: {
6490
+ selectors: [['meta[property="og:image"]', 'content'], ['meta[name="og:image"]', 'value']]
6491
+ },
6492
+ content: {
6493
+ selectors: ['noscript'],
6494
+ transforms: {
6495
+ noscript: function noscript($node, $) {
6496
+ var innerHtml = $.browser ? $node.text() : $node.html();
6497
+ var summary = $(innerHtml).find('#bsky_post_text');
6498
+ $node.replaceWith(summary.html());
6499
+ }
6500
+ },
6501
+ clean: []
6502
+ }
6503
+ };
6504
+
6505
+ var WwwNtvDeExtractor = {
6506
+ domain: 'www.n-tv.de',
6507
+ title: {
6508
+ selectors: [['meta[name="og:title"]', 'value']]
6509
+ },
6510
+ date_published: {
6511
+ selectors: [['meta[name="date"]', 'value']]
6512
+ },
6513
+ lead_image_url: {
6514
+ selectors: [['meta[name="og:image"]', 'value']]
6515
+ },
6516
+ content: {
6517
+ selectors: ['.article__text', 'article'],
6518
+ transforms: {},
6519
+ clean: ['.article__share-main']
6520
+ }
6521
+ };
6522
+
6523
+ var SportSePlExtractor = {
6524
+ domain: 'sport.se.pl',
6525
+ title: {
6526
+ selectors: [['meta[name="og:title"]', 'value']]
6527
+ },
6528
+ author: {
6529
+ selectors: ['.article_author']
6530
+ },
6531
+ date_published: {
6532
+ selectors: ['#timezone'],
6533
+ timezone: 'Europe/Warsaw'
6534
+ },
6535
+ lead_image_url: {
6536
+ selectors: [['meta[name="og:image"]', 'value']]
6537
+ },
6538
+ content: {
6539
+ selectors: ['article'],
6540
+ transforms: {
6541
+ h2: function h2(node) {
6542
+ return node.attr('class', 'mercury-parser-keep');
6543
+ }
6544
+ },
6545
+ clean: ['#timezone', '.article__author__croppimg', '.article_authors_with_thumbnail', '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
6546
+ }
6547
+ };
6548
+
6549
+ var WwwSePlExtractor = {
6550
+ domain: 'www.se.pl',
6551
+ title: {
6552
+ selectors: [['meta[name="og:title"]', 'value']]
6553
+ },
6554
+ author: {
6555
+ selectors: ['.article_author:first-of-type']
6556
+ },
6557
+ date_published: {
6558
+ selectors: ['#timezone'],
6559
+ timezone: 'Europe/Warsaw'
6560
+ },
6561
+ lead_image_url: {
6562
+ selectors: [['meta[name="og:image"]', 'value']]
6563
+ },
6564
+ content: {
6565
+ selectors: ['article'],
6566
+ transforms: {
6567
+ h2: function h2(node) {
6568
+ return node.attr('class', 'mercury-parser-keep');
6569
+ }
6570
+ },
6571
+ clean: ['#timezone', '.article__author__croppimg', '.article_authors_with_thumbnail', '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
6572
+ }
6573
+ };
6574
+
6575
+ var PolitykaSePlExtractor = {
6576
+ domain: 'polityka.se.pl',
6577
+ title: {
6578
+ selectors: [['meta[name="og:title"]', 'value']]
6579
+ },
6580
+ author: {
6581
+ selectors: ['.article_author:first-of-type']
6582
+ },
6583
+ date_published: {
6584
+ selectors: ['#timezone'],
6585
+ timezone: 'Europe/Warsaw'
6586
+ },
6587
+ lead_image_url: {
6588
+ selectors: [['meta[name="og:image"]', 'value']]
6589
+ },
6590
+ content: {
6591
+ selectors: ['article'],
6592
+ transforms: {
6593
+ h2: function h2(node) {
6594
+ return node.attr('class', 'mercury-parser-keep');
6595
+ }
6596
+ },
6597
+ clean: ['.article__author__croppimg', // author photo
6598
+ '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
6599
+ }
6600
+ };
6601
+
6602
+ var SuperserialeSePlExtractor = {
6603
+ domain: 'superseriale.se.pl',
6604
+ title: {
6605
+ selectors: [['meta[name="og:title"]', 'value']]
6606
+ },
6607
+ author: {
6608
+ selectors: ['.article_author:first-of-type']
6609
+ },
6610
+ date_published: {
6611
+ selectors: ['#timezone'],
6612
+ timezone: 'Europe/Warsaw'
6613
+ },
6614
+ lead_image_url: {
6615
+ selectors: [['meta[name="og:image"]', 'value']]
6616
+ },
6617
+ content: {
6618
+ selectors: ['article'],
6619
+ transforms: {
6620
+ h2: function h2(node) {
6621
+ return node.attr('class', 'mercury-parser-keep');
6622
+ }
6623
+ },
6624
+ clean: ['#timezone', '.article__author__croppimg', // author photo
6625
+ '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
6626
+ }
6627
+ };
6628
+
6482
6629
 
6483
6630
 
6484
6631
  var CustomExtractors = /*#__PURE__*/Object.freeze({
@@ -6635,7 +6782,13 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
6635
6782
  WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
6636
6783
  WccftechComExtractor: WccftechComExtractor,
6637
6784
  WwwHeiseDeExtractor: WwwHeiseDeExtractor,
6638
- TldrTechExtractor: TldrTechExtractor
6785
+ TldrTechExtractor: TldrTechExtractor,
6786
+ BskyAppExtractor: BskyAppExtractor,
6787
+ WwwNtvDeExtractor: WwwNtvDeExtractor,
6788
+ SportSePlExtractor: SportSePlExtractor,
6789
+ WwwSePlExtractor: WwwSePlExtractor,
6790
+ PolitykaSePlExtractor: PolitykaSePlExtractor,
6791
+ SuperserialeSePlExtractor: SuperserialeSePlExtractor
6639
6792
  });
6640
6793
 
6641
6794
  var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {