@jocmp/mercury-parser 2.3.3 → 2.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +198 -1
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +206 -1
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +1 -1
- package/dist/mercury.web.js.map +1 -1
- package/package.json +1 -2
package/dist/mercury.js
CHANGED
|
@@ -6429,6 +6429,203 @@ var WccftechComExtractor = {
|
|
|
6429
6429
|
}
|
|
6430
6430
|
};
|
|
6431
6431
|
|
|
6432
|
+
var WwwHeiseDeExtractor = {
|
|
6433
|
+
domain: 'www.heise.de',
|
|
6434
|
+
title: {
|
|
6435
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6436
|
+
},
|
|
6437
|
+
author: {
|
|
6438
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6439
|
+
},
|
|
6440
|
+
date_published: {
|
|
6441
|
+
selectors: [['meta[name="date"]', 'value']]
|
|
6442
|
+
},
|
|
6443
|
+
dek: {
|
|
6444
|
+
selectors: ['.a-article-header__lead']
|
|
6445
|
+
},
|
|
6446
|
+
lead_image_url: {
|
|
6447
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6448
|
+
},
|
|
6449
|
+
content: {
|
|
6450
|
+
selectors: ['.article-layout__content'],
|
|
6451
|
+
transforms: {
|
|
6452
|
+
h3: function h3($node) {
|
|
6453
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6454
|
+
}
|
|
6455
|
+
},
|
|
6456
|
+
clean: ['.ad-mobile-group-1', '.branding', '[data-component="RecommendationBox"]']
|
|
6457
|
+
}
|
|
6458
|
+
};
|
|
6459
|
+
|
|
6460
|
+
var TldrTechExtractor = {
|
|
6461
|
+
domain: 'tldr.tech',
|
|
6462
|
+
title: {
|
|
6463
|
+
selectors: ['h1']
|
|
6464
|
+
},
|
|
6465
|
+
lead_image_url: {
|
|
6466
|
+
selectors: [['meta[name="twitter:image"]', 'value']]
|
|
6467
|
+
},
|
|
6468
|
+
content: {
|
|
6469
|
+
selectors: ['.content-center', 'body'],
|
|
6470
|
+
transforms: {
|
|
6471
|
+
h2: function h2($node) {
|
|
6472
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6473
|
+
},
|
|
6474
|
+
h3: function h3($node) {
|
|
6475
|
+
return $node.attr('class', 'mercury-parser-keep');
|
|
6476
|
+
}
|
|
6477
|
+
},
|
|
6478
|
+
clean: []
|
|
6479
|
+
}
|
|
6480
|
+
};
|
|
6481
|
+
|
|
6482
|
+
var BskyAppExtractor = {
|
|
6483
|
+
domain: 'bsky.app',
|
|
6484
|
+
title: {
|
|
6485
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6486
|
+
},
|
|
6487
|
+
author: null,
|
|
6488
|
+
date_published: null,
|
|
6489
|
+
lead_image_url: {
|
|
6490
|
+
selectors: [['meta[property="og:image"]', 'content'], ['meta[name="og:image"]', 'value']]
|
|
6491
|
+
},
|
|
6492
|
+
content: {
|
|
6493
|
+
selectors: ['noscript'],
|
|
6494
|
+
transforms: {
|
|
6495
|
+
noscript: function noscript($node, $) {
|
|
6496
|
+
var innerHtml = $.browser ? $node.text() : $node.html();
|
|
6497
|
+
var summary = $(innerHtml).find('#bsky_post_text');
|
|
6498
|
+
$node.replaceWith(summary.html());
|
|
6499
|
+
}
|
|
6500
|
+
},
|
|
6501
|
+
clean: []
|
|
6502
|
+
}
|
|
6503
|
+
};
|
|
6504
|
+
|
|
6505
|
+
var WwwNtvDeExtractor = {
|
|
6506
|
+
domain: 'www.n-tv.de',
|
|
6507
|
+
title: {
|
|
6508
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6509
|
+
},
|
|
6510
|
+
date_published: {
|
|
6511
|
+
selectors: [['meta[name="date"]', 'value']]
|
|
6512
|
+
},
|
|
6513
|
+
lead_image_url: {
|
|
6514
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6515
|
+
},
|
|
6516
|
+
content: {
|
|
6517
|
+
selectors: ['.article__text', 'article'],
|
|
6518
|
+
transforms: {},
|
|
6519
|
+
clean: ['.article__share-main']
|
|
6520
|
+
}
|
|
6521
|
+
};
|
|
6522
|
+
|
|
6523
|
+
var SportSePlExtractor = {
|
|
6524
|
+
domain: 'sport.se.pl',
|
|
6525
|
+
title: {
|
|
6526
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6527
|
+
},
|
|
6528
|
+
author: {
|
|
6529
|
+
selectors: ['.article_author']
|
|
6530
|
+
},
|
|
6531
|
+
date_published: {
|
|
6532
|
+
selectors: ['#timezone'],
|
|
6533
|
+
timezone: 'Europe/Warsaw'
|
|
6534
|
+
},
|
|
6535
|
+
lead_image_url: {
|
|
6536
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6537
|
+
},
|
|
6538
|
+
content: {
|
|
6539
|
+
selectors: ['article'],
|
|
6540
|
+
transforms: {
|
|
6541
|
+
h2: function h2(node) {
|
|
6542
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6543
|
+
}
|
|
6544
|
+
},
|
|
6545
|
+
clean: ['#timezone', '.article__author__croppimg', '.article_authors_with_thumbnail', '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
|
|
6546
|
+
}
|
|
6547
|
+
};
|
|
6548
|
+
|
|
6549
|
+
var WwwSePlExtractor = {
|
|
6550
|
+
domain: 'www.se.pl',
|
|
6551
|
+
title: {
|
|
6552
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6553
|
+
},
|
|
6554
|
+
author: {
|
|
6555
|
+
selectors: ['.article_author:first-of-type']
|
|
6556
|
+
},
|
|
6557
|
+
date_published: {
|
|
6558
|
+
selectors: ['#timezone'],
|
|
6559
|
+
timezone: 'Europe/Warsaw'
|
|
6560
|
+
},
|
|
6561
|
+
lead_image_url: {
|
|
6562
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6563
|
+
},
|
|
6564
|
+
content: {
|
|
6565
|
+
selectors: ['article'],
|
|
6566
|
+
transforms: {
|
|
6567
|
+
h2: function h2(node) {
|
|
6568
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6569
|
+
}
|
|
6570
|
+
},
|
|
6571
|
+
clean: ['#timezone', '.article__author__croppimg', '.article_authors_with_thumbnail', '.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
|
|
6572
|
+
}
|
|
6573
|
+
};
|
|
6574
|
+
|
|
6575
|
+
var PolitykaSePlExtractor = {
|
|
6576
|
+
domain: 'polityka.se.pl',
|
|
6577
|
+
title: {
|
|
6578
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6579
|
+
},
|
|
6580
|
+
author: {
|
|
6581
|
+
selectors: ['.article_author:first-of-type']
|
|
6582
|
+
},
|
|
6583
|
+
date_published: {
|
|
6584
|
+
selectors: ['#timezone'],
|
|
6585
|
+
timezone: 'Europe/Warsaw'
|
|
6586
|
+
},
|
|
6587
|
+
lead_image_url: {
|
|
6588
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6589
|
+
},
|
|
6590
|
+
content: {
|
|
6591
|
+
selectors: ['article'],
|
|
6592
|
+
transforms: {
|
|
6593
|
+
h2: function h2(node) {
|
|
6594
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6595
|
+
}
|
|
6596
|
+
},
|
|
6597
|
+
clean: ['.article__author__croppimg', // author photo
|
|
6598
|
+
'.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
|
|
6599
|
+
}
|
|
6600
|
+
};
|
|
6601
|
+
|
|
6602
|
+
var SuperserialeSePlExtractor = {
|
|
6603
|
+
domain: 'superseriale.se.pl',
|
|
6604
|
+
title: {
|
|
6605
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
6606
|
+
},
|
|
6607
|
+
author: {
|
|
6608
|
+
selectors: ['.article_author:first-of-type']
|
|
6609
|
+
},
|
|
6610
|
+
date_published: {
|
|
6611
|
+
selectors: ['#timezone'],
|
|
6612
|
+
timezone: 'Europe/Warsaw'
|
|
6613
|
+
},
|
|
6614
|
+
lead_image_url: {
|
|
6615
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6616
|
+
},
|
|
6617
|
+
content: {
|
|
6618
|
+
selectors: ['article'],
|
|
6619
|
+
transforms: {
|
|
6620
|
+
h2: function h2(node) {
|
|
6621
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6622
|
+
}
|
|
6623
|
+
},
|
|
6624
|
+
clean: ['#timezone', '.article__author__croppimg', // author photo
|
|
6625
|
+
'.related_articles__elements', '.gl_plugin.socials', '.gl_plugin.player', '.gl_plugin.video_player', '.gl_plugin + video']
|
|
6626
|
+
}
|
|
6627
|
+
};
|
|
6628
|
+
|
|
6432
6629
|
|
|
6433
6630
|
|
|
6434
6631
|
var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
@@ -6583,7 +6780,15 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6583
6780
|
WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
|
|
6584
6781
|
MobilesyrupComExtractor: MobilesyrupComExtractor,
|
|
6585
6782
|
WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
|
|
6586
|
-
WccftechComExtractor: WccftechComExtractor
|
|
6783
|
+
WccftechComExtractor: WccftechComExtractor,
|
|
6784
|
+
WwwHeiseDeExtractor: WwwHeiseDeExtractor,
|
|
6785
|
+
TldrTechExtractor: TldrTechExtractor,
|
|
6786
|
+
BskyAppExtractor: BskyAppExtractor,
|
|
6787
|
+
WwwNtvDeExtractor: WwwNtvDeExtractor,
|
|
6788
|
+
SportSePlExtractor: SportSePlExtractor,
|
|
6789
|
+
WwwSePlExtractor: WwwSePlExtractor,
|
|
6790
|
+
PolitykaSePlExtractor: PolitykaSePlExtractor,
|
|
6791
|
+
SuperserialeSePlExtractor: SuperserialeSePlExtractor
|
|
6587
6792
|
});
|
|
6588
6793
|
|
|
6589
6794
|
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
|