@jocmp/mercury-parser 2.3.7 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +161 -16
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +151 -1
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +1 -1
- package/dist/mercury.web.js.map +1 -1
- package/package.json +1 -1
package/dist/mercury.js
CHANGED
|
@@ -6606,6 +6606,150 @@ var BialystokSePlExtractor = _objectSpread({}, WwwSePlExtractor, {
|
|
|
6606
6606
|
domain: 'bialystok.se.pl'
|
|
6607
6607
|
});
|
|
6608
6608
|
|
|
6609
|
+
var WwwLebensmittelwarnungDeExtractor = {
|
|
6610
|
+
domain: 'www.lebensmittelwarnung.de',
|
|
6611
|
+
title: {
|
|
6612
|
+
selectors: ['.lmw-intro__heading', 'title']
|
|
6613
|
+
},
|
|
6614
|
+
date_published: {
|
|
6615
|
+
selectors: [['.lmw-intro__meta > time', 'datetime']],
|
|
6616
|
+
timezone: 'Europe/Berlin'
|
|
6617
|
+
},
|
|
6618
|
+
lead_image_url: {
|
|
6619
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6620
|
+
},
|
|
6621
|
+
content: {
|
|
6622
|
+
selectors: ['main'],
|
|
6623
|
+
transforms: {
|
|
6624
|
+
h2: function h2(node) {
|
|
6625
|
+
var button = node.find('button');
|
|
6626
|
+
|
|
6627
|
+
if (node.find('button').length > 0) {
|
|
6628
|
+
node.find('.lmw-section__toggle-icon').remove();
|
|
6629
|
+
node.text(button.text().trim());
|
|
6630
|
+
}
|
|
6631
|
+
|
|
6632
|
+
node.attr('class', 'mercury-parser-keep');
|
|
6633
|
+
},
|
|
6634
|
+
ul: function ul($node) {
|
|
6635
|
+
$node.attr('class', 'mercury-parser-keep');
|
|
6636
|
+
},
|
|
6637
|
+
'.lmw-bodytext': function lmwBodytext(node) {
|
|
6638
|
+
// Kontakt Information
|
|
6639
|
+
node.attr('class', 'mercury-parser-keep');
|
|
6640
|
+
},
|
|
6641
|
+
'.lmw-description-list__item': function lmwDescriptionList__item(node) {
|
|
6642
|
+
node.attr('class', 'mercury-parser-keep');
|
|
6643
|
+
}
|
|
6644
|
+
},
|
|
6645
|
+
clean: []
|
|
6646
|
+
}
|
|
6647
|
+
};
|
|
6648
|
+
|
|
6649
|
+
var WwwQbitaiComExtractor = {
|
|
6650
|
+
domain: 'www.qbitai.com',
|
|
6651
|
+
title: {
|
|
6652
|
+
selectors: ['title', 'h1']
|
|
6653
|
+
},
|
|
6654
|
+
content: {
|
|
6655
|
+
selectors: ['.article'],
|
|
6656
|
+
transforms: {
|
|
6657
|
+
'.zhaiyao': function zhaiyao(node) {
|
|
6658
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
6659
|
+
}
|
|
6660
|
+
},
|
|
6661
|
+
clean: ['.article_info']
|
|
6662
|
+
}
|
|
6663
|
+
};
|
|
6664
|
+
|
|
6665
|
+
var EconomictimesIndiatimesComExtractor = {
|
|
6666
|
+
domain: 'economictimes.indiatimes.com',
|
|
6667
|
+
title: {
|
|
6668
|
+
selectors: ['title', ['meta[name="og:title"]', 'value']]
|
|
6669
|
+
},
|
|
6670
|
+
author: {
|
|
6671
|
+
selectors: ['a[rel="author"]']
|
|
6672
|
+
},
|
|
6673
|
+
lead_image_url: {
|
|
6674
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6675
|
+
},
|
|
6676
|
+
content: {
|
|
6677
|
+
selectors: ['article'],
|
|
6678
|
+
transforms: {},
|
|
6679
|
+
clean: ['span.imgAgency']
|
|
6680
|
+
}
|
|
6681
|
+
};
|
|
6682
|
+
|
|
6683
|
+
var FactorioComExtractor = {
|
|
6684
|
+
domain: 'factorio.com',
|
|
6685
|
+
title: {
|
|
6686
|
+
selectors: ['title']
|
|
6687
|
+
},
|
|
6688
|
+
lead_image_url: {
|
|
6689
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6690
|
+
},
|
|
6691
|
+
content: {
|
|
6692
|
+
selectors: [['.blog-post', 'div:nth-child(2)']],
|
|
6693
|
+
transforms: {
|
|
6694
|
+
h3: function h3(node) {
|
|
6695
|
+
var author = node.find('author');
|
|
6696
|
+
|
|
6697
|
+
if (author.text()) {
|
|
6698
|
+
node.after("<p>".concat(author.text(), "</p>"));
|
|
6699
|
+
author.remove();
|
|
6700
|
+
}
|
|
6701
|
+
}
|
|
6702
|
+
},
|
|
6703
|
+
clean: ['.logo-expansion-space-age']
|
|
6704
|
+
}
|
|
6705
|
+
};
|
|
6706
|
+
|
|
6707
|
+
var WwwTagesschauDeExtractor = {
|
|
6708
|
+
domain: 'www.tagesschau.de',
|
|
6709
|
+
title: {
|
|
6710
|
+
selectors: ['.seitenkopf__headline--text', 'title']
|
|
6711
|
+
},
|
|
6712
|
+
author: {
|
|
6713
|
+
selectors: ['.authorline__author authorline__link:first-child']
|
|
6714
|
+
},
|
|
6715
|
+
date_published: {
|
|
6716
|
+
selectors: [['meta[name="date"]', 'value'], '.metatextline'],
|
|
6717
|
+
timezone: 'UTC'
|
|
6718
|
+
},
|
|
6719
|
+
lead_image_url: {
|
|
6720
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6721
|
+
},
|
|
6722
|
+
content: {
|
|
6723
|
+
selectors: ['article'],
|
|
6724
|
+
clean: ['[data-config]', '.seitenkopf__headline', '.authorline__author', '.metatextline']
|
|
6725
|
+
}
|
|
6726
|
+
};
|
|
6727
|
+
|
|
6728
|
+
var Nineto5googleComExtractor = {
|
|
6729
|
+
domain: '9to5google.com',
|
|
6730
|
+
title: {
|
|
6731
|
+
selectors: ['title', 'h1']
|
|
6732
|
+
},
|
|
6733
|
+
author: {
|
|
6734
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
6735
|
+
},
|
|
6736
|
+
date_published: {
|
|
6737
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
6738
|
+
},
|
|
6739
|
+
lead_image_url: {
|
|
6740
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
6741
|
+
},
|
|
6742
|
+
content: {
|
|
6743
|
+
selectors: ['main'],
|
|
6744
|
+
transforms: {
|
|
6745
|
+
img: function img(node) {
|
|
6746
|
+
node.removeAttr('sizes');
|
|
6747
|
+
}
|
|
6748
|
+
},
|
|
6749
|
+
clean: ['.post-meta']
|
|
6750
|
+
}
|
|
6751
|
+
};
|
|
6752
|
+
|
|
6609
6753
|
|
|
6610
6754
|
|
|
6611
6755
|
var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
@@ -6776,7 +6920,13 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
|
|
|
6776
6920
|
LodzSePlExtractor: LodzSePlExtractor,
|
|
6777
6921
|
WroclawSePlExtractor: WroclawSePlExtractor,
|
|
6778
6922
|
LublinSePlExtractor: LublinSePlExtractor,
|
|
6779
|
-
BialystokSePlExtractor: BialystokSePlExtractor
|
|
6923
|
+
BialystokSePlExtractor: BialystokSePlExtractor,
|
|
6924
|
+
WwwLebensmittelwarnungDeExtractor: WwwLebensmittelwarnungDeExtractor,
|
|
6925
|
+
WwwQbitaiComExtractor: WwwQbitaiComExtractor,
|
|
6926
|
+
EconomictimesIndiatimesComExtractor: EconomictimesIndiatimesComExtractor,
|
|
6927
|
+
FactorioComExtractor: FactorioComExtractor,
|
|
6928
|
+
WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
|
|
6929
|
+
Nineto5googleComExtractor: Nineto5googleComExtractor
|
|
6780
6930
|
});
|
|
6781
6931
|
|
|
6782
6932
|
var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
|