rsshub 1.0.0-master.f97cdb2 → 1.0.0-master.fa3428f
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/lib/config.js +3 -1
- package/lib/radar-rules.js +0 -92
- package/lib/router.js +68 -71
- package/lib/routes/tencent/wechat/miniprogram/framework.js +4 -2
- package/lib/utils/common-config.js +4 -0
- package/lib/utils/parse-date.js +8 -7
- package/lib/utils/rand-user-agent.js +29 -0
- package/lib/utils/request-wrapper.js +2 -5
- package/lib/utils/wechat-mp.js +106 -10
- package/lib/v2/agirls/index.js +6 -0
- package/lib/v2/bilibili/video.js +1 -0
- package/lib/v2/bookfere/category.js +36 -0
- package/lib/v2/bookfere/maintainer.js +3 -0
- package/lib/v2/bookfere/radar.js +11 -0
- package/lib/v2/bookfere/router.js +3 -0
- package/lib/v2/cankaoxiaoxi/index.js +81 -0
- package/lib/v2/cankaoxiaoxi/maintainer.js +3 -0
- package/lib/v2/cankaoxiaoxi/radar.js +61 -0
- package/lib/v2/cankaoxiaoxi/router.js +3 -0
- package/lib/{routes → v2}/cctv/category.js +1 -1
- package/lib/{routes → v2}/cctv/jx.js +6 -1
- package/lib/{routes → v2}/cctv/lm.js +1 -1
- package/lib/v2/cctv/maintainer.js +7 -0
- package/lib/v2/cctv/radar.js +41 -0
- package/lib/v2/cctv/router.js +7 -0
- package/lib/{routes → v2}/cctv/special.js +6 -4
- package/lib/{routes → v2}/cctv/utils/mzzlbg.js +8 -6
- package/lib/{routes → v2}/cctv/utils/news.js +25 -22
- package/lib/{routes → v2}/cctv/utils/xinwen1j1.js +20 -17
- package/lib/v2/cctv/xwlb.js +50 -0
- package/lib/v2/chinacef/experts.js +38 -0
- package/lib/v2/chinacef/hot.js +38 -0
- package/lib/v2/chinacef/index.js +39 -0
- package/lib/v2/chinacef/maintainer.js +5 -0
- package/lib/v2/chinacef/radar.js +25 -0
- package/lib/v2/chinacef/router.js +5 -0
- package/lib/v2/{wechat → chinacef}/templates/description.art +0 -0
- package/lib/v2/chinacef/utils.js +55 -0
- package/lib/v2/chinanews/index.js +65 -0
- package/lib/v2/chinanews/maintainer.js +3 -0
- package/lib/v2/chinanews/radar.js +13 -0
- package/lib/v2/chinanews/router.js +3 -0
- package/lib/v2/cnbeta/index.js +2 -1
- package/lib/v2/cnbeta/utils.js +1 -0
- package/lib/v2/dapenti/maintainer.js +4 -0
- package/lib/v2/dapenti/radar.js +27 -0
- package/lib/v2/dapenti/router.js +4 -0
- package/lib/{routes → v2}/dapenti/subject.js +1 -1
- package/lib/{routes → v2}/dapenti/tugua.js +1 -1
- package/lib/v2/dapenti/utils.js +75 -0
- package/lib/{routes → v2}/dongqiudi/daily.js +10 -13
- package/lib/v2/dongqiudi/maintainer.js +8 -0
- package/lib/{routes → v2}/dongqiudi/player_news.js +0 -0
- package/lib/v2/dongqiudi/radar.js +45 -0
- package/lib/{routes → v2}/dongqiudi/result.js +2 -1
- package/lib/v2/dongqiudi/router.js +8 -0
- package/lib/{routes → v2}/dongqiudi/special.js +0 -0
- package/lib/{routes → v2}/dongqiudi/team_news.js +0 -0
- package/lib/{routes → v2}/dongqiudi/top_news.js +12 -15
- package/lib/{routes → v2}/dongqiudi/utils.js +47 -28
- package/lib/v2/dut/defaults.js +53 -0
- package/lib/v2/dut/index.js +78 -0
- package/lib/v2/dut/maintainer.js +14 -0
- package/lib/v2/dut/radar.js +109 -0
- package/lib/v2/dut/router.js +4 -0
- package/lib/v2/dut/shortcuts.js +77 -0
- package/lib/{routes → v2}/dx2025/index.js +0 -0
- package/lib/v2/dx2025/maintainer.js +3 -0
- package/lib/v2/dx2025/radar.js +19 -0
- package/lib/v2/dx2025/router.js +3 -0
- package/lib/v2/e-hentai/radar.js +21 -3
- package/lib/v2/eagle/blog.js +47 -0
- package/lib/{routes → v2}/eagle/changelog.js +3 -1
- package/lib/v2/eagle/maintainer.js +4 -0
- package/lib/v2/eagle/radar.js +101 -0
- package/lib/v2/eagle/router.js +4 -0
- package/lib/{routes → v2}/ehentai/ehapi.js +50 -21
- package/lib/{routes → v2}/ehentai/favorites.js +4 -2
- package/lib/v2/ehentai/maintainer.js +5 -0
- package/lib/v2/ehentai/router.js +5 -0
- package/lib/{routes → v2}/ehentai/search.js +5 -3
- package/lib/{routes → v2}/ehentai/tag.js +4 -2
- package/lib/v2/firefox/index.js +7 -2
- package/lib/v2/fortunechina/index.js +1 -1
- package/lib/v2/github/comments.js +7 -0
- package/lib/v2/gitpod/blog.js +8 -0
- package/lib/v2/gov/maintainer.js +1 -0
- package/lib/v2/gov/nrta/news.js +63 -0
- package/lib/v2/gov/radar.js +11 -0
- package/lib/v2/gov/router.js +1 -0
- package/lib/v2/guancha/index.js +1 -1
- package/lib/v2/guangdiu/cheaps.js +28 -0
- package/lib/v2/guangdiu/index.js +44 -0
- package/lib/v2/guangdiu/maintainer.js +5 -0
- package/lib/v2/guangdiu/radar.js +25 -0
- package/lib/v2/guangdiu/rank.js +38 -0
- package/lib/v2/guangdiu/router.js +5 -0
- package/lib/{routes/guokr/calendar.js → v2/guokr/channel.js} +2 -2
- package/lib/v2/guokr/maintainer.js +4 -0
- package/lib/v2/guokr/radar.js +19 -0
- package/lib/v2/guokr/router.js +4 -0
- package/lib/{routes → v2}/guokr/scientific.js +3 -3
- package/lib/v2/hkej/index.js +9 -15
- package/lib/v2/hkepc/index.js +8 -0
- package/lib/v2/hket/index.js +10 -2
- package/lib/v2/huangz/index.js +29 -0
- package/lib/v2/huangz/maintainer.js +3 -0
- package/lib/v2/huangz/radar.js +13 -0
- package/lib/v2/huangz/router.js +3 -0
- package/lib/v2/huanqiu/index.js +65 -0
- package/lib/v2/huanqiu/maintainer.js +3 -0
- package/lib/v2/huanqiu/radar.js +13 -0
- package/lib/v2/huanqiu/router.js +3 -0
- package/lib/v2/miris/blog.js +19 -0
- package/lib/v2/miris/maintainer.js +3 -0
- package/lib/v2/miris/radar.js +11 -0
- package/lib/v2/miris/router.js +3 -0
- package/lib/v2/news/whxw.js +2 -2
- package/lib/v2/npm/package.js +1 -1
- package/lib/v2/npm/router.js +1 -1
- package/lib/v2/qbittorrent/maintainer.js +3 -0
- package/lib/v2/qbittorrent/news.js +59 -0
- package/lib/v2/qbittorrent/radar.js +13 -0
- package/lib/v2/qbittorrent/router.js +3 -0
- package/lib/v2/rsshub/sponsors.js +1 -0
- package/lib/{routes → v2}/sciencenet/blog.js +17 -12
- package/lib/v2/sciencenet/maintainer.js +4 -0
- package/lib/v2/sciencenet/radar.js +19 -0
- package/lib/v2/sciencenet/router.js +4 -0
- package/lib/v2/sciencenet/user.js +65 -0
- package/lib/v2/sdu/cmse.js +59 -0
- package/lib/v2/sdu/cs.js +57 -0
- package/lib/v2/sdu/data.js +110 -0
- package/lib/v2/sdu/epe.js +55 -0
- package/lib/v2/sdu/extractor/index.js +15 -0
- package/lib/v2/sdu/extractor/sdrj.js +21 -0
- package/lib/v2/sdu/extractor/view.js +21 -0
- package/lib/v2/sdu/extractor/wh/jwc.js +24 -0
- package/lib/v2/sdu/extractor/wh/news.js +21 -0
- package/lib/v2/sdu/maintainer.js +9 -0
- package/lib/v2/sdu/mech.js +60 -0
- package/lib/v2/sdu/radar.js +159 -0
- package/lib/v2/sdu/router.js +9 -0
- package/lib/v2/sdu/sc.js +61 -0
- package/lib/v2/sdu/wh/jwc.js +42 -0
- package/lib/v2/sdu/wh/news.js +38 -0
- package/lib/v2/stratechery/index.js +20 -0
- package/lib/v2/stratechery/maintainer.js +3 -0
- package/lib/v2/stratechery/radar.js +11 -0
- package/lib/v2/stratechery/router.js +3 -0
- package/lib/{routes → v2}/taptap/changelog.js +2 -1
- package/lib/v2/taptap/maintainer.js +5 -0
- package/lib/v2/taptap/radar.js +25 -0
- package/lib/v2/taptap/review.js +69 -0
- package/lib/v2/taptap/router.js +5 -0
- package/lib/v2/taptap/templates/videoPost.art +2 -0
- package/lib/v2/taptap/topic.js +79 -0
- package/lib/v2/taptap/utils.js +48 -0
- package/lib/v2/thecover/channel.js +66 -0
- package/lib/v2/thecover/maintainer.js +3 -0
- package/lib/v2/thecover/radar.js +13 -0
- package/lib/v2/thecover/router.js +3 -0
- package/lib/{routes → v2}/twreporter/category.js +7 -12
- package/lib/{routes → v2}/twreporter/fetch_article.js +5 -4
- package/lib/v2/twreporter/maintainer.js +5 -0
- package/lib/{routes → v2}/twreporter/newest.js +8 -13
- package/lib/v2/twreporter/photography.js +46 -0
- package/lib/v2/twreporter/radar.js +25 -0
- package/lib/v2/twreporter/router.js +5 -0
- package/lib/{routes/tencent → v2}/wechat/announce.js +3 -2
- package/lib/v2/wechat/ce.js +69 -0
- package/lib/v2/wechat/data258.js +137 -0
- package/lib/v2/wechat/ershcimi.js +35 -0
- package/lib/{routes/tencent → v2}/wechat/feeds.js +0 -0
- package/lib/v2/wechat/maintainer.js +11 -0
- package/lib/{routes/tencent → v2}/wechat/mp.js +0 -0
- package/lib/{routes/tencent → v2}/wechat/msgalbum.js +0 -0
- package/lib/v2/wechat/radar.js +76 -1
- package/lib/v2/wechat/router.js +11 -0
- package/lib/{routes/tencent → v2}/wechat/tgchannel.js +0 -0
- package/lib/{routes/tencent → v2}/wechat/uread.js +0 -0
- package/lib/{routes/tencent → v2}/wechat/wemp.js +0 -0
- package/lib/{routes/tencent → v2}/wechat/wxnmh.js +0 -0
- package/lib/{routes → v2}/xueqiu/favorite.js +2 -1
- package/lib/v2/xueqiu/fund.js +50 -0
- package/lib/{routes → v2}/xueqiu/hots.js +2 -1
- package/lib/v2/xueqiu/maintainer.js +10 -0
- package/lib/v2/xueqiu/radar.js +60 -0
- package/lib/v2/xueqiu/router.js +10 -0
- package/lib/{routes → v2}/xueqiu/snb.js +2 -1
- package/lib/{routes → v2}/xueqiu/stock_comments.js +4 -20
- package/lib/{routes → v2}/xueqiu/stock_info.js +2 -1
- package/lib/v2/xueqiu/templates/comments_description.art +10 -0
- package/lib/{routes → v2}/xueqiu/user.js +2 -1
- package/lib/{routes → v2}/xueqiu/user_stock.js +3 -2
- package/lib/v2/yunspe/maintainer.js +3 -0
- package/lib/v2/yunspe/newsflash.js +42 -0
- package/lib/v2/yunspe/radar.js +13 -0
- package/lib/v2/yunspe/router.js +3 -0
- package/lib/{routes → v2}/yystv/category.js +8 -8
- package/lib/{routes → v2}/yystv/docs.js +2 -2
- package/lib/v2/yystv/maintainer.js +4 -0
- package/lib/v2/yystv/radar.js +49 -0
- package/lib/v2/yystv/router.js +4 -0
- package/lib/{routes/universities → v2}/zju/career/index.js +14 -13
- package/lib/{routes/universities → v2}/zju/cst/custom.js +2 -1
- package/lib/{routes/universities → v2}/zju/cst/index.js +20 -24
- package/lib/v2/zju/grs/index.js +43 -0
- package/lib/v2/zju/list.js +66 -0
- package/lib/v2/zju/maintainer.js +8 -0
- package/lib/v2/zju/physics/index.js +52 -0
- package/lib/v2/zju/radar.js +139 -0
- package/lib/v2/zju/router.js +8 -0
- package/lib/views/atom.art +13 -1
- package/lib/views/welcome.art +4 -1
- package/package.json +13 -12
- package/lib/routes/cctv/xwlb.js +0 -48
- package/lib/routes/dapenti/utils.js +0 -73
- package/lib/routes/taptap/review.js +0 -39
- package/lib/routes/taptap/topic.js +0 -85
- package/lib/routes/tencent/wechat/_README +0 -1
- package/lib/routes/tencent/wechat/ce.js +0 -40
- package/lib/routes/tencent/wechat/ershcimi.js +0 -43
- package/lib/routes/twreporter/photography.js +0 -57
- package/lib/routes/universities/dut/index.js +0 -60
- package/lib/routes/universities/dut/subsite.js +0 -50
- package/lib/routes/universities/sdu/cmse.js +0 -55
- package/lib/routes/universities/sdu/cs.js +0 -55
- package/lib/routes/universities/sdu/epe.js +0 -54
- package/lib/routes/universities/sdu/mech.js +0 -58
- package/lib/routes/universities/sdu/sc.js +0 -56
- package/lib/routes/universities/zju/grs/index.js +0 -44
- package/lib/routes/universities/zju/list.js +0 -78
- package/lib/routes/universities/zju/physics/index.js +0 -55
- package/lib/routes/xueqiu/fund.js +0 -71
- package/lib/v2/wechat/templates/image.art +0 -1
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
const got = require('@/utils/got');
|
|
2
|
+
const cheerio = require('cheerio');
|
|
3
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
4
|
+
const timezone = require('@/utils/timezone');
|
|
5
|
+
const rootUrl = 'https://www.thecover.cn';
|
|
6
|
+
|
|
7
|
+
const nodes = {
|
|
8
|
+
3892: '天下',
|
|
9
|
+
3560: '四川',
|
|
10
|
+
3909: '辟谣',
|
|
11
|
+
3686: '国际',
|
|
12
|
+
11: '云招考',
|
|
13
|
+
3902: '30秒',
|
|
14
|
+
3889: '拍客',
|
|
15
|
+
3689: '体育',
|
|
16
|
+
1: '国内',
|
|
17
|
+
4002: '帮扶铁军',
|
|
18
|
+
12: '文娱',
|
|
19
|
+
46: '宽窄',
|
|
20
|
+
4: '商业',
|
|
21
|
+
21: '千面',
|
|
22
|
+
17: '封面号',
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
module.exports = async (ctx) => {
|
|
26
|
+
const id = ctx.params.id ?? '3892';
|
|
27
|
+
const targetUrl = rootUrl.concat(`/channel_${id}`);
|
|
28
|
+
const resp = await got({
|
|
29
|
+
method: 'get',
|
|
30
|
+
url: targetUrl,
|
|
31
|
+
});
|
|
32
|
+
const $ = cheerio.load(resp.data);
|
|
33
|
+
const list = $('a.link-to-article')
|
|
34
|
+
.filter(function () {
|
|
35
|
+
return $(this).attr('href').startsWith('/');
|
|
36
|
+
})
|
|
37
|
+
.map((_, item) => ({
|
|
38
|
+
link: rootUrl.concat($(item).attr('href')),
|
|
39
|
+
}))
|
|
40
|
+
.get();
|
|
41
|
+
const items = await Promise.all(
|
|
42
|
+
list.map((item) =>
|
|
43
|
+
ctx.cache.tryGet(item.link, async () => {
|
|
44
|
+
const detailResponse = await got({
|
|
45
|
+
method: 'get',
|
|
46
|
+
url: item.link,
|
|
47
|
+
});
|
|
48
|
+
const content = cheerio.load(detailResponse.data);
|
|
49
|
+
item.title = content('h1', '.main-article').text();
|
|
50
|
+
item.description = content('section.article-content').html();
|
|
51
|
+
const info = content('span', '.props-of-title');
|
|
52
|
+
item.author = info.eq(0).text();
|
|
53
|
+
item.pubDate = timezone(parseDate(info.eq(1).text(), 'YYYY-MM-DD HH:mm'), +8);
|
|
54
|
+
return item;
|
|
55
|
+
})
|
|
56
|
+
)
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
ctx.state.data = {
|
|
60
|
+
title: `${nodes[id]}-封面新闻`,
|
|
61
|
+
link: targetUrl,
|
|
62
|
+
description: `封面新闻作为华西都市报深度融合转型和打造新型主流媒体的载体,牢固确立移动优先战略,创新移动新闻产品,打造移动传播矩阵,封面新闻的传播力、引导力、影响力和公信力不断得到各方肯定。封面新闻突破千万的用户下载量,呈现出以四川为主阵地的全国分布态势,用户年龄构成以20-35岁为主,“亿万年轻人的生活方式”的定位初步得到体现。`,
|
|
63
|
+
language: 'zh-cn',
|
|
64
|
+
item: items,
|
|
65
|
+
};
|
|
66
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
'thecover.cn': {
|
|
3
|
+
_name: '封面新闻',
|
|
4
|
+
'.': [
|
|
5
|
+
{
|
|
6
|
+
title: '频道',
|
|
7
|
+
docs: 'https://docs.rsshub.app/new-media.html#the-cover',
|
|
8
|
+
source: ['/:id', '/'],
|
|
9
|
+
target: (params) => `/thecover/channel/${params.id.replace('channel_', '')}`,
|
|
10
|
+
},
|
|
11
|
+
],
|
|
12
|
+
},
|
|
13
|
+
};
|
|
@@ -6,26 +6,21 @@ const fetch = require('./fetch_article');
|
|
|
6
6
|
module.exports = async (ctx) => {
|
|
7
7
|
const baseURL = 'https://www.twreporter.org';
|
|
8
8
|
const url = baseURL + `/categories/${ctx.params.cid}`;
|
|
9
|
-
const res = await got
|
|
9
|
+
const res = await got(url);
|
|
10
10
|
const $ = cheerio.load(res.data);
|
|
11
11
|
const list = $('.lnKPLr').get();
|
|
12
12
|
const category = $('.kCfkTU').text();
|
|
13
13
|
|
|
14
14
|
const out = await Promise.all(
|
|
15
|
-
list.map(
|
|
15
|
+
list.map((item) => {
|
|
16
16
|
const $ = cheerio.load(item);
|
|
17
17
|
const address = baseURL + $('a').attr('href');
|
|
18
18
|
const title = $('.list-item__Title-sc-1dx5lew-5').text();
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
const single = await fetch(address);
|
|
25
|
-
single.title = title;
|
|
26
|
-
|
|
27
|
-
ctx.cache.set(address, JSON.stringify(single));
|
|
28
|
-
return Promise.resolve(single);
|
|
19
|
+
return ctx.cache.tryGet(address, async () => {
|
|
20
|
+
const single = await fetch(address);
|
|
21
|
+
single.title = title;
|
|
22
|
+
return single;
|
|
23
|
+
});
|
|
29
24
|
})
|
|
30
25
|
);
|
|
31
26
|
ctx.state.data = {
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
const cheerio = require('cheerio');
|
|
2
2
|
const got = require('@/utils/got');
|
|
3
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
3
4
|
|
|
4
5
|
module.exports = async function fetch(address) {
|
|
5
|
-
const res = await got
|
|
6
|
+
const res = await got(address);
|
|
6
7
|
const capture = cheerio.load(res.data);
|
|
7
8
|
capture('.gIMvvS').remove();
|
|
8
9
|
|
|
9
|
-
let metaInfoBox = capture('.
|
|
10
|
+
let metaInfoBox = capture('.ffAPnj')
|
|
10
11
|
.filter((index) => index === 0)
|
|
11
12
|
.get();
|
|
12
13
|
|
|
13
14
|
// For photography
|
|
14
15
|
if (metaInfoBox.length === 0) {
|
|
15
|
-
metaInfoBox = capture('.
|
|
16
|
+
metaInfoBox = capture('.deNvJY')
|
|
16
17
|
.filter((index) => index === 0)
|
|
17
18
|
.get();
|
|
18
19
|
}
|
|
@@ -61,6 +62,6 @@ module.exports = async function fetch(address) {
|
|
|
61
62
|
description: contents,
|
|
62
63
|
link: address,
|
|
63
64
|
guid: address,
|
|
64
|
-
pubDate:
|
|
65
|
+
pubDate: parseDate(time, 'M/D/YYYY'),
|
|
65
66
|
};
|
|
66
67
|
};
|
|
@@ -5,25 +5,20 @@ const fetch = require('./fetch_article');
|
|
|
5
5
|
|
|
6
6
|
module.exports = async (ctx) => {
|
|
7
7
|
const url = 'https://www.twreporter.org';
|
|
8
|
-
const res = await got
|
|
8
|
+
const res = await got(url);
|
|
9
9
|
const $ = cheerio.load(res.data);
|
|
10
10
|
const list = $('.gKMjSz').get();
|
|
11
11
|
|
|
12
12
|
const out = await Promise.all(
|
|
13
|
-
list.map(
|
|
13
|
+
list.map((item) => {
|
|
14
14
|
const $ = cheerio.load(item);
|
|
15
15
|
const address = url + $('a').attr('href');
|
|
16
|
-
const title = $('.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const single = await fetch(address);
|
|
23
|
-
single.title = title;
|
|
24
|
-
|
|
25
|
-
ctx.cache.set(address, JSON.stringify(single));
|
|
26
|
-
return Promise.resolve(single);
|
|
16
|
+
const title = $('.latest-section__Title-hzxpx3-6').text();
|
|
17
|
+
return ctx.cache.tryGet(address, async () => {
|
|
18
|
+
const single = await fetch(address);
|
|
19
|
+
single.title = title;
|
|
20
|
+
return single;
|
|
21
|
+
});
|
|
27
22
|
})
|
|
28
23
|
);
|
|
29
24
|
ctx.state.data = {
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
const cheerio = require('cheerio');
|
|
2
|
+
const got = require('@/utils/got');
|
|
3
|
+
|
|
4
|
+
const fetch = require('./fetch_article');
|
|
5
|
+
|
|
6
|
+
module.exports = async (ctx) => {
|
|
7
|
+
const baseURL = 'https://www.twreporter.org';
|
|
8
|
+
const url = baseURL + `/photography`;
|
|
9
|
+
const res = await got(url);
|
|
10
|
+
const $ = cheerio.load(res.data);
|
|
11
|
+
const coverList = $('.WPJvn').get();
|
|
12
|
+
const commonList = $('.eVNsZf').get();
|
|
13
|
+
|
|
14
|
+
const coverView = await Promise.all(
|
|
15
|
+
coverList.map((item) => {
|
|
16
|
+
const $ = cheerio.load(item);
|
|
17
|
+
const address = baseURL + $('li > a').attr('href');
|
|
18
|
+
const title = $('.sc-1aojo4z-4').text();
|
|
19
|
+
return ctx.cache.tryGet(address, async () => {
|
|
20
|
+
const single = await fetch(address);
|
|
21
|
+
single.title = title;
|
|
22
|
+
return single;
|
|
23
|
+
});
|
|
24
|
+
})
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
const listView = await Promise.all(
|
|
28
|
+
commonList.map((item) => {
|
|
29
|
+
const $ = cheerio.load(item);
|
|
30
|
+
const address = baseURL + $('li > a').attr('href');
|
|
31
|
+
const title = $('.ii0887-4').text();
|
|
32
|
+
|
|
33
|
+
return ctx.cache.tryGet(address, async () => {
|
|
34
|
+
const single = await fetch(address);
|
|
35
|
+
single.title = title;
|
|
36
|
+
return single;
|
|
37
|
+
});
|
|
38
|
+
})
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
ctx.state.data = {
|
|
42
|
+
title: `報導者 | 影像`,
|
|
43
|
+
link: url,
|
|
44
|
+
item: coverView.concat(listView),
|
|
45
|
+
};
|
|
46
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
'twreporter.org': {
|
|
3
|
+
_name: '報導者',
|
|
4
|
+
'.': [
|
|
5
|
+
{
|
|
6
|
+
title: '最新',
|
|
7
|
+
docs: 'https://docs.rsshub.app/new-media.html#bao-dao-zhe',
|
|
8
|
+
source: ['/'],
|
|
9
|
+
target: '/twreporter/newest',
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
title: '摄影',
|
|
13
|
+
docs: 'https://docs.rsshub.app/new-media.html#bao-dao-zhe',
|
|
14
|
+
source: ['/photography'],
|
|
15
|
+
target: '/twreporter',
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
title: '分类',
|
|
19
|
+
docs: 'https://docs.rsshub.app/new-media.html#bao-dao-zhe',
|
|
20
|
+
source: ['/categories/:tid'],
|
|
21
|
+
target: '/twreporter/category/:tid',
|
|
22
|
+
},
|
|
23
|
+
],
|
|
24
|
+
},
|
|
25
|
+
};
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const got = require('@/utils/got');
|
|
2
2
|
const cheerio = require('cheerio');
|
|
3
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
3
4
|
|
|
4
5
|
module.exports = async (ctx) => {
|
|
5
6
|
const { data: htmlString } = await got({
|
|
@@ -20,13 +21,13 @@ module.exports = async (ctx) => {
|
|
|
20
21
|
title: `${time} ${title}`,
|
|
21
22
|
link: `https://mp.weixin.qq.com${$link.attr('href')}`,
|
|
22
23
|
description: title,
|
|
23
|
-
pubDate:
|
|
24
|
+
pubDate: parseDate(time),
|
|
24
25
|
});
|
|
25
26
|
});
|
|
26
27
|
|
|
27
28
|
ctx.state.data = {
|
|
28
29
|
title: '微信公众平台-系统公告栏目',
|
|
29
|
-
link: 'https://mp.weixin.qq.com/cgi-bin/announce?action=getannouncementlist&lang=zh_CN
|
|
30
|
+
link: 'https://mp.weixin.qq.com/cgi-bin/announce?action=getannouncementlist&lang=zh_CN',
|
|
30
31
|
item: announceList,
|
|
31
32
|
};
|
|
32
33
|
};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
const parser = require('@/utils/rss-parser');
|
|
2
|
+
const got = require('@/utils/got');
|
|
3
|
+
const cheerio = require('cheerio');
|
|
4
|
+
const { fixArticleContent } = require('@/utils/wechat-mp');
|
|
5
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
6
|
+
|
|
7
|
+
// any UA containing "RSS" can pass the check
|
|
8
|
+
// mark the UA as a desktop UA with "(X11; Linux x86_64)"
|
|
9
|
+
const UA = 'Mozilla/5.0 (X11; Linux x86_64) RSS Reader';
|
|
10
|
+
|
|
11
|
+
module.exports = async (ctx) => {
|
|
12
|
+
const { id } = ctx.params;
|
|
13
|
+
|
|
14
|
+
const feed = await parser.parseString(
|
|
15
|
+
await got
|
|
16
|
+
.get(`https://posts.careerengine.us/author/${id}/rss`, {
|
|
17
|
+
headers: {
|
|
18
|
+
'User-Agent': UA,
|
|
19
|
+
},
|
|
20
|
+
})
|
|
21
|
+
.then((_) => _.data)
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
const items = await Promise.all(
|
|
25
|
+
feed.items.splice(0, 10).map(async (item) => {
|
|
26
|
+
// generally speaking, changing `item.link` of an existing route could potentially break `item.guid`
|
|
27
|
+
// but since the route has been down for at least 8 months, it's probably safe
|
|
28
|
+
item.link = item.link.replace(/^http:\/\//, 'https://');
|
|
29
|
+
return await ctx.cache.tryGet(item.link, async () => {
|
|
30
|
+
const response = await got.get(item.link, {
|
|
31
|
+
headers: {
|
|
32
|
+
'User-Agent': UA,
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const $ = cheerio.load(response.data);
|
|
37
|
+
|
|
38
|
+
const description = fixArticleContent($('.post'));
|
|
39
|
+
|
|
40
|
+
let pubDate = item.pubDate;
|
|
41
|
+
if (!pubDate || pubDate === 'Invalid Date') {
|
|
42
|
+
// sometimes the pubDate is not available in the official feed
|
|
43
|
+
const postDate = $('.post-date')
|
|
44
|
+
.text()
|
|
45
|
+
.replace(/\s+|发表/g, '');
|
|
46
|
+
// the date format is "发表 YYYY年MM月DD日 "
|
|
47
|
+
// following the official feed behavior: imprecise date is in UTC
|
|
48
|
+
// `<pubDate>Mon, 04 Apr 2022 00:00:00 GMT</pubDate>`
|
|
49
|
+
pubDate = parseDate(postDate, 'YYYY年MM月DD日');
|
|
50
|
+
pubDate = new Date(pubDate.getTime() - pubDate.getTimezoneOffset() * 60 * 1000);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
title: item.title,
|
|
55
|
+
description,
|
|
56
|
+
pubDate,
|
|
57
|
+
link: item.link,
|
|
58
|
+
};
|
|
59
|
+
});
|
|
60
|
+
})
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
ctx.state.data = {
|
|
64
|
+
title: `微信公众号 - ${feed.title}`,
|
|
65
|
+
link: `https://posts.careerengine.us/author/${id}/posts`,
|
|
66
|
+
description: feed.description,
|
|
67
|
+
item: items,
|
|
68
|
+
};
|
|
69
|
+
};
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
const got = require('@/utils/got');
|
|
2
|
+
const cheerio = require('cheerio');
|
|
3
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
4
|
+
const timezone = require('@/utils/timezone');
|
|
5
|
+
const { finishArticleItem } = require('@/utils/wechat-mp');
|
|
6
|
+
const { RequestInProgressError } = require('@/errors');
|
|
7
|
+
const wait = require('@/utils/wait');
|
|
8
|
+
|
|
9
|
+
const parsePage = ($item, hyperlinkSelector, timeSelector) => {
|
|
10
|
+
const hyperlink = $item.find(hyperlinkSelector);
|
|
11
|
+
const title = hyperlink.text();
|
|
12
|
+
const link = hyperlink.attr('href');
|
|
13
|
+
const pubDate = timezone(parseDate($item.find(timeSelector).text(), 'YYYY-MM-DD HH:mm'), 8);
|
|
14
|
+
return {
|
|
15
|
+
title,
|
|
16
|
+
link,
|
|
17
|
+
pubDate,
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
module.exports = async (ctx) => {
|
|
22
|
+
// !!! here we must use a lock to prevent other requests to break the anti-anti-crawler workarounds !!!
|
|
23
|
+
if ((await ctx.cache.get('data258:lock', false)) === '1') {
|
|
24
|
+
throw new RequestInProgressError('Another request is in progress, please try again later.');
|
|
25
|
+
}
|
|
26
|
+
// !!! here no need to acquire the lock, because the MP/category page has no crawler detection !!!
|
|
27
|
+
|
|
28
|
+
const id = ctx.params.id;
|
|
29
|
+
|
|
30
|
+
const limit = ctx.query.limit ? parseInt(ctx.query.limit) : 5;
|
|
31
|
+
|
|
32
|
+
const rootUrl = 'https://mp.data258.com';
|
|
33
|
+
const pageUrl = id ? `${rootUrl}/article/category/${id}` : rootUrl;
|
|
34
|
+
|
|
35
|
+
const response = await got(pageUrl);
|
|
36
|
+
const $ = cheerio.load(response.data);
|
|
37
|
+
|
|
38
|
+
const title = $('head title').text();
|
|
39
|
+
// title = title.endsWith('-微阅读') ? title.slice(0, title.length - 4) : title;
|
|
40
|
+
const description = $('meta[name="description"]').attr('content');
|
|
41
|
+
|
|
42
|
+
const categoryPage = $('ul.fly-list');
|
|
43
|
+
|
|
44
|
+
let items;
|
|
45
|
+
if (categoryPage && categoryPage.length) {
|
|
46
|
+
// got a category page
|
|
47
|
+
items = $(categoryPage)
|
|
48
|
+
.find('li')
|
|
49
|
+
.map((_, item) => parsePage($(item), 'h2 a', '.fly-list-info span'))
|
|
50
|
+
.get();
|
|
51
|
+
} else {
|
|
52
|
+
// got an MP page
|
|
53
|
+
items = $('ul.jie-row li')
|
|
54
|
+
.map((_, item) => parsePage($(item), 'a.jie-title', '.layui-hide-xs'))
|
|
55
|
+
.get();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
items = items.slice(0, limit); // limit to avoid being anti-crawled
|
|
59
|
+
|
|
60
|
+
// !!! double-check !!!
|
|
61
|
+
if ((await ctx.cache.get('data258:lock', false)) === '1') {
|
|
62
|
+
throw new RequestInProgressError('Another request is in progress, please try again later.');
|
|
63
|
+
} else {
|
|
64
|
+
// !!! here we acquire the lock because the jump page has crawler detection !!!
|
|
65
|
+
await ctx.cache.set('data258:lock', '1', 60);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// !!! here we must use a for-loop to ensure the concurrency is 1 !!!
|
|
69
|
+
// !!! please do note that if you try to increase the concurrency, your IP will be banned for a long time !!!
|
|
70
|
+
|
|
71
|
+
let err; // !!! let RSSHub throw an anti-crawler prompt if the route is empty !!!
|
|
72
|
+
|
|
73
|
+
/* eslint-disable no-await-in-loop */
|
|
74
|
+
for (const item of items) {
|
|
75
|
+
// https://mp.data258.com/wx?id=${id}&t={token}, id is a permanent hex, token is a temporary base64
|
|
76
|
+
const cacheId = item.link.match(/id=([\da-f]+)/)[1];
|
|
77
|
+
item.link = item.link.startsWith('http') ? item.link : `${rootUrl}${item.link}`;
|
|
78
|
+
const realLink = await ctx.cache.tryGet(`data258:${cacheId}`, async () => {
|
|
79
|
+
try {
|
|
80
|
+
// !!! here we must sleep 1s to avoid being anti-crawled !!!
|
|
81
|
+
// !!! please do note that if the interval is less than 1s, your IP will be banned for a long time !!!
|
|
82
|
+
await wait(1000);
|
|
83
|
+
|
|
84
|
+
const response = await got.get(item.link, {
|
|
85
|
+
headers: {
|
|
86
|
+
Referer: pageUrl, // essential
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
if (response.data.includes('今日浏览次数已达上限')) {
|
|
90
|
+
// !!! as long as cache hits, the link will not be crawled and consume the limit !!!
|
|
91
|
+
// !!! so that's not a big problem if the RSSHub instance is self-hosted !!!
|
|
92
|
+
err = new got.RequestError(response.data, {}, response.request);
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
const $ = cheerio.load(response.data);
|
|
96
|
+
const jmpJS = $('script')
|
|
97
|
+
.filter((_, e) => $(e).html().includes('location.href'))
|
|
98
|
+
.html();
|
|
99
|
+
return jmpJS.match(/location\.href='([^']+)'/)[1];
|
|
100
|
+
} catch (e) {
|
|
101
|
+
err = e;
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
if (realLink) {
|
|
106
|
+
item.link = realLink;
|
|
107
|
+
} else {
|
|
108
|
+
break; // being anti-crawled, immediately cancel following operations
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/* eslint-enable no-await-in-loop */
|
|
112
|
+
|
|
113
|
+
// !!! release the lock, let it expire immediately since no need to keep it in cache !!!
|
|
114
|
+
await ctx.cache.set('data258:lock', '0', 1);
|
|
115
|
+
|
|
116
|
+
// jump links are valid only for a short period of time, drop those un-jumped items
|
|
117
|
+
// http://mp.weixin.qq.com/s
|
|
118
|
+
items = items.filter((item) => item.link.match(/^https?:\/\/mp\.weixin\.qq\.com\/s/));
|
|
119
|
+
|
|
120
|
+
if (items.length === 0 && err) {
|
|
121
|
+
// !!! if each request is anti-crawled, the filtered items array will be empty !!!
|
|
122
|
+
// !!! let RSSHub throw an anti-crawler prompt !!!
|
|
123
|
+
throw err;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
await Promise.all(items.map(async (item) => await finishArticleItem(ctx, item, !!categoryPage)));
|
|
127
|
+
|
|
128
|
+
ctx.state.data = {
|
|
129
|
+
title,
|
|
130
|
+
link: pageUrl,
|
|
131
|
+
description,
|
|
132
|
+
item: items,
|
|
133
|
+
};
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// TODO: login? the valid time for cookies seems to be short, and abusing account will probably get banned...
|
|
137
|
+
// TODO: fetch full article for the official RSS feed? unless someone who is VIP contributes their RSS feed for test...
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
const got = require('@/utils/got');
|
|
2
|
+
const cheerio = require('cheerio');
|
|
3
|
+
const { finishArticleItem } = require('@/utils/wechat-mp');
|
|
4
|
+
const { parseDate } = require('@/utils/parse-date');
|
|
5
|
+
const timezone = require('@/utils/timezone');
|
|
6
|
+
|
|
7
|
+
module.exports = async (ctx) => {
|
|
8
|
+
const { id } = ctx.params;
|
|
9
|
+
const rootUrl = 'https://www.cimidata.com';
|
|
10
|
+
|
|
11
|
+
const url = `${rootUrl}/a/${id}`;
|
|
12
|
+
const response = await got(url);
|
|
13
|
+
const $ = cheerio.load(response.data);
|
|
14
|
+
const items = $('.weui_media_box')
|
|
15
|
+
.map((_, ele) => {
|
|
16
|
+
const $item = cheerio.load(ele);
|
|
17
|
+
const link = $item('.weui_media_title a').attr('href');
|
|
18
|
+
return {
|
|
19
|
+
title: $item('.weui_media_title a').text(),
|
|
20
|
+
description: $item('.weui_media_desc').text(),
|
|
21
|
+
link,
|
|
22
|
+
pubDate: timezone(parseDate($item('.weui_media_extra_info').attr('title')), +8),
|
|
23
|
+
};
|
|
24
|
+
})
|
|
25
|
+
.get();
|
|
26
|
+
|
|
27
|
+
await Promise.all(items.map(async (item) => await finishArticleItem(ctx, item)));
|
|
28
|
+
|
|
29
|
+
ctx.state.data = {
|
|
30
|
+
title: `微信公众号 - ${$('span.name').text()}`,
|
|
31
|
+
link: url,
|
|
32
|
+
description: $('div.Profile-sideColumnItemValue').text(),
|
|
33
|
+
item: items,
|
|
34
|
+
};
|
|
35
|
+
};
|
|
File without changes
|
|
@@ -1,3 +1,14 @@
|
|
|
1
1
|
module.exports = {
|
|
2
|
+
'/announce': ['xyqfer'],
|
|
3
|
+
'/ce/:id': ['HenryQW'],
|
|
4
|
+
'/ershicimi/:id': ['sanmmm'],
|
|
5
|
+
'/data258/:id?': ['Rongronggg9'],
|
|
2
6
|
'/feeddd/:id': ['TonyRL', 'Rongronggg9'],
|
|
7
|
+
'/feeds': ['tylinux'],
|
|
8
|
+
'/mp/homepage/:biz/:hid/:cid?': ['MisteryMonster'],
|
|
9
|
+
'/mp/msgalbum/:biz/:aid': ['MisteryMonster'],
|
|
10
|
+
'/tgchannel/:id/:mpName?/:searchQueryType?': ['LogicJake', 'Rongronggg9'],
|
|
11
|
+
'/uread/:userid': ['kt286'],
|
|
12
|
+
'/wemp/:id': ['HenryQW'],
|
|
13
|
+
'/wxnmh/:id': ['laampui'],
|
|
3
14
|
};
|
|
File without changes
|
|
File without changes
|
package/lib/v2/wechat/radar.js
CHANGED
|
@@ -1,12 +1,87 @@
|
|
|
1
1
|
module.exports = {
|
|
2
|
+
'careerengine.us': {
|
|
3
|
+
_name: '微信',
|
|
4
|
+
posts: [
|
|
5
|
+
{
|
|
6
|
+
title: '公众号(CareerEngine 来源)',
|
|
7
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
8
|
+
source: ['/author/*id/posts'],
|
|
9
|
+
target: (params) => `/wechat/ce/${params.id}`,
|
|
10
|
+
},
|
|
11
|
+
],
|
|
12
|
+
},
|
|
13
|
+
'cimidata.com': {
|
|
14
|
+
_name: '微信',
|
|
15
|
+
'.': [
|
|
16
|
+
{
|
|
17
|
+
title: '公众号(二十次幂来源)',
|
|
18
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
19
|
+
source: ['/a/:id'],
|
|
20
|
+
target: '/wechat/ce/:id',
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
'data258.com': {
|
|
25
|
+
_name: '微信',
|
|
26
|
+
mp: [
|
|
27
|
+
{
|
|
28
|
+
title: '公众号(微阅读来源)',
|
|
29
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
30
|
+
source: ['/', '/article/category/:id'],
|
|
31
|
+
target: '/wechat/data258/:id?',
|
|
32
|
+
},
|
|
33
|
+
],
|
|
34
|
+
},
|
|
2
35
|
'feeddd.org': {
|
|
3
36
|
_name: '微信',
|
|
4
37
|
'.': [
|
|
5
38
|
{
|
|
6
|
-
title: '
|
|
39
|
+
title: '公众号(feeddd 来源)',
|
|
7
40
|
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
8
41
|
source: ['/'],
|
|
9
42
|
},
|
|
10
43
|
],
|
|
11
44
|
},
|
|
45
|
+
'mp.weixin.qq.com': {
|
|
46
|
+
_name: '微信',
|
|
47
|
+
'.': [
|
|
48
|
+
{
|
|
49
|
+
title: '公众平台系统公告栏目',
|
|
50
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
51
|
+
source: ['/cgi-bin/announce'],
|
|
52
|
+
target: '/wechat/announce',
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
},
|
|
56
|
+
'privacyhide.com': {
|
|
57
|
+
_name: '微信',
|
|
58
|
+
wechat: [
|
|
59
|
+
{
|
|
60
|
+
title: '公众号(wechat-feeds 来源)',
|
|
61
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
62
|
+
},
|
|
63
|
+
],
|
|
64
|
+
},
|
|
65
|
+
'wemp.app': {
|
|
66
|
+
_name: '微信',
|
|
67
|
+
'.': [
|
|
68
|
+
{
|
|
69
|
+
title: '公众号(wemp.app 来源)',
|
|
70
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
71
|
+
source: ['/accounts/:id'],
|
|
72
|
+
target: '/wechat/wemp/:id',
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
},
|
|
76
|
+
'wxnmh.com': {
|
|
77
|
+
_name: '微信',
|
|
78
|
+
'.': [
|
|
79
|
+
{
|
|
80
|
+
title: '公众号(wxnmh.com 来源)',
|
|
81
|
+
docs: 'https://docs.rsshub.app/new-media.html#wei-xin',
|
|
82
|
+
source: ['/:id'],
|
|
83
|
+
target: (params) => `/wechat/wxnmh/${params.id.replace('user-', '').replace('.htm', '')}`,
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
},
|
|
12
87
|
};
|