rsshub 1.0.0-master.f6cb490 → 1.0.0-master.f6f0273
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/api/index.ts +1 -6
- package/lib/routes/2048/index.ts +24 -23
- package/lib/routes/anthropic/news.ts +27 -13
- package/lib/routes/asianfanfics/namespace.ts +7 -0
- package/lib/routes/asianfanfics/tag.ts +89 -0
- package/lib/routes/asianfanfics/text-search.ts +68 -0
- package/lib/routes/blockworks/index.ts +128 -0
- package/lib/routes/blockworks/namespace.ts +7 -0
- package/lib/routes/cmu/andypavlo/blog.ts +55 -0
- package/lib/routes/cmu/namespace.ts +7 -0
- package/lib/routes/coindesk/{index.ts → consensus-magazine.ts} +17 -21
- package/lib/routes/coindesk/namespace.ts +2 -1
- package/lib/routes/coindesk/news.ts +47 -0
- package/lib/routes/coindesk/utils.ts +26 -0
- package/lib/routes/cointelegraph/index.ts +106 -0
- package/lib/routes/cointelegraph/namespace.ts +7 -0
- package/lib/routes/collabo-cafe/category.ts +37 -0
- package/lib/routes/collabo-cafe/index.ts +35 -0
- package/lib/routes/collabo-cafe/namespace.ts +9 -0
- package/lib/routes/collabo-cafe/parser.ts +29 -0
- package/lib/routes/collabo-cafe/tag.ts +37 -0
- package/lib/routes/cryptoslate/index.ts +98 -0
- package/lib/routes/cryptoslate/namespace.ts +7 -0
- package/lib/routes/decrypt/index.ts +115 -0
- package/lib/routes/decrypt/namespace.ts +7 -0
- package/lib/routes/discuz/discuz.ts +7 -9
- package/lib/routes/fangchan/list.ts +224 -0
- package/lib/routes/fangchan/namespace.ts +9 -0
- package/lib/routes/fangchan/templates/description.art +7 -0
- package/lib/routes/foreignaffairs/namespace.ts +7 -0
- package/lib/routes/foreignaffairs/rss.ts +55 -0
- package/lib/routes/forklog/index.ts +72 -0
- package/lib/routes/forklog/namespace.ts +7 -0
- package/lib/routes/gcores/categories.ts +129 -0
- package/lib/routes/gcores/collections.ts +129 -0
- package/lib/routes/gcores/topics.ts +63 -0
- package/lib/routes/gov/moa/gjs.ts +210 -0
- package/lib/routes/gov/tianjin/tjftz.ts +53 -0
- package/lib/routes/gov/tianjin/tjrcgzw.ts +51 -0
- package/lib/routes/grainoil/category.ts +207 -0
- package/lib/routes/grainoil/namespace.ts +9 -0
- package/lib/routes/huxiu/util.ts +11 -9
- package/lib/routes/ifanr/category.ts +7 -2
- package/lib/routes/ifanr/digest.ts +1 -1
- package/lib/routes/ifanr/index.ts +1 -1
- package/lib/routes/instructables/projects.ts +20 -15
- package/lib/routes/juejin/collections.ts +1 -1
- package/lib/routes/komiic/comic.ts +88 -0
- package/lib/routes/komiic/namespace.ts +7 -0
- package/lib/routes/leagueoflegends/namespace.ts +8 -0
- package/lib/routes/leagueoflegends/patch-notes.ts +76 -0
- package/lib/routes/likeshop/index.ts +43 -0
- package/lib/routes/likeshop/namespace.ts +7 -0
- package/lib/routes/ltaaa/article.ts +180 -0
- package/lib/routes/ltaaa/namespace.ts +9 -0
- package/lib/routes/ltaaa/templates/description.art +7 -0
- package/lib/routes/mashiro/index.ts +1 -0
- package/lib/routes/nhentai/util.ts +4 -1
- package/lib/routes/pinterest/user.ts +9 -0
- package/lib/routes/sohu/mp.ts +3 -2
- package/lib/routes/spotify/show.ts +1 -1
- package/lib/routes/stcn/index.ts +241 -136
- package/lib/routes/stcn/kx.ts +144 -0
- package/lib/routes/swjtu/namespace.ts +1 -1
- package/lib/routes/swjtu/{scai/bks.ts → scai.ts} +34 -20
- package/lib/routes/swjtu/sports.ts +77 -0
- package/lib/routes/theblock/index.ts +142 -0
- package/lib/routes/theblock/namespace.ts +7 -0
- package/lib/routes/theverge/index.ts +73 -62
- package/lib/routes/theverge/templates/header.art +19 -0
- package/lib/routes/threads/index.ts +73 -54
- package/lib/routes/threads/utils.ts +60 -78
- package/lib/routes/tmtpost/column.ts +298 -0
- package/lib/routes/tmtpost/new.ts +4 -199
- package/lib/routes/tmtpost/util.ts +207 -0
- package/lib/routes/toranoana/namespace.ts +7 -0
- package/lib/routes/toranoana/news.ts +110 -0
- package/lib/routes/wainao/templates/description.art +9 -0
- package/lib/routes/wainao/topics.ts +214 -0
- package/lib/routes/xiaoyuzhou/podcast.ts +27 -27
- package/lib/routes/xjtu/yz.ts +74 -0
- package/lib/routes/youmemark/index.ts +6 -6
- package/lib/routes/zaobao/util.ts +11 -3
- package/lib/routes/zhihu/answers.ts +26 -54
- package/package.json +36 -35
- package/lib/routes/gcores/category.ts +0 -171
- package/lib/routes/gcores/collection.ts +0 -161
- package/lib/routes-deprecated/ltaaa/index.js +0 -69
package/lib/api/index.ts
CHANGED
|
@@ -29,11 +29,6 @@ for (const path in docs.paths) {
|
|
|
29
29
|
delete docs.paths[path];
|
|
30
30
|
}
|
|
31
31
|
app.get('/openapi.json', (ctx) => ctx.json(docs));
|
|
32
|
-
app.get(
|
|
33
|
-
'/reference',
|
|
34
|
-
apiReference({
|
|
35
|
-
spec: { content: docs },
|
|
36
|
-
})
|
|
37
|
-
);
|
|
32
|
+
app.get('/reference', apiReference({ content: docs }));
|
|
38
33
|
|
|
39
34
|
export default app;
|
package/lib/routes/2048/index.ts
CHANGED
|
@@ -67,31 +67,27 @@ async function handler(ctx) {
|
|
|
67
67
|
const id = ctx.req.param('id') ?? '3';
|
|
68
68
|
|
|
69
69
|
const rootUrl = 'https://hjd2048.com';
|
|
70
|
-
|
|
70
|
+
// 获取地址发布页指向的 URL
|
|
71
71
|
const domainInfo = (await cache.tryGet('2048:domainInfo', async () => {
|
|
72
|
-
const response = await ofetch('https://
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
};
|
|
88
|
-
})) as { url: string; cookie: string };
|
|
89
|
-
|
|
90
|
-
const currentUrl = `${domainInfo.url}thread.php?fid-${id}.html`;
|
|
72
|
+
const response = await ofetch('https://2048.info');
|
|
73
|
+
const $ = load(response);
|
|
74
|
+
const onclickValue = $('.button').first().attr('onclick');
|
|
75
|
+
const targetUrl = onclickValue.match(/window\.open\('([^']+)'/)[1];
|
|
76
|
+
|
|
77
|
+
return { url: targetUrl };
|
|
78
|
+
})) as { url: string };
|
|
79
|
+
// 获取重定向后的url和safeid
|
|
80
|
+
const redirectResponse = await ofetch.raw(domainInfo.url);
|
|
81
|
+
const currentUrl = `${redirectResponse.url}thread.php?fid-${id}.html`;
|
|
82
|
+
const redirectPageContent = load(redirectResponse._data);
|
|
83
|
+
const safeId =
|
|
84
|
+
redirectPageContent('script')
|
|
85
|
+
.text()
|
|
86
|
+
.match(/var safeid='(.*?)',/)?.[1] ?? '';
|
|
91
87
|
|
|
92
88
|
const response = await ofetch.raw(currentUrl, {
|
|
93
89
|
headers: {
|
|
94
|
-
cookie: `_safe=${
|
|
90
|
+
cookie: `_safe=${safeId}`,
|
|
95
91
|
},
|
|
96
92
|
});
|
|
97
93
|
|
|
@@ -121,7 +117,7 @@ async function handler(ctx) {
|
|
|
121
117
|
cache.tryGet(item.guid, async () => {
|
|
122
118
|
const detailResponse = await ofetch(item.link, {
|
|
123
119
|
headers: {
|
|
124
|
-
cookie: `_safe=${
|
|
120
|
+
cookie: `_safe=${safeId}`,
|
|
125
121
|
},
|
|
126
122
|
});
|
|
127
123
|
|
|
@@ -130,7 +126,12 @@ async function handler(ctx) {
|
|
|
130
126
|
content('.ads, .tips').remove();
|
|
131
127
|
|
|
132
128
|
content('ignore_js_op').each(function () {
|
|
133
|
-
|
|
129
|
+
const img = content(this).find('img');
|
|
130
|
+
const originalSrc = img.attr('data-original');
|
|
131
|
+
const fallbackSrc = img.attr('src');
|
|
132
|
+
// 判断是否有 data-original 属性,若有则使用其值,否则使用 src 属性值
|
|
133
|
+
const imgSrc = originalSrc || fallbackSrc;
|
|
134
|
+
content(this).replaceWith(`<img src="${imgSrc}">`);
|
|
134
135
|
});
|
|
135
136
|
|
|
136
137
|
item.author = content('.fl.black').first().text();
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import ofetch from '@/utils/ofetch';
|
|
2
2
|
import { load } from 'cheerio';
|
|
3
3
|
import cache from '@/utils/cache';
|
|
4
4
|
import { Route } from '@/types';
|
|
@@ -10,28 +10,28 @@ export const route: Route = {
|
|
|
10
10
|
parameters: {},
|
|
11
11
|
radar: [
|
|
12
12
|
{
|
|
13
|
-
source: ['anthropic.com'],
|
|
13
|
+
source: ['www.anthropic.com/news', 'www.anthropic.com'],
|
|
14
14
|
},
|
|
15
15
|
],
|
|
16
16
|
name: 'News',
|
|
17
17
|
maintainers: ['etShaw-zh'],
|
|
18
18
|
handler,
|
|
19
|
-
url: 'anthropic.com/news',
|
|
19
|
+
url: 'www.anthropic.com/news',
|
|
20
20
|
};
|
|
21
21
|
|
|
22
22
|
async function handler() {
|
|
23
|
-
const link = 'https://anthropic.com/news';
|
|
24
|
-
const response = await
|
|
25
|
-
const $ = load(response
|
|
23
|
+
const link = 'https://www.anthropic.com/news';
|
|
24
|
+
const response = await ofetch(link);
|
|
25
|
+
const $ = load(response);
|
|
26
26
|
|
|
27
27
|
const list = $('.contentFadeUp a')
|
|
28
28
|
.toArray()
|
|
29
29
|
.map((e) => {
|
|
30
30
|
e = $(e);
|
|
31
|
-
const title = e.find('h3
|
|
32
|
-
const href = e.attr('href');
|
|
33
|
-
const pubDate = e.find('
|
|
34
|
-
const fullLink = href.startsWith('http') ? href : `https://anthropic.com${href}`;
|
|
31
|
+
const title = e.find('h3[class^="PostCard_post-heading__"]').text().trim();
|
|
32
|
+
const href = e.attr('href');
|
|
33
|
+
const pubDate = e.find('div[class^="PostList_post-date__"]').text().trim();
|
|
34
|
+
const fullLink = href.startsWith('http') ? href : `https://www.anthropic.com${href}`;
|
|
35
35
|
return {
|
|
36
36
|
title,
|
|
37
37
|
link: fullLink,
|
|
@@ -42,10 +42,24 @@ async function handler() {
|
|
|
42
42
|
const out = await Promise.all(
|
|
43
43
|
list.map((item) =>
|
|
44
44
|
cache.tryGet(item.link, async () => {
|
|
45
|
-
const response = await
|
|
46
|
-
const $ = load(response
|
|
45
|
+
const response = await ofetch(item.link);
|
|
46
|
+
const $ = load(response);
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
$('div[class^="PostDetail_b-social-share"]').remove();
|
|
49
|
+
|
|
50
|
+
const content = $('div[class*="PostDetail_post-detail__"]');
|
|
51
|
+
content.find('img').each((_, e) => {
|
|
52
|
+
const $e = $(e);
|
|
53
|
+
$e.removeAttr('style srcset');
|
|
54
|
+
const src = $e.attr('src');
|
|
55
|
+
const params = new URLSearchParams(src);
|
|
56
|
+
const newSrc = params.get('/_next/image?url');
|
|
57
|
+
if (newSrc) {
|
|
58
|
+
$e.attr('src', newSrc);
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
item.description = content.html();
|
|
49
63
|
|
|
50
64
|
return item;
|
|
51
65
|
})
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { DataItem, Route } from '@/types';
|
|
2
|
+
|
|
3
|
+
import { config } from '@/config';
|
|
4
|
+
import ofetch from '@/utils/ofetch';
|
|
5
|
+
import { parseDate } from '@/utils/parse-date';
|
|
6
|
+
import { load } from 'cheerio';
|
|
7
|
+
|
|
8
|
+
// test url http://localhost:1200/asianfanfics/tag/milklove/N
|
|
9
|
+
|
|
10
|
+
export const route: Route = {
|
|
11
|
+
path: '/tag/:tag/:type',
|
|
12
|
+
categories: ['reading'],
|
|
13
|
+
example: '/asianfanfics/tag/milklove/N',
|
|
14
|
+
parameters: {
|
|
15
|
+
tag: '标签',
|
|
16
|
+
type: '排序类型',
|
|
17
|
+
},
|
|
18
|
+
name: '标签',
|
|
19
|
+
maintainers: ['KazooTTT'],
|
|
20
|
+
radar: [
|
|
21
|
+
{
|
|
22
|
+
source: ['www.asianfanfics.com/browse/tag/:tag/:type'],
|
|
23
|
+
target: '/tag/:tag/:type',
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
description: `匹配asianfanfics标签,支持排序类型:
|
|
27
|
+
- L: Latest 最近更新
|
|
28
|
+
- N: Newest 最近发布
|
|
29
|
+
- O: Oldest 最早发布
|
|
30
|
+
- C: Completed 已完成
|
|
31
|
+
- OS: One Shots 短篇
|
|
32
|
+
`,
|
|
33
|
+
handler,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
type Type = 'L' | 'N' | 'O' | 'C' | 'OS';
|
|
37
|
+
|
|
38
|
+
const typeToText = {
|
|
39
|
+
L: '最近更新',
|
|
40
|
+
N: '最近发布',
|
|
41
|
+
O: '最早发布',
|
|
42
|
+
C: '已完成',
|
|
43
|
+
OS: '短篇',
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
async function handler(ctx) {
|
|
47
|
+
const tag = ctx.req.param('tag');
|
|
48
|
+
const type = ctx.req.param('type') as Type;
|
|
49
|
+
|
|
50
|
+
if (!type || !['L', 'N', 'O', 'C', 'OS'].includes(type)) {
|
|
51
|
+
throw new Error('无效的排序类型');
|
|
52
|
+
}
|
|
53
|
+
const link = `https://www.asianfanfics.com/browse/tag/${tag}/${type}`;
|
|
54
|
+
|
|
55
|
+
const response = await ofetch(link, {
|
|
56
|
+
headers: {
|
|
57
|
+
'user-agent': config.trueUA,
|
|
58
|
+
Referer: 'https://www.asianfanfics.com/',
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
const $ = load(response);
|
|
62
|
+
|
|
63
|
+
const items: DataItem[] = $('.primary-container .excerpt')
|
|
64
|
+
.toArray()
|
|
65
|
+
.filter((element) => {
|
|
66
|
+
const $element = $(element);
|
|
67
|
+
return $element.find('.excerpt__title a').length > 0;
|
|
68
|
+
})
|
|
69
|
+
.map((element) => {
|
|
70
|
+
const $element = $(element);
|
|
71
|
+
const title = $element.find('.excerpt__title a').text();
|
|
72
|
+
const link = 'https://www.asianfanfics.com' + $element.find('.excerpt__title a').attr('href');
|
|
73
|
+
const author = $element.find('.excerpt__meta__name a').text().trim();
|
|
74
|
+
const pubDate = parseDate($element.find('time').attr('datetime') || '');
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
title,
|
|
78
|
+
link,
|
|
79
|
+
author,
|
|
80
|
+
pubDate,
|
|
81
|
+
};
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
title: `Asianfanfics - 标签:${tag} - ${typeToText[type]}`,
|
|
86
|
+
link,
|
|
87
|
+
item: items,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { config } from '@/config';
|
|
2
|
+
import { DataItem, Route } from '@/types';
|
|
3
|
+
import ofetch from '@/utils/ofetch';
|
|
4
|
+
import { parseDate } from '@/utils/parse-date';
|
|
5
|
+
import { load } from 'cheerio';
|
|
6
|
+
|
|
7
|
+
// test url http://localhost:1200/asianfanfics/text-search/milklove
|
|
8
|
+
|
|
9
|
+
export const route: Route = {
|
|
10
|
+
path: '/text-search/:keyword',
|
|
11
|
+
categories: ['reading'],
|
|
12
|
+
example: '/asianfanfics/text-search/milklove',
|
|
13
|
+
parameters: {
|
|
14
|
+
keyword: '关键词',
|
|
15
|
+
},
|
|
16
|
+
name: '关键词',
|
|
17
|
+
maintainers: ['KazooTTT'],
|
|
18
|
+
radar: [
|
|
19
|
+
{
|
|
20
|
+
source: ['www.asianfanfics.com/browse/text_search?q=:keyword'],
|
|
21
|
+
target: '/text-search/:keyword',
|
|
22
|
+
},
|
|
23
|
+
],
|
|
24
|
+
description: '匹配asianfanfics搜索关键词',
|
|
25
|
+
handler,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
async function handler(ctx) {
|
|
29
|
+
const keyword = ctx.req.param('keyword');
|
|
30
|
+
if (keyword.trim() === '') {
|
|
31
|
+
throw new Error('关键词不能为空');
|
|
32
|
+
}
|
|
33
|
+
const link = `https://www.asianfanfics.com/browse/text_search?q=${keyword}+`;
|
|
34
|
+
|
|
35
|
+
const response = await ofetch(link, {
|
|
36
|
+
headers: {
|
|
37
|
+
'user-agent': config.trueUA,
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
const $ = load(response);
|
|
41
|
+
|
|
42
|
+
const items: DataItem[] = $('.primary-container .excerpt')
|
|
43
|
+
.toArray()
|
|
44
|
+
.filter((element) => {
|
|
45
|
+
const $element = $(element);
|
|
46
|
+
return $element.find('.excerpt__title a').length > 0;
|
|
47
|
+
})
|
|
48
|
+
.map((element) => {
|
|
49
|
+
const $element = $(element);
|
|
50
|
+
const title = $element.find('.excerpt__title a').text();
|
|
51
|
+
const link = 'https://www.asianfanfics.com' + $element.find('.excerpt__title a').attr('href');
|
|
52
|
+
const author = $element.find('.excerpt__meta__name a').text().trim();
|
|
53
|
+
const pubDate = parseDate($element.find('time').attr('datetime') || '');
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
title,
|
|
57
|
+
link,
|
|
58
|
+
author,
|
|
59
|
+
pubDate,
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
title: `Asianfanfics - 关键词:${keyword}`,
|
|
65
|
+
link,
|
|
66
|
+
item: items,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { Route, Data, DataItem } from '@/types';
|
|
2
|
+
import cache from '@/utils/cache';
|
|
3
|
+
import ofetch from '@/utils/ofetch';
|
|
4
|
+
import { parseDate } from '@/utils/parse-date';
|
|
5
|
+
import { load } from 'cheerio';
|
|
6
|
+
import logger from '@/utils/logger';
|
|
7
|
+
import parser from '@/utils/rss-parser';
|
|
8
|
+
import { config } from '@/config';
|
|
9
|
+
|
|
10
|
+
export const route: Route = {
|
|
11
|
+
path: '/',
|
|
12
|
+
categories: ['finance'],
|
|
13
|
+
example: '/blockworks',
|
|
14
|
+
parameters: {},
|
|
15
|
+
features: {
|
|
16
|
+
requireConfig: false,
|
|
17
|
+
requirePuppeteer: false,
|
|
18
|
+
antiCrawler: false,
|
|
19
|
+
supportBT: false,
|
|
20
|
+
supportPodcast: false,
|
|
21
|
+
supportScihub: false,
|
|
22
|
+
},
|
|
23
|
+
radar: [
|
|
24
|
+
{
|
|
25
|
+
source: ['blockworks.co/'],
|
|
26
|
+
target: '/',
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
name: 'News',
|
|
30
|
+
maintainers: ['pseudoyu'],
|
|
31
|
+
handler,
|
|
32
|
+
description: 'Blockworks news with full text support.',
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
async function handler(ctx): Promise<Data> {
|
|
36
|
+
const rssUrl = 'https://blockworks.co/feed';
|
|
37
|
+
const feed = await parser.parseURL(rssUrl);
|
|
38
|
+
const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 20;
|
|
39
|
+
// Limit to 20 items
|
|
40
|
+
const limitedItems = feed.items.slice(0, limit);
|
|
41
|
+
|
|
42
|
+
const buildId = await getBuildId();
|
|
43
|
+
|
|
44
|
+
const items = await Promise.all(
|
|
45
|
+
limitedItems
|
|
46
|
+
.map((item) => ({
|
|
47
|
+
...item,
|
|
48
|
+
link: item.link?.split('?')[0],
|
|
49
|
+
}))
|
|
50
|
+
.map((item) =>
|
|
51
|
+
cache.tryGet(item.link!, async () => {
|
|
52
|
+
// Get cached content or fetch new content
|
|
53
|
+
const content = await extractFullText(item.link!.split('/').pop()!, buildId);
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
title: item.title || 'Untitled',
|
|
57
|
+
pubDate: item.isoDate ? parseDate(item.isoDate) : undefined,
|
|
58
|
+
link: item.link,
|
|
59
|
+
description: content.description || item.content || item.contentSnippet || item.summary || '',
|
|
60
|
+
author: item.author,
|
|
61
|
+
category: content.category,
|
|
62
|
+
media: content.imageUrl
|
|
63
|
+
? {
|
|
64
|
+
content: { url: content.imageUrl },
|
|
65
|
+
}
|
|
66
|
+
: undefined,
|
|
67
|
+
} as DataItem;
|
|
68
|
+
})
|
|
69
|
+
)
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
title: feed.title || 'Blockworks News',
|
|
74
|
+
link: feed.link || 'https://blockworks.co',
|
|
75
|
+
description: feed.description || 'Latest news from Blockworks',
|
|
76
|
+
item: items,
|
|
77
|
+
language: feed.language || 'en',
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function extractFullText(slug: string, buildId: string): Promise<{ description: string; imageUrl: string; category: string[] }> {
|
|
82
|
+
try {
|
|
83
|
+
const response = await ofetch(`https://blockworks.co/_next/data/${buildId}/news/${slug}.json?slug=${slug}`);
|
|
84
|
+
const article = response.pageProps.article;
|
|
85
|
+
const $ = load(article.content, null, false);
|
|
86
|
+
|
|
87
|
+
// Remove promotional content at the end
|
|
88
|
+
$('hr').remove();
|
|
89
|
+
$('p > em, p > strong').each((_, el) => {
|
|
90
|
+
const $el = $(el);
|
|
91
|
+
if ($el.text().includes('To read full editions') || $el.text().includes('Get the news in your inbox')) {
|
|
92
|
+
$el.parent().remove();
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
$('ul.wp-block-list > li > a').each((_, el) => {
|
|
96
|
+
const $el = $(el);
|
|
97
|
+
if ($el.attr('href') === 'https://blockworks.co/newsletter/daily') {
|
|
98
|
+
$el.parent().parent().remove();
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
description: $.html(),
|
|
104
|
+
imageUrl: article.imageUrl,
|
|
105
|
+
category: [...new Set([...article.categories, ...article.tags])],
|
|
106
|
+
};
|
|
107
|
+
} catch (error) {
|
|
108
|
+
logger.error('Error extracting full text from Blockworks:', error);
|
|
109
|
+
return { description: '', imageUrl: '', category: [] };
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const getBuildId = () =>
|
|
114
|
+
cache.tryGet(
|
|
115
|
+
'blockworks:buildId',
|
|
116
|
+
async () => {
|
|
117
|
+
const response = await ofetch('https://blockworks.co');
|
|
118
|
+
const $ = load(response);
|
|
119
|
+
|
|
120
|
+
return (
|
|
121
|
+
$('script#__NEXT_DATA__')
|
|
122
|
+
.text()
|
|
123
|
+
?.match(/"buildId":"(.*?)",/)?.[1] || ''
|
|
124
|
+
);
|
|
125
|
+
},
|
|
126
|
+
config.cache.routeExpire,
|
|
127
|
+
false
|
|
128
|
+
) as Promise<string>;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { Route } from '@/types';
|
|
2
|
+
import got from '@/utils/got';
|
|
3
|
+
import { load } from 'cheerio';
|
|
4
|
+
import { parseDate } from '@/utils/parse-date';
|
|
5
|
+
|
|
6
|
+
async function getArticles() {
|
|
7
|
+
const url = 'https://www.cs.cmu.edu/~pavlo/blog/index.html';
|
|
8
|
+
const { data: res } = await got(url);
|
|
9
|
+
const $ = load(res);
|
|
10
|
+
|
|
11
|
+
const list = $('.row.mb-3')
|
|
12
|
+
.toArray()
|
|
13
|
+
.map((element) => {
|
|
14
|
+
const $item = $(element);
|
|
15
|
+
const $title = $item.find('h4 a');
|
|
16
|
+
const $date = $item.find('.text-muted');
|
|
17
|
+
const $description = $item.find('p');
|
|
18
|
+
|
|
19
|
+
return {
|
|
20
|
+
title: $title.text().trim(),
|
|
21
|
+
link: $title.attr('href'),
|
|
22
|
+
description: $description.text().trim(),
|
|
23
|
+
pubDate: parseDate($date.attr('title')),
|
|
24
|
+
guid: $title.attr('href'),
|
|
25
|
+
};
|
|
26
|
+
});
|
|
27
|
+
return list;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export const route: Route = {
|
|
31
|
+
path: '/andypavlo/blog',
|
|
32
|
+
categories: ['blog'],
|
|
33
|
+
example: '/cmu/andypavlo/blog',
|
|
34
|
+
parameters: {},
|
|
35
|
+
features: {
|
|
36
|
+
requireConfig: false,
|
|
37
|
+
requirePuppeteer: false,
|
|
38
|
+
antiCrawler: false,
|
|
39
|
+
supportBT: false,
|
|
40
|
+
supportPodcast: false,
|
|
41
|
+
supportScihub: false,
|
|
42
|
+
},
|
|
43
|
+
name: 'Andy Pavlo Blog',
|
|
44
|
+
maintainers: ['mocusez'],
|
|
45
|
+
handler,
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
async function handler() {
|
|
49
|
+
const articles = await getArticles();
|
|
50
|
+
return {
|
|
51
|
+
title: 'Andy Pavlo - Carnegie Mellon University',
|
|
52
|
+
link: 'https://www.cs.cmu.edu/~pavlo/blog/index.html',
|
|
53
|
+
item: articles,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { Route } from '@/types';
|
|
2
|
-
import
|
|
2
|
+
import ofetch from '@/utils/ofetch';
|
|
3
|
+
import cache from '@/utils/cache';
|
|
3
4
|
import { load } from 'cheerio';
|
|
5
|
+
import { parseItem } from './utils';
|
|
4
6
|
const rootUrl = 'https://www.coindesk.com';
|
|
5
7
|
|
|
6
8
|
export const route: Route = {
|
|
@@ -27,29 +29,23 @@ export const route: Route = {
|
|
|
27
29
|
url: 'coindesk.com/',
|
|
28
30
|
};
|
|
29
31
|
|
|
30
|
-
async function handler(
|
|
31
|
-
const channel =
|
|
32
|
+
async function handler() {
|
|
33
|
+
const channel = 'consensus-magazine';
|
|
32
34
|
|
|
33
|
-
const response = await
|
|
34
|
-
const $ = load(response
|
|
35
|
-
const content = JSON.parse(
|
|
36
|
-
$('#fusion-metadata')
|
|
37
|
-
.text()
|
|
38
|
-
.match(/Fusion\.contentCache=(.*?);Fusion\.layout/)[1]
|
|
39
|
-
);
|
|
35
|
+
const response = await ofetch(`${rootUrl}/${channel}`);
|
|
36
|
+
const $ = load(response);
|
|
40
37
|
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
const list = $('div h2')
|
|
39
|
+
.toArray()
|
|
40
|
+
.map((item) => {
|
|
41
|
+
const $item = $(item);
|
|
42
|
+
return {
|
|
43
|
+
title: $item.text(),
|
|
44
|
+
link: rootUrl + $item.parent().attr('href'),
|
|
45
|
+
};
|
|
46
|
+
});
|
|
44
47
|
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
const items = list.map((item) => ({
|
|
48
|
-
title: item.headlines.basic,
|
|
49
|
-
link: rootUrl + item.canonical_url,
|
|
50
|
-
description: item.subheadlines.basic,
|
|
51
|
-
pubDate: item.display_date,
|
|
52
|
-
}));
|
|
48
|
+
const items = await Promise.all(list.map((item) => cache.tryGet(item.link, () => parseItem(item))));
|
|
53
49
|
|
|
54
50
|
return {
|
|
55
51
|
title: 'CoinDesk Consensus Magazine',
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import type { Namespace } from '@/types';
|
|
2
2
|
|
|
3
3
|
export const namespace: Namespace = {
|
|
4
|
-
name: 'CoinDesk
|
|
4
|
+
name: 'CoinDesk',
|
|
5
5
|
url: 'coindesk.com',
|
|
6
6
|
lang: 'en',
|
|
7
|
+
description: 'CoinDesk is a news site specializing in bitcoin and digital currencies, delivering news, analysis, and information about the blockchain ecosystem.',
|
|
7
8
|
};
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Route, Data, DataItem } from '@/types';
|
|
2
|
+
import cache from '@/utils/cache';
|
|
3
|
+
import parser from '@/utils/rss-parser';
|
|
4
|
+
import { parseItem } from './utils';
|
|
5
|
+
|
|
6
|
+
export const route: Route = {
|
|
7
|
+
path: '/news',
|
|
8
|
+
categories: ['finance'],
|
|
9
|
+
example: '/coindesk/news',
|
|
10
|
+
parameters: {},
|
|
11
|
+
features: {
|
|
12
|
+
requireConfig: false,
|
|
13
|
+
requirePuppeteer: false,
|
|
14
|
+
antiCrawler: false,
|
|
15
|
+
supportBT: false,
|
|
16
|
+
supportPodcast: false,
|
|
17
|
+
supportScihub: false,
|
|
18
|
+
},
|
|
19
|
+
name: 'News',
|
|
20
|
+
maintainers: ['pseudoyu'],
|
|
21
|
+
handler,
|
|
22
|
+
radar: [
|
|
23
|
+
{
|
|
24
|
+
source: ['coindesk.com/'],
|
|
25
|
+
target: '/news',
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
description: 'Get latest news from CoinDesk with full text.',
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
async function handler(): Promise<Data> {
|
|
32
|
+
const rssUrl = 'https://feeds.feedburner.com/Coindesk';
|
|
33
|
+
const feed = await parser.parseURL(rssUrl);
|
|
34
|
+
|
|
35
|
+
const items = await Promise.all(feed.items.map((item) => cache.tryGet(item.link, () => parseItem(item))));
|
|
36
|
+
|
|
37
|
+
// Filter out null items
|
|
38
|
+
const validItems = items.filter((item): item is DataItem => item !== null);
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
title: feed.title || 'CoinDesk News',
|
|
42
|
+
link: feed.link || 'https://coindesk.com',
|
|
43
|
+
description: feed.description || 'Latest news from CoinDesk',
|
|
44
|
+
language: feed.language || 'en',
|
|
45
|
+
item: validItems,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import ofetch from '@/utils/ofetch';
|
|
2
|
+
import { load } from 'cheerio';
|
|
3
|
+
import { parseDate } from '@/utils/parse-date';
|
|
4
|
+
|
|
5
|
+
export const parseItem = async (item) => {
|
|
6
|
+
const response = await ofetch(item.link);
|
|
7
|
+
const $ = load(response);
|
|
8
|
+
const ldJson = JSON.parse($('script[type="application/ld+json"]').text());
|
|
9
|
+
|
|
10
|
+
$('.article-ad, #strategy-rules-player-wrapper, [data-module-name="newsletter-article-sign-up-module"], div.flex.flex-col.gap-2').remove();
|
|
11
|
+
const cover = $('.article-content-wrapper figure');
|
|
12
|
+
cover.find('img').attr('src', cover.find('img').attr('url')?.split('?')[0]);
|
|
13
|
+
cover.find('img').removeAttr('style srcset url');
|
|
14
|
+
|
|
15
|
+
item.description =
|
|
16
|
+
cover.parent().html() +
|
|
17
|
+
$('.document-body')
|
|
18
|
+
.toArray()
|
|
19
|
+
.map((item) => $(item).html())
|
|
20
|
+
.join('');
|
|
21
|
+
item.pubDate = parseDate(ldJson.datePublished);
|
|
22
|
+
item.author = ldJson.author.map((a) => ({ name: a.name }));
|
|
23
|
+
item.image = ldJson.image.url.split('?')[0];
|
|
24
|
+
|
|
25
|
+
return item;
|
|
26
|
+
};
|