rssany 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -50
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +159 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +79 -9
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/0.BB88QFoe.css +1 -0
- package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
- package/webui/build/_app/immutable/chunks/{DcAshVxe.js → D6VIKef0.js} +1 -1
- package/webui/build/_app/immutable/chunks/{EIZIMsXK.js → Dbqx2mXq.js} +1 -1
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
- package/webui/build/_app/immutable/chunks/{BXCWEhUd.js → dhB8G5Is.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.DdgnooOk.js → app.XPso7q7g.js} +2 -2
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.5DFDaT4c.js → 1.BS3_Rfxm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.OVK4i9XE.js → 10.CyyxDCIS.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.Dhn_rO4A.js → 11.CtYgIaGj.js} +1 -1
- package/webui/build/_app/immutable/nodes/{14.B_KpJLxn.js → 14.D5OEGPR2.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.RaWaA-0I.js → 15.B4dFN1Gk.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DSUgqolV.js → 16.M7ZII7tl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.wQvGs9w-.js → 3.7r8v7qkm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CCtn90c0.js → 5.CHIzoGrb.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.C2_mjW1u.js → 6.BDBqx-GY.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Dwz6W7A1.js → 7.D5czsDmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DzkEw6rx.js → 8.pjVNsCdV.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.DtlXEwe1.js → 9.CsARv1BH.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +0 -1
- package/webui/build/_app/immutable/chunks/CkUAV0m0.js +0 -41
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +0 -1
- package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +0 -11
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// ByteDance Seed 研究页插件:抓取研究论文与动态条目(不含 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const DEFAULT_ORIGIN = "https://seed.bytedance.com";
|
|
8
|
+
const ROUTER_DATA_MARKER = "window._ROUTER_DATA = ";
|
|
9
|
+
const RESEARCH_PAGE_KEY = "(locale$)/research/page";
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
function normalizeText(text) {
|
|
13
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
function hashGuid(input) {
|
|
18
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
function toOrigin(url) {
|
|
23
|
+
try {
|
|
24
|
+
return new URL(url).origin;
|
|
25
|
+
} catch {
|
|
26
|
+
return DEFAULT_ORIGIN;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
function detectLocale(url) {
|
|
32
|
+
try {
|
|
33
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
34
|
+
if (path.startsWith("/en/")) return "en";
|
|
35
|
+
} catch {
|
|
36
|
+
// ignore
|
|
37
|
+
}
|
|
38
|
+
return "zh";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
function pickLocalized(zhValue, enValue, locale) {
|
|
43
|
+
const zh = normalizeText(zhValue);
|
|
44
|
+
const en = normalizeText(enValue);
|
|
45
|
+
if (locale === "zh") return zh || en;
|
|
46
|
+
return en || zh;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
function parsePublishDate(raw) {
|
|
51
|
+
const num = Number(raw);
|
|
52
|
+
if (Number.isFinite(num) && num > 0) {
|
|
53
|
+
const date = new Date(num);
|
|
54
|
+
if (!Number.isNaN(date.getTime())) return date;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const direct = new Date(String(raw ?? ""));
|
|
58
|
+
if (!Number.isNaN(direct.getTime())) return direct;
|
|
59
|
+
return new Date();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
function extractBalancedJson(raw, marker) {
|
|
64
|
+
const markerIndex = raw.indexOf(marker);
|
|
65
|
+
if (markerIndex < 0) return undefined;
|
|
66
|
+
|
|
67
|
+
const start = raw.indexOf("{", markerIndex + marker.length);
|
|
68
|
+
if (start < 0) return undefined;
|
|
69
|
+
|
|
70
|
+
let inString = false;
|
|
71
|
+
let escaped = false;
|
|
72
|
+
let depth = 0;
|
|
73
|
+
|
|
74
|
+
for (let i = start; i < raw.length; i += 1) {
|
|
75
|
+
const ch = raw[i];
|
|
76
|
+
if (inString) {
|
|
77
|
+
if (escaped) {
|
|
78
|
+
escaped = false;
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
if (ch === "\\") {
|
|
82
|
+
escaped = true;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (ch === "\"") inString = false;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (ch === "\"") {
|
|
90
|
+
inString = true;
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (ch === "{") depth += 1;
|
|
94
|
+
if (ch === "}") {
|
|
95
|
+
depth -= 1;
|
|
96
|
+
if (depth === 0) {
|
|
97
|
+
return raw.slice(start, i + 1);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
function parseRouterData(html) {
|
|
107
|
+
const raw = extractBalancedJson(html, ROUTER_DATA_MARKER);
|
|
108
|
+
if (!raw) return undefined;
|
|
109
|
+
try {
|
|
110
|
+
return JSON.parse(raw);
|
|
111
|
+
} catch {
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
function parseModernData(html) {
|
|
118
|
+
const m = html.match(/<script[^>]*id="__MODERN_DATA__"[^>]*>([\s\S]*?)<\/script>/i);
|
|
119
|
+
if (!m) return undefined;
|
|
120
|
+
try {
|
|
121
|
+
return JSON.parse(m[1]);
|
|
122
|
+
} catch {
|
|
123
|
+
return undefined;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
function getResearchPayload(html) {
|
|
129
|
+
const routerData = parseRouterData(html);
|
|
130
|
+
const fromRouter = routerData?.loaderData?.[RESEARCH_PAGE_KEY];
|
|
131
|
+
if (fromRouter && typeof fromRouter === "object") return fromRouter;
|
|
132
|
+
|
|
133
|
+
const modernData = parseModernData(html);
|
|
134
|
+
if (modernData?.data && typeof modernData.data === "object") return modernData.data;
|
|
135
|
+
if (modernData && typeof modernData === "object") return modernData;
|
|
136
|
+
|
|
137
|
+
return undefined;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
function buildItem(entry, kind, locale, origin) {
|
|
142
|
+
const meta = entry?.ArticleMeta ?? {};
|
|
143
|
+
const zh = entry?.ArticleSubContentZh ?? {};
|
|
144
|
+
const en = entry?.ArticleSubContentEn ?? {};
|
|
145
|
+
|
|
146
|
+
const title = pickLocalized(zh.Title ?? zh.ShortTitle, en.Title ?? en.ShortTitle, locale);
|
|
147
|
+
if (!title) return null;
|
|
148
|
+
|
|
149
|
+
const abstract = pickLocalized(zh.Abstract, en.Abstract, locale);
|
|
150
|
+
const titleKey = pickLocalized(zh.TitleKey, en.TitleKey, locale);
|
|
151
|
+
const articleId = String(meta.ArticleID ?? meta.ID ?? "").trim();
|
|
152
|
+
|
|
153
|
+
let link;
|
|
154
|
+
if (titleKey) {
|
|
155
|
+
const path = kind === "public_papers"
|
|
156
|
+
? `/${locale}/public_papers/${encodeURIComponent(titleKey)}`
|
|
157
|
+
: `/${locale}/blog/${encodeURIComponent(titleKey)}`;
|
|
158
|
+
link = new URL(path, origin).href;
|
|
159
|
+
} else {
|
|
160
|
+
const fallback = articleId || title;
|
|
161
|
+
link = `${origin}/${locale}/research#${encodeURIComponent(fallback)}`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const author = normalizeText(meta.Author);
|
|
165
|
+
const pubDate = parsePublishDate(meta.PublishDate);
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
guid: hashGuid(`${kind}|${articleId || titleKey || title}`),
|
|
169
|
+
title,
|
|
170
|
+
link,
|
|
171
|
+
pubDate,
|
|
172
|
+
author: author || undefined,
|
|
173
|
+
summary: abstract || undefined,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
function dedupeAndSort(items) {
|
|
179
|
+
const out = [];
|
|
180
|
+
const seen = new Set();
|
|
181
|
+
for (const item of items) {
|
|
182
|
+
const key = item.link || item.guid;
|
|
183
|
+
if (seen.has(key)) continue;
|
|
184
|
+
seen.add(key);
|
|
185
|
+
out.push(item);
|
|
186
|
+
}
|
|
187
|
+
out.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
188
|
+
return out;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
async function fetchItems(sourceId, ctx) {
|
|
193
|
+
_deps = ctx.deps;
|
|
194
|
+
// 该站点条目核心数据在脚本 JSON 中,需关闭 purify 才能读取。
|
|
195
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500, purify: false });
|
|
196
|
+
const pageUrl = finalUrl || sourceId;
|
|
197
|
+
const locale = detectLocale(pageUrl);
|
|
198
|
+
const origin = toOrigin(pageUrl);
|
|
199
|
+
const payload = getResearchPayload(html);
|
|
200
|
+
|
|
201
|
+
if (!payload) {
|
|
202
|
+
throw new Error("[bytedance-seed-research] 未找到研究页数据块(window._ROUTER_DATA / __MODERN_DATA__)");
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const articleList = Array.isArray(payload.article_list) ? payload.article_list : [];
|
|
206
|
+
const feedList = Array.isArray(payload.feedList) ? payload.feedList : [];
|
|
207
|
+
|
|
208
|
+
const parsed = [];
|
|
209
|
+
for (const entry of articleList) {
|
|
210
|
+
const item = buildItem(entry, "public_papers", locale, origin);
|
|
211
|
+
if (item) parsed.push(item);
|
|
212
|
+
}
|
|
213
|
+
for (const entry of feedList) {
|
|
214
|
+
const item = buildItem(entry, "blog", locale, origin);
|
|
215
|
+
if (item) parsed.push(item);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const items = dedupeAndSort(parsed);
|
|
219
|
+
if (items.length === 0) {
|
|
220
|
+
throw new Error("[bytedance-seed-research] 研究页数据存在,但未解析到有效条目");
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return items;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
export default {
|
|
228
|
+
id: "bytedance-seed-research",
|
|
229
|
+
listUrlPattern: /^https?:\/\/seed\.bytedance\.com\/(zh|en)\/research(?:\/)?(\?.*)?$/i,
|
|
230
|
+
fetchItems,
|
|
231
|
+
};
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
const SITE_ID = "five-radar";
|
|
6
|
+
const DEFAULT_PAGE_SIZE = 30;
|
|
7
|
+
const THEME_PAGE_SIZE = 20;
|
|
8
|
+
const THEME_PAGE_SIZE_CANDIDATES = [20, 10, 5];
|
|
9
|
+
const DEFAULT_MAX_PAGES = 3;
|
|
10
|
+
const MAX_PAGES_LIMIT = 10;
|
|
11
|
+
const THEME_PAGE_RE = /^\/([A-Za-z0-9_-]+)\/news\/?$/;
|
|
12
|
+
const ALL_TAB_KEYS = [
|
|
13
|
+
"dataresourceregistration",
|
|
14
|
+
"dataresources",
|
|
15
|
+
"publicdatadevelop",
|
|
16
|
+
"dataproducts",
|
|
17
|
+
"dplists",
|
|
18
|
+
"dataproperty",
|
|
19
|
+
"dataintellectualproperty",
|
|
20
|
+
"internationaldatamarket",
|
|
21
|
+
"dataassets",
|
|
22
|
+
"dataopensource",
|
|
23
|
+
];
|
|
24
|
+
const KNOWN_TITLE_EN = new Set([
|
|
25
|
+
"classicproject",
|
|
26
|
+
"dataassets",
|
|
27
|
+
"dataassetvalue",
|
|
28
|
+
"dataintellectualproperty",
|
|
29
|
+
"dataopensource",
|
|
30
|
+
"dataproducts",
|
|
31
|
+
"dataproperty",
|
|
32
|
+
"dataresourceregistration",
|
|
33
|
+
"dataresources",
|
|
34
|
+
"dplists",
|
|
35
|
+
"internationaldatamarket",
|
|
36
|
+
"latestbusiness",
|
|
37
|
+
"latesttech",
|
|
38
|
+
"latesttrend",
|
|
39
|
+
"publicdatadevelop",
|
|
40
|
+
"salon",
|
|
41
|
+
"techforum",
|
|
42
|
+
]);
|
|
43
|
+
const TAB_ALIASES = new Map([
|
|
44
|
+
["公共数据", "dataresourceregistration"],
|
|
45
|
+
["公共数据资源登记", "dataresourceregistration"],
|
|
46
|
+
["资源入表", "dataresources"],
|
|
47
|
+
["数据资源入表", "dataresources"],
|
|
48
|
+
["授权运营", "publicdatadevelop"],
|
|
49
|
+
["公共数据授权运营", "publicdatadevelop"],
|
|
50
|
+
["产品交易", "dataproducts"],
|
|
51
|
+
["数据产品交易", "dataproducts"],
|
|
52
|
+
["产品上架", "dplists"],
|
|
53
|
+
["数据产品上架", "dplists"],
|
|
54
|
+
["推荐", "recommend"],
|
|
55
|
+
["产权登记", "dataproperty"],
|
|
56
|
+
["数据产权登记", "dataproperty"],
|
|
57
|
+
["知识产权", "dataintellectualproperty"],
|
|
58
|
+
["数据知识产权登记", "dataintellectualproperty"],
|
|
59
|
+
["商业市场", "internationaldatamarket"],
|
|
60
|
+
["商业数据市场", "internationaldatamarket"],
|
|
61
|
+
["资产融资", "dataassets"],
|
|
62
|
+
["数据资产融资", "dataassets"],
|
|
63
|
+
["开源市场", "dataopensource"],
|
|
64
|
+
["开源数据市场", "dataopensource"],
|
|
65
|
+
["全部", "all"],
|
|
66
|
+
["所有目录", "all"],
|
|
67
|
+
["all", "all"],
|
|
68
|
+
["alltabs", "all"],
|
|
69
|
+
["all-tabs", "all"],
|
|
70
|
+
["最新", "latest"],
|
|
71
|
+
["latest", "latest"],
|
|
72
|
+
["new", "latest"],
|
|
73
|
+
]);
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function normalizeText(text) {
|
|
77
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
function hashGuid(input) {
|
|
82
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
function parsePublishedAt(input) {
|
|
87
|
+
const text = normalizeText(input);
|
|
88
|
+
if (!text) return new Date();
|
|
89
|
+
|
|
90
|
+
const m = text.match(
|
|
91
|
+
/^(\d{4})-(\d{1,2})-(\d{1,2})(?:[ T](\d{1,2}):(\d{1,2})(?::(\d{1,2}))?)?$/
|
|
92
|
+
);
|
|
93
|
+
if (m) {
|
|
94
|
+
const [, y, mm, dd, hh = "0", mi = "0", ss = "0"] = m;
|
|
95
|
+
const iso =
|
|
96
|
+
`${y}-${mm.padStart(2, "0")}-${dd.padStart(2, "0")}T` +
|
|
97
|
+
`${hh.padStart(2, "0")}:${mi.padStart(2, "0")}:${ss.padStart(2, "0")}+08:00`;
|
|
98
|
+
const parsed = new Date(iso);
|
|
99
|
+
if (!Number.isNaN(parsed.getTime())) return parsed;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const fallback = new Date(text);
|
|
103
|
+
return Number.isNaN(fallback.getTime()) ? new Date() : fallback;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
function normalizeThemeToken(input) {
|
|
108
|
+
return normalizeText(input).toLowerCase();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
function isChineseToken(input) {
|
|
113
|
+
return /[\u3400-\u9fff]/.test(input);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
function decodeUrlToken(input) {
|
|
118
|
+
const text = normalizeText(input);
|
|
119
|
+
if (!text) return "";
|
|
120
|
+
try {
|
|
121
|
+
return decodeURIComponent(text);
|
|
122
|
+
} catch {
|
|
123
|
+
return text;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
function resolveMaxPages(sourceUrl) {
|
|
129
|
+
const hashToken = normalizeText(sourceUrl.hash.replace(/^#/, ""));
|
|
130
|
+
const hashParams = new URLSearchParams(hashToken.startsWith("?") ? hashToken : `?${hashToken}`);
|
|
131
|
+
|
|
132
|
+
const raw =
|
|
133
|
+
sourceUrl.searchParams.get("pages") ||
|
|
134
|
+
sourceUrl.searchParams.get("maxPages") ||
|
|
135
|
+
hashParams.get("pages") ||
|
|
136
|
+
hashParams.get("maxPages");
|
|
137
|
+
const n = Number.parseInt(normalizeText(raw), 10);
|
|
138
|
+
if (Number.isNaN(n)) return DEFAULT_MAX_PAGES;
|
|
139
|
+
return Math.min(MAX_PAGES_LIMIT, Math.max(1, n));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
function getThemeTokenFromHash(hash) {
|
|
144
|
+
const raw = decodeUrlToken(hash.replace(/^#/, ""));
|
|
145
|
+
if (!raw) return "";
|
|
146
|
+
|
|
147
|
+
const maybeQuery = raw.startsWith("?") ? raw : `?${raw}`;
|
|
148
|
+
const params = new URLSearchParams(maybeQuery);
|
|
149
|
+
const queryToken =
|
|
150
|
+
params.get("theme") ||
|
|
151
|
+
params.get("title_en") ||
|
|
152
|
+
params.get("tab") ||
|
|
153
|
+
params.get("category");
|
|
154
|
+
if (normalizeText(queryToken)) return normalizeText(queryToken);
|
|
155
|
+
|
|
156
|
+
const fallbackToken = normalizeText(raw.split("&")[0] ?? "");
|
|
157
|
+
return fallbackToken;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
function getThemeTokenFromUrl(sourceUrl) {
|
|
162
|
+
const queryToken =
|
|
163
|
+
sourceUrl.searchParams.get("theme") ||
|
|
164
|
+
sourceUrl.searchParams.get("title_en") ||
|
|
165
|
+
sourceUrl.searchParams.get("tab") ||
|
|
166
|
+
sourceUrl.searchParams.get("category");
|
|
167
|
+
if (normalizeText(queryToken)) return normalizeText(queryToken);
|
|
168
|
+
|
|
169
|
+
const hashToken = getThemeTokenFromHash(sourceUrl.hash || "");
|
|
170
|
+
if (hashToken) return hashToken;
|
|
171
|
+
|
|
172
|
+
const m = sourceUrl.pathname.match(THEME_PAGE_RE);
|
|
173
|
+
return m?.[1] ? normalizeText(m[1]) : "";
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
function resolveThemeOptions(sourceUrl) {
|
|
178
|
+
const tokenFromUrl = getThemeTokenFromUrl(sourceUrl);
|
|
179
|
+
const tokenRaw = decodeUrlToken(tokenFromUrl);
|
|
180
|
+
if (!tokenRaw) {
|
|
181
|
+
return { mode: "all_tabs", label: "全部目录" };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const token = normalizeThemeToken(tokenRaw);
|
|
185
|
+
const alias = TAB_ALIASES.get(tokenRaw) ?? TAB_ALIASES.get(normalizeText(tokenRaw));
|
|
186
|
+
const resolved = alias ?? token;
|
|
187
|
+
|
|
188
|
+
if (resolved === "recommend") {
|
|
189
|
+
return {
|
|
190
|
+
mode: "theme",
|
|
191
|
+
titleEn: "",
|
|
192
|
+
isRecommend: 1,
|
|
193
|
+
label: "推荐",
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (resolved === "all") {
|
|
198
|
+
return {
|
|
199
|
+
mode: "all_tabs",
|
|
200
|
+
label: "全部目录",
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (resolved === "latest") {
|
|
205
|
+
return {
|
|
206
|
+
mode: "latest",
|
|
207
|
+
label: "latest",
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (KNOWN_TITLE_EN.has(resolved)) {
|
|
212
|
+
return {
|
|
213
|
+
mode: "theme",
|
|
214
|
+
titleEn: resolved,
|
|
215
|
+
isRecommend: 0,
|
|
216
|
+
label: tokenRaw,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (isChineseToken(tokenRaw)) {
|
|
221
|
+
throw new Error(
|
|
222
|
+
`[${SITE_ID}] 不支持的目录: ${tokenRaw}。` +
|
|
223
|
+
"可用中文目录示例:公共数据、资源入表、授权运营、产品交易、产品上架、推荐、产权登记、知识产权、商业市场、资产融资、开源市场"
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
mode: "theme",
|
|
229
|
+
titleEn: resolved,
|
|
230
|
+
isRecommend: 0,
|
|
231
|
+
label: tokenRaw,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
function toAbsoluteLink(rawUrl, origin, row) {
|
|
237
|
+
const text = normalizeText(rawUrl);
|
|
238
|
+
if (text) {
|
|
239
|
+
try {
|
|
240
|
+
const url = new URL(text, origin);
|
|
241
|
+
if (/^https?:$/i.test(url.protocol) && !isHomepageLink(url)) return url.href;
|
|
242
|
+
} catch {
|
|
243
|
+
// ignore malformed url from upstream API
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const idText = String(row?.id ?? "").trim();
|
|
248
|
+
if (idText) {
|
|
249
|
+
const detailUrl = new URL("/detail", origin);
|
|
250
|
+
detailUrl.searchParams.set("id", idText);
|
|
251
|
+
return detailUrl.href;
|
|
252
|
+
}
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
function isHomepageLink(url) {
|
|
258
|
+
return url.pathname === "/" && !url.search && !url.hash;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
function mapRowToFeedItem(row, origin) {
|
|
263
|
+
const title = normalizeText(row?.title);
|
|
264
|
+
const link = toAbsoluteLink(row?.url, origin, row);
|
|
265
|
+
if (!title || !link) return null;
|
|
266
|
+
|
|
267
|
+
const summary = normalizeText(row?.summary);
|
|
268
|
+
const author = normalizeText(row?.source || row?.author);
|
|
269
|
+
const category = normalizeText(row?.theme?.title);
|
|
270
|
+
const idText = String(row?.id ?? "").trim();
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
guid: idText ? `${SITE_ID}-${idText}` : hashGuid(link),
|
|
274
|
+
title,
|
|
275
|
+
link,
|
|
276
|
+
pubDate: parsePublishedAt(row?.published_at),
|
|
277
|
+
author: author || undefined,
|
|
278
|
+
summary: summary || undefined,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
async function fetchNewsRows(origin, pageSize = DEFAULT_PAGE_SIZE) {
|
|
284
|
+
const apiUrl = new URL("/api/news/list", origin);
|
|
285
|
+
apiUrl.searchParams.set("page", "1");
|
|
286
|
+
apiUrl.searchParams.set("page_size", String(pageSize));
|
|
287
|
+
apiUrl.searchParams.set("key", "new");
|
|
288
|
+
|
|
289
|
+
const res = await fetch(apiUrl, {
|
|
290
|
+
headers: {
|
|
291
|
+
Accept: "application/json,text/plain,*/*",
|
|
292
|
+
"User-Agent":
|
|
293
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
294
|
+
},
|
|
295
|
+
});
|
|
296
|
+
if (!res.ok) {
|
|
297
|
+
throw new Error(`[${SITE_ID}] 拉取新闻接口失败: HTTP ${res.status}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
let payload;
|
|
301
|
+
try {
|
|
302
|
+
payload = await res.json();
|
|
303
|
+
} catch {
|
|
304
|
+
throw new Error(`[${SITE_ID}] 新闻接口返回非 JSON 数据`);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const rows = Array.isArray(payload?.data) ? payload.data : [];
|
|
308
|
+
return rows;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
async function fetchThemeRowsPage(origin, opts, page, pageSize = THEME_PAGE_SIZE) {
|
|
313
|
+
const apiUrl = new URL("/api/themes/news-list", origin);
|
|
314
|
+
apiUrl.searchParams.set("page", String(page));
|
|
315
|
+
apiUrl.searchParams.set("page_size", String(pageSize));
|
|
316
|
+
apiUrl.searchParams.set("title_en", opts.titleEn ?? "");
|
|
317
|
+
apiUrl.searchParams.set("is_recommend", String(opts.isRecommend ?? 0));
|
|
318
|
+
|
|
319
|
+
const res = await fetch(apiUrl, {
|
|
320
|
+
headers: {
|
|
321
|
+
Accept: "application/json,text/plain,*/*",
|
|
322
|
+
"User-Agent":
|
|
323
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
324
|
+
},
|
|
325
|
+
});
|
|
326
|
+
if (!res.ok) {
|
|
327
|
+
throw new Error(
|
|
328
|
+
`[${SITE_ID}] 拉取目录接口失败: HTTP ${res.status} (title_en=${opts.titleEn || "recommend"})`
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
let payload;
|
|
333
|
+
try {
|
|
334
|
+
payload = await res.json();
|
|
335
|
+
} catch {
|
|
336
|
+
throw new Error(`[${SITE_ID}] 目录接口返回非 JSON 数据`);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (payload?.status === 422 || payload?.success === false) {
|
|
340
|
+
const message = normalizeText(payload?.error?.message || payload?.message || "unknown error");
|
|
341
|
+
const err = new Error(
|
|
342
|
+
`[${SITE_ID}] 目录接口返回错误 (title_en=${opts.titleEn || "recommend"}): ${message}`
|
|
343
|
+
);
|
|
344
|
+
if (isPageSizeValidationError(message)) err.name = "PageSizeValidationError";
|
|
345
|
+
throw err;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const rows = Array.isArray(payload?.data) ? payload.data : [];
|
|
349
|
+
const currentPage = Number.parseInt(String(payload?.pagination?.current_page ?? page), 10) || page;
|
|
350
|
+
const totalPages = Number.parseInt(String(payload?.pagination?.total_pages ?? currentPage), 10) || currentPage;
|
|
351
|
+
|
|
352
|
+
return { rows, currentPage, totalPages };
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
async function fetchThemeRowsPaged(origin, opts, pageSize = THEME_PAGE_SIZE, maxPages = DEFAULT_MAX_PAGES) {
|
|
357
|
+
const candidates = dedupeValidPageSizes([pageSize, ...THEME_PAGE_SIZE_CANDIDATES]);
|
|
358
|
+
let lastError;
|
|
359
|
+
|
|
360
|
+
for (const candidate of candidates) {
|
|
361
|
+
try {
|
|
362
|
+
return await fetchThemeRowsPagedWithPageSize(origin, opts, candidate, maxPages);
|
|
363
|
+
} catch (err) {
|
|
364
|
+
lastError = err;
|
|
365
|
+
if (!isPageSizeValidationErr(err)) throw err;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
throw lastError ?? new Error(`[${SITE_ID}] 目录抓取失败: 未知错误`);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
function dedupeValidPageSizes(sizes) {
|
|
374
|
+
const out = [];
|
|
375
|
+
for (const size of sizes) {
|
|
376
|
+
if (!Number.isInteger(size) || size <= 0) continue;
|
|
377
|
+
if (!out.includes(size)) out.push(size);
|
|
378
|
+
}
|
|
379
|
+
return out.length > 0 ? out : [THEME_PAGE_SIZE];
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
function isPageSizeValidationError(message) {
|
|
384
|
+
const text = normalizeText(message).toLowerCase();
|
|
385
|
+
return text.includes("page_size") && text.includes("validation.max.numeric");
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
function isPageSizeValidationErr(err) {
|
|
390
|
+
return err instanceof Error && err.name === "PageSizeValidationError";
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
async function fetchThemeRowsPagedWithPageSize(
|
|
395
|
+
origin,
|
|
396
|
+
opts,
|
|
397
|
+
pageSize = THEME_PAGE_SIZE,
|
|
398
|
+
maxPages = DEFAULT_MAX_PAGES
|
|
399
|
+
) {
|
|
400
|
+
const out = [];
|
|
401
|
+
const limit = Math.min(MAX_PAGES_LIMIT, Math.max(1, maxPages));
|
|
402
|
+
|
|
403
|
+
for (let page = 1; page <= limit; page += 1) {
|
|
404
|
+
const { rows, currentPage, totalPages } = await fetchThemeRowsPage(origin, opts, page, pageSize);
|
|
405
|
+
out.push(...rows);
|
|
406
|
+
if (rows.length === 0 || currentPage >= totalPages) break;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return out;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
async function fetchAllThemeRows(origin, pageSize = THEME_PAGE_SIZE, maxPages = DEFAULT_MAX_PAGES) {
|
|
414
|
+
const settled = await Promise.allSettled(
|
|
415
|
+
ALL_TAB_KEYS.map((titleEn) =>
|
|
416
|
+
fetchThemeRowsPaged(origin, { titleEn, isRecommend: 0, label: titleEn }, pageSize, maxPages)
|
|
417
|
+
)
|
|
418
|
+
);
|
|
419
|
+
|
|
420
|
+
const rows = [];
|
|
421
|
+
const failedKeys = [];
|
|
422
|
+
for (let i = 0; i < settled.length; i += 1) {
|
|
423
|
+
const result = settled[i];
|
|
424
|
+
const key = ALL_TAB_KEYS[i];
|
|
425
|
+
if (result.status === "fulfilled") {
|
|
426
|
+
rows.push(...result.value);
|
|
427
|
+
} else {
|
|
428
|
+
failedKeys.push(`${key}: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
if (rows.length === 0) {
|
|
433
|
+
throw new Error(
|
|
434
|
+
`[${SITE_ID}] 全目录抓取失败:` +
|
|
435
|
+
(failedKeys.length > 0 ? failedKeys.join(" | ") : "没有可用数据")
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
return rows;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
async function fetchItems(sourceId, _ctx) {
|
|
443
|
+
_deps = _ctx.deps;
|
|
444
|
+
let sourceUrl;
|
|
445
|
+
try {
|
|
446
|
+
sourceUrl = new URL(sourceId);
|
|
447
|
+
} catch {
|
|
448
|
+
throw new Error(`[${SITE_ID}] 无效 URL: ${sourceId}`);
|
|
449
|
+
}
|
|
450
|
+
if (!/^https?:$/i.test(sourceUrl.protocol)) {
|
|
451
|
+
throw new Error(`[${SITE_ID}] 仅支持 http/https URL`);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
const themeOptions = resolveThemeOptions(sourceUrl);
|
|
455
|
+
const maxPages = resolveMaxPages(sourceUrl);
|
|
456
|
+
let rows;
|
|
457
|
+
if (themeOptions.mode === "theme") {
|
|
458
|
+
rows = await fetchThemeRowsPaged(sourceUrl.origin, themeOptions, THEME_PAGE_SIZE, maxPages);
|
|
459
|
+
} else if (themeOptions.mode === "all_tabs") {
|
|
460
|
+
rows = await fetchAllThemeRows(sourceUrl.origin, THEME_PAGE_SIZE, maxPages);
|
|
461
|
+
} else {
|
|
462
|
+
rows = await fetchNewsRows(sourceUrl.origin);
|
|
463
|
+
}
|
|
464
|
+
const items = [];
|
|
465
|
+
const seenLinks = new Set();
|
|
466
|
+
|
|
467
|
+
for (const row of rows) {
|
|
468
|
+
const item = mapRowToFeedItem(row, sourceUrl.origin);
|
|
469
|
+
if (!item || seenLinks.has(item.link)) continue;
|
|
470
|
+
seenLinks.add(item.link);
|
|
471
|
+
items.push(item);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
475
|
+
if (items.length === 0) {
|
|
476
|
+
if (themeOptions.mode === "theme") {
|
|
477
|
+
throw new Error(`[${SITE_ID}] 目录 ${themeOptions.label} 未解析到条目`);
|
|
478
|
+
}
|
|
479
|
+
throw new Error(`[${SITE_ID}] 未解析到条目,接口结构可能已变化`);
|
|
480
|
+
}
|
|
481
|
+
return items;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
export default {
|
|
486
|
+
id: SITE_ID,
|
|
487
|
+
refreshInterval: "10min",
|
|
488
|
+
listUrlPattern: /^https?:\/\/(?:www\.)?5radar\.com(?:\/[A-Za-z0-9_-]+\/news)?\/?(?:[?#].*)?$/i,
|
|
489
|
+
fetchItems,
|
|
490
|
+
};
|