rssany 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  2. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  3. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  4. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  5. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  6. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  7. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  8. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  9. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  10. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  11. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  12. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  13. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  14. package/app/plugins/builtin/google-research.rssany.js +220 -0
  15. package/app/plugins/builtin/google.rssany.js +187 -0
  16. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  17. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  18. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  19. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  20. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  21. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  22. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  23. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  24. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  25. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  26. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  27. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  28. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  30. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  31. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  32. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  33. package/app/plugins/builtin/rss.rssany.js +11 -1
  34. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  35. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  36. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  37. package/app/plugins/builtin/theinformation-briefings.rssany.js +136 -0
  38. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  39. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  40. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  41. package/app/plugins/builtin/x.rssany.js +328 -0
  42. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  43. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  44. package/dist/index.js +62 -4
  45. package/dist/index.js.map +1 -1
  46. package/package.json +1 -1
  47. package/webui/build/200.html +6 -6
  48. package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
  49. package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +1 -0
  50. package/webui/build/_app/immutable/chunks/BwlaCkNX.js +36 -0
  51. package/webui/build/_app/immutable/chunks/C0J2-L94.js +1 -0
  52. package/webui/build/_app/immutable/chunks/CLOXMsDk.js +36 -0
  53. package/webui/build/_app/immutable/chunks/{C85CNwD2.js → DgceFEv5.js} +1 -1
  54. package/webui/build/_app/immutable/chunks/{CllQAdvt.js → SqCUd34O.js} +1 -1
  55. package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.B8zBPipq.js} +2 -2
  56. package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +1 -0
  57. package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +11 -0
  58. package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.1N74-4Io.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.DY30t9Ib.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.ITuxnukH.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +1 -0
  62. package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.BHnIxbVM.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.CLjT9il3.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.BD-mKCLN.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.Dt5o2Fmz.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.Dy3vSsIP.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.DvclsL6H.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D2nJy-Uz.js} +1 -1
  69. package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.C75mhrqs.js} +1 -1
  70. package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.Bp_QXw3w.js} +1 -1
  71. package/webui/build/_app/version.json +1 -1
  72. package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +0 -1
  73. package/webui/build/_app/immutable/chunks/CdMsRjxJ.js +0 -1
  74. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  75. package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
  76. package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
  77. package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
  78. package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +0 -1
@@ -0,0 +1,208 @@
1
+ let _deps;
2
+
3
+ // MIT CSAIL Research plugin: warm up via homepage, then parse /research list items.
4
+
5
+
6
+
7
+ const SITE_ID = "mit-csail-research";
8
+ const CSAIL_HOME_URL = "https://www.csail.mit.edu/";
9
+ const CSAIL_RESEARCH_PATH = "/research";
10
+ const SUMMARY_SELECTOR = "div, p, span, h2, h3, h4, a";
11
+ const BLOCKED_HINTS = [
12
+ "http error 403",
13
+ "request to access",
14
+ "access denied",
15
+ "request denied",
16
+ "body class=\"neterror\"",
17
+ "\u60a8\u672a\u83b7\u6388\u6743\uff0c\u65e0\u6cd5\u67e5\u770b\u6b64\u7f51\u9875\u3002",
18
+ "\u8bbf\u95ee <span>www.csail.mit.edu</span> \u7684\u8bf7\u6c42\u906d\u5230\u62d2\u7edd",
19
+ ];
20
+
21
+
22
+ function normalizeText(text) {
23
+ return (text ?? "").replace(/\s+/g, " ").trim();
24
+ }
25
+
26
+
27
+ function hashGuid(input) {
28
+ return _deps.createHash("sha256").update(input).digest("hex");
29
+ }
30
+
31
+
32
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
33
+ if (!rawHref) return null;
34
+ const href = rawHref.trim();
35
+ if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) return null;
36
+ try {
37
+ const url = new URL(href, baseUrl);
38
+ if (!/^https?:$/i.test(url.protocol)) return null;
39
+ return url.href;
40
+ } catch {
41
+ return null;
42
+ }
43
+ }
44
+
45
+
46
+ function normalizeCategoryToken(text) {
47
+ return normalizeText(text).toLowerCase().replace(/[^a-z0-9]+/g, "");
48
+ }
49
+
50
+
51
+ function matchesRequestedCategory(cardCategory, queryCategory) {
52
+ const wanted = normalizeCategoryToken(queryCategory);
53
+ if (!wanted) return true;
54
+ const card = normalizeCategoryToken(cardCategory);
55
+ if (!card) return false;
56
+
57
+ if (wanted.includes("group")) return card.includes("group");
58
+ if (wanted.includes("center")) return card.includes("center");
59
+ if (wanted.includes("community")) return card.includes("community");
60
+ return card.includes(wanted);
61
+ }
62
+
63
+
64
+ function looksLikeBlockedPage(status, html) {
65
+ if (status >= 400) return true;
66
+ const body = (html ?? "").toLowerCase();
67
+ return BLOCKED_HINTS.some((hint) => body.includes(hint.toLowerCase()));
68
+ }
69
+
70
+
71
+ function resolveResearchUrlFromHome(homeHtml, homeUrl) {
72
+ const root = _deps.parseHtml(homeHtml);
73
+ const anchors = root.querySelectorAll("a[href]");
74
+ for (const anchor of anchors) {
75
+ const text = normalizeText(anchor.textContent).toLowerCase();
76
+ if (!text || !text.includes("research")) continue;
77
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), homeUrl);
78
+ if (!link) continue;
79
+ try {
80
+ const url = new URL(link);
81
+ if (!/(^|\.)csail\.mit\.edu$/i.test(url.hostname)) continue;
82
+ const pathname = url.pathname.replace(/\/+$/, "") || "/";
83
+ if (pathname !== CSAIL_RESEARCH_PATH) continue;
84
+ url.search = "";
85
+ url.hash = "";
86
+ return url.href;
87
+ } catch {
88
+ // ignore malformed link
89
+ }
90
+ }
91
+ return new URL(CSAIL_RESEARCH_PATH, homeUrl).href;
92
+ }
93
+
94
+
95
+ function getLeafTexts(node) {
96
+ return node
97
+ .querySelectorAll(SUMMARY_SELECTOR)
98
+ .filter((el) => el.querySelector(SUMMARY_SELECTOR) == null)
99
+ .map((el) => normalizeText(el.textContent))
100
+ .filter(Boolean);
101
+ }
102
+
103
+
104
+ function extractSummary(card, title, category) {
105
+ const texts = getLeafTexts(card)
106
+ .filter((t) => t !== title && t !== category)
107
+ .filter((t) => !/^lead$/i.test(t))
108
+ .filter((t) => !/^\+\s*\d+$/i.test(t))
109
+ .filter((t) => !/^\.{3}more$/i.test(t));
110
+
111
+ return texts.find((t) => t.length >= 20 && t.length <= 600);
112
+ }
113
+
114
+
115
+ function extractAuthor(card) {
116
+ const names = card.querySelectorAll("app-lead-bar a[href]")
117
+ .map((a) => normalizeText(a.textContent))
118
+ .filter(Boolean)
119
+ .filter((name) => name !== "...more");
120
+ const unique = [...new Set(names)];
121
+ return unique.length > 0 ? unique.join(", ") : undefined;
122
+ }
123
+
124
+
125
+ function parseCardItem(card, baseUrl, requestedCategory) {
126
+ const bookmark =
127
+ card.querySelector('a[rel="bookmark"][href]') ??
128
+ card.querySelector('a[href*="/research/"]');
129
+ if (!bookmark) return null;
130
+
131
+ const title = normalizeText(bookmark.querySelector("h2")?.textContent || bookmark.textContent);
132
+ if (!title) return null;
133
+
134
+ const link = toAbsoluteHttpUrl(bookmark.getAttribute("href"), baseUrl);
135
+ if (!link) return null;
136
+
137
+ const category = normalizeText(card.querySelector("h4")?.textContent);
138
+ if (!matchesRequestedCategory(category, requestedCategory)) return null;
139
+
140
+ const summary = extractSummary(card, title, category);
141
+ const author = extractAuthor(card);
142
+
143
+ return {
144
+ guid: hashGuid(link),
145
+ title,
146
+ link,
147
+ pubDate: new Date(),
148
+ author,
149
+ summary: summary || undefined,
150
+ };
151
+ }
152
+
153
+
154
+ function parseItems(html, finalUrl, requestedCategory) {
155
+ const root = _deps.parseHtml(html);
156
+ const cards = root.querySelectorAll("article");
157
+ const seen = new Set();
158
+ const items = [];
159
+
160
+ for (const card of cards) {
161
+ const item = parseCardItem(card, finalUrl, requestedCategory);
162
+ if (!item) continue;
163
+ if (seen.has(item.link)) continue;
164
+ seen.add(item.link);
165
+ items.push(item);
166
+ }
167
+
168
+ return items;
169
+ }
170
+
171
+
172
+ async function fetchItems(sourceId, ctx) {
173
+ _deps = ctx.deps;
174
+ const sourceUrl = new URL(sourceId);
175
+ const requestedCategory = sourceUrl.searchParams.get("category") ?? "";
176
+
177
+ const home = await ctx.fetchHtml(CSAIL_HOME_URL, { waitMs: 3000 });
178
+ if (looksLikeBlockedPage(home.status, home.html)) {
179
+ throw new Error(`[${SITE_ID}] \u8bbf\u95ee CSAIL \u9996\u9875\u88ab\u62d2\u7edd\uff08HTTP ${home.status}\uff09`);
180
+ }
181
+
182
+ const researchUrl = resolveResearchUrlFromHome(home.html, home.finalUrl || CSAIL_HOME_URL);
183
+ const research = await ctx.fetchHtml(researchUrl, { waitMs: 4500 });
184
+ if (looksLikeBlockedPage(research.status, research.html)) {
185
+ throw new Error(`[${SITE_ID}] \u8bbf\u95ee research \u5217\u8868\u88ab\u62d2\u7edd\uff08HTTP ${research.status}\uff09`);
186
+ }
187
+
188
+ let items = parseItems(research.html, research.finalUrl || researchUrl, requestedCategory);
189
+ if (items.length === 0) {
190
+ const retry = await ctx.fetchHtml(researchUrl, { waitMs: 6500 });
191
+ if (looksLikeBlockedPage(retry.status, retry.html)) {
192
+ throw new Error(`[${SITE_ID}] \u8bbf\u95ee research \u5217\u8868\u88ab\u62d2\u7edd\uff08HTTP ${retry.status}\uff09`);
193
+ }
194
+ items = parseItems(retry.html, retry.finalUrl || researchUrl, requestedCategory);
195
+ }
196
+ if (items.length === 0) {
197
+ const withCategory = requestedCategory ? ` (category=${requestedCategory})` : "";
198
+ throw new Error(`[${SITE_ID}] \u672a\u89e3\u6790\u5230\u7814\u7a76\u6761\u76ee${withCategory}\uff0c\u9875\u9762\u7ed3\u6784\u53ef\u80fd\u5df2\u53d8\u5316`);
199
+ }
200
+ return items;
201
+ }
202
+
203
+
204
+ export default {
205
+ id: SITE_ID,
206
+ listUrlPattern: /^https?:\/\/(www\.)?csail\.mit\.edu\/research(?:\?.*)?$/i,
207
+ fetchItems,
208
+ };
@@ -0,0 +1,127 @@
1
+ let _deps;
2
+
3
+ // Moonshot 官方站插件:抓取首页“最新研究”列表,输出 FeedItem(不含 enrich)
4
+
5
+
6
+
7
+ const DATE_RE = /^(\d{4})-(\d{1,2})-(\d{1,2})$/;
8
+ const RESEARCH_HEADING_RE = /最新研究|latest\s+research/i;
9
+
10
+
11
+ function normalizeText(text) {
12
+ return (text ?? "").replace(/\s+/g, " ").trim();
13
+ }
14
+
15
+
16
+ function hashGuid(input) {
17
+ return _deps.createHash("sha256").update(input).digest("hex");
18
+ }
19
+
20
+
21
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
22
+ if (!rawHref) return null;
23
+ const href = rawHref.trim();
24
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
25
+ try {
26
+ const url = new URL(href, baseUrl);
27
+ if (!/^https?:$/i.test(url.protocol)) return null;
28
+ return url.href;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+
34
+
35
+ function parseDate(dateText) {
36
+ const normalized = normalizeText(dateText);
37
+ const m = normalized.match(DATE_RE);
38
+ if (!m) return undefined;
39
+ const [, y, mm, dd] = m;
40
+ const date = new Date(Date.UTC(Number(y), Number(mm) - 1, Number(dd), 12, 0, 0));
41
+ if (Number.isNaN(date.getTime())) return undefined;
42
+ return date;
43
+ }
44
+
45
+
46
+ function extractTitleParts(anchor) {
47
+ const parts = anchor
48
+ .querySelectorAll("h2")
49
+ .map((el) => normalizeText(el.textContent))
50
+ .filter(Boolean);
51
+ if (parts.length > 0) return parts;
52
+
53
+ const fallback = normalizeText(anchor.textContent);
54
+ return fallback ? [fallback] : [];
55
+ }
56
+
57
+
58
+ function parseResearchItem(anchor, finalUrl) {
59
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
60
+ if (!link) return null;
61
+
62
+ const titleParts = extractTitleParts(anchor);
63
+ if (titleParts.length === 0) return null;
64
+
65
+ const dateText = anchor
66
+ .querySelectorAll("p")
67
+ .map((p) => normalizeText(p.textContent))
68
+ .find((t) => DATE_RE.test(t));
69
+ if (!dateText) return null;
70
+
71
+ const pubDate = parseDate(dateText) ?? new Date();
72
+ const title = titleParts[0];
73
+ const subtitle = titleParts.length > 1 ? titleParts.slice(1).join(" ") : "";
74
+
75
+ return {
76
+ guid: hashGuid(link),
77
+ title,
78
+ link,
79
+ pubDate,
80
+ summary: subtitle || undefined,
81
+ };
82
+ }
83
+
84
+
85
+ function collectCandidateAnchors(root) {
86
+ const heading = root
87
+ .querySelectorAll("h1, h2, h3")
88
+ .find((node) => RESEARCH_HEADING_RE.test(normalizeText(node.textContent)));
89
+
90
+ if (heading?.parentNode && "querySelectorAll" in heading.parentNode) {
91
+ return heading.parentNode.querySelectorAll("a[href]");
92
+ }
93
+
94
+ return root.querySelectorAll("a[href]");
95
+ }
96
+
97
+
98
+ async function fetchItems(sourceId, ctx) {
99
+ _deps = ctx.deps;
100
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
101
+ const root = _deps.parseHtml(html);
102
+
103
+ const seen = new Set();
104
+ const items = [];
105
+ const anchors = collectCandidateAnchors(root);
106
+
107
+ for (const anchor of anchors) {
108
+ const item = parseResearchItem(anchor, finalUrl);
109
+ if (!item) continue;
110
+ if (seen.has(item.link)) continue;
111
+ seen.add(item.link);
112
+ items.push(item);
113
+ }
114
+
115
+ if (items.length === 0) {
116
+ throw new Error("[moonshot] 未解析到“最新研究”条目,页面结构可能已变化");
117
+ }
118
+
119
+ return items;
120
+ }
121
+
122
+
123
+ export default {
124
+ id: "moonshot",
125
+ listUrlPattern: /^https?:\/\/(www\.)?moonshot\.ai(?:\/[a-z]{2}(?:-[a-z]{2})?)?\/?(\?.*)?$/i,
126
+ fetchItems,
127
+ };
@@ -0,0 +1,174 @@
1
+ let _deps;
2
+
3
+
4
+ const API_ROOT = "https://static.openxlab.org.cn/opendatalab/dynamics";
5
+ const DETAIL_ROOT = "https://opendatalab.org.cn/news/details";
6
+
7
+ const SOURCES = [
8
+ {
9
+ label: "featuredArticles",
10
+ url: `${API_ROOT}/featuredArticles/data.json?t=12`,
11
+ detailType: "article",
12
+ category: "精选文章",
13
+ },
14
+ {
15
+ label: "banner",
16
+ url: `${API_ROOT}/banner/data.json?t=1`,
17
+ detailType: "banner",
18
+ category: "轮播",
19
+ },
20
+ {
21
+ label: "AItalk",
22
+ url: `${API_ROOT}/talkArticles/AItalk/data.json?t=1`,
23
+ detailType: "AItalk",
24
+ category: "AI Talk",
25
+ },
26
+ {
27
+ label: "JStalk",
28
+ url: `${API_ROOT}/talkArticles/JStalk/data.json?t=1`,
29
+ detailType: "JStalk",
30
+ category: "解数 Talk",
31
+ },
32
+ ];
33
+
34
+ function normalizeText(text) {
35
+ return (text ?? "").replace(/\s+/g, " ").trim();
36
+ }
37
+
38
+ function hashGuid(input) {
39
+ return _deps.createHash("sha256").update(input).digest("hex");
40
+ }
41
+
42
+ function parsePubDate(rawDate, rawStartTime) {
43
+ const dateText = normalizeText(rawDate);
44
+ const dateMatch = dateText.match(/(\d{4})[/-](\d{1,2})[/-](\d{1,2})/);
45
+ if (!dateMatch) return new Date();
46
+
47
+ const year = Number(dateMatch[1]);
48
+ const month = Number(dateMatch[2]);
49
+ const day = Number(dateMatch[3]);
50
+ const monthText = String(month).padStart(2, "0");
51
+ const dayText = String(day).padStart(2, "0");
52
+
53
+ const timeText = normalizeText(rawStartTime);
54
+ const timeMatch = timeText.match(/(\d{1,2}):(\d{2})/);
55
+
56
+ if (timeMatch) {
57
+ const hour = Number(timeMatch[1]);
58
+ const minute = Number(timeMatch[2]);
59
+ if ([year, month, day, hour, minute].every(Number.isFinite)) {
60
+ const hourText = String(hour).padStart(2, "0");
61
+ const minuteText = String(minute).padStart(2, "0");
62
+ return new Date(`${year}-${monthText}-${dayText}T${hourText}:${minuteText}:00+08:00`);
63
+ }
64
+ }
65
+
66
+ if ([year, month, day].every(Number.isFinite)) {
67
+ return new Date(`${year}-${monthText}-${dayText}T12:00:00+08:00`);
68
+ }
69
+
70
+ return new Date();
71
+ }
72
+
73
+ function buildSummary(item) {
74
+ const dateText = normalizeText(item?.date);
75
+ const start = normalizeText(item?.start_time);
76
+ const end = normalizeText(item?.end_time);
77
+ if (!dateText) return undefined;
78
+ if (start && end) return `直播时间: ${dateText} ${start}-${end}`;
79
+ return `发布时间: ${dateText}`;
80
+ }
81
+
82
+ async function fetchJson(url, label) {
83
+ let response;
84
+ try {
85
+ response = await fetch(url, {
86
+ headers: {
87
+ "Accept": "application/json,text/plain,*/*",
88
+ "User-Agent": "RssAny/1.0 (+https://github.com/rssany/rssany)",
89
+ },
90
+ });
91
+ } catch (err) {
92
+ const message = err instanceof Error ? err.message : String(err);
93
+ throw new Error(`[opendatalab-news] 请求 ${label} 失败: ${message}`);
94
+ }
95
+
96
+ if (!response.ok) {
97
+ throw new Error(`[opendatalab-news] 请求 ${label} 失败: HTTP ${response.status}`);
98
+ }
99
+
100
+ let data;
101
+ try {
102
+ data = await response.json();
103
+ } catch (err) {
104
+ const message = err instanceof Error ? err.message : String(err);
105
+ throw new Error(`[opendatalab-news] 解析 ${label} JSON 失败: ${message}`);
106
+ }
107
+
108
+ if (!Array.isArray(data)) {
109
+ throw new Error(`[opendatalab-news] ${label} 返回结构异常,期望数组`);
110
+ }
111
+
112
+ return data;
113
+ }
114
+
115
+ function mapItems(records, source) {
116
+ const items = [];
117
+
118
+ for (const raw of records) {
119
+ if (typeof raw !== "object" || raw == null) continue;
120
+
121
+ const id = Number(raw.id);
122
+ const title = normalizeText(raw.title);
123
+ if (!Number.isFinite(id) || !title) continue;
124
+
125
+ const link = `${DETAIL_ROOT}/${source.detailType}/${id}`;
126
+ const pubDate = parsePubDate(raw.date, raw.start_time);
127
+ const summary = buildSummary(raw);
128
+
129
+ items.push({
130
+ guid: hashGuid(link),
131
+ title,
132
+ link,
133
+ pubDate,
134
+ summary,
135
+ sourceId: "opendatalab-news",
136
+ });
137
+ }
138
+
139
+ return items;
140
+ }
141
+
142
+ async function fetchItems(_sourceId, _ctx) {
143
+ _deps = _ctx.deps;
144
+ const collected = [];
145
+
146
+ for (const source of SOURCES) {
147
+ const records = await fetchJson(source.url, source.label);
148
+ collected.push(...mapItems(records, source));
149
+ }
150
+
151
+ const deduped = [];
152
+ const seen = new Set();
153
+
154
+ for (const item of collected) {
155
+ if (seen.has(item.link)) continue;
156
+ seen.add(item.link);
157
+ deduped.push(item);
158
+ }
159
+
160
+ deduped.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
161
+
162
+ if (deduped.length === 0) {
163
+ throw new Error("[opendatalab-news] 未解析到条目,页面数据源可能已变化");
164
+ }
165
+
166
+ return deduped;
167
+ }
168
+
169
+ export default {
170
+ id: "opendatalab-news",
171
+ listUrlPattern: /^https?:\/\/(www\.)?opendatalab\.(org\.cn|com)\/news\/?(\?.*)?$/i,
172
+ refreshInterval: "1h",
173
+ fetchItems,
174
+ };
@@ -0,0 +1,109 @@
1
+ let _deps;
2
+
3
+
4
+ const OPENDATALAB_ORIGIN = "https://opendatalab.org.cn";
5
+ const OPENDATALAB_LIST_API = `${OPENDATALAB_ORIGIN}/datasets/api/v3/datasets/list`;
6
+
7
+ function normalizeText(text) {
8
+ return (text ?? "").replace(/\s+/g, " ").trim();
9
+ }
10
+
11
+ function hashGuid(input) {
12
+ return _deps.createHash("sha256").update(input).digest("hex");
13
+ }
14
+
15
+ function toDate(value) {
16
+ const raw = typeof value === "string" ? Number(value) : value;
17
+ if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) return undefined;
18
+ const ms = raw < 1e12 ? raw * 1000 : raw;
19
+ const date = new Date(ms);
20
+ return Number.isNaN(date.getTime()) ? undefined : date;
21
+ }
22
+
23
+ function pickPubDate(record) {
24
+ const candidates = [record?.lastUpdateTime, record?.updatedAt, record?.publicTime, record?.createdAt];
25
+ for (const value of candidates) {
26
+ const parsed = toDate(value);
27
+ if (parsed) return parsed;
28
+ }
29
+ return new Date();
30
+ }
31
+
32
+ function toDatasetLink(name) {
33
+ const normalized = normalizeText(name);
34
+ if (!normalized) return null;
35
+ const parts = normalized.split("/").map((x) => normalizeText(x)).filter(Boolean);
36
+ if (parts.length < 2) return null;
37
+ const encodedPath = parts.map((x) => encodeURIComponent(x)).join("/");
38
+ return `${OPENDATALAB_ORIGIN}/${encodedPath}`;
39
+ }
40
+
41
+ function toFeedItem(record) {
42
+ if (!record || typeof record !== "object") return null;
43
+ const title = normalizeText(record.displayName || record.name || "");
44
+ const link = toDatasetLink(record.name);
45
+ if (!title || !link) return null;
46
+
47
+ const summary = normalizeText(record?.introduction?.zh || record?.introduction?.en || "");
48
+ const author = normalizeText(record?.createdBy?.name || record?.updatedBy || "");
49
+
50
+ return {
51
+ guid: hashGuid(link),
52
+ title,
53
+ link,
54
+ pubDate: pickPubDate(record),
55
+ author: author || undefined,
56
+ summary: summary || undefined,
57
+ sourceId: "opendatalab",
58
+ };
59
+ }
60
+
61
+ function parsePaginationFromSourceId(sourceId) {
62
+ const defaults = { pageNo: 1, pageSize: 30 };
63
+ try {
64
+ const url = new URL(sourceId);
65
+ const pageNo = Number(url.searchParams.get("pageNo") ?? defaults.pageNo);
66
+ const pageSize = Number(url.searchParams.get("pageSize") ?? defaults.pageSize);
67
+ const safePageNo = Number.isInteger(pageNo) && pageNo > 0 ? pageNo : defaults.pageNo;
68
+ const safePageSize = Number.isInteger(pageSize) && pageSize >= 1 && pageSize <= 100 ? pageSize : defaults.pageSize;
69
+ return { pageNo: safePageNo, pageSize: safePageSize };
70
+ } catch {
71
+ return defaults;
72
+ }
73
+ }
74
+
75
+ async function fetchItems(sourceId, ctx) {
76
+ _deps = ctx.deps;
77
+ const { pageNo, pageSize } = parsePaginationFromSourceId(sourceId);
78
+ const response = await fetch(OPENDATALAB_LIST_API, {
79
+ method: "POST",
80
+ headers: {
81
+ "Content-Type": "application/json",
82
+ "Accept": "application/json",
83
+ },
84
+ body: JSON.stringify({ pageNo, pageSize }),
85
+ });
86
+
87
+ if (!response.ok) {
88
+ throw new Error(`[opendatalab] 请求列表接口失败: HTTP ${response.status}`);
89
+ }
90
+
91
+ const payload = await response.json().catch(() => null);
92
+ const list = payload?.data?.list;
93
+ if (!Array.isArray(list)) {
94
+ const msg = normalizeText(payload?.msg) || "返回结构异常";
95
+ throw new Error(`[opendatalab] 列表接口响应不可用: ${msg}`);
96
+ }
97
+
98
+ const items = list.map((record) => toFeedItem(record)).filter(Boolean);
99
+ if (items.length === 0) {
100
+ throw new Error("[opendatalab] 未解析到条目,接口结构可能已变化");
101
+ }
102
+ return items;
103
+ }
104
+
105
+ export default {
106
+ id: "opendatalab",
107
+ listUrlPattern: /^https?:\/\/(www\.)?opendatalab\.(org\.cn|com)\/?(?:datasets\/?)?(?:\?.*)?$/i,
108
+ fetchItems,
109
+ };