rssany 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -50
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +159 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +79 -9
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/0.BB88QFoe.css +1 -0
- package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
- package/webui/build/_app/immutable/chunks/{DcAshVxe.js → D6VIKef0.js} +1 -1
- package/webui/build/_app/immutable/chunks/{EIZIMsXK.js → Dbqx2mXq.js} +1 -1
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
- package/webui/build/_app/immutable/chunks/{BXCWEhUd.js → dhB8G5Is.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.DdgnooOk.js → app.XPso7q7g.js} +2 -2
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.5DFDaT4c.js → 1.BS3_Rfxm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.OVK4i9XE.js → 10.CyyxDCIS.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.Dhn_rO4A.js → 11.CtYgIaGj.js} +1 -1
- package/webui/build/_app/immutable/nodes/{14.B_KpJLxn.js → 14.D5OEGPR2.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.RaWaA-0I.js → 15.B4dFN1Gk.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DSUgqolV.js → 16.M7ZII7tl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.wQvGs9w-.js → 3.7r8v7qkm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CCtn90c0.js → 5.CHIzoGrb.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.C2_mjW1u.js → 6.BDBqx-GY.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Dwz6W7A1.js → 7.D5czsDmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DzkEw6rx.js → 8.pjVNsCdV.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.DtlXEwe1.js → 9.CsARv1BH.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +0 -1
- package/webui/build/_app/immutable/chunks/CkUAV0m0.js +0 -41
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +0 -1
- package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +0 -11
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Mila (Quebec AI Institute) 新闻列表插件:支持首页 /en 与新闻页 /en/news
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const MILA_ORIGIN = "https://mila.quebec";
|
|
8
|
+
const NEWS_PATH_RE = /^\/en\/news\/[^/?#]+\/?$/i;
|
|
9
|
+
const NOISE_TITLE_RE = /^(read the (story|news)|see more news)$/i;
|
|
10
|
+
const MONTH_TO_INDEX = {
|
|
11
|
+
jan: 0,
|
|
12
|
+
feb: 1,
|
|
13
|
+
mar: 2,
|
|
14
|
+
apr: 3,
|
|
15
|
+
may: 4,
|
|
16
|
+
jun: 5,
|
|
17
|
+
jul: 6,
|
|
18
|
+
aug: 7,
|
|
19
|
+
sep: 8,
|
|
20
|
+
oct: 9,
|
|
21
|
+
nov: 10,
|
|
22
|
+
dec: 11,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
function normalizeText(text) {
|
|
27
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
function hashGuid(input) {
|
|
32
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
37
|
+
if (!rawHref) return null;
|
|
38
|
+
const href = rawHref.trim();
|
|
39
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
40
|
+
try {
|
|
41
|
+
const url = new URL(href, baseUrl);
|
|
42
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
43
|
+
return url.href;
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
function isNewsArticleUrl(url) {
|
|
51
|
+
try {
|
|
52
|
+
return NEWS_PATH_RE.test(new URL(url).pathname);
|
|
53
|
+
} catch {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
function extractDateFromText(text) {
|
|
60
|
+
const normalized = normalizeText(text).replace(/,/g, " ");
|
|
61
|
+
if (!normalized) return undefined;
|
|
62
|
+
|
|
63
|
+
const m = normalized.match(
|
|
64
|
+
/(?:^|\b)(\d{1,2})\s+(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(\d{4})(?:\b|$)/i
|
|
65
|
+
);
|
|
66
|
+
if (!m) return undefined;
|
|
67
|
+
|
|
68
|
+
const day = Number(m[1]);
|
|
69
|
+
const monthIdx = MONTH_TO_INDEX[m[2].slice(0, 3).toLowerCase()];
|
|
70
|
+
const year = Number(m[3]);
|
|
71
|
+
if (monthIdx == null || !Number.isFinite(day) || !Number.isFinite(year)) return undefined;
|
|
72
|
+
|
|
73
|
+
const d = new Date(Date.UTC(year, monthIdx, day, 12, 0, 0));
|
|
74
|
+
if (Number.isNaN(d.getTime())) return undefined;
|
|
75
|
+
return d;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
function extractDateNearNode(node) {
|
|
80
|
+
const timeDatetime = node.querySelector?.("time[datetime]")?.getAttribute("datetime");
|
|
81
|
+
if (timeDatetime) {
|
|
82
|
+
const d = new Date(timeDatetime);
|
|
83
|
+
if (!Number.isNaN(d.getTime())) return d;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
let current = node;
|
|
87
|
+
for (let i = 0; i < 6 && current; i += 1) {
|
|
88
|
+
const parsed = extractDateFromText(current.textContent);
|
|
89
|
+
if (parsed) return parsed;
|
|
90
|
+
current = current.parentNode ?? null;
|
|
91
|
+
}
|
|
92
|
+
return undefined;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
function scoreTitle(text) {
|
|
97
|
+
const normalized = normalizeText(text);
|
|
98
|
+
if (!normalized) return 0;
|
|
99
|
+
if (NOISE_TITLE_RE.test(normalized.toLowerCase())) return 1;
|
|
100
|
+
if (normalized.length < 5) return 2;
|
|
101
|
+
return 10 + Math.min(normalized.length, 120);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
function titleFromUrl(link) {
|
|
106
|
+
try {
|
|
107
|
+
const slug = decodeURIComponent(new URL(link).pathname.split("/").filter(Boolean).pop() ?? "");
|
|
108
|
+
return normalizeText(slug.replace(/[-_]+/g, " "));
|
|
109
|
+
} catch {
|
|
110
|
+
return "Mila News";
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
function chooseSummary(node, title) {
|
|
116
|
+
let current = node;
|
|
117
|
+
for (let i = 0; i < 4 && current; i += 1) {
|
|
118
|
+
const candidates = current
|
|
119
|
+
.querySelectorAll?.("p")
|
|
120
|
+
?.map((p) => normalizeText(p.textContent))
|
|
121
|
+
?.filter(Boolean) ?? [];
|
|
122
|
+
for (const text of candidates) {
|
|
123
|
+
const lower = text.toLowerCase();
|
|
124
|
+
if (lower === title.toLowerCase()) continue;
|
|
125
|
+
if (NOISE_TITLE_RE.test(lower)) continue;
|
|
126
|
+
return text;
|
|
127
|
+
}
|
|
128
|
+
current = current.parentNode ?? null;
|
|
129
|
+
}
|
|
130
|
+
return undefined;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
async function fetchItems(sourceId, ctx) {
|
|
135
|
+
_deps = ctx.deps;
|
|
136
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4000 });
|
|
137
|
+
const root = _deps.parseHtml(html);
|
|
138
|
+
const pageUrl = finalUrl || sourceId || MILA_ORIGIN;
|
|
139
|
+
const byLink = new Map();
|
|
140
|
+
|
|
141
|
+
for (const anchor of root.querySelectorAll("a[href]")) {
|
|
142
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), pageUrl);
|
|
143
|
+
if (!link || !isNewsArticleUrl(link)) continue;
|
|
144
|
+
|
|
145
|
+
const rawTitle = normalizeText(anchor.textContent);
|
|
146
|
+
const titleScore = scoreTitle(rawTitle);
|
|
147
|
+
const pubDate = extractDateNearNode(anchor);
|
|
148
|
+
const summary = chooseSummary(anchor, rawTitle || "");
|
|
149
|
+
|
|
150
|
+
const existing = byLink.get(link) ?? {
|
|
151
|
+
link,
|
|
152
|
+
title: "",
|
|
153
|
+
titleScore: 0,
|
|
154
|
+
pubDate: undefined,
|
|
155
|
+
summary: undefined,
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
if (titleScore > existing.titleScore) {
|
|
159
|
+
existing.title = rawTitle;
|
|
160
|
+
existing.titleScore = titleScore;
|
|
161
|
+
}
|
|
162
|
+
if (!existing.pubDate && pubDate) existing.pubDate = pubDate;
|
|
163
|
+
if (!existing.summary && summary) existing.summary = summary;
|
|
164
|
+
|
|
165
|
+
byLink.set(link, existing);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const items = Array.from(byLink.values()).map((entry) => {
|
|
169
|
+
const title = entry.title && !NOISE_TITLE_RE.test(entry.title.toLowerCase())
|
|
170
|
+
? entry.title
|
|
171
|
+
: titleFromUrl(entry.link);
|
|
172
|
+
const summary = entry.summary && normalizeText(entry.summary) !== normalizeText(title)
|
|
173
|
+
? entry.summary
|
|
174
|
+
: undefined;
|
|
175
|
+
return {
|
|
176
|
+
guid: hashGuid(entry.link),
|
|
177
|
+
title,
|
|
178
|
+
link: entry.link,
|
|
179
|
+
pubDate: entry.pubDate ?? new Date(),
|
|
180
|
+
author: "Mila",
|
|
181
|
+
summary,
|
|
182
|
+
};
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
186
|
+
|
|
187
|
+
if (items.length === 0) {
|
|
188
|
+
throw new Error("[mila-quebec] 未解析到新闻条目,页面结构可能已变化");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return items;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
export default {
|
|
196
|
+
id: "mila-quebec",
|
|
197
|
+
listUrlPattern: /^https?:\/\/(www\.)?mila\.quebec\/en(?:\/news)?(?:\/)?(?:\?.*)?$/i,
|
|
198
|
+
fetchItems,
|
|
199
|
+
};
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// MIT CSAIL Research plugin: warm up via homepage, then parse /research list items.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const SITE_ID = "mit-csail-research";
|
|
8
|
+
const CSAIL_HOME_URL = "https://www.csail.mit.edu/";
|
|
9
|
+
const CSAIL_RESEARCH_PATH = "/research";
|
|
10
|
+
const SUMMARY_SELECTOR = "div, p, span, h2, h3, h4, a";
|
|
11
|
+
const BLOCKED_HINTS = [
|
|
12
|
+
"http error 403",
|
|
13
|
+
"request to access",
|
|
14
|
+
"access denied",
|
|
15
|
+
"request denied",
|
|
16
|
+
"body class=\"neterror\"",
|
|
17
|
+
"\u60a8\u672a\u83b7\u6388\u6743\uff0c\u65e0\u6cd5\u67e5\u770b\u6b64\u7f51\u9875\u3002",
|
|
18
|
+
"\u8bbf\u95ee <span>www.csail.mit.edu</span> \u7684\u8bf7\u6c42\u906d\u5230\u62d2\u7edd",
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
function normalizeText(text) {
|
|
23
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
function hashGuid(input) {
|
|
28
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
33
|
+
if (!rawHref) return null;
|
|
34
|
+
const href = rawHref.trim();
|
|
35
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) return null;
|
|
36
|
+
try {
|
|
37
|
+
const url = new URL(href, baseUrl);
|
|
38
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
39
|
+
return url.href;
|
|
40
|
+
} catch {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
function normalizeCategoryToken(text) {
|
|
47
|
+
return normalizeText(text).toLowerCase().replace(/[^a-z0-9]+/g, "");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
function matchesRequestedCategory(cardCategory, queryCategory) {
|
|
52
|
+
const wanted = normalizeCategoryToken(queryCategory);
|
|
53
|
+
if (!wanted) return true;
|
|
54
|
+
const card = normalizeCategoryToken(cardCategory);
|
|
55
|
+
if (!card) return false;
|
|
56
|
+
|
|
57
|
+
if (wanted.includes("group")) return card.includes("group");
|
|
58
|
+
if (wanted.includes("center")) return card.includes("center");
|
|
59
|
+
if (wanted.includes("community")) return card.includes("community");
|
|
60
|
+
return card.includes(wanted);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
function looksLikeBlockedPage(status, html) {
|
|
65
|
+
if (status >= 400) return true;
|
|
66
|
+
const body = (html ?? "").toLowerCase();
|
|
67
|
+
return BLOCKED_HINTS.some((hint) => body.includes(hint.toLowerCase()));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
function resolveResearchUrlFromHome(homeHtml, homeUrl) {
|
|
72
|
+
const root = _deps.parseHtml(homeHtml);
|
|
73
|
+
const anchors = root.querySelectorAll("a[href]");
|
|
74
|
+
for (const anchor of anchors) {
|
|
75
|
+
const text = normalizeText(anchor.textContent).toLowerCase();
|
|
76
|
+
if (!text || !text.includes("research")) continue;
|
|
77
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), homeUrl);
|
|
78
|
+
if (!link) continue;
|
|
79
|
+
try {
|
|
80
|
+
const url = new URL(link);
|
|
81
|
+
if (!/(^|\.)csail\.mit\.edu$/i.test(url.hostname)) continue;
|
|
82
|
+
const pathname = url.pathname.replace(/\/+$/, "") || "/";
|
|
83
|
+
if (pathname !== CSAIL_RESEARCH_PATH) continue;
|
|
84
|
+
url.search = "";
|
|
85
|
+
url.hash = "";
|
|
86
|
+
return url.href;
|
|
87
|
+
} catch {
|
|
88
|
+
// ignore malformed link
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return new URL(CSAIL_RESEARCH_PATH, homeUrl).href;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
function getLeafTexts(node) {
|
|
96
|
+
return node
|
|
97
|
+
.querySelectorAll(SUMMARY_SELECTOR)
|
|
98
|
+
.filter((el) => el.querySelector(SUMMARY_SELECTOR) == null)
|
|
99
|
+
.map((el) => normalizeText(el.textContent))
|
|
100
|
+
.filter(Boolean);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
function extractSummary(card, title, category) {
|
|
105
|
+
const texts = getLeafTexts(card)
|
|
106
|
+
.filter((t) => t !== title && t !== category)
|
|
107
|
+
.filter((t) => !/^lead$/i.test(t))
|
|
108
|
+
.filter((t) => !/^\+\s*\d+$/i.test(t))
|
|
109
|
+
.filter((t) => !/^\.{3}more$/i.test(t));
|
|
110
|
+
|
|
111
|
+
return texts.find((t) => t.length >= 20 && t.length <= 600);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
function extractAuthor(card) {
|
|
116
|
+
const names = card.querySelectorAll("app-lead-bar a[href]")
|
|
117
|
+
.map((a) => normalizeText(a.textContent))
|
|
118
|
+
.filter(Boolean)
|
|
119
|
+
.filter((name) => name !== "...more");
|
|
120
|
+
const unique = [...new Set(names)];
|
|
121
|
+
return unique.length > 0 ? unique.join(", ") : undefined;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
function parseCardItem(card, baseUrl, requestedCategory) {
|
|
126
|
+
const bookmark =
|
|
127
|
+
card.querySelector('a[rel="bookmark"][href]') ??
|
|
128
|
+
card.querySelector('a[href*="/research/"]');
|
|
129
|
+
if (!bookmark) return null;
|
|
130
|
+
|
|
131
|
+
const title = normalizeText(bookmark.querySelector("h2")?.textContent || bookmark.textContent);
|
|
132
|
+
if (!title) return null;
|
|
133
|
+
|
|
134
|
+
const link = toAbsoluteHttpUrl(bookmark.getAttribute("href"), baseUrl);
|
|
135
|
+
if (!link) return null;
|
|
136
|
+
|
|
137
|
+
const category = normalizeText(card.querySelector("h4")?.textContent);
|
|
138
|
+
if (!matchesRequestedCategory(category, requestedCategory)) return null;
|
|
139
|
+
|
|
140
|
+
const summary = extractSummary(card, title, category);
|
|
141
|
+
const author = extractAuthor(card);
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
guid: hashGuid(link),
|
|
145
|
+
title,
|
|
146
|
+
link,
|
|
147
|
+
pubDate: new Date(),
|
|
148
|
+
author,
|
|
149
|
+
summary: summary || undefined,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
function parseItems(html, finalUrl, requestedCategory) {
|
|
155
|
+
const root = _deps.parseHtml(html);
|
|
156
|
+
const cards = root.querySelectorAll("article");
|
|
157
|
+
const seen = new Set();
|
|
158
|
+
const items = [];
|
|
159
|
+
|
|
160
|
+
for (const card of cards) {
|
|
161
|
+
const item = parseCardItem(card, finalUrl, requestedCategory);
|
|
162
|
+
if (!item) continue;
|
|
163
|
+
if (seen.has(item.link)) continue;
|
|
164
|
+
seen.add(item.link);
|
|
165
|
+
items.push(item);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return items;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
async function fetchItems(sourceId, ctx) {
|
|
173
|
+
_deps = ctx.deps;
|
|
174
|
+
const sourceUrl = new URL(sourceId);
|
|
175
|
+
const requestedCategory = sourceUrl.searchParams.get("category") ?? "";
|
|
176
|
+
|
|
177
|
+
const home = await ctx.fetchHtml(CSAIL_HOME_URL, { waitMs: 3000 });
|
|
178
|
+
if (looksLikeBlockedPage(home.status, home.html)) {
|
|
179
|
+
throw new Error(`[${SITE_ID}] \u8bbf\u95ee CSAIL \u9996\u9875\u88ab\u62d2\u7edd\uff08HTTP ${home.status}\uff09`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const researchUrl = resolveResearchUrlFromHome(home.html, home.finalUrl || CSAIL_HOME_URL);
|
|
183
|
+
const research = await ctx.fetchHtml(researchUrl, { waitMs: 4500 });
|
|
184
|
+
if (looksLikeBlockedPage(research.status, research.html)) {
|
|
185
|
+
throw new Error(`[${SITE_ID}] \u8bbf\u95ee research \u5217\u8868\u88ab\u62d2\u7edd\uff08HTTP ${research.status}\uff09`);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
let items = parseItems(research.html, research.finalUrl || researchUrl, requestedCategory);
|
|
189
|
+
if (items.length === 0) {
|
|
190
|
+
const retry = await ctx.fetchHtml(researchUrl, { waitMs: 6500 });
|
|
191
|
+
if (looksLikeBlockedPage(retry.status, retry.html)) {
|
|
192
|
+
throw new Error(`[${SITE_ID}] \u8bbf\u95ee research \u5217\u8868\u88ab\u62d2\u7edd\uff08HTTP ${retry.status}\uff09`);
|
|
193
|
+
}
|
|
194
|
+
items = parseItems(retry.html, retry.finalUrl || researchUrl, requestedCategory);
|
|
195
|
+
}
|
|
196
|
+
if (items.length === 0) {
|
|
197
|
+
const withCategory = requestedCategory ? ` (category=${requestedCategory})` : "";
|
|
198
|
+
throw new Error(`[${SITE_ID}] \u672a\u89e3\u6790\u5230\u7814\u7a76\u6761\u76ee${withCategory}\uff0c\u9875\u9762\u7ed3\u6784\u53ef\u80fd\u5df2\u53d8\u5316`);
|
|
199
|
+
}
|
|
200
|
+
return items;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
export default {
|
|
205
|
+
id: SITE_ID,
|
|
206
|
+
listUrlPattern: /^https?:\/\/(www\.)?csail\.mit\.edu\/research(?:\?.*)?$/i,
|
|
207
|
+
fetchItems,
|
|
208
|
+
};
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Moonshot 官方站插件:抓取首页“最新研究”列表,输出 FeedItem(不含 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const DATE_RE = /^(\d{4})-(\d{1,2})-(\d{1,2})$/;
|
|
8
|
+
const RESEARCH_HEADING_RE = /最新研究|latest\s+research/i;
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
function normalizeText(text) {
|
|
12
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
function hashGuid(input) {
|
|
17
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
22
|
+
if (!rawHref) return null;
|
|
23
|
+
const href = rawHref.trim();
|
|
24
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
25
|
+
try {
|
|
26
|
+
const url = new URL(href, baseUrl);
|
|
27
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
28
|
+
return url.href;
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
function parseDate(dateText) {
|
|
36
|
+
const normalized = normalizeText(dateText);
|
|
37
|
+
const m = normalized.match(DATE_RE);
|
|
38
|
+
if (!m) return undefined;
|
|
39
|
+
const [, y, mm, dd] = m;
|
|
40
|
+
const date = new Date(Date.UTC(Number(y), Number(mm) - 1, Number(dd), 12, 0, 0));
|
|
41
|
+
if (Number.isNaN(date.getTime())) return undefined;
|
|
42
|
+
return date;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
function extractTitleParts(anchor) {
|
|
47
|
+
const parts = anchor
|
|
48
|
+
.querySelectorAll("h2")
|
|
49
|
+
.map((el) => normalizeText(el.textContent))
|
|
50
|
+
.filter(Boolean);
|
|
51
|
+
if (parts.length > 0) return parts;
|
|
52
|
+
|
|
53
|
+
const fallback = normalizeText(anchor.textContent);
|
|
54
|
+
return fallback ? [fallback] : [];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
function parseResearchItem(anchor, finalUrl) {
|
|
59
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
|
|
60
|
+
if (!link) return null;
|
|
61
|
+
|
|
62
|
+
const titleParts = extractTitleParts(anchor);
|
|
63
|
+
if (titleParts.length === 0) return null;
|
|
64
|
+
|
|
65
|
+
const dateText = anchor
|
|
66
|
+
.querySelectorAll("p")
|
|
67
|
+
.map((p) => normalizeText(p.textContent))
|
|
68
|
+
.find((t) => DATE_RE.test(t));
|
|
69
|
+
if (!dateText) return null;
|
|
70
|
+
|
|
71
|
+
const pubDate = parseDate(dateText) ?? new Date();
|
|
72
|
+
const title = titleParts[0];
|
|
73
|
+
const subtitle = titleParts.length > 1 ? titleParts.slice(1).join(" ") : "";
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
guid: hashGuid(link),
|
|
77
|
+
title,
|
|
78
|
+
link,
|
|
79
|
+
pubDate,
|
|
80
|
+
summary: subtitle || undefined,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
function collectCandidateAnchors(root) {
|
|
86
|
+
const heading = root
|
|
87
|
+
.querySelectorAll("h1, h2, h3")
|
|
88
|
+
.find((node) => RESEARCH_HEADING_RE.test(normalizeText(node.textContent)));
|
|
89
|
+
|
|
90
|
+
if (heading?.parentNode && "querySelectorAll" in heading.parentNode) {
|
|
91
|
+
return heading.parentNode.querySelectorAll("a[href]");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return root.querySelectorAll("a[href]");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async function fetchItems(sourceId, ctx) {
|
|
99
|
+
_deps = ctx.deps;
|
|
100
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
101
|
+
const root = _deps.parseHtml(html);
|
|
102
|
+
|
|
103
|
+
const seen = new Set();
|
|
104
|
+
const items = [];
|
|
105
|
+
const anchors = collectCandidateAnchors(root);
|
|
106
|
+
|
|
107
|
+
for (const anchor of anchors) {
|
|
108
|
+
const item = parseResearchItem(anchor, finalUrl);
|
|
109
|
+
if (!item) continue;
|
|
110
|
+
if (seen.has(item.link)) continue;
|
|
111
|
+
seen.add(item.link);
|
|
112
|
+
items.push(item);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (items.length === 0) {
|
|
116
|
+
throw new Error("[moonshot] 未解析到“最新研究”条目,页面结构可能已变化");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return items;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
export default {
|
|
124
|
+
id: "moonshot",
|
|
125
|
+
listUrlPattern: /^https?:\/\/(www\.)?moonshot\.ai(?:\/[a-z]{2}(?:-[a-z]{2})?)?\/?(\?.*)?$/i,
|
|
126
|
+
fetchItems,
|
|
127
|
+
};
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
const API_ROOT = "https://static.openxlab.org.cn/opendatalab/dynamics";
|
|
5
|
+
const DETAIL_ROOT = "https://opendatalab.org.cn/news/details";
|
|
6
|
+
|
|
7
|
+
const SOURCES = [
|
|
8
|
+
{
|
|
9
|
+
label: "featuredArticles",
|
|
10
|
+
url: `${API_ROOT}/featuredArticles/data.json?t=12`,
|
|
11
|
+
detailType: "article",
|
|
12
|
+
category: "精选文章",
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
label: "banner",
|
|
16
|
+
url: `${API_ROOT}/banner/data.json?t=1`,
|
|
17
|
+
detailType: "banner",
|
|
18
|
+
category: "轮播",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
label: "AItalk",
|
|
22
|
+
url: `${API_ROOT}/talkArticles/AItalk/data.json?t=1`,
|
|
23
|
+
detailType: "AItalk",
|
|
24
|
+
category: "AI Talk",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
label: "JStalk",
|
|
28
|
+
url: `${API_ROOT}/talkArticles/JStalk/data.json?t=1`,
|
|
29
|
+
detailType: "JStalk",
|
|
30
|
+
category: "解数 Talk",
|
|
31
|
+
},
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
function normalizeText(text) {
|
|
35
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function hashGuid(input) {
|
|
39
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function parsePubDate(rawDate, rawStartTime) {
|
|
43
|
+
const dateText = normalizeText(rawDate);
|
|
44
|
+
const dateMatch = dateText.match(/(\d{4})[/-](\d{1,2})[/-](\d{1,2})/);
|
|
45
|
+
if (!dateMatch) return new Date();
|
|
46
|
+
|
|
47
|
+
const year = Number(dateMatch[1]);
|
|
48
|
+
const month = Number(dateMatch[2]);
|
|
49
|
+
const day = Number(dateMatch[3]);
|
|
50
|
+
const monthText = String(month).padStart(2, "0");
|
|
51
|
+
const dayText = String(day).padStart(2, "0");
|
|
52
|
+
|
|
53
|
+
const timeText = normalizeText(rawStartTime);
|
|
54
|
+
const timeMatch = timeText.match(/(\d{1,2}):(\d{2})/);
|
|
55
|
+
|
|
56
|
+
if (timeMatch) {
|
|
57
|
+
const hour = Number(timeMatch[1]);
|
|
58
|
+
const minute = Number(timeMatch[2]);
|
|
59
|
+
if ([year, month, day, hour, minute].every(Number.isFinite)) {
|
|
60
|
+
const hourText = String(hour).padStart(2, "0");
|
|
61
|
+
const minuteText = String(minute).padStart(2, "0");
|
|
62
|
+
return new Date(`${year}-${monthText}-${dayText}T${hourText}:${minuteText}:00+08:00`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if ([year, month, day].every(Number.isFinite)) {
|
|
67
|
+
return new Date(`${year}-${monthText}-${dayText}T12:00:00+08:00`);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return new Date();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function buildSummary(item) {
|
|
74
|
+
const dateText = normalizeText(item?.date);
|
|
75
|
+
const start = normalizeText(item?.start_time);
|
|
76
|
+
const end = normalizeText(item?.end_time);
|
|
77
|
+
if (!dateText) return undefined;
|
|
78
|
+
if (start && end) return `直播时间: ${dateText} ${start}-${end}`;
|
|
79
|
+
return `发布时间: ${dateText}`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function fetchJson(url, label) {
|
|
83
|
+
let response;
|
|
84
|
+
try {
|
|
85
|
+
response = await fetch(url, {
|
|
86
|
+
headers: {
|
|
87
|
+
"Accept": "application/json,text/plain,*/*",
|
|
88
|
+
"User-Agent": "RssAny/1.0 (+https://github.com/rssany/rssany)",
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
} catch (err) {
|
|
92
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
93
|
+
throw new Error(`[opendatalab-news] 请求 ${label} 失败: ${message}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (!response.ok) {
|
|
97
|
+
throw new Error(`[opendatalab-news] 请求 ${label} 失败: HTTP ${response.status}`);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let data;
|
|
101
|
+
try {
|
|
102
|
+
data = await response.json();
|
|
103
|
+
} catch (err) {
|
|
104
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
105
|
+
throw new Error(`[opendatalab-news] 解析 ${label} JSON 失败: ${message}`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (!Array.isArray(data)) {
|
|
109
|
+
throw new Error(`[opendatalab-news] ${label} 返回结构异常,期望数组`);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return data;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function mapItems(records, source) {
|
|
116
|
+
const items = [];
|
|
117
|
+
|
|
118
|
+
for (const raw of records) {
|
|
119
|
+
if (typeof raw !== "object" || raw == null) continue;
|
|
120
|
+
|
|
121
|
+
const id = Number(raw.id);
|
|
122
|
+
const title = normalizeText(raw.title);
|
|
123
|
+
if (!Number.isFinite(id) || !title) continue;
|
|
124
|
+
|
|
125
|
+
const link = `${DETAIL_ROOT}/${source.detailType}/${id}`;
|
|
126
|
+
const pubDate = parsePubDate(raw.date, raw.start_time);
|
|
127
|
+
const summary = buildSummary(raw);
|
|
128
|
+
|
|
129
|
+
items.push({
|
|
130
|
+
guid: hashGuid(link),
|
|
131
|
+
title,
|
|
132
|
+
link,
|
|
133
|
+
pubDate,
|
|
134
|
+
summary,
|
|
135
|
+
sourceId: "opendatalab-news",
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return items;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function fetchItems(_sourceId, _ctx) {
|
|
143
|
+
_deps = _ctx.deps;
|
|
144
|
+
const collected = [];
|
|
145
|
+
|
|
146
|
+
for (const source of SOURCES) {
|
|
147
|
+
const records = await fetchJson(source.url, source.label);
|
|
148
|
+
collected.push(...mapItems(records, source));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const deduped = [];
|
|
152
|
+
const seen = new Set();
|
|
153
|
+
|
|
154
|
+
for (const item of collected) {
|
|
155
|
+
if (seen.has(item.link)) continue;
|
|
156
|
+
seen.add(item.link);
|
|
157
|
+
deduped.push(item);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
deduped.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
161
|
+
|
|
162
|
+
if (deduped.length === 0) {
|
|
163
|
+
throw new Error("[opendatalab-news] 未解析到条目,页面数据源可能已变化");
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return deduped;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export default {
|
|
170
|
+
id: "opendatalab-news",
|
|
171
|
+
listUrlPattern: /^https?:\/\/(www\.)?opendatalab\.(org\.cn|com)\/news\/?(\?.*)?$/i,
|
|
172
|
+
refreshInterval: "1h",
|
|
173
|
+
fetchItems,
|
|
174
|
+
};
|