rssany 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  2. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  3. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  4. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  5. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  6. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  7. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  8. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  9. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  10. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  11. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  12. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  13. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  14. package/app/plugins/builtin/google-research.rssany.js +220 -0
  15. package/app/plugins/builtin/google.rssany.js +187 -0
  16. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  17. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  18. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  19. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  20. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  21. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  22. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  23. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  24. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  25. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  26. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  27. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  28. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  30. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  31. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  32. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  33. package/app/plugins/builtin/rss.rssany.js +11 -1
  34. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  35. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  36. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  37. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  38. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  39. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  40. package/app/plugins/builtin/x.rssany.js +159 -0
  41. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  42. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  43. package/dist/index.js +62 -4
  44. package/dist/index.js.map +1 -1
  45. package/package.json +1 -1
  46. package/webui/build/200.html +6 -6
  47. package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
  48. package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
  49. package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
  50. package/webui/build/_app/immutable/chunks/{C85CNwD2.js → D6VIKef0.js} +1 -1
  51. package/webui/build/_app/immutable/chunks/{CllQAdvt.js → Dbqx2mXq.js} +1 -1
  52. package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
  53. package/webui/build/_app/immutable/chunks/{CdMsRjxJ.js → dhB8G5Is.js} +1 -1
  54. package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.XPso7q7g.js} +2 -2
  55. package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
  56. package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
  57. package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.BS3_Rfxm.js} +1 -1
  58. package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.CyyxDCIS.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.CtYgIaGj.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.D5OEGPR2.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.B4dFN1Gk.js} +1 -1
  62. package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.M7ZII7tl.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.7r8v7qkm.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.CHIzoGrb.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.BDBqx-GY.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D5czsDmz.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.pjVNsCdV.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.CsARv1BH.js} +1 -1
  69. package/webui/build/_app/version.json +1 -1
  70. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  71. package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
  72. package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
  73. package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
@@ -0,0 +1,223 @@
1
+ let _deps;
2
+
3
+ // Google DeepMind Research 插件:抓取 research 页面中的最新研究条目(不做 enrich)
4
+
5
+
6
+
7
+ const DEEPMIND_RESEARCH_URL = "https://deepmind.google/research/";
8
+ const DEEPMIND_ORIGIN = "https://deepmind.google";
9
+ const MONTH_TO_INDEX = {
10
+ january: 0,
11
+ february: 1,
12
+ march: 2,
13
+ april: 3,
14
+ may: 4,
15
+ june: 5,
16
+ july: 6,
17
+ august: 7,
18
+ september: 8,
19
+ october: 9,
20
+ november: 10,
21
+ december: 11,
22
+ };
23
+
24
+
25
+ function normalizeText(text) {
26
+ return (text ?? "").replace(/\s+/g, " ").trim();
27
+ }
28
+
29
+
30
+ function hashGuid(input) {
31
+ return _deps.createHash("sha256").update(input).digest("hex");
32
+ }
33
+
34
+
35
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
36
+ if (!rawHref) return null;
37
+ const href = rawHref.trim();
38
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
39
+ try {
40
+ const url = new URL(href, baseUrl);
41
+ if (!/^https?:$/i.test(url.protocol)) return null;
42
+ return url.href;
43
+ } catch {
44
+ return null;
45
+ }
46
+ }
47
+
48
+
49
+ function parsePubDate(rawDate) {
50
+ const normalized = normalizeText(rawDate);
51
+ if (!normalized) return undefined;
52
+
53
+ const monthYear = normalized.match(/^([A-Za-z]+)\s+(\d{4})$/);
54
+ if (monthYear) {
55
+ const monthName = monthYear[1]?.toLowerCase();
56
+ const year = Number(monthYear[2]);
57
+ const month = monthName ? MONTH_TO_INDEX[monthName] : undefined;
58
+ if (month != null && Number.isFinite(year)) {
59
+ return new Date(Date.UTC(year, month, 1, 12, 0, 0));
60
+ }
61
+ }
62
+
63
+ const direct = new Date(normalized);
64
+ if (!Number.isNaN(direct.getTime())) return direct;
65
+ return undefined;
66
+ }
67
+
68
+
69
+ function isResearchLink(link) {
70
+ let url;
71
+ try {
72
+ url = new URL(link);
73
+ } catch {
74
+ return false;
75
+ }
76
+
77
+ if (url.hostname === "deepmind.google") {
78
+ if (/^\/research\/publications\/\d+\/?$/i.test(url.pathname)) return true;
79
+ if (/^\/blog\/[^?#]+/i.test(url.pathname)) return true;
80
+ return false;
81
+ }
82
+
83
+ if (url.hostname === "blog.google") {
84
+ return /^\/technology\/google-deepmind\/[^?#]+/i.test(url.pathname);
85
+ }
86
+
87
+ return false;
88
+ }
89
+
90
+
91
+ function extractTitle(container) {
92
+ const heading =
93
+ container.querySelector("h1") ??
94
+ container.querySelector("h2") ??
95
+ container.querySelector("h3") ??
96
+ container.querySelector("h4");
97
+ const title = normalizeText(heading?.textContent);
98
+ if (title) return title;
99
+ return "";
100
+ }
101
+
102
+
103
+ function extractSummary(container, title) {
104
+ const summary = normalizeText(container.querySelector("p")?.textContent);
105
+ if (!summary) return undefined;
106
+ if (summary === title) return undefined;
107
+ return summary;
108
+ }
109
+
110
+
111
+ function isCallToActionTitle(title) {
112
+ return /^(learn|view|see|read|watch)\b/i.test(title);
113
+ }
114
+
115
+
116
+ function parseItemsFromArticles(root, baseUrl) {
117
+ const items = [];
118
+ const seen = new Set();
119
+ const articles = root.querySelectorAll("article");
120
+
121
+ for (const article of articles) {
122
+ const title = extractTitle(article);
123
+ if (!title || isCallToActionTitle(title)) continue;
124
+
125
+ const anchor = article.querySelector("a[href]");
126
+ const link = toAbsoluteHttpUrl(anchor?.getAttribute("href"), baseUrl);
127
+ if (!link || !isResearchLink(link)) continue;
128
+ if (seen.has(link)) continue;
129
+ seen.add(link);
130
+
131
+ const dateRaw =
132
+ article.querySelector("time")?.getAttribute("datetime") ??
133
+ article.querySelector("time")?.textContent ??
134
+ "";
135
+ const pubDate = parsePubDate(dateRaw) ?? new Date();
136
+ const summary = extractSummary(article, title);
137
+
138
+ items.push({
139
+ guid: hashGuid(link),
140
+ title,
141
+ link,
142
+ pubDate,
143
+ author: "Google DeepMind",
144
+ summary,
145
+ sourceId: "google-deepmind-research",
146
+ });
147
+ }
148
+
149
+ return items;
150
+ }
151
+
152
+
153
+ function findTitleAroundAnchor(anchor) {
154
+ const inlineHeading =
155
+ anchor.querySelector("h1, h2, h3, h4") ??
156
+ anchor.parentNode?.querySelector?.("h1, h2, h3, h4");
157
+ const title = normalizeText(inlineHeading?.textContent);
158
+ if (title) return title;
159
+
160
+ const text = normalizeText(anchor.textContent);
161
+ if (text && text.length >= 8 && !isCallToActionTitle(text)) return text;
162
+ return "";
163
+ }
164
+
165
+
166
+ function parseItemsFromAnchors(root, baseUrl) {
167
+ const items = [];
168
+ const seen = new Set();
169
+ const anchors = root.querySelectorAll("a[href]");
170
+
171
+ for (const anchor of anchors) {
172
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), baseUrl);
173
+ if (!link || !isResearchLink(link)) continue;
174
+ if (seen.has(link)) continue;
175
+
176
+ const title = findTitleAroundAnchor(anchor);
177
+ if (!title) continue;
178
+
179
+ const container = anchor.parentNode ?? anchor;
180
+ const dateRaw =
181
+ container.querySelector?.("time")?.getAttribute?.("datetime") ??
182
+ container.querySelector?.("time")?.textContent ??
183
+ "";
184
+ const pubDate = parsePubDate(dateRaw) ?? new Date();
185
+ const summary = extractSummary(container, title);
186
+
187
+ seen.add(link);
188
+ items.push({
189
+ guid: hashGuid(link),
190
+ title,
191
+ link,
192
+ pubDate,
193
+ author: "Google DeepMind",
194
+ summary,
195
+ sourceId: "google-deepmind-research",
196
+ });
197
+ }
198
+
199
+ return items;
200
+ }
201
+
202
+
203
+ async function fetchItems(sourceId, ctx) {
204
+ _deps = ctx.deps;
205
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
206
+ const root = _deps.parseHtml(html);
207
+ const baseUrl = finalUrl || DEEPMIND_ORIGIN;
208
+
209
+ const fromArticles = parseItemsFromArticles(root, baseUrl);
210
+ if (fromArticles.length > 0) return fromArticles;
211
+
212
+ const fromAnchors = parseItemsFromAnchors(root, baseUrl);
213
+ if (fromAnchors.length > 0) return fromAnchors;
214
+
215
+ throw new Error("[google-deepmind-research] 未解析到研究条目,页面结构可能已变化");
216
+ }
217
+
218
+
219
+ export default {
220
+ id: "google-deepmind-research",
221
+ listUrlPattern: /^https?:\/\/deepmind\.google\/research\/?(?:\?.*)?$/i,
222
+ fetchItems,
223
+ };
@@ -0,0 +1,171 @@
1
+ let _deps;
2
+
3
+
4
+
5
+ const GOOGLE_RESEARCH_AUTHOR = "Google Research Datasets";
6
+ const DATASETS_URL = "https://research.google/resources/datasets/";
7
+ const MIN_SUMMARY_LENGTH = 24;
8
+
9
+
10
+ function normalizeText(text) {
11
+ return (text ?? "").replace(/\s+/g, " ").trim();
12
+ }
13
+
14
+
15
+ function hashGuid(input) {
16
+ return _deps.createHash("sha256").update(input).digest("hex");
17
+ }
18
+
19
+
20
+ function isGoogleHost(hostname) {
21
+ return /^([a-z0-9-]+\.)*google\.[a-z.]+$/i.test(hostname);
22
+ }
23
+
24
+
25
+ function resolveHttpUrl(rawHref, baseUrl) {
26
+ if (!rawHref) return null;
27
+ const href = rawHref.trim();
28
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
29
+
30
+ try {
31
+ const url = new URL(href, baseUrl);
32
+ if (!/^https?:$/i.test(url.protocol)) return null;
33
+ return url;
34
+ } catch {
35
+ return null;
36
+ }
37
+ }
38
+
39
+
40
+ function resolveResultLink(rawHref, baseUrl) {
41
+ const url = resolveHttpUrl(rawHref, baseUrl);
42
+ if (!url) return null;
43
+
44
+ if (isGoogleHost(url.hostname) && url.pathname === "/url") {
45
+ const target = resolveHttpUrl(url.searchParams.get("q") ?? url.searchParams.get("url"), url.href);
46
+ return target?.href ?? null;
47
+ }
48
+ return url.href;
49
+ }
50
+
51
+
52
+ function dedupeTexts(texts) {
53
+ const out = [];
54
+ const seen = new Set();
55
+ for (const text of texts) {
56
+ const normalized = normalizeText(text);
57
+ if (!normalized) continue;
58
+ const key = normalized.toLowerCase();
59
+ if (seen.has(key)) continue;
60
+ seen.add(key);
61
+ out.push(normalized);
62
+ }
63
+ return out;
64
+ }
65
+
66
+
67
+ function extractLeafTexts(anchor) {
68
+ const leaves = anchor
69
+ .querySelectorAll("h1,h2,h3,h4,h5,h6,p,span,div")
70
+ .filter((node) => node.querySelector("h1,h2,h3,h4,h5,h6,p,span,div") == null)
71
+ .map((node) => normalizeText(node.textContent))
72
+ .filter(Boolean);
73
+ return dedupeTexts(leaves);
74
+ }
75
+
76
+
77
+ function parseYearFromText(text) {
78
+ if (!text) return undefined;
79
+ const matches = text.match(/\b20\d{2}\b/g) ?? [];
80
+ if (matches.length === 0) return undefined;
81
+ const currentYear = new Date().getUTCFullYear();
82
+ const years = matches
83
+ .map((x) => Number(x))
84
+ .filter((x) => Number.isFinite(x))
85
+ .filter((x) => x >= 2000 && x <= currentYear + 1);
86
+ if (years.length === 0) return undefined;
87
+ return Math.max(...years);
88
+ }
89
+
90
+
91
+ function buildItem(title, link, summary, index) {
92
+ const year = parseYearFromText(`${title} ${summary ?? ""}`);
93
+ const pubDate = year == null
94
+ ? new Date(Date.now() - index * 1000)
95
+ : new Date(Date.UTC(year, 0, 1, 12, 0, 0));
96
+ return {
97
+ guid: hashGuid(link),
98
+ title,
99
+ link,
100
+ pubDate,
101
+ author: GOOGLE_RESEARCH_AUTHOR,
102
+ summary: summary || undefined,
103
+ };
104
+ }
105
+
106
+
107
+ function parseFromPurifiedHtml(html, finalUrl) {
108
+ const root = _deps.parseHtml(html);
109
+ const anchors = root.querySelectorAll("a[href]");
110
+ const seenLinks = new Set();
111
+ const items = [];
112
+
113
+ for (const anchor of anchors) {
114
+ const link = resolveResultLink(anchor.getAttribute("href"), finalUrl);
115
+ if (!link || seenLinks.has(link)) continue;
116
+
117
+ const texts = extractLeafTexts(anchor);
118
+ if (texts.length < 2) continue;
119
+ const title = texts[0];
120
+ const summary = texts.find((text) => text !== title && text.length >= MIN_SUMMARY_LENGTH);
121
+ if (!title || !summary) continue;
122
+
123
+ seenLinks.add(link);
124
+ items.push(buildItem(title, link, summary, items.length));
125
+ }
126
+
127
+ return items;
128
+ }
129
+
130
+
131
+ function parseFromRawHtml(html, finalUrl) {
132
+ const root = _deps.parseHtml(html);
133
+ const anchors = root.querySelectorAll("a.row-card[href]");
134
+ const seenLinks = new Set();
135
+ const items = [];
136
+
137
+ for (const anchor of anchors) {
138
+ const link = resolveResultLink(anchor.getAttribute("href"), finalUrl);
139
+ if (!link || seenLinks.has(link)) continue;
140
+
141
+ const title = normalizeText(anchor.querySelector(".row-card__heading")?.textContent);
142
+ const summary = normalizeText(anchor.querySelector(".row-card__subheading__item")?.textContent);
143
+ if (!title || !summary) continue;
144
+
145
+ seenLinks.add(link);
146
+ items.push(buildItem(title, link, summary, items.length));
147
+ }
148
+
149
+ return items;
150
+ }
151
+
152
+
153
+ async function fetchItems(sourceId, ctx) {
154
+ _deps = ctx.deps;
155
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
156
+ const fromPurified = parseFromPurifiedHtml(html, finalUrl || sourceId || DATASETS_URL);
157
+ if (fromPurified.length > 0) return fromPurified;
158
+
159
+ const raw = await ctx.fetchHtml(sourceId, { waitMs: 3500, purify: false });
160
+ const fromRaw = parseFromRawHtml(raw.html, raw.finalUrl || sourceId || DATASETS_URL);
161
+ if (fromRaw.length > 0) return fromRaw;
162
+
163
+ throw new Error("[google-research-datasets] 未解析到数据集条目,页面结构可能已变化");
164
+ }
165
+
166
+
167
+ export default {
168
+ id: "google-research-datasets",
169
+ listUrlPattern: /^https?:\/\/research\.google\/resources\/datasets\/?(\?.*)?$/i,
170
+ fetchItems,
171
+ };
@@ -0,0 +1,220 @@
1
+ let _deps;
2
+
3
+ // Google Research 首页插件:抓取 research.google 首页中的最新博客/论文条目(不做 enrich)
4
+
5
+
6
+
7
+ const BLOG_DETAIL_PATH_RE = /^\/blog\/(?!rss\/?$|label\/|\d{4}(?:\/|$))[^?#]+/i;
8
+ const PUBS_DETAIL_PATH_RE = /^\/pubs\/(?!$|\/?$)[^?#]+/i;
9
+ const CTA_TITLE_RE = /^(see|learn|explore|read|watch)\b/i;
10
+ const MONTH_NAME_RE =
11
+ /(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})(?:,\s*|\s+)(\d{4})/i;
12
+ const MONTH_DAY_RE =
13
+ /(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})\b/i;
14
+ const MONTH_INDEX = {
15
+ january: 0,
16
+ february: 1,
17
+ march: 2,
18
+ april: 3,
19
+ may: 4,
20
+ june: 5,
21
+ july: 6,
22
+ august: 7,
23
+ september: 8,
24
+ october: 9,
25
+ november: 10,
26
+ december: 11,
27
+ };
28
+
29
+
30
+ function normalizeText(text) {
31
+ return (text ?? "")
32
+ .replace(/[\u200B-\u200D\uFEFF]/g, "")
33
+ .replace(/\s+/g, " ")
34
+ .trim();
35
+ }
36
+
37
+
38
+ function splitMeaningfulLines(text) {
39
+ return (text ?? "")
40
+ .split(/\n+/)
41
+ .map((line) => normalizeText(line))
42
+ .filter(Boolean)
43
+ .filter((line) => line !== "·");
44
+ }
45
+
46
+
47
+ function decodeSlug(slug) {
48
+ return normalizeText(
49
+ slug
50
+ .replace(/[-_]+/g, " ")
51
+ .replace(/\b([a-z])/g, (m) => m.toUpperCase())
52
+ );
53
+ }
54
+
55
+
56
+ function toAbsoluteHttpUrl(rawHref, pageUrl) {
57
+ if (!rawHref) return null;
58
+ const href = rawHref.trim();
59
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
60
+ try {
61
+ const url = new URL(href, pageUrl);
62
+ if (!/^https?:$/i.test(url.protocol)) return null;
63
+ return url;
64
+ } catch {
65
+ return null;
66
+ }
67
+ }
68
+
69
+
70
+ function isResearchItemPath(pathname) {
71
+ return BLOG_DETAIL_PATH_RE.test(pathname) || PUBS_DETAIL_PATH_RE.test(pathname);
72
+ }
73
+
74
+
75
+ function extractTitle(anchor, linkUrl) {
76
+ const heading = anchor.querySelector("h1, h2, h3, h4, h5, h6");
77
+ const headingText = normalizeText(heading?.textContent);
78
+ if (headingText) return headingText;
79
+
80
+ const spanText = normalizeText(anchor.querySelector("span")?.textContent);
81
+ if (spanText && spanText.length >= 6 && !MONTH_DAY_RE.test(spanText)) return spanText;
82
+
83
+ const lines = splitMeaningfulLines(anchor.textContent).filter((line) => {
84
+ if (line.length < 3) return false;
85
+ if (MONTH_NAME_RE.test(line) || MONTH_DAY_RE.test(line)) return false;
86
+ if (line.toUpperCase() === "BLOG" || line.toUpperCase() === "PUBLICATIONS") return false;
87
+ return true;
88
+ });
89
+ if (lines.length > 0) return lines[0];
90
+
91
+ const segments = linkUrl.pathname.split("/").filter(Boolean);
92
+ const slug = segments.at(-1) ?? "";
93
+ return decodeSlug(slug);
94
+ }
95
+
96
+
97
+ function parseDateFromText(text) {
98
+ const normalized = normalizeText(text);
99
+ const monthWithYear = normalized.match(MONTH_NAME_RE);
100
+ if (monthWithYear) {
101
+ const monthName = monthWithYear[1]?.toLowerCase();
102
+ const day = Number(monthWithYear[2]);
103
+ const year = Number(monthWithYear[3]);
104
+ const month = monthName ? MONTH_INDEX[monthName] : undefined;
105
+ if (month != null && Number.isFinite(day) && Number.isFinite(year)) {
106
+ return new Date(Date.UTC(year, month, day, 0, 0, 0));
107
+ }
108
+ }
109
+
110
+ const monthDayOnly = normalized.match(MONTH_DAY_RE);
111
+ if (monthDayOnly) {
112
+ const monthName = monthDayOnly[1]?.toLowerCase();
113
+ const day = Number(monthDayOnly[2]);
114
+ const year = new Date().getFullYear();
115
+ const month = monthName ? MONTH_INDEX[monthName] : undefined;
116
+ if (month != null && Number.isFinite(day)) {
117
+ return new Date(Date.UTC(year, month, day, 0, 0, 0));
118
+ }
119
+ }
120
+
121
+ const yearMatch = normalized.match(/\((19|20)\d{2}\)/);
122
+ if (yearMatch) {
123
+ const year = Number(yearMatch[0].replace(/[()]/g, ""));
124
+ if (year >= 1990 && year <= new Date().getFullYear() + 1) {
125
+ return new Date(Date.UTC(year, 0, 1, 0, 0, 0));
126
+ }
127
+ }
128
+ return undefined;
129
+ }
130
+
131
+
132
+ function extractSummary(anchor, title) {
133
+ const lines = splitMeaningfulLines(anchor.textContent).filter((line) => {
134
+ if (!line) return false;
135
+ if (line === title) return false;
136
+ if (MONTH_NAME_RE.test(line) || MONTH_DAY_RE.test(line)) return false;
137
+ return true;
138
+ });
139
+ if (lines.length === 0) return undefined;
140
+ const summary = normalizeText(lines.slice(0, 2).join(" · "));
141
+ return summary || undefined;
142
+ }
143
+
144
+
145
+ function collectContextText(anchor) {
146
+ let cur = anchor;
147
+ const chunks = [];
148
+ for (let i = 0; i < 4 && cur; i += 1) {
149
+ const text = normalizeText(cur.textContent);
150
+ if (text) chunks.push(text);
151
+ cur = cur.parentNode ?? null;
152
+ }
153
+ return chunks.join(" ");
154
+ }
155
+
156
+
157
+ function isCallToActionTitle(title) {
158
+ const normalized = normalizeText(title);
159
+ if (!normalized) return true;
160
+ if (!CTA_TITLE_RE.test(normalized)) return false;
161
+ return normalized.split(" ").length <= 8;
162
+ }
163
+
164
+
165
+ function parseItemsFromHome(html, pageUrl) {
166
+ const root = _deps.parseHtml(html);
167
+ const anchors = root.querySelectorAll("a[href]");
168
+ const seen = new Set();
169
+ const items = [];
170
+
171
+ for (const anchor of anchors) {
172
+ const linkUrl = toAbsoluteHttpUrl(anchor.getAttribute("href"), pageUrl);
173
+ if (!linkUrl) continue;
174
+ if (linkUrl.hostname !== "research.google") continue;
175
+ if (!isResearchItemPath(linkUrl.pathname)) continue;
176
+
177
+ const link = linkUrl.href;
178
+ if (seen.has(link)) continue;
179
+ seen.add(link);
180
+
181
+ const title = extractTitle(anchor, linkUrl);
182
+ if (!title) continue;
183
+ if (isCallToActionTitle(title)) continue;
184
+
185
+ const context = collectContextText(anchor);
186
+ const pubDate = parseDateFromText(context) ?? new Date();
187
+ const summary = extractSummary(anchor, title);
188
+
189
+ items.push({
190
+ guid: _deps.createHash("sha256").update(link).digest("hex"),
191
+ title,
192
+ link,
193
+ pubDate,
194
+ author: "Google Research",
195
+ summary,
196
+ sourceId: "google-research",
197
+ });
198
+ }
199
+
200
+ return items;
201
+ }
202
+
203
+
204
+ async function fetchItems(sourceId, ctx) {
205
+ _deps = ctx.deps;
206
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4000 });
207
+ const pageUrl = new URL(finalUrl);
208
+ const items = parseItemsFromHome(html, pageUrl);
209
+ if (items.length === 0) {
210
+ throw new Error("[google-research] 未解析到条目,页面结构可能已变化");
211
+ }
212
+ return items;
213
+ }
214
+
215
+
216
+ export default {
217
+ id: "google-research",
218
+ listUrlPattern: /^https?:\/\/research\.google\/?(?:\?.*)?$/i,
219
+ fetchItems,
220
+ };