rssany 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  2. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  3. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  4. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  5. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  6. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  7. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  8. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  9. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  10. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  11. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  12. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  13. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  14. package/app/plugins/builtin/google-research.rssany.js +220 -0
  15. package/app/plugins/builtin/google.rssany.js +187 -0
  16. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  17. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  18. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  19. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  20. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  21. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  22. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  23. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  24. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  25. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  26. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  27. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  28. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  30. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  31. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  32. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  33. package/app/plugins/builtin/rss.rssany.js +11 -1
  34. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  35. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  36. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  37. package/app/plugins/builtin/theinformation-briefings.rssany.js +136 -0
  38. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  39. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  40. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  41. package/app/plugins/builtin/x.rssany.js +328 -0
  42. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  43. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  44. package/dist/index.js +62 -4
  45. package/dist/index.js.map +1 -1
  46. package/package.json +1 -1
  47. package/webui/build/200.html +6 -6
  48. package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
  49. package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +1 -0
  50. package/webui/build/_app/immutable/chunks/BwlaCkNX.js +36 -0
  51. package/webui/build/_app/immutable/chunks/C0J2-L94.js +1 -0
  52. package/webui/build/_app/immutable/chunks/CLOXMsDk.js +36 -0
  53. package/webui/build/_app/immutable/chunks/{C85CNwD2.js → DgceFEv5.js} +1 -1
  54. package/webui/build/_app/immutable/chunks/{CllQAdvt.js → SqCUd34O.js} +1 -1
  55. package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.B8zBPipq.js} +2 -2
  56. package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +1 -0
  57. package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +11 -0
  58. package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.1N74-4Io.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.DY30t9Ib.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.ITuxnukH.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +1 -0
  62. package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.BHnIxbVM.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.CLjT9il3.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.BD-mKCLN.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.Dt5o2Fmz.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.Dy3vSsIP.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.DvclsL6H.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D2nJy-Uz.js} +1 -1
  69. package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.C75mhrqs.js} +1 -1
  70. package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.Bp_QXw3w.js} +1 -1
  71. package/webui/build/_app/version.json +1 -1
  72. package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +0 -1
  73. package/webui/build/_app/immutable/chunks/CdMsRjxJ.js +0 -1
  74. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  75. package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
  76. package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
  77. package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
  78. package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +0 -1
@@ -0,0 +1,114 @@
1
+ let _deps;
2
+
3
+ // OpenDriveLab Autonomous Driving 插件:抓取时间线条目并输出 FeedItem(不含 enrich)
4
+
5
+
6
+ function normalizeText(text) {
7
+ return (text ?? "").replace(/\s+/g, " ").trim();
8
+ }
9
+
10
+ function hashGuid(input) {
11
+ return _deps.createHash("sha256").update(input).digest("hex");
12
+ }
13
+
14
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
15
+ if (!rawHref) return null;
16
+ const href = rawHref.trim();
17
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
18
+ try {
19
+ const url = new URL(href, baseUrl);
20
+ if (!/^https?:$/i.test(url.protocol)) return null;
21
+ return url.href;
22
+ } catch {
23
+ return null;
24
+ }
25
+ }
26
+
27
+ function parsePubDate(dateText) {
28
+ const normalized = normalizeText(dateText);
29
+ const m = normalized.match(/(\d{4})[./-](\d{1,2})[./-](\d{1,2})/);
30
+ if (!m) return new Date();
31
+ const year = Number(m[1]);
32
+ const month = Number(m[2]);
33
+ const day = Number(m[3]);
34
+ if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) {
35
+ return new Date();
36
+ }
37
+ // 统一使用 UTC 中午,避免仅日期时的时区偏移问题。
38
+ return new Date(Date.UTC(year, month - 1, day, 12, 0, 0));
39
+ }
40
+
41
+ const AUX_LINK_TEXTS = new Set([
42
+ "paper",
43
+ "page",
44
+ "github",
45
+ "dataset",
46
+ "hugging face",
47
+ "video",
48
+ "blog",
49
+ "poster",
50
+ "slides",
51
+ "arxiv",
52
+ "code",
53
+ "demo",
54
+ "解讀",
55
+ ]);
56
+
57
+ function findTitleAnchor(li, finalUrl) {
58
+ const anchors = li.querySelectorAll("a[href]");
59
+ let fallback = null;
60
+
61
+ for (const anchor of anchors) {
62
+ const title = normalizeText(anchor.textContent);
63
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
64
+ if (!title || !link) continue;
65
+
66
+ if (!fallback) fallback = { title, link };
67
+ if (AUX_LINK_TEXTS.has(title.toLowerCase())) continue;
68
+ if (title.length < 8) continue;
69
+ return { title, link };
70
+ }
71
+
72
+ return fallback;
73
+ }
74
+
75
+ async function fetchItems(sourceId, ctx) {
76
+ _deps = ctx.deps;
77
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
78
+ const root = _deps.parseHtml(html);
79
+
80
+ const seenLinks = new Set();
81
+ const items = [];
82
+ const rows = root.querySelectorAll("li");
83
+ for (const row of rows) {
84
+ const dateText = normalizeText(row.querySelector("time")?.textContent);
85
+ if (!dateText) continue;
86
+
87
+ const titleAnchor = findTitleAnchor(row, finalUrl);
88
+ if (!titleAnchor) continue;
89
+ if (seenLinks.has(titleAnchor.link)) continue;
90
+ seenLinks.add(titleAnchor.link);
91
+
92
+ const summaryText = normalizeText(row.querySelector("i")?.textContent);
93
+ items.push({
94
+ guid: hashGuid(titleAnchor.link),
95
+ title: titleAnchor.title,
96
+ link: titleAnchor.link,
97
+ pubDate: parsePubDate(dateText),
98
+ author: "OpenDriveLab",
99
+ summary: summaryText || undefined,
100
+ sourceId: "opendrivelab-autonomous-driving",
101
+ });
102
+ }
103
+
104
+ if (items.length === 0) {
105
+ throw new Error("[opendrivelab-autonomous-driving] 未解析到条目,页面结构可能已变化");
106
+ }
107
+ return items;
108
+ }
109
+
110
+ export default {
111
+ id: "opendrivelab-autonomous-driving",
112
+ listUrlPattern: /^https?:\/\/(www\.)?opendrivelab\.com\/AutonomousDriving\/?(\?.*)?$/i,
113
+ fetchItems,
114
+ };
@@ -0,0 +1,114 @@
1
+ let _deps;
2
+
3
+
4
+ const SITE_ID = "opendrivelab-embodiedai";
5
+ const DATE_RE = /\b(20\d{2})[./-](\d{1,2})[./-](\d{1,2})\b/;
6
+ const ACTION_LINK_LABELS = new Set([
7
+ "paper",
8
+ "page",
9
+ "blog",
10
+ "github",
11
+ "video",
12
+ "dataset",
13
+ "challenge",
14
+ "hugging face",
15
+ "hardware guide",
16
+ ]);
17
+
18
+ function normalizeText(text) {
19
+ return (text ?? "").replace(/\s+/g, " ").trim();
20
+ }
21
+
22
+ function hashGuid(input) {
23
+ return _deps.createHash("sha256").update(input).digest("hex");
24
+ }
25
+
26
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
27
+ if (!rawHref) return null;
28
+ const href = rawHref.trim();
29
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
30
+ try {
31
+ const url = new URL(href, baseUrl);
32
+ if (!/^https?:$/i.test(url.protocol)) return null;
33
+ return url.href;
34
+ } catch {
35
+ return null;
36
+ }
37
+ }
38
+
39
+ function parseDate(dateText) {
40
+ const text = normalizeText(dateText);
41
+ const m = text.match(DATE_RE);
42
+ if (!m) return undefined;
43
+ const [, y, mm, dd] = m;
44
+ const date = new Date(Date.UTC(Number(y), Number(mm) - 1, Number(dd), 12, 0, 0));
45
+ return Number.isNaN(date.getTime()) ? undefined : date;
46
+ }
47
+
48
+ function isActionLabel(text) {
49
+ const normalized = normalizeText(text).toLowerCase();
50
+ return ACTION_LINK_LABELS.has(normalized);
51
+ }
52
+
53
+ function findTitleAnchor(liNode, finalUrl) {
54
+ const anchors = liNode.querySelectorAll("a[href]");
55
+ let fallback = null;
56
+
57
+ for (const anchor of anchors) {
58
+ const title = normalizeText(anchor.textContent);
59
+ if (!title || isActionLabel(title)) continue;
60
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
61
+ if (!link) continue;
62
+ if (!fallback) fallback = { anchor, link, title };
63
+ if (title.length >= 12) return { anchor, link, title };
64
+ }
65
+
66
+ return fallback;
67
+ }
68
+
69
+ function buildItemsFromHtml(html, finalUrl) {
70
+ const root = _deps.parseHtml(html);
71
+ const items = [];
72
+ const seen = new Set();
73
+ const liNodes = root.querySelectorAll("li");
74
+
75
+ for (const li of liNodes) {
76
+ const dateText = normalizeText(li.querySelector("time")?.textContent);
77
+ if (!dateText) continue;
78
+
79
+ const titleAnchor = findTitleAnchor(li, finalUrl);
80
+ if (!titleAnchor) continue;
81
+ if (seen.has(titleAnchor.link)) continue;
82
+
83
+ const summary = normalizeText(li.querySelector("i")?.textContent);
84
+ items.push({
85
+ guid: hashGuid(titleAnchor.link),
86
+ title: titleAnchor.title,
87
+ link: titleAnchor.link,
88
+ pubDate: parseDate(dateText) ?? new Date(),
89
+ summary: summary || undefined,
90
+ });
91
+ seen.add(titleAnchor.link);
92
+ }
93
+
94
+ return items;
95
+ }
96
+
97
+ async function fetchItems(sourceId, ctx) {
98
+ _deps = ctx.deps;
99
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
100
+ const items = buildItemsFromHtml(html, finalUrl);
101
+ if (items.length > 0) return items;
102
+
103
+ const text = normalizeText(_deps.parseHtml(html).textContent).toLowerCase();
104
+ if (text.includes("just a moment") || text.includes("checking your browser")) {
105
+ throw new Error(`[${SITE_ID}] 命中站点风控验证页,当前会话无法稳定抓取`);
106
+ }
107
+ throw new Error(`[${SITE_ID}] 未解析到 Embodied AI 条目,页面结构可能已变化`);
108
+ }
109
+
110
+ export default {
111
+ id: SITE_ID,
112
+ listUrlPattern: /^https?:\/\/(www\.)?opendrivelab\.com\/EmbodiedAI\/?(\?.*)?$/i,
113
+ fetchItems,
114
+ };
@@ -0,0 +1,130 @@
1
+ let _deps;
2
+
3
+
4
+
5
+ const VENUE_HINT_RE = /(20\d{2}|cvpr|iccv|eccv|neurips|iclr|aaai|ijcv|tpami|icra|rss|corl|preprint|arxiv)/i;
6
+ const BLOCKED_HOST_RE = /(^|\.)scholar\.google\.com$|(^|\.)github\.com$|(^|\.)img\.shields\.io$|(^|\.)youtube\.com$|(^|\.)youtu\.be$|(^|\.)zhihu\.com$|(^|\.)mp\.weixin\.qq\.com$|(^|\.)cvpr\d{4}\.thecvf\.com$/i;
7
+ const AWARD_RE = /\baward\b/i;
8
+
9
+
10
+ function normalizeText(text) {
11
+ return (text ?? "").replace(/\s+/g, " ").trim();
12
+ }
13
+
14
+
15
+ function toHttpUrl(rawHref, baseUrl) {
16
+ if (!rawHref) return null;
17
+ try {
18
+ const url = new URL(rawHref, baseUrl);
19
+ if (!/^https?:$/i.test(url.protocol)) return null;
20
+ return url;
21
+ } catch {
22
+ return null;
23
+ }
24
+ }
25
+
26
+
27
+ function hashGuid(input) {
28
+ return _deps.createHash("sha256").update(input).digest("hex");
29
+ }
30
+
31
+
32
+ function isLikelyPaperTitle(title) {
33
+ if (!title || title.length < 20) return false;
34
+ if (AWARD_RE.test(title)) return false;
35
+ const words = title.match(/[A-Za-z0-9][A-Za-z0-9-]*/g) ?? [];
36
+ return words.length >= 4;
37
+ }
38
+
39
+
40
+ function findYear(text) {
41
+ const m = normalizeText(text).match(/\b(20\d{2})\b/);
42
+ if (!m) return undefined;
43
+ const year = Number(m[1]);
44
+ if (year < 2000 || year > 2099) return undefined;
45
+ return year;
46
+ }
47
+
48
+
49
+ function extractContext(anchor) {
50
+ let node = anchor;
51
+ let summary;
52
+ let category;
53
+ let year;
54
+
55
+ for (let i = 0; i < 8 && node; i += 1) {
56
+ if (!summary) {
57
+ const summaryNode = node.querySelector?.("i");
58
+ const summaryText = normalizeText(summaryNode?.textContent);
59
+ if (summaryText && summaryText.length >= 24) summary = summaryText;
60
+ }
61
+
62
+ const spanTexts = (node.querySelectorAll?.("span") ?? [])
63
+ .map((el) => normalizeText(el.textContent))
64
+ .filter(Boolean);
65
+
66
+ if (!category) {
67
+ category = spanTexts.find((text) => VENUE_HINT_RE.test(text) && !AWARD_RE.test(text));
68
+ }
69
+ if (year == null) {
70
+ for (const text of spanTexts) {
71
+ const parsed = findYear(text);
72
+ if (parsed != null) {
73
+ year = parsed;
74
+ break;
75
+ }
76
+ }
77
+ }
78
+
79
+ node = node.parentNode ?? null;
80
+ }
81
+
82
+ return { summary, category, year };
83
+ }
84
+
85
+
86
+ async function fetchItems(sourceId, ctx) {
87
+ _deps = ctx.deps;
88
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
89
+ const root = _deps.parseHtml(html);
90
+ const main = root.querySelector("main") ?? root;
91
+ const anchors = main.querySelectorAll("a[href]");
92
+ const seen = new Set();
93
+ const items = [];
94
+
95
+ for (const anchor of anchors) {
96
+ const title = normalizeText(anchor.textContent);
97
+ if (!isLikelyPaperTitle(title)) continue;
98
+
99
+ const url = toHttpUrl(anchor.getAttribute("href"), finalUrl);
100
+ if (!url) continue;
101
+ if (BLOCKED_HOST_RE.test(url.hostname)) continue;
102
+
103
+ const link = url.href;
104
+ if (seen.has(link)) continue;
105
+ seen.add(link);
106
+
107
+ const { summary, category, year } = extractContext(anchor);
108
+ const pubDate = year != null ? new Date(Date.UTC(year, 0, 1)) : new Date();
109
+
110
+ items.push({
111
+ guid: hashGuid(link),
112
+ title,
113
+ link,
114
+ pubDate,
115
+ summary: summary || undefined,
116
+ });
117
+ }
118
+
119
+ if (items.length === 0) {
120
+ throw new Error("[opendrivelab-publications] 未解析到论文条目,页面结构可能已变化");
121
+ }
122
+ return items;
123
+ }
124
+
125
+
126
+ export default {
127
+ id: "opendrivelab-publications",
128
+ listUrlPattern: /^https?:\/\/(www\.)?opendrivelab\.com\/publications\/?(\?.*)?$/i,
129
+ fetchItems,
130
+ };
@@ -0,0 +1,333 @@
1
+ let _deps;
2
+
3
+ // OpenDriveLab 首页插件:解析首页展示内容并输出 FeedItem(不含 enrich)
4
+
5
+
6
+ const SITE_ID = "opendrivelab";
7
+ const NAVIGATION_TITLES = new Set([
8
+ "news",
9
+ "recruit",
10
+ "research",
11
+ "publication",
12
+ "dataset",
13
+ "event",
14
+ "more",
15
+ "team",
16
+ "sponsor",
17
+ "opendrivelab",
18
+ "embodied ai",
19
+ "autonomous driving",
20
+ ]);
21
+ const ACTION_LABELS = new Set([
22
+ "paper",
23
+ "page",
24
+ "blog",
25
+ "code",
26
+ "github",
27
+ "dataset",
28
+ "demo",
29
+ "video",
30
+ "poster",
31
+ "slides",
32
+ "community",
33
+ "cite",
34
+ "checkout at mmlab.hk/mm-hand",
35
+ ]);
36
+ const NAVIGATION_PATHS = new Set([
37
+ "/",
38
+ "/embodiedai",
39
+ "/autonomousdriving",
40
+ "/publications",
41
+ "/events",
42
+ "/team",
43
+ "/recruit",
44
+ "/ccai9025",
45
+ ]);
46
+
47
+ function normalizeText(text) {
48
+ return (text ?? "").replace(/\s+/g, " ").trim();
49
+ }
50
+
51
+ function hashGuid(input) {
52
+ return _deps.createHash("sha256").update(input).digest("hex");
53
+ }
54
+
55
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
56
+ if (!rawHref) return null;
57
+ const href = rawHref.trim();
58
+ if (!href || href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) return null;
59
+ try {
60
+ const url = new URL(href, baseUrl);
61
+ if (!/^https?:$/i.test(url.protocol)) return null;
62
+ return url.href;
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+ function normalizePath(pathname) {
69
+ if (!pathname) return "/";
70
+ const trimmed = pathname.replace(/\/+$/, "");
71
+ return (trimmed || "/").toLowerCase();
72
+ }
73
+
74
+ function isBlockedPage(root, html, finalUrl) {
75
+ const text = normalizeText(root.textContent).toLowerCase();
76
+ const body = (html ?? "").toLowerCase();
77
+ const url = (finalUrl ?? "").toLowerCase();
78
+ if (url.includes("/cdn-cgi/challenge")) return true;
79
+ if (body.includes("__cf_chl_opt")) return true;
80
+ if (body.includes("/cdn-cgi/challenge-platform")) return true;
81
+ if (text.includes("just a moment")) return true;
82
+ if (text.includes("checking your browser")) return true;
83
+ if (text.includes("attention required")) return true;
84
+ return text.includes("captcha");
85
+ }
86
+
87
+ function isNoiseTitle(text) {
88
+ const title = normalizeText(text);
89
+ if (!title) return true;
90
+ const lower = title.toLowerCase();
91
+ if (NAVIGATION_TITLES.has(lower)) return true;
92
+ if (/^\d+\s*\/\s*\d+$/.test(lower)) return true;
93
+ if (title.length < 8) return true;
94
+ return false;
95
+ }
96
+
97
+ function isActionLabel(text) {
98
+ const lower = normalizeText(text).toLowerCase();
99
+ if (!lower) return true;
100
+ if (ACTION_LABELS.has(lower)) return true;
101
+ if (/(best paper|award|finalist|position paper)/i.test(lower)) return true;
102
+ return false;
103
+ }
104
+
105
+ function findContentContainer(node) {
106
+ let current = node;
107
+ for (let i = 0; i < 8 && current; i += 1) {
108
+ if (current.nodeType !== _deps.NodeType.ELEMENT_NODE) {
109
+ current = current.parentNode ?? null;
110
+ continue;
111
+ }
112
+ const anchors = current.querySelectorAll?.("a[href]") ?? [];
113
+ if (anchors.length >= 1 && anchors.length <= 20) return current;
114
+ current = current.parentNode ?? null;
115
+ }
116
+ return node.parentNode ?? node;
117
+ }
118
+
119
+ function parseDateFromText(text) {
120
+ const normalized = normalizeText(text);
121
+ if (!normalized) return undefined;
122
+
123
+ let m = normalized.match(/\b(20\d{2})[./-](\d{1,2})[./-](\d{1,2})\b/);
124
+ if (m) {
125
+ const [, y, mm, dd] = m;
126
+ const date = new Date(Date.UTC(Number(y), Number(mm) - 1, Number(dd), 12, 0, 0));
127
+ if (!Number.isNaN(date.getTime())) return date;
128
+ }
129
+
130
+ m = normalized.match(/\b(January|February|March|April|May|June|July|August|September|October|November|December)\s*,?\s*(20\d{2})\b/i);
131
+ if (m) {
132
+ const monthMap = {
133
+ january: 0,
134
+ february: 1,
135
+ march: 2,
136
+ april: 3,
137
+ may: 4,
138
+ june: 5,
139
+ july: 6,
140
+ august: 7,
141
+ september: 8,
142
+ october: 9,
143
+ november: 10,
144
+ december: 11,
145
+ };
146
+ const monthIndex = monthMap[m[1].toLowerCase()];
147
+ const year = Number(m[2]);
148
+ const date = new Date(Date.UTC(year, monthIndex, 1, 12, 0, 0));
149
+ if (!Number.isNaN(date.getTime())) return date;
150
+ }
151
+
152
+ m = normalized.match(/\b(20\d{2})\b/);
153
+ if (m) {
154
+ const year = Number(m[1]);
155
+ const date = new Date(Date.UTC(year, 0, 1, 12, 0, 0));
156
+ if (!Number.isNaN(date.getTime())) return date;
157
+ }
158
+ return undefined;
159
+ }
160
+
161
+ function parseDateFromLink(link) {
162
+ try {
163
+ const url = new URL(link);
164
+ if (!/(^|\.)arxiv\.org$/i.test(url.hostname)) return undefined;
165
+ const m = url.pathname.match(/\/abs\/(\d{2})(\d{2})\.\d+/);
166
+ if (!m) return undefined;
167
+ const year = 2000 + Number(m[1]);
168
+ const month = Number(m[2]);
169
+ if (month < 1 || month > 12) return undefined;
170
+ const date = new Date(Date.UTC(year, month - 1, 1, 12, 0, 0));
171
+ return Number.isNaN(date.getTime()) ? undefined : date;
172
+ } catch {
173
+ return undefined;
174
+ }
175
+ }
176
+
177
+ function pickPrimaryLink(container, title, baseUrl) {
178
+ const anchors = container.querySelectorAll("a[href]");
179
+ const candidates = [];
180
+
181
+ for (const anchor of anchors) {
182
+ const text = normalizeText(anchor.textContent);
183
+ if (!text) continue;
184
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), baseUrl);
185
+ if (!link) continue;
186
+ const path = normalizePath(new URL(link).pathname);
187
+ candidates.push({ text, textLower: text.toLowerCase(), link, path });
188
+ }
189
+
190
+ if (candidates.length === 0) return null;
191
+
192
+ const normalizedTitle = normalizeText(title).toLowerCase();
193
+ const titleMatch = candidates.find((item) => item.textLower === normalizedTitle);
194
+ if (titleMatch) return titleMatch.link;
195
+
196
+ const actionMatch = candidates.find((item) => ACTION_LABELS.has(item.textLower));
197
+ if (actionMatch) return actionMatch.link;
198
+
199
+ const nonNav = candidates.find((item) => !NAVIGATION_PATHS.has(item.path));
200
+ return (nonNav ?? candidates[0]).link;
201
+ }
202
+
203
+ function pickSummary(container, title) {
204
+ const texts = [];
205
+ for (const selector of ["i", "p", "h2", "h3", "span"]) {
206
+ for (const node of container.querySelectorAll(selector)) {
207
+ const text = normalizeText(node.textContent);
208
+ if (!text || text === title) continue;
209
+ if (isActionLabel(text)) continue;
210
+ if (/^\d+\s*\/\s*\d+$/.test(text)) continue;
211
+ if (parseDateFromText(text) && text.length <= 24) continue;
212
+ texts.push(text);
213
+ }
214
+ }
215
+
216
+ const unique = [...new Set(texts)];
217
+ return unique.find((text) => text.length >= 20 && text.length <= 400);
218
+ }
219
+
220
+ function extractPubDate(headingNode, container, link) {
221
+ const texts = [];
222
+ const aroundHeading = normalizeText(headingNode.parentNode?.textContent);
223
+ if (aroundHeading) texts.push(aroundHeading);
224
+ const containerText = normalizeText(container.textContent);
225
+ if (containerText) texts.push(containerText);
226
+
227
+ let cursor = container.parentNode ?? null;
228
+ for (let i = 0; i < 3 && cursor; i += 1) {
229
+ const t = normalizeText(cursor.textContent);
230
+ if (t && t.length <= 3000) texts.push(t);
231
+ cursor = cursor.parentNode ?? null;
232
+ }
233
+
234
+ for (const text of texts) {
235
+ const date = parseDateFromText(text);
236
+ if (date) return date;
237
+ }
238
+
239
+ return parseDateFromLink(link) ?? new Date();
240
+ }
241
+
242
+ function toFeedItem({ title, link, pubDate, summary }) {
243
+ return {
244
+ guid: hashGuid(link),
245
+ title,
246
+ link,
247
+ pubDate,
248
+ author: "OpenDriveLab",
249
+ summary: summary || undefined,
250
+ sourceId: SITE_ID,
251
+ };
252
+ }
253
+
254
+ function parseFromHeadings(root, finalUrl, seen) {
255
+ const items = [];
256
+ const headings = root.querySelectorAll("h1, h2, h3");
257
+
258
+ for (const heading of headings) {
259
+ const title = normalizeText(heading.textContent);
260
+ if (isNoiseTitle(title)) continue;
261
+
262
+ const container = findContentContainer(heading);
263
+ const link = pickPrimaryLink(container, title, finalUrl);
264
+ if (!link || seen.has(link)) continue;
265
+
266
+ seen.add(link);
267
+ items.push(
268
+ toFeedItem({
269
+ title,
270
+ link,
271
+ summary: pickSummary(container, title),
272
+ pubDate: extractPubDate(heading, container, link),
273
+ }),
274
+ );
275
+ }
276
+
277
+ return items;
278
+ }
279
+
280
+ function parseFromTitleAnchors(root, finalUrl, seen) {
281
+ const items = [];
282
+ const anchors = root.querySelectorAll("a[href]");
283
+
284
+ for (const anchor of anchors) {
285
+ const title = normalizeText(anchor.textContent);
286
+ if (!title || title.length < 20) continue;
287
+ if (isNoiseTitle(title) || isActionLabel(title)) continue;
288
+
289
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
290
+ if (!link || seen.has(link)) continue;
291
+
292
+ const path = normalizePath(new URL(link).pathname);
293
+ if (NAVIGATION_PATHS.has(path)) continue;
294
+
295
+ const container = findContentContainer(anchor);
296
+ seen.add(link);
297
+ items.push(
298
+ toFeedItem({
299
+ title,
300
+ link,
301
+ summary: pickSummary(container, title),
302
+ pubDate: extractPubDate(anchor, container, link),
303
+ }),
304
+ );
305
+ }
306
+
307
+ return items;
308
+ }
309
+
310
+ async function fetchItems(sourceId, ctx) {
311
+ _deps = ctx.deps;
312
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
313
+ const root = _deps.parseHtml(html);
314
+
315
+ const seenLinks = new Set();
316
+ const items = [
317
+ ...parseFromHeadings(root, finalUrl, seenLinks),
318
+ ...parseFromTitleAnchors(root, finalUrl, seenLinks),
319
+ ];
320
+
321
+ if (items.length > 0) return items;
322
+
323
+ if (isBlockedPage(root, html, finalUrl)) {
324
+ throw new Error(`[${SITE_ID}] 命中站点风控验证页,当前会话无法稳定抓取`);
325
+ }
326
+ throw new Error(`[${SITE_ID}] 未解析到首页条目,页面结构可能已变化`);
327
+ }
328
+
329
+ export default {
330
+ id: SITE_ID,
331
+ listUrlPattern: /^https?:\/\/(www\.)?opendrivelab\.com\/?(\?.*)?$/i,
332
+ fetchItems,
333
+ };