rssany 0.1.2 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +28 -50
  2. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  3. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  4. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  5. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  6. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  7. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  8. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  9. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  10. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  11. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  12. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  13. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  14. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  15. package/app/plugins/builtin/google-research.rssany.js +220 -0
  16. package/app/plugins/builtin/google.rssany.js +187 -0
  17. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  18. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  19. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  20. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  21. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  22. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  23. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  24. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  25. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  26. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  27. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  28. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  30. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  31. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  32. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  33. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  34. package/app/plugins/builtin/rss.rssany.js +11 -1
  35. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  36. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  37. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  38. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  39. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  40. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  41. package/app/plugins/builtin/x.rssany.js +159 -0
  42. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  43. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  44. package/dist/index.js +79 -9
  45. package/dist/index.js.map +1 -1
  46. package/package.json +1 -1
  47. package/webui/build/200.html +6 -6
  48. package/webui/build/_app/immutable/assets/0.BB88QFoe.css +1 -0
  49. package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
  50. package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
  51. package/webui/build/_app/immutable/chunks/{DcAshVxe.js → D6VIKef0.js} +1 -1
  52. package/webui/build/_app/immutable/chunks/{EIZIMsXK.js → Dbqx2mXq.js} +1 -1
  53. package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
  54. package/webui/build/_app/immutable/chunks/{BXCWEhUd.js → dhB8G5Is.js} +1 -1
  55. package/webui/build/_app/immutable/entry/{app.DdgnooOk.js → app.XPso7q7g.js} +2 -2
  56. package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
  57. package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
  58. package/webui/build/_app/immutable/nodes/{1.5DFDaT4c.js → 1.BS3_Rfxm.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{10.OVK4i9XE.js → 10.CyyxDCIS.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{11.Dhn_rO4A.js → 11.CtYgIaGj.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/{14.B_KpJLxn.js → 14.D5OEGPR2.js} +1 -1
  62. package/webui/build/_app/immutable/nodes/{15.RaWaA-0I.js → 15.B4dFN1Gk.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{16.DSUgqolV.js → 16.M7ZII7tl.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{3.wQvGs9w-.js → 3.7r8v7qkm.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{5.CCtn90c0.js → 5.CHIzoGrb.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{6.C2_mjW1u.js → 6.BDBqx-GY.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{7.Dwz6W7A1.js → 7.D5czsDmz.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{8.DzkEw6rx.js → 8.pjVNsCdV.js} +1 -1
  69. package/webui/build/_app/immutable/nodes/{9.DtlXEwe1.js → 9.CsARv1BH.js} +1 -1
  70. package/webui/build/_app/version.json +1 -1
  71. package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +0 -1
  72. package/webui/build/_app/immutable/chunks/CkUAV0m0.js +0 -41
  73. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  74. package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +0 -1
  75. package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +0 -11
@@ -0,0 +1,185 @@
1
+ let _deps;
2
+
3
+ // 华为云社区博客插件:抓取 https://bbs.huaweicloud.com/blogs 列表条目(默认仅列表,不做 enrich)
4
+
5
+
6
+
7
+ const HUAWEICLOUD_ORIGIN = "https://bbs.huaweicloud.com";
8
+ const BLOG_PATH_RE = /^\/blogs\/\d+$/;
9
+ const DATE_RE = /(\d{4})[/-](\d{1,2})[/-](\d{1,2})/;
10
+
11
+
12
+ function normalizeText(text) {
13
+ return (text ?? "").replace(/\s+/g, " ").trim();
14
+ }
15
+
16
+
17
+ function cleanTitle(text) {
18
+ return normalizeText(text).replace(/\s+HOT$/i, "");
19
+ }
20
+
21
+
22
+ function hashGuid(input) {
23
+ return _deps.createHash("sha256").update(input).digest("hex");
24
+ }
25
+
26
+
27
+ function toAbsoluteUrl(href, baseUrl) {
28
+ if (!href) return null;
29
+ try {
30
+ const url = new URL(href, baseUrl);
31
+ if (!/^https?:$/i.test(url.protocol)) return null;
32
+ return url.href;
33
+ } catch {
34
+ return null;
35
+ }
36
+ }
37
+
38
+
39
+ function getBlogPath(href, baseUrl) {
40
+ const absolute = toAbsoluteUrl(href, baseUrl);
41
+ if (!absolute) return null;
42
+ try {
43
+ const url = new URL(absolute);
44
+ const normalizedPath = url.pathname.replace(/\/+$/, "");
45
+ return BLOG_PATH_RE.test(normalizedPath) ? normalizedPath : null;
46
+ } catch {
47
+ return null;
48
+ }
49
+ }
50
+
51
+
52
+ function parseDate(text) {
53
+ const normalized = normalizeText(text);
54
+ const m = normalized.match(DATE_RE);
55
+ if (!m) return undefined;
56
+ const [, year, month, day] = m;
57
+ const iso = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T00:00:00+08:00`;
58
+ const date = new Date(iso);
59
+ return Number.isNaN(date.getTime()) ? undefined : date;
60
+ }
61
+
62
+
63
+ function extractDate(card) {
64
+ const allText = normalizeText(card?.textContent);
65
+ return parseDate(allText);
66
+ }
67
+
68
+
69
+ function extractAuthor(card) {
70
+ if (!card) return undefined;
71
+ const authorAnchor = card.querySelector('a[id^="ydcomm_blog_author_"]') ??
72
+ card.querySelector('a[href^="/community/usersnew/"]');
73
+ return normalizeText(authorAnchor?.textContent) || undefined;
74
+ }
75
+
76
+
77
+ function extractSummary(card, title, linkPath, pageUrl) {
78
+ if (!card) return undefined;
79
+ let best = "";
80
+ for (const anchor of card.querySelectorAll("a[href]")) {
81
+ const href = anchor.getAttribute("href") || "";
82
+ const path = getBlogPath(href, pageUrl);
83
+ if (!path || path !== linkPath) continue;
84
+ const text = normalizeText(anchor.textContent);
85
+ if (!text) continue;
86
+ if (text === title) continue;
87
+ if (text.length > best.length) best = text;
88
+ }
89
+ return best || undefined;
90
+ }
91
+
92
+
93
+ function mapCardToFeedItem(card, pageUrl) {
94
+ const titleAnchor = card?.querySelector('a[id^="ydcomm_blog_title_"][href]') ??
95
+ card?.querySelector('a[title][href^="/blogs/"], a[title][href*="/blogs/"]');
96
+ const href = titleAnchor?.getAttribute("href") ?? "";
97
+ const linkPath = getBlogPath(href, pageUrl);
98
+ if (!linkPath) return null;
99
+
100
+ const link = toAbsoluteUrl(linkPath, pageUrl);
101
+ if (!link) return null;
102
+
103
+ const title = cleanTitle(titleAnchor?.getAttribute("title")) || cleanTitle(titleAnchor?.textContent);
104
+ if (!title) return null;
105
+
106
+ const pubDate = extractDate(card) ?? new Date();
107
+ const summary = extractSummary(card, title, linkPath, pageUrl);
108
+ const author = extractAuthor(card);
109
+
110
+ return {
111
+ guid: hashGuid(link),
112
+ title,
113
+ link,
114
+ pubDate,
115
+ summary: summary || undefined,
116
+ author,
117
+ };
118
+ }
119
+
120
+
121
+ function findCardRoot(node) {
122
+ let current = node ?? null;
123
+ for (let i = 0; i < 8 && current; i += 1) {
124
+ if (typeof current.getAttribute === "function") {
125
+ const id = current.getAttribute("id") || "";
126
+ if (id.startsWith("ydcomm_blog_content_")) return current;
127
+ }
128
+ current = current.parentNode ?? null;
129
+ }
130
+ return null;
131
+ }
132
+
133
+
134
+ function parseFromCardBlocks(root, pageUrl) {
135
+ const items = [];
136
+ const seen = new Set();
137
+ const cards = root.querySelectorAll('div[id^="ydcomm_blog_content_"]');
138
+ for (const card of cards) {
139
+ const item = mapCardToFeedItem(card, pageUrl);
140
+ if (!item || seen.has(item.link)) continue;
141
+ seen.add(item.link);
142
+ items.push(item);
143
+ }
144
+ return items;
145
+ }
146
+
147
+
148
+ function parseFromTitleAnchors(root, pageUrl) {
149
+ const items = [];
150
+ const seen = new Set();
151
+ const anchors = root.querySelectorAll('a[id^="ydcomm_blog_title_"][href]');
152
+ for (const anchor of anchors) {
153
+ const card = findCardRoot(anchor);
154
+ const item = mapCardToFeedItem(card ?? anchor.parentNode, pageUrl);
155
+ if (!item || seen.has(item.link)) continue;
156
+ seen.add(item.link);
157
+ items.push(item);
158
+ }
159
+ return items;
160
+ }
161
+
162
+
163
+ async function fetchItems(sourceId, ctx) {
164
+ _deps = ctx.deps;
165
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
166
+ const root = _deps.parseHtml(html);
167
+ const pageUrl = new URL(finalUrl || sourceId, HUAWEICLOUD_ORIGIN);
168
+
169
+ const itemsFromCards = parseFromCardBlocks(root, pageUrl);
170
+ const items = itemsFromCards.length > 0 ? itemsFromCards : parseFromTitleAnchors(root, pageUrl);
171
+
172
+ if (items.length === 0) {
173
+ throw new Error("[huaweicloud-bbs-blogs] 未解析到博客条目,页面结构可能已变化");
174
+ }
175
+
176
+ return items;
177
+ }
178
+
179
+
180
+ export default {
181
+ id: "huaweicloud-bbs-blogs",
182
+ listUrlPattern: /^https?:\/\/bbs\.huaweicloud\.com\/blogs\/?(\?.*)?$/i,
183
+ refreshInterval: "1h",
184
+ fetchItems,
185
+ };
@@ -0,0 +1,119 @@
1
+ // 语鲸 Open API 插件:将语鲸推荐文章 / 今日文章转换为条目列表
2
+ //
3
+ // sourceId 格式:
4
+ // lingowhale://articles — 推荐文章(默认拉取 2 页,可用 ?pages=5 调整)
5
+ // lingowhale://today — 今日文章(自动翻页拉完)
6
+ //
7
+ // 认证参数可内联于 URL query:
8
+ // lingowhale://articles?app_id=xxx&app_secret=yyy
9
+ // 也可通过环境变量提供(优先级低于 URL 参数):
10
+ // LINGOWHALE_APP_ID / LINGOWHALE_APP_SECRET
11
+
12
+
13
+ const BASE_URL = "https://open.lingowhale.com/open-api/v1";
14
+
15
+
16
+ function resolveCredentials(sourceId) {
17
+ let appId = process.env.LINGOWHALE_APP_ID ?? "";
18
+ let appSecret = process.env.LINGOWHALE_APP_SECRET ?? "";
19
+ try {
20
+ const parsed = new URL(sourceId.replace(/^lingowhale:/, "http://x"));
21
+ appId = parsed.searchParams.get("app_id") ?? appId;
22
+ appSecret = parsed.searchParams.get("app_secret") ?? appSecret;
23
+ } catch { /* fallback to env vars */ }
24
+ if (!appId || !appSecret) {
25
+ throw new Error(
26
+ "[LingowhalePlugin] 缺少认证信息:请在 sourceId 中提供 ?app_id=&app_secret= 参数," +
27
+ "或设置环境变量 LINGOWHALE_APP_ID / LINGOWHALE_APP_SECRET"
28
+ );
29
+ }
30
+ return { appId, appSecret };
31
+ }
32
+
33
+ function resolveEndpoint(sourceId) {
34
+ return sourceId.includes("://today") ? "today" : "articles";
35
+ }
36
+
37
+ function resolveMaxPages(sourceId) {
38
+ try {
39
+ const parsed = new URL(sourceId.replace(/^lingowhale:/, "http://x"));
40
+ const pages = parseInt(parsed.searchParams.get("pages") ?? "", 10);
41
+ if (!isNaN(pages) && pages > 0) return pages;
42
+ } catch { /* ignore */ }
43
+ return 2;
44
+ }
45
+
46
+ function buildHeaders(appId, appSecret) {
47
+ return {
48
+ "X-App-ID": appId,
49
+ "X-App-Secret": appSecret,
50
+ "Content-Type": "application/json",
51
+ "Accept": "application/json",
52
+ };
53
+ }
54
+
55
+ function stripHl(text) {
56
+ return text.replace(/<\/?hl>/g, "");
57
+ }
58
+
59
+ function mapArticle(article) {
60
+ const link = article.orig_url ?? `https://lingowhale.com/article/${article.entry_id}`;
61
+ const rawSummary = article.abstract || article.description;
62
+ return {
63
+ guid: article.entry_id,
64
+ title: article.title ?? "(无标题)",
65
+ link,
66
+ pubDate: article.pub_time ? new Date(article.pub_time * 1000) : new Date(),
67
+ summary: rawSummary ? stripHl(rawSummary) : undefined,
68
+ content: article.html || undefined,
69
+ };
70
+ }
71
+
72
+ async function fetchArticles(appId, appSecret, maxPages) {
73
+ const headers = buildHeaders(appId, appSecret);
74
+ const items = [];
75
+ for (let page = 1; page <= maxPages; page++) {
76
+ const res = await fetch(`${BASE_URL}/articles?page=${page}&page_size=100`, { headers });
77
+ if (!res.ok) throw new Error(`[LingowhalePlugin] HTTP ${res.status} 拉取文章列表失败`);
78
+ const json = await res.json();
79
+ if (json.code !== 0) throw new Error(`[LingowhalePlugin] API 错误:${json.message}`);
80
+ const pageItems = json.data.items ?? [];
81
+ items.push(...pageItems.map(mapArticle));
82
+ const total = json.data.total ?? 0;
83
+ if (items.length >= total || pageItems.length === 0) break;
84
+ }
85
+ return items;
86
+ }
87
+
88
+ async function fetchTodayArticles(appId, appSecret) {
89
+ const headers = buildHeaders(appId, appSecret);
90
+ const items = [];
91
+ let cursor;
92
+ for (let round = 0; round < 10; round++) {
93
+ const cursorParam = cursor ? `&cursor=${encodeURIComponent(cursor)}` : "";
94
+ const res = await fetch(`${BASE_URL}/articles/today?page_size=20${cursorParam}`, { headers });
95
+ if (!res.ok) throw new Error(`[LingowhalePlugin] HTTP ${res.status} 拉取今日文章失败`);
96
+ const json = await res.json();
97
+ if (json.code !== 0) throw new Error(`[LingowhalePlugin] API 错误:${json.message}`);
98
+ const pageItems = json.data.items ?? [];
99
+ items.push(...pageItems.map(mapArticle));
100
+ cursor = json.data.nextCursor;
101
+ if (!cursor || pageItems.length === 0) break;
102
+ }
103
+ return items;
104
+ }
105
+
106
+
107
+ export default {
108
+ id: "lingowhale",
109
+ listUrlPattern: /^lingowhale:\/\//,
110
+ refreshInterval: "1h",
111
+
112
+ async fetchItems(sourceId, _ctx) {
113
+ const { appId, appSecret } = resolveCredentials(sourceId);
114
+ const endpoint = resolveEndpoint(sourceId);
115
+ const maxPages = resolveMaxPages(sourceId);
116
+ if (endpoint === "today") return fetchTodayArticles(appId, appSecret);
117
+ return fetchArticles(appId, appSecret, maxPages);
118
+ },
119
+ };
@@ -0,0 +1,130 @@
1
+ let _deps;
2
+
3
+ // 美团技术团队博客:https://tech.meituan.com/
4
+ // 列表页解析 .post-container,可选 enrichItem 拉取正文
5
+
6
+
7
+ function normalizeText(text) {
8
+ return (text ?? "").replace(/\s+/g, " ").trim();
9
+ }
10
+
11
+ function toAbsoluteUrl(href, baseUrl) {
12
+ if (!href) return null;
13
+ try {
14
+ const url = new URL(href, baseUrl);
15
+ if (!/^https?:$/i.test(url.protocol)) return null;
16
+ return url.href;
17
+ } catch {
18
+ return null;
19
+ }
20
+ }
21
+
22
+ /** 解析 "2026年03月13日" 或 "2026-03-13" */
23
+ function parseDate(text) {
24
+ const raw = normalizeText(text);
25
+ const m = raw.match(/(\d{4})[年/-](\d{1,2})[月/-](\d{1,2})/);
26
+ if (!m) return undefined;
27
+ const [, y, mon, d] = m;
28
+ const iso = `${y}-${mon.padStart(2, "0")}-${d.padStart(2, "0")}T00:00:00+08:00`;
29
+ const date = new Date(iso);
30
+ return Number.isNaN(date.getTime()) ? undefined : date;
31
+ }
32
+
33
+ function hashGuid(link) {
34
+ return _deps.createHash("sha256").update(link).digest("hex");
35
+ }
36
+
37
+ async function fetchItems(sourceId, ctx) {
38
+ _deps = ctx.deps;
39
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
40
+ waitMs: 5000,
41
+ waitForSelector: ".post-container",
42
+ waitForSelectorTimeoutMs: 25000,
43
+ });
44
+ const root = _deps.parseHtml(html);
45
+ const baseUrl = finalUrl || sourceId;
46
+
47
+ const items = [];
48
+ const seen = new Set();
49
+
50
+ // 兼容 .row.post-container-wrapper > .col-md-6 > .post-container 或直接 .post-container
51
+ let containers = root.querySelectorAll(".post-container");
52
+ if (containers.length === 0) {
53
+ containers = root.querySelectorAll(".post-container-wrapper .post-container");
54
+ }
55
+ for (const el of containers) {
56
+ const titleEl = el.querySelector(".post-title a[href]");
57
+ let href = titleEl?.getAttribute("href");
58
+ if (!href) {
59
+ const moreLink = el.querySelector("a.more-link[href]");
60
+ if (moreLink) href = moreLink.getAttribute("href");
61
+ }
62
+ if (!href) continue;
63
+
64
+ const link = toAbsoluteUrl(href, baseUrl);
65
+ if (!link) continue;
66
+ try {
67
+ const u = new URL(link);
68
+ if (!u.hostname.endsWith("tech.meituan.com")) continue;
69
+ } catch {
70
+ continue;
71
+ }
72
+ if (seen.has(link)) continue;
73
+ seen.add(link);
74
+
75
+ const title = normalizeText(titleEl?.textContent) || normalizeText(el.querySelector(".post-title")?.textContent);
76
+ if (!title) continue;
77
+
78
+ const dateEl = el.querySelector(".m-post-date");
79
+ const pubDate = parseDate(dateEl?.textContent) ?? new Date();
80
+
81
+ const authorEl = el.querySelector(".m-post-nick");
82
+ const authorRaw = normalizeText(authorEl?.textContent);
83
+ const author = authorRaw ? [authorRaw] : undefined;
84
+
85
+ const summaryEl = el.querySelector(".post-content.post-expect");
86
+ let summary = "";
87
+ if (summaryEl) {
88
+ const clone = summaryEl.clone();
89
+ const moreLink = clone.querySelector("a.more-link");
90
+ if (moreLink) moreLink.remove();
91
+ summary = normalizeText(clone.textContent);
92
+ }
93
+
94
+ const tagLinks = el.querySelectorAll(".tag-links a[rel='tag']");
95
+ const categories = tagLinks.length
96
+ ? Array.from(tagLinks).map((a) => normalizeText(a.textContent)).filter(Boolean)
97
+ : undefined;
98
+
99
+ items.push({
100
+ guid: hashGuid(link),
101
+ title,
102
+ link,
103
+ pubDate,
104
+ author,
105
+ summary: summary || undefined,
106
+ categories,
107
+ sourceRef: sourceId,
108
+ });
109
+ }
110
+
111
+ if (items.length === 0) {
112
+ throw new Error("[meituan-tech] 未解析到文章条目,请检查列表页结构是否变化");
113
+ }
114
+
115
+ return items;
116
+ }
117
+
118
+ /** 可选:拉取详情页正文,使用默认 Readability 提取 */
119
+ async function enrichItem(item, ctx) {
120
+ return ctx.extractItem(item);
121
+ }
122
+
123
+ export default {
124
+ id: "meituan-tech",
125
+ listUrlPattern: /^https?:\/\/(www\.)?tech\.meituan\.com(\/.*)?$/i,
126
+ detailUrlPattern: /^https?:\/\/(www\.)?tech\.meituan\.com\/\d{4}\/\d{2}\/\d{2}\/[^/]+\.html(?:\?.*)?$/i,
127
+ refreshInterval: "1day",
128
+ fetchItems,
129
+ enrichItem,
130
+ };
@@ -0,0 +1,221 @@
1
+ let _deps;
2
+
3
+ // Meta AI Publications 插件:抓取结果页中的 publication 条目(不做正文 enrich)
4
+
5
+
6
+
7
+ const PUBLICATION_PATH_RE = /^\/research\/publications\/[^?#]+\/?$/i;
8
+ const PUBLICATION_RESULTS_URL_RE =
9
+ /^https?:\/\/ai\.meta\.com\/results\/?\?.*content_types(?:%5B0%5D|\[0\])=publication(?:&.*)?$/i;
10
+ const MONTH_TO_INDEX = {
11
+ january: 0,
12
+ february: 1,
13
+ march: 2,
14
+ april: 3,
15
+ may: 4,
16
+ june: 5,
17
+ july: 6,
18
+ august: 7,
19
+ september: 8,
20
+ october: 9,
21
+ november: 10,
22
+ december: 11,
23
+ };
24
+
25
+
26
+ function normalizeText(text) {
27
+ return (text ?? "")
28
+ .replace(/[\u200B-\u200D\uFEFF]/g, "")
29
+ .replace(/\s+/g, " ")
30
+ .trim();
31
+ }
32
+
33
+
34
+ function hashGuid(input) {
35
+ return _deps.createHash("sha256").update(input).digest("hex");
36
+ }
37
+
38
+
39
+ function toAbsolutePublicationUrl(rawHref, pageUrl) {
40
+ if (!rawHref) return null;
41
+ const href = rawHref.trim();
42
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
43
+ try {
44
+ const url = new URL(href, pageUrl);
45
+ if (!/^https?:$/i.test(url.protocol)) return null;
46
+ if (url.hostname !== "ai.meta.com") return null;
47
+ if (!PUBLICATION_PATH_RE.test(url.pathname)) return null;
48
+ return url.href;
49
+ } catch {
50
+ return null;
51
+ }
52
+ }
53
+
54
+
55
+ function parsePubDate(rawDate) {
56
+ const normalized = normalizeText(rawDate);
57
+ if (!normalized) return undefined;
58
+ const m = normalized.match(
59
+ /^(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),\s*(\d{4})$/i
60
+ );
61
+ if (!m) return undefined;
62
+ const monthName = m[1]?.toLowerCase();
63
+ const day = Number(m[2]);
64
+ const year = Number(m[3]);
65
+ const month = monthName ? MONTH_TO_INDEX[monthName] : undefined;
66
+ if (month == null || !Number.isFinite(day) || !Number.isFinite(year)) return undefined;
67
+ return new Date(Date.UTC(year, month, day, 0, 0, 0));
68
+ }
69
+
70
+
71
+ function decodeTitleFromLink(link) {
72
+ try {
73
+ const path = new URL(link).pathname;
74
+ const slug = path.split("/").filter(Boolean).at(-1) ?? "";
75
+ if (!slug) return "";
76
+ const decoded = decodeURIComponent(slug).replace(/[-_]+/g, " ");
77
+ return normalizeText(decoded.replace(/\b([a-z])/g, (m) => m.toUpperCase()));
78
+ } catch {
79
+ return "";
80
+ }
81
+ }
82
+
83
+
84
+ function looksLikeCategory(text) {
85
+ if (!text || text.length > 80) return false;
86
+ if (/[.!?]/.test(text)) return false;
87
+ if (/\d{4}/.test(text)) return false;
88
+ const letters = text.replace(/[^A-Za-z]/g, "");
89
+ if (!letters) return false;
90
+ const uppercase = letters.replace(/[^A-Z]/g, "").length;
91
+ const ratio = uppercase / letters.length;
92
+ return ratio >= 0.7 || text.includes("|");
93
+ }
94
+
95
+
96
+ function looksLikeAuthorLine(text) {
97
+ if (!text) return false;
98
+ if (text.length < 8 || text.length > 700) return false;
99
+ if (!text.includes(",")) return false;
100
+ if (/[.!?]/.test(text)) return false;
101
+ return /[A-Za-z]/.test(text);
102
+ }
103
+
104
+
105
+ function collectUniqueTexts(nodes) {
106
+ const out = [];
107
+ const seen = new Set();
108
+ for (const node of nodes) {
109
+ const text = normalizeText(node.textContent);
110
+ if (!text || seen.has(text)) continue;
111
+ seen.add(text);
112
+ out.push(text);
113
+ }
114
+ return out;
115
+ }
116
+
117
+
118
+ function findCardRoot(anchor) {
119
+ let cur = anchor;
120
+ let fallback = null;
121
+ for (let i = 0; i < 8 && cur; i += 1) {
122
+ const headings = collectUniqueTexts(cur.querySelectorAll?.("h1, h2, h3, h4, h5, h6") ?? []);
123
+ const paragraphs = collectUniqueTexts(cur.querySelectorAll?.("p") ?? []);
124
+ const hasDate = paragraphs.some((p) => parsePubDate(p) != null);
125
+ if (headings.length >= 2 && hasDate) return cur;
126
+ if (headings.length >= 1 && paragraphs.length >= 3) fallback = cur;
127
+ cur = cur.parentNode ?? null;
128
+ }
129
+ return fallback ?? anchor.parentNode ?? anchor;
130
+ }
131
+
132
+
133
+ function extractTitle(card, link) {
134
+ const headings = collectUniqueTexts(card.querySelectorAll("h1, h2, h3, h4, h5, h6"));
135
+ const candidates = headings
136
+ .filter((text) => !/^(publication|read the paper)$/i.test(text))
137
+ .filter((text) => !looksLikeCategory(text));
138
+ const chosen = candidates.find((text) => text.length >= 8) ?? candidates.at(-1) ?? "";
139
+ if (chosen) return chosen;
140
+ return decodeTitleFromLink(link);
141
+ }
142
+
143
+
144
+ function extractPubDateFromCard(card) {
145
+ const timeDatetime = card.querySelector("time[datetime]")?.getAttribute("datetime");
146
+ if (timeDatetime) {
147
+ const direct = new Date(timeDatetime);
148
+ if (!Number.isNaN(direct.getTime())) return direct;
149
+ }
150
+ const paragraphs = collectUniqueTexts(card.querySelectorAll("p, span, div"));
151
+ for (const text of paragraphs) {
152
+ const parsed = parsePubDate(text);
153
+ if (parsed) return parsed;
154
+ }
155
+ return undefined;
156
+ }
157
+
158
+
159
+ function extractSummary(card, title) {
160
+ const paragraphs = collectUniqueTexts(card.querySelectorAll("p"));
161
+ const candidates = paragraphs
162
+ .filter((text) => !/^(publication|read the paper)$/i.test(text))
163
+ .filter((text) => parsePubDate(text) == null)
164
+ .filter((text) => !looksLikeAuthorLine(text))
165
+ .filter((text) => !looksLikeCategory(text))
166
+ .filter((text) => text !== title);
167
+ const summary = candidates.find((text) => text.length >= 40) ?? "";
168
+ return summary || undefined;
169
+ }
170
+
171
+
172
+ async function fetchItems(sourceId, ctx) {
173
+ _deps = ctx.deps;
174
+ const { html, finalUrl, status } = await ctx.fetchHtml(sourceId, { waitMs: 3500, purify: false });
175
+ if (status >= 400) {
176
+ throw new Error(`[meta-ai-publications] 抓取失败: HTTP ${status}`);
177
+ }
178
+
179
+ const root = _deps.parseHtml(html);
180
+ const pageUrl = finalUrl || sourceId;
181
+ const anchors = root.querySelectorAll('a[href*="/research/publications/"]');
182
+ const seen = new Set();
183
+ const items = [];
184
+
185
+ for (const anchor of anchors) {
186
+ const link = toAbsolutePublicationUrl(anchor.getAttribute("href"), pageUrl);
187
+ if (!link || seen.has(link)) continue;
188
+ seen.add(link);
189
+
190
+ const card = findCardRoot(anchor);
191
+ const title = extractTitle(card, link);
192
+ if (!title) continue;
193
+
194
+ const pubDate = extractPubDateFromCard(card) ?? new Date();
195
+ const summary = extractSummary(card, title);
196
+
197
+ items.push({
198
+ guid: hashGuid(link),
199
+ title,
200
+ link,
201
+ pubDate,
202
+ author: "Meta AI",
203
+ summary,
204
+ sourceId: "meta-ai-publications",
205
+ });
206
+ }
207
+
208
+ if (items.length === 0) {
209
+ throw new Error(
210
+ "[meta-ai-publications] 未解析到 publication 条目,可能是页面结构变化,或需要更长等待时间/可用浏览器会话"
211
+ );
212
+ }
213
+ return items;
214
+ }
215
+
216
+
217
+ export default {
218
+ id: "meta-ai-publications",
219
+ listUrlPattern: PUBLICATION_RESULTS_URL_RE,
220
+ fetchItems,
221
+ };