rssany 0.1.2 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +28 -50
  2. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  3. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  4. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  5. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  6. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  7. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  8. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  9. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  10. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  11. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  12. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  13. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  14. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  15. package/app/plugins/builtin/google-research.rssany.js +220 -0
  16. package/app/plugins/builtin/google.rssany.js +187 -0
  17. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  18. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  19. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  20. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  21. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  22. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  23. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  24. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  25. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  26. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  27. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  28. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  30. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  31. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  32. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  33. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  34. package/app/plugins/builtin/rss.rssany.js +11 -1
  35. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  36. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  37. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  38. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  39. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  40. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  41. package/app/plugins/builtin/x.rssany.js +159 -0
  42. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  43. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  44. package/dist/index.js +79 -9
  45. package/dist/index.js.map +1 -1
  46. package/package.json +1 -1
  47. package/webui/build/200.html +6 -6
  48. package/webui/build/_app/immutable/assets/0.BB88QFoe.css +1 -0
  49. package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
  50. package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
  51. package/webui/build/_app/immutable/chunks/{DcAshVxe.js → D6VIKef0.js} +1 -1
  52. package/webui/build/_app/immutable/chunks/{EIZIMsXK.js → Dbqx2mXq.js} +1 -1
  53. package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
  54. package/webui/build/_app/immutable/chunks/{BXCWEhUd.js → dhB8G5Is.js} +1 -1
  55. package/webui/build/_app/immutable/entry/{app.DdgnooOk.js → app.XPso7q7g.js} +2 -2
  56. package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
  57. package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
  58. package/webui/build/_app/immutable/nodes/{1.5DFDaT4c.js → 1.BS3_Rfxm.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{10.OVK4i9XE.js → 10.CyyxDCIS.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{11.Dhn_rO4A.js → 11.CtYgIaGj.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/{14.B_KpJLxn.js → 14.D5OEGPR2.js} +1 -1
  62. package/webui/build/_app/immutable/nodes/{15.RaWaA-0I.js → 15.B4dFN1Gk.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{16.DSUgqolV.js → 16.M7ZII7tl.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{3.wQvGs9w-.js → 3.7r8v7qkm.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{5.CCtn90c0.js → 5.CHIzoGrb.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{6.C2_mjW1u.js → 6.BDBqx-GY.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{7.Dwz6W7A1.js → 7.D5czsDmz.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{8.DzkEw6rx.js → 8.pjVNsCdV.js} +1 -1
  69. package/webui/build/_app/immutable/nodes/{9.DtlXEwe1.js → 9.CsARv1BH.js} +1 -1
  70. package/webui/build/_app/version.json +1 -1
  71. package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +0 -1
  72. package/webui/build/_app/immutable/chunks/CkUAV0m0.js +0 -41
  73. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  74. package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +0 -1
  75. package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +0 -11
package/README.md CHANGED
@@ -14,13 +14,12 @@
14
14
  ## 功能概览
15
15
 
16
16
  - **统一订阅**:在 `.rssany/sources.json` 中配置网站列表、标准 RSS、IMAP 邮件等,由调度器按 `refresh` 策略拉取。
17
- - **可插拔信源**:`app/plugins/builtin/` 与 `.rssany/plugins/` 中的 **Site** 插件(`.rssany.js` / `.rssany.ts`),自定义列表解析与详情规则。
17
+ - **可插拔信源**:**Site / Source** 插件(`.rssany.js` / `.rssany.ts`),见 **[插件配置说明](./docs/plugins.md)**。
18
18
  - **正文与解析**:在信源 `fetchItems`(及需要的 `ctx.extractItem` 等)内完成;入库后跑 pipeline。
19
19
  - **固定 pipeline**:`app/pipeline/` 中打标签、翻译等,由 `.rssany/config.json` 的 `pipeline.steps` 开关(**不是**用户目录下的 pipeline 插件)。
20
20
  - **LLM 辅助**:解析、提取、标签、翻译等可按配置走 OpenAI 兼容接口。
21
21
  - **站点登录**:需登录的站点通过 Puppeteer 管理 Cookie(与产品用户账号无关)。
22
22
  - **可选远端投递**:若 `config.json` 中 `**deliver.url`** 非空,在写库与 pipeline 完成后将条目以 `**{ sourceRef, items }**` JSON **POST** 到该 URL(由 `app/deliver/post.ts` 发送);留空则仅本地消费。
23
- - **MCP**:条目检索等能力以 MCP 暴露,供 Cursor、Claude 等使用。
24
23
  - **Web 界面**:SvelteKit 构建产物由后端托管;**Feeds** 等需 **邮箱校验**;`**/admin`** 需 `**users.role === 'admin'**`(可从 `**/me**` 进入)。
25
24
 
26
25
  ---
@@ -42,82 +41,62 @@
42
41
 
43
42
  ## 快速开始
44
43
 
45
- ### 环境要求
44
+ 日常使用只需 **Node.js 20.x–23.x**(与 `package.json` 的 `engines` 一致):
46
45
 
47
- - Node.js **20.x–23.x**(与 `package.json` 的 `engines` 字段一致)
48
- - **pnpm**
49
-
50
- ### 安装依赖
46
+ ### 全局安装(推荐)
51
47
 
52
48
  ```bash
53
- pnpm install
54
- pnpm run webui:install
49
+ npm install -g rssany # 与 npm i -g rssany 相同
50
+ rssany
55
51
  ```
56
52
 
57
- ### 配置
58
-
59
- 1. 复制环境变量示例并按需填写(JWT、OAuth、SMTP、LLM 等):
60
- ```bash
61
- cp .env.example .env
62
- ```
63
- 2. 信源与全局配置:首次启动会在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany\`)下自动从包内 **`init/`** 目录中的默认数据复制生成 `sources.json`、`config.json`(若已存在则不会覆盖)。也可手动复制仓库里的 `init/sources.json`、`init/config.json`。
64
- 3. (可选)LLM:在 `.env` 中设置 `OPENAI_API_KEY`、`OPENAI_BASE_URL`、`OPENAI_MODEL` 等。
53
+ 安装包内已包含构建好的后端与 Web 界面;启动后用浏览器打开终端里提示的地址(默认 **`http://127.0.0.1:18473/`**,端口可在**运行命令时当前目录**下的 `.env` 里设置 `PORT`)。
65
54
 
66
- ### 运行
55
+ - **数据目录**:首次运行会在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany\`)自动从包内 **`init/`** 生成 `sources.json`、`config.json` 等(已存在则不会覆盖)。
56
+ - **可选配置**:在启动 `rssany` 时的**当前目录**放置 `.env`(可参考仓库里的 `.env.example`),用于 JWT、OAuth、SMTP、LLM(如 `OPENAI_API_KEY` / `OPENAI_BASE_URL` / `OPENAI_MODEL`)等。
57
+ - **重置全部本地数据**(结束占用 `PORT` 的进程并删除用户目录,慎用):执行 **`rssany reset`**;在含 `.env` 的目录下运行可读取 `PORT` / `RSSANY_USER_DIR`,或事先在环境里导出这些变量。
67
58
 
68
- **开发**(后端根路径托管 `webui` 构建产物,改前端需重新构建或 watch):
69
-
70
- ```bash
71
- # 推荐:API + 前端 watch(修改 Svelte 后自动写入构建目录,刷新浏览器即可)
72
- pnpm run dev:all
73
-
74
- # 或分步:先打一次前端再起后端
75
- pnpm run webui:build
76
- pnpm dev
77
- ```
59
+ 等价于在项目里执行 `node node_modules/rssany/dist/index.js`;CLI 名为 **`rssany`**。
78
60
 
79
- 默认监听 `**http://127.0.0.1:18473/**`(端口见 `.env.example` 中 `PORT`,避免与常见开发端口冲突)。
61
+ ### 从源码运行(开发 / 贡献)
80
62
 
81
- **重置本地数据**(结束占用 `PORT` 的监听进程,并删除 **`~/.rssany/`**,或 `RSSANY_USER_DIR` 所设目录):
63
+ 需要 **pnpm**:
82
64
 
83
65
  ```bash
84
- pnpm reset
66
+ pnpm install
67
+ pnpm run webui:install
68
+ cp .env.example .env # 按需修改
85
69
  ```
86
70
 
87
- **仅调试 WebUI 热更新**(可选):`cd webui && pnpm dev`(Vite 代理到本机后端,见 `webui/vite.config.ts`)。
88
-
89
- **生产**:
71
+ **开发**(后端托管 `webui` 构建目录;改前端可 watch):
90
72
 
91
73
  ```bash
92
- pnpm run webui:build && pnpm start
74
+ pnpm run dev:all
93
75
  ```
94
76
 
95
- ### npm 全局安装(或 `npx`)
77
+ 或分步:`pnpm run webui:build` `pnpm dev`。
96
78
 
97
- 发布包时 `prepublishOnly` 会执行 `build:all`(后端 `vite build` + `webui:build`)。安装后:
79
+ **仅调试 WebUI 热更新**(可选):`cd webui && pnpm dev`(Vite 代理到本机后端,见 `webui/vite.config.ts`)。
98
80
 
99
- ```bash
100
- npm install -g rssany
101
- rssany
102
- ```
81
+ **生产**(本仓库):`pnpm run webui:build && pnpm start`。
103
82
 
104
- 重置数据(结束 `PORT` 监听进程并删除用户目录):**`rssany reset`**(与仓库内 **`pnpm reset`** 相同逻辑;可在含 `.env` 的目录下执行以读取 `PORT` / `RSSANY_USER_DIR`)。
83
+ **重置本地数据**(与全局安装的 `rssany reset` 逻辑相同):`pnpm reset`。
105
84
 
106
- 用户数据在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany`),与工作目录无关。可选环境变量 **`RSSANY_USER_DIR`** 可指定其它路径。等价于 `node node_modules/rssany/dist/index.js`;CLI 名称为 `rssany`。内置 `app/plugins/builtin/`、`statics/`、`webui/build` 随包安装路径解析。
85
+ 发布到 npm `prepublishOnly` 会执行 `build:all`(后端 `vite build` + `webui:build`)。
107
86
 
108
87
  ---
109
88
 
110
89
  ## 数据流(简图)
111
90
 
112
91
  ```
113
- sources.json / Site 插件
92
+ sources.json / 信源插件
114
93
  → 调度器触发 fetchItems
115
94
  → upsertItems
116
95
  → pipeline(每条一次)
117
96
  → [可选] deliver.url POST(出站,非入站 API)
118
97
  ```
119
98
 
120
- 消费侧:**RSS/XML**、`**/api/*`**、**MCP**、Web UI。
99
+ 消费侧:**RSS/XML**、`**/api/*`**、Web UI。
121
100
 
122
101
  ---
123
102
 
@@ -130,11 +109,9 @@ sources.json / Site 插件
130
109
 
131
110
  ---
132
111
 
133
- ## 插件与配置
134
-
135
- ### 信源插件(Site)
112
+ ## 配置
136
113
 
137
- 放置于 `**app/plugins/builtin/**` `**.rssany/plugins/**`(扁平),用户插件可与内置插件同 `id` 覆盖。最小约定包括 `id`、`listUrlPattern` 等(详见 `app/scraper/sources/web/site.ts`)。
114
+ **信源插件(Site / Source)**:目录约定、`listUrlPattern` / `pattern`、`fetchItems`、与 `sources.json` 的关系等,见 **[docs/plugins.md](./docs/plugins.md)**。
138
115
 
139
116
  ### Pipeline(固定代码)
140
117
 
@@ -173,8 +150,9 @@ sources.json / Site 插件
173
150
  ## 仓库目录(摘要)
174
151
 
175
152
  ```
176
- ├── app/ # 后端:路由、feeder、scraper、pipeline、mcp、db、auth…
153
+ ├── app/ # 后端:路由、feeder、scraper、pipeline、db、auth…
177
154
  │ └── plugins/builtin/ # 内置信源 *.rssany.js
155
+ ├── docs/ # 用户文档(如 plugins.md)
178
156
  └── webui/ # SvelteKit 前端
179
157
 
180
158
  ~/.rssany/ # 运行时用户数据(首次启动创建;或 RSSANY_USER_DIR)
@@ -0,0 +1,188 @@
1
+ let _deps;
2
+
3
+
4
+ const DEFAULT_ORIGIN = "https://agi-eval.org";
5
+ const DEFAULT_SOURCES = ["PUBLIC", "PRIVATE"];
6
+ const VALID_SOURCES = new Set(DEFAULT_SOURCES);
7
+
8
+ function normalizeText(text) {
9
+ return (text ?? "").replace(/\s+/g, " ").trim();
10
+ }
11
+
12
+ function hashGuid(input) {
13
+ return _deps.createHash("sha256").update(input).digest("hex");
14
+ }
15
+
16
+ function parseDate(value) {
17
+ const text = normalizeText(value);
18
+ if (!text) return undefined;
19
+ const date = new Date(text);
20
+ return Number.isNaN(date.getTime()) ? undefined : date;
21
+ }
22
+
23
+ function clampText(text, maxLen = 300) {
24
+ if (text.length <= maxLen) return text;
25
+ return `${text.slice(0, maxLen - 1).trim()}…`;
26
+ }
27
+
28
+ function pickOrigin(sourceId) {
29
+ try {
30
+ const url = new URL(sourceId);
31
+ if (/^https?:$/i.test(url.protocol)) return url.origin;
32
+ } catch {
33
+ // ignore
34
+ }
35
+ return DEFAULT_ORIGIN;
36
+ }
37
+
38
+ function parseSources(sourceId) {
39
+ const found = [];
40
+ try {
41
+ const url = new URL(sourceId);
42
+ const fromQuery = [
43
+ url.searchParams.get("source"),
44
+ url.searchParams.get("sources"),
45
+ ];
46
+ for (const value of fromQuery) {
47
+ if (!value) continue;
48
+ found.push(...value.split(/[,+\s|/]+/g));
49
+ }
50
+
51
+ const pathMatch = url.pathname.match(/^\/evaluation\/home\/([^/?#]+)/i);
52
+ if (pathMatch) {
53
+ found.push(...decodeURIComponent(pathMatch[1]).split(/[,+\s|/]+/g));
54
+ }
55
+ } catch {
56
+ // ignore
57
+ }
58
+
59
+ const picked = [];
60
+ const seen = new Set();
61
+ for (const raw of found) {
62
+ const source = normalizeText(raw).toUpperCase();
63
+ if (!VALID_SOURCES.has(source) || seen.has(source)) continue;
64
+ seen.add(source);
65
+ picked.push(source);
66
+ }
67
+ return picked.length > 0 ? picked : [...DEFAULT_SOURCES];
68
+ }
69
+
70
+ async function fetchBySource(origin, source) {
71
+ const response = await fetch(`${origin}/commWebApi/evaluation/home`, {
72
+ method: "POST",
73
+ headers: {
74
+ "Content-Type": "application/json",
75
+ "Accept": "application/json",
76
+ },
77
+ body: JSON.stringify({ source }),
78
+ });
79
+
80
+ if (!response.ok) {
81
+ throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: HTTP ${response.status}`);
82
+ }
83
+
84
+ const payload = await response.json().catch(() => null);
85
+ if (!payload || payload.rescode !== 0) {
86
+ const message = normalizeText(payload?.msg) || "接口返回异常";
87
+ throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: ${message}`);
88
+ }
89
+
90
+ const list = payload?.data?.evaluationList;
91
+ if (!Array.isArray(list)) {
92
+ throw new Error(`[agi-eval-evaluation] source=${source} 响应结构异常`);
93
+ }
94
+ return list;
95
+ }
96
+
97
+ function buildLink(origin, record) {
98
+ const id = String(record?.id ?? "").trim();
99
+ const name = normalizeText(record?.name);
100
+ if (id && name) {
101
+ return `${origin}/evaluation/${encodeURIComponent(name)}?id=${encodeURIComponent(id)}`;
102
+ }
103
+ if (id) {
104
+ return `${origin}/evaluation/detail?id=${encodeURIComponent(id)}`;
105
+ }
106
+ return `${origin}/evaluation/home`;
107
+ }
108
+
109
+ function pickPubDate(record) {
110
+ const candidates = [record?.publishTime, record?.createdAt, record?.updatedAt];
111
+ for (const value of candidates) {
112
+ const date = parseDate(value);
113
+ if (date) return date;
114
+ }
115
+ return new Date();
116
+ }
117
+
118
+ function buildSummary(record) {
119
+ const description = normalizeText(
120
+ record?.description ||
121
+ record?.introduction ||
122
+ record?.zhData?.description ||
123
+ record?.enData?.description ||
124
+ ""
125
+ );
126
+ const detail = description || "AGI-Eval 评测条目";
127
+ const views = Number(record?.views);
128
+ if (Number.isFinite(views) && views >= 0) {
129
+ return clampText(`${detail} | 浏览量 ${views}`);
130
+ }
131
+ return clampText(detail);
132
+ }
133
+
134
+ function toFeedItem(record, origin, source) {
135
+ if (!record || typeof record !== "object") return null;
136
+ const title = normalizeText(record.name);
137
+ if (!title) return null;
138
+
139
+ const link = buildLink(origin, record);
140
+ const id = String(record.id ?? "").trim();
141
+ const guidSeed = id ? `agi-eval:${id}` : link;
142
+
143
+ return {
144
+ guid: hashGuid(guidSeed),
145
+ title,
146
+ link,
147
+ pubDate: pickPubDate(record),
148
+ author: "AGI-Eval",
149
+ summary: buildSummary(record),
150
+ sourceId: "agi-eval-evaluation",
151
+ };
152
+ }
153
+
154
+ async function fetchItems(sourceId, ctx) {
155
+ _deps = ctx.deps;
156
+ const origin = pickOrigin(sourceId);
157
+ const sources = parseSources(sourceId);
158
+ const rows = [];
159
+
160
+ for (const source of sources) {
161
+ const list = await fetchBySource(origin, source);
162
+ for (const record of list) {
163
+ rows.push({ source, record });
164
+ }
165
+ }
166
+
167
+ const seen = new Set();
168
+ const items = [];
169
+ for (const { source, record } of rows) {
170
+ const item = toFeedItem(record, origin, source);
171
+ if (!item || seen.has(item.guid)) continue;
172
+ seen.add(item.guid);
173
+ items.push(item);
174
+ }
175
+
176
+ if (items.length === 0) {
177
+ throw new Error("[agi-eval-evaluation] 未解析到条目,接口结构可能已变化");
178
+ }
179
+
180
+ items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
181
+ return items;
182
+ }
183
+
184
+ export default {
185
+ id: "agi-eval-evaluation",
186
+ listUrlPattern: /^https?:\/\/agi-eval\.(org|cn)\/evaluation\/home(?:\/[^/?#]+)?\/?(?:\?.*)?$/i,
187
+ fetchItems,
188
+ };
@@ -0,0 +1,73 @@
1
+ let _deps;
2
+
3
+ // Amii Research & Talent 插件:抓取人物卡片列表(不做正文 enrich)
4
+
5
+
6
+ const PEOPLE_PATH_RE = /^\/people\/[^/?#]+\/?$/i;
7
+
8
+ function normalizeText(text) {
9
+ return (text ?? "").replace(/\s+/g, " ").trim();
10
+ }
11
+
12
+ function hashGuid(input) {
13
+ return _deps.createHash("sha256").update(input).digest("hex");
14
+ }
15
+
16
+ function resolvePeopleLink(rawHref, pageUrl) {
17
+ if (!rawHref) return null;
18
+ try {
19
+ const url = new URL(rawHref, pageUrl);
20
+ if (!/^https?:$/i.test(url.protocol)) return null;
21
+ if (!PEOPLE_PATH_RE.test(url.pathname)) return null;
22
+ return url.href;
23
+ } catch {
24
+ return null;
25
+ }
26
+ }
27
+
28
+ function extractPeopleItems(root, pageUrl) {
29
+ const anchors = root.querySelectorAll('a[href*="/people/"]');
30
+ const seen = new Set();
31
+ const items = [];
32
+
33
+ for (const anchor of anchors) {
34
+ const link = resolvePeopleLink(anchor.getAttribute("href"), pageUrl);
35
+ if (!link || seen.has(link)) continue;
36
+
37
+ const title = normalizeText(anchor.querySelector("h3")?.textContent);
38
+ if (!title) continue;
39
+
40
+ const summary = normalizeText(anchor.querySelector("p")?.textContent);
41
+ seen.add(link);
42
+ items.push({
43
+ guid: hashGuid(link),
44
+ title,
45
+ link,
46
+ pubDate: new Date(),
47
+ author: "Amii",
48
+ summary: summary || undefined,
49
+ });
50
+ }
51
+
52
+ return items;
53
+ }
54
+
55
+ async function fetchItems(sourceId, ctx) {
56
+ _deps = ctx.deps;
57
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3000 });
58
+ const root = _deps.parseHtml(html);
59
+ const pageUrl = new URL(finalUrl);
60
+
61
+ const items = extractPeopleItems(root, pageUrl);
62
+ if (items.length === 0) {
63
+ throw new Error("[amii-research-talent] 未解析到人物条目,页面结构可能已变化");
64
+ }
65
+
66
+ return items;
67
+ }
68
+
69
+ export default {
70
+ id: "amii-research-talent",
71
+ listUrlPattern: /^https?:\/\/(www\.)?amii\.ca\/research-talent\/?(\?.*)?$/i,
72
+ fetchItems,
73
+ };
@@ -0,0 +1,155 @@
1
+ let _deps;
2
+
3
+ // Anthropic Research 插件:抓取研究页列表条目(不含 enrich)
4
+
5
+
6
+
7
+ const ANTHROPIC_ORIGIN = "https://www.anthropic.com";
8
+ const MONTH_TO_INDEX = {
9
+ jan: 0,
10
+ feb: 1,
11
+ mar: 2,
12
+ apr: 3,
13
+ may: 4,
14
+ jun: 5,
15
+ jul: 6,
16
+ aug: 7,
17
+ sep: 8,
18
+ oct: 9,
19
+ nov: 10,
20
+ dec: 11,
21
+ };
22
+
23
+
24
+ function normalizeText(text) {
25
+ return (text ?? "").replace(/\s+/g, " ").trim();
26
+ }
27
+
28
+
29
+ function hashGuid(input) {
30
+ return _deps.createHash("sha256").update(input).digest("hex");
31
+ }
32
+
33
+
34
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
35
+ if (!rawHref) return null;
36
+ const href = rawHref.trim();
37
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
38
+ try {
39
+ const url = new URL(href, baseUrl);
40
+ if (!/^https?:$/i.test(url.protocol)) return null;
41
+ return url.href;
42
+ } catch {
43
+ return null;
44
+ }
45
+ }
46
+
47
+
48
+ function isResearchArticlePath(pathname) {
49
+ if (!pathname.startsWith("/research/")) return false;
50
+ if (pathname.startsWith("/research/team/")) return false;
51
+ return pathname.length > "/research/".length;
52
+ }
53
+
54
+
55
+ function parsePubDate(dateText) {
56
+ const normalized = normalizeText(dateText);
57
+ if (!normalized) return undefined;
58
+
59
+ const m = normalized.match(/^([A-Za-z]{3,9})\s+(\d{1,2}),\s*(\d{4})$/);
60
+ if (m) {
61
+ const month = MONTH_TO_INDEX[m[1].slice(0, 3).toLowerCase()];
62
+ if (month != null) {
63
+ const day = Number(m[2]);
64
+ const year = Number(m[3]);
65
+ const d = new Date(Date.UTC(year, month, day, 12, 0, 0));
66
+ if (!Number.isNaN(d.getTime())) return d;
67
+ }
68
+ }
69
+
70
+ const direct = new Date(normalized);
71
+ if (!Number.isNaN(direct.getTime())) return direct;
72
+ return undefined;
73
+ }
74
+
75
+
76
+ function extractTitle(anchor) {
77
+ const heading =
78
+ anchor.querySelector("h2") ??
79
+ anchor.querySelector("h3") ??
80
+ anchor.querySelector("h4");
81
+ const headingText = normalizeText(heading?.textContent);
82
+ if (headingText) return headingText;
83
+
84
+ const spans = anchor
85
+ .querySelectorAll("span")
86
+ .map((s) => normalizeText(s.textContent))
87
+ .filter(Boolean);
88
+ if (spans.length > 0) return spans[spans.length - 1];
89
+
90
+ return normalizeText(anchor.textContent);
91
+ }
92
+
93
+
94
+ function extractSummary(anchor, title) {
95
+ const summary = normalizeText(anchor.querySelector("p")?.textContent);
96
+ if (summary && summary !== title) return summary;
97
+ return undefined;
98
+ }
99
+
100
+
101
+ function parseAnchorItem(anchor, finalUrl) {
102
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
103
+ if (!link) return null;
104
+
105
+ const pathname = new URL(link).pathname;
106
+ if (!isResearchArticlePath(pathname)) return null;
107
+
108
+ const title = extractTitle(anchor);
109
+ if (!title) return null;
110
+
111
+ const dateText = normalizeText(anchor.querySelector("time")?.textContent);
112
+ const pubDate = parsePubDate(dateText) ?? new Date();
113
+ const summary = extractSummary(anchor, title);
114
+
115
+ return {
116
+ guid: hashGuid(link),
117
+ title,
118
+ link,
119
+ pubDate,
120
+ author: "Anthropic",
121
+ summary,
122
+ };
123
+ }
124
+
125
+
126
+ async function fetchItems(sourceId, ctx) {
127
+ _deps = ctx.deps;
128
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
129
+ const root = _deps.parseHtml(html);
130
+
131
+ const seen = new Set();
132
+ const items = [];
133
+ const anchors = root.querySelectorAll("a[href]");
134
+
135
+ for (const anchor of anchors) {
136
+ const item = parseAnchorItem(anchor, finalUrl || ANTHROPIC_ORIGIN);
137
+ if (!item) continue;
138
+ if (seen.has(item.link)) continue;
139
+ seen.add(item.link);
140
+ items.push(item);
141
+ }
142
+
143
+ if (items.length === 0) {
144
+ throw new Error("[anthropic-research] 未解析到研究条目,页面结构可能已变化");
145
+ }
146
+
147
+ return items;
148
+ }
149
+
150
+
151
+ export default {
152
+ id: "anthropic-research",
153
+ listUrlPattern: /^https?:\/\/(www\.)?anthropic\.com\/research(?:\/)?(\?.*)?$/i,
154
+ fetchItems,
155
+ };