rssany 0.1.2 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -50
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +159 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +79 -9
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/0.BB88QFoe.css +1 -0
- package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
- package/webui/build/_app/immutable/chunks/{DcAshVxe.js → D6VIKef0.js} +1 -1
- package/webui/build/_app/immutable/chunks/{EIZIMsXK.js → Dbqx2mXq.js} +1 -1
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
- package/webui/build/_app/immutable/chunks/{BXCWEhUd.js → dhB8G5Is.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.DdgnooOk.js → app.XPso7q7g.js} +2 -2
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.5DFDaT4c.js → 1.BS3_Rfxm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.OVK4i9XE.js → 10.CyyxDCIS.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.Dhn_rO4A.js → 11.CtYgIaGj.js} +1 -1
- package/webui/build/_app/immutable/nodes/{14.B_KpJLxn.js → 14.D5OEGPR2.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.RaWaA-0I.js → 15.B4dFN1Gk.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DSUgqolV.js → 16.M7ZII7tl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.wQvGs9w-.js → 3.7r8v7qkm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CCtn90c0.js → 5.CHIzoGrb.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.C2_mjW1u.js → 6.BDBqx-GY.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Dwz6W7A1.js → 7.D5czsDmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DzkEw6rx.js → 8.pjVNsCdV.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.DtlXEwe1.js → 9.CsARv1BH.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +0 -1
- package/webui/build/_app/immutable/chunks/CkUAV0m0.js +0 -41
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +0 -1
- package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +0 -11
package/README.md
CHANGED
|
@@ -14,13 +14,12 @@
|
|
|
14
14
|
## 功能概览
|
|
15
15
|
|
|
16
16
|
- **统一订阅**:在 `.rssany/sources.json` 中配置网站列表、标准 RSS、IMAP 邮件等,由调度器按 `refresh` 策略拉取。
|
|
17
|
-
-
|
|
17
|
+
- **可插拔信源**:**Site / Source** 插件(`.rssany.js` / `.rssany.ts`),见 **[插件配置说明](./docs/plugins.md)**。
|
|
18
18
|
- **正文与解析**:在信源 `fetchItems`(及需要的 `ctx.extractItem` 等)内完成;入库后跑 pipeline。
|
|
19
19
|
- **固定 pipeline**:`app/pipeline/` 中打标签、翻译等,由 `.rssany/config.json` 的 `pipeline.steps` 开关(**不是**用户目录下的 pipeline 插件)。
|
|
20
20
|
- **LLM 辅助**:解析、提取、标签、翻译等可按配置走 OpenAI 兼容接口。
|
|
21
21
|
- **站点登录**:需登录的站点通过 Puppeteer 管理 Cookie(与产品用户账号无关)。
|
|
22
22
|
- **可选远端投递**:若 `config.json` 中 `**deliver.url`** 非空,在写库与 pipeline 完成后将条目以 `**{ sourceRef, items }**` JSON **POST** 到该 URL(由 `app/deliver/post.ts` 发送);留空则仅本地消费。
|
|
23
|
-
- **MCP**:条目检索等能力以 MCP 暴露,供 Cursor、Claude 等使用。
|
|
24
23
|
- **Web 界面**:SvelteKit 构建产物由后端托管;**Feeds** 等需 **邮箱校验**;`**/admin`** 需 `**users.role === 'admin'**`(可从 `**/me**` 进入)。
|
|
25
24
|
|
|
26
25
|
---
|
|
@@ -42,82 +41,62 @@
|
|
|
42
41
|
|
|
43
42
|
## 快速开始
|
|
44
43
|
|
|
45
|
-
|
|
44
|
+
日常使用只需 **Node.js 20.x–23.x**(与 `package.json` 的 `engines` 一致):
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
- **pnpm**
|
|
49
|
-
|
|
50
|
-
### 安装依赖
|
|
46
|
+
### 全局安装(推荐)
|
|
51
47
|
|
|
52
48
|
```bash
|
|
53
|
-
|
|
54
|
-
|
|
49
|
+
npm install -g rssany # 与 npm i -g rssany 相同
|
|
50
|
+
rssany
|
|
55
51
|
```
|
|
56
52
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
1. 复制环境变量示例并按需填写(JWT、OAuth、SMTP、LLM 等):
|
|
60
|
-
```bash
|
|
61
|
-
cp .env.example .env
|
|
62
|
-
```
|
|
63
|
-
2. 信源与全局配置:首次启动会在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany\`)下自动从包内 **`init/`** 目录中的默认数据复制生成 `sources.json`、`config.json`(若已存在则不会覆盖)。也可手动复制仓库里的 `init/sources.json`、`init/config.json`。
|
|
64
|
-
3. (可选)LLM:在 `.env` 中设置 `OPENAI_API_KEY`、`OPENAI_BASE_URL`、`OPENAI_MODEL` 等。
|
|
53
|
+
安装包内已包含构建好的后端与 Web 界面;启动后用浏览器打开终端里提示的地址(默认 **`http://127.0.0.1:18473/`**,端口可在**运行命令时当前目录**下的 `.env` 里设置 `PORT`)。
|
|
65
54
|
|
|
66
|
-
|
|
55
|
+
- **数据目录**:首次运行会在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany\`)自动从包内 **`init/`** 生成 `sources.json`、`config.json` 等(已存在则不会覆盖)。
|
|
56
|
+
- **可选配置**:在启动 `rssany` 时的**当前目录**放置 `.env`(可参考仓库里的 `.env.example`),用于 JWT、OAuth、SMTP、LLM(如 `OPENAI_API_KEY` / `OPENAI_BASE_URL` / `OPENAI_MODEL`)等。
|
|
57
|
+
- **重置全部本地数据**(结束占用 `PORT` 的进程并删除用户目录,慎用):执行 **`rssany reset`**;在含 `.env` 的目录下运行可读取 `PORT` / `RSSANY_USER_DIR`,或事先在环境里导出这些变量。
|
|
67
58
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
```bash
|
|
71
|
-
# 推荐:API + 前端 watch(修改 Svelte 后自动写入构建目录,刷新浏览器即可)
|
|
72
|
-
pnpm run dev:all
|
|
73
|
-
|
|
74
|
-
# 或分步:先打一次前端再起后端
|
|
75
|
-
pnpm run webui:build
|
|
76
|
-
pnpm dev
|
|
77
|
-
```
|
|
59
|
+
等价于在项目里执行 `node node_modules/rssany/dist/index.js`;CLI 名为 **`rssany`**。
|
|
78
60
|
|
|
79
|
-
|
|
61
|
+
### 从源码运行(开发 / 贡献)
|
|
80
62
|
|
|
81
|
-
|
|
63
|
+
需要 **pnpm**:
|
|
82
64
|
|
|
83
65
|
```bash
|
|
84
|
-
pnpm
|
|
66
|
+
pnpm install
|
|
67
|
+
pnpm run webui:install
|
|
68
|
+
cp .env.example .env # 按需修改
|
|
85
69
|
```
|
|
86
70
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
**生产**:
|
|
71
|
+
**开发**(后端托管 `webui` 构建目录;改前端可 watch):
|
|
90
72
|
|
|
91
73
|
```bash
|
|
92
|
-
pnpm run
|
|
74
|
+
pnpm run dev:all
|
|
93
75
|
```
|
|
94
76
|
|
|
95
|
-
|
|
77
|
+
或分步:`pnpm run webui:build` 后 `pnpm dev`。
|
|
96
78
|
|
|
97
|
-
|
|
79
|
+
**仅调试 WebUI 热更新**(可选):`cd webui && pnpm dev`(Vite 代理到本机后端,见 `webui/vite.config.ts`)。
|
|
98
80
|
|
|
99
|
-
|
|
100
|
-
npm install -g rssany
|
|
101
|
-
rssany
|
|
102
|
-
```
|
|
81
|
+
**生产**(本仓库):`pnpm run webui:build && pnpm start`。
|
|
103
82
|
|
|
104
|
-
|
|
83
|
+
**重置本地数据**(与全局安装的 `rssany reset` 逻辑相同):`pnpm reset`。
|
|
105
84
|
|
|
106
|
-
|
|
85
|
+
发布到 npm 时 `prepublishOnly` 会执行 `build:all`(后端 `vite build` + `webui:build`)。
|
|
107
86
|
|
|
108
87
|
---
|
|
109
88
|
|
|
110
89
|
## 数据流(简图)
|
|
111
90
|
|
|
112
91
|
```
|
|
113
|
-
sources.json /
|
|
92
|
+
sources.json / 信源插件
|
|
114
93
|
→ 调度器触发 fetchItems
|
|
115
94
|
→ upsertItems
|
|
116
95
|
→ pipeline(每条一次)
|
|
117
96
|
→ [可选] deliver.url POST(出站,非入站 API)
|
|
118
97
|
```
|
|
119
98
|
|
|
120
|
-
消费侧:**RSS/XML**、`**/api
|
|
99
|
+
消费侧:**RSS/XML**、`**/api/*`**、Web UI。
|
|
121
100
|
|
|
122
101
|
---
|
|
123
102
|
|
|
@@ -130,11 +109,9 @@ sources.json / Site 插件
|
|
|
130
109
|
|
|
131
110
|
---
|
|
132
111
|
|
|
133
|
-
##
|
|
134
|
-
|
|
135
|
-
### 信源插件(Site)
|
|
112
|
+
## 配置
|
|
136
113
|
|
|
137
|
-
|
|
114
|
+
**信源插件(Site / Source)**:目录约定、`listUrlPattern` / `pattern`、`fetchItems`、与 `sources.json` 的关系等,见 **[docs/plugins.md](./docs/plugins.md)**。
|
|
138
115
|
|
|
139
116
|
### Pipeline(固定代码)
|
|
140
117
|
|
|
@@ -173,8 +150,9 @@ sources.json / Site 插件
|
|
|
173
150
|
## 仓库目录(摘要)
|
|
174
151
|
|
|
175
152
|
```
|
|
176
|
-
├── app/ # 后端:路由、feeder、scraper、pipeline、
|
|
153
|
+
├── app/ # 后端:路由、feeder、scraper、pipeline、db、auth…
|
|
177
154
|
│ └── plugins/builtin/ # 内置信源 *.rssany.js
|
|
155
|
+
├── docs/ # 用户文档(如 plugins.md)
|
|
178
156
|
└── webui/ # SvelteKit 前端
|
|
179
157
|
|
|
180
158
|
~/.rssany/ # 运行时用户数据(首次启动创建;或 RSSANY_USER_DIR)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
const DEFAULT_ORIGIN = "https://agi-eval.org";
|
|
5
|
+
const DEFAULT_SOURCES = ["PUBLIC", "PRIVATE"];
|
|
6
|
+
const VALID_SOURCES = new Set(DEFAULT_SOURCES);
|
|
7
|
+
|
|
8
|
+
function normalizeText(text) {
|
|
9
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function hashGuid(input) {
|
|
13
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function parseDate(value) {
|
|
17
|
+
const text = normalizeText(value);
|
|
18
|
+
if (!text) return undefined;
|
|
19
|
+
const date = new Date(text);
|
|
20
|
+
return Number.isNaN(date.getTime()) ? undefined : date;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function clampText(text, maxLen = 300) {
|
|
24
|
+
if (text.length <= maxLen) return text;
|
|
25
|
+
return `${text.slice(0, maxLen - 1).trim()}…`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function pickOrigin(sourceId) {
|
|
29
|
+
try {
|
|
30
|
+
const url = new URL(sourceId);
|
|
31
|
+
if (/^https?:$/i.test(url.protocol)) return url.origin;
|
|
32
|
+
} catch {
|
|
33
|
+
// ignore
|
|
34
|
+
}
|
|
35
|
+
return DEFAULT_ORIGIN;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function parseSources(sourceId) {
|
|
39
|
+
const found = [];
|
|
40
|
+
try {
|
|
41
|
+
const url = new URL(sourceId);
|
|
42
|
+
const fromQuery = [
|
|
43
|
+
url.searchParams.get("source"),
|
|
44
|
+
url.searchParams.get("sources"),
|
|
45
|
+
];
|
|
46
|
+
for (const value of fromQuery) {
|
|
47
|
+
if (!value) continue;
|
|
48
|
+
found.push(...value.split(/[,+\s|/]+/g));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const pathMatch = url.pathname.match(/^\/evaluation\/home\/([^/?#]+)/i);
|
|
52
|
+
if (pathMatch) {
|
|
53
|
+
found.push(...decodeURIComponent(pathMatch[1]).split(/[,+\s|/]+/g));
|
|
54
|
+
}
|
|
55
|
+
} catch {
|
|
56
|
+
// ignore
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const picked = [];
|
|
60
|
+
const seen = new Set();
|
|
61
|
+
for (const raw of found) {
|
|
62
|
+
const source = normalizeText(raw).toUpperCase();
|
|
63
|
+
if (!VALID_SOURCES.has(source) || seen.has(source)) continue;
|
|
64
|
+
seen.add(source);
|
|
65
|
+
picked.push(source);
|
|
66
|
+
}
|
|
67
|
+
return picked.length > 0 ? picked : [...DEFAULT_SOURCES];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function fetchBySource(origin, source) {
|
|
71
|
+
const response = await fetch(`${origin}/commWebApi/evaluation/home`, {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
"Accept": "application/json",
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({ source }),
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: HTTP ${response.status}`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const payload = await response.json().catch(() => null);
|
|
85
|
+
if (!payload || payload.rescode !== 0) {
|
|
86
|
+
const message = normalizeText(payload?.msg) || "接口返回异常";
|
|
87
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: ${message}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const list = payload?.data?.evaluationList;
|
|
91
|
+
if (!Array.isArray(list)) {
|
|
92
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 响应结构异常`);
|
|
93
|
+
}
|
|
94
|
+
return list;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function buildLink(origin, record) {
|
|
98
|
+
const id = String(record?.id ?? "").trim();
|
|
99
|
+
const name = normalizeText(record?.name);
|
|
100
|
+
if (id && name) {
|
|
101
|
+
return `${origin}/evaluation/${encodeURIComponent(name)}?id=${encodeURIComponent(id)}`;
|
|
102
|
+
}
|
|
103
|
+
if (id) {
|
|
104
|
+
return `${origin}/evaluation/detail?id=${encodeURIComponent(id)}`;
|
|
105
|
+
}
|
|
106
|
+
return `${origin}/evaluation/home`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function pickPubDate(record) {
|
|
110
|
+
const candidates = [record?.publishTime, record?.createdAt, record?.updatedAt];
|
|
111
|
+
for (const value of candidates) {
|
|
112
|
+
const date = parseDate(value);
|
|
113
|
+
if (date) return date;
|
|
114
|
+
}
|
|
115
|
+
return new Date();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function buildSummary(record) {
|
|
119
|
+
const description = normalizeText(
|
|
120
|
+
record?.description ||
|
|
121
|
+
record?.introduction ||
|
|
122
|
+
record?.zhData?.description ||
|
|
123
|
+
record?.enData?.description ||
|
|
124
|
+
""
|
|
125
|
+
);
|
|
126
|
+
const detail = description || "AGI-Eval 评测条目";
|
|
127
|
+
const views = Number(record?.views);
|
|
128
|
+
if (Number.isFinite(views) && views >= 0) {
|
|
129
|
+
return clampText(`${detail} | 浏览量 ${views}`);
|
|
130
|
+
}
|
|
131
|
+
return clampText(detail);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function toFeedItem(record, origin, source) {
|
|
135
|
+
if (!record || typeof record !== "object") return null;
|
|
136
|
+
const title = normalizeText(record.name);
|
|
137
|
+
if (!title) return null;
|
|
138
|
+
|
|
139
|
+
const link = buildLink(origin, record);
|
|
140
|
+
const id = String(record.id ?? "").trim();
|
|
141
|
+
const guidSeed = id ? `agi-eval:${id}` : link;
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
guid: hashGuid(guidSeed),
|
|
145
|
+
title,
|
|
146
|
+
link,
|
|
147
|
+
pubDate: pickPubDate(record),
|
|
148
|
+
author: "AGI-Eval",
|
|
149
|
+
summary: buildSummary(record),
|
|
150
|
+
sourceId: "agi-eval-evaluation",
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function fetchItems(sourceId, ctx) {
|
|
155
|
+
_deps = ctx.deps;
|
|
156
|
+
const origin = pickOrigin(sourceId);
|
|
157
|
+
const sources = parseSources(sourceId);
|
|
158
|
+
const rows = [];
|
|
159
|
+
|
|
160
|
+
for (const source of sources) {
|
|
161
|
+
const list = await fetchBySource(origin, source);
|
|
162
|
+
for (const record of list) {
|
|
163
|
+
rows.push({ source, record });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const seen = new Set();
|
|
168
|
+
const items = [];
|
|
169
|
+
for (const { source, record } of rows) {
|
|
170
|
+
const item = toFeedItem(record, origin, source);
|
|
171
|
+
if (!item || seen.has(item.guid)) continue;
|
|
172
|
+
seen.add(item.guid);
|
|
173
|
+
items.push(item);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (items.length === 0) {
|
|
177
|
+
throw new Error("[agi-eval-evaluation] 未解析到条目,接口结构可能已变化");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
181
|
+
return items;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export default {
|
|
185
|
+
id: "agi-eval-evaluation",
|
|
186
|
+
listUrlPattern: /^https?:\/\/agi-eval\.(org|cn)\/evaluation\/home(?:\/[^/?#]+)?\/?(?:\?.*)?$/i,
|
|
187
|
+
fetchItems,
|
|
188
|
+
};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Amii Research & Talent 插件:抓取人物卡片列表(不做正文 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
const PEOPLE_PATH_RE = /^\/people\/[^/?#]+\/?$/i;
|
|
7
|
+
|
|
8
|
+
function normalizeText(text) {
|
|
9
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function hashGuid(input) {
|
|
13
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function resolvePeopleLink(rawHref, pageUrl) {
|
|
17
|
+
if (!rawHref) return null;
|
|
18
|
+
try {
|
|
19
|
+
const url = new URL(rawHref, pageUrl);
|
|
20
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
21
|
+
if (!PEOPLE_PATH_RE.test(url.pathname)) return null;
|
|
22
|
+
return url.href;
|
|
23
|
+
} catch {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function extractPeopleItems(root, pageUrl) {
|
|
29
|
+
const anchors = root.querySelectorAll('a[href*="/people/"]');
|
|
30
|
+
const seen = new Set();
|
|
31
|
+
const items = [];
|
|
32
|
+
|
|
33
|
+
for (const anchor of anchors) {
|
|
34
|
+
const link = resolvePeopleLink(anchor.getAttribute("href"), pageUrl);
|
|
35
|
+
if (!link || seen.has(link)) continue;
|
|
36
|
+
|
|
37
|
+
const title = normalizeText(anchor.querySelector("h3")?.textContent);
|
|
38
|
+
if (!title) continue;
|
|
39
|
+
|
|
40
|
+
const summary = normalizeText(anchor.querySelector("p")?.textContent);
|
|
41
|
+
seen.add(link);
|
|
42
|
+
items.push({
|
|
43
|
+
guid: hashGuid(link),
|
|
44
|
+
title,
|
|
45
|
+
link,
|
|
46
|
+
pubDate: new Date(),
|
|
47
|
+
author: "Amii",
|
|
48
|
+
summary: summary || undefined,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return items;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function fetchItems(sourceId, ctx) {
|
|
56
|
+
_deps = ctx.deps;
|
|
57
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3000 });
|
|
58
|
+
const root = _deps.parseHtml(html);
|
|
59
|
+
const pageUrl = new URL(finalUrl);
|
|
60
|
+
|
|
61
|
+
const items = extractPeopleItems(root, pageUrl);
|
|
62
|
+
if (items.length === 0) {
|
|
63
|
+
throw new Error("[amii-research-talent] 未解析到人物条目,页面结构可能已变化");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return items;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export default {
|
|
70
|
+
id: "amii-research-talent",
|
|
71
|
+
listUrlPattern: /^https?:\/\/(www\.)?amii\.ca\/research-talent\/?(\?.*)?$/i,
|
|
72
|
+
fetchItems,
|
|
73
|
+
};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Anthropic Research 插件:抓取研究页列表条目(不含 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const ANTHROPIC_ORIGIN = "https://www.anthropic.com";
|
|
8
|
+
const MONTH_TO_INDEX = {
|
|
9
|
+
jan: 0,
|
|
10
|
+
feb: 1,
|
|
11
|
+
mar: 2,
|
|
12
|
+
apr: 3,
|
|
13
|
+
may: 4,
|
|
14
|
+
jun: 5,
|
|
15
|
+
jul: 6,
|
|
16
|
+
aug: 7,
|
|
17
|
+
sep: 8,
|
|
18
|
+
oct: 9,
|
|
19
|
+
nov: 10,
|
|
20
|
+
dec: 11,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
function normalizeText(text) {
|
|
25
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
function hashGuid(input) {
|
|
30
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
35
|
+
if (!rawHref) return null;
|
|
36
|
+
const href = rawHref.trim();
|
|
37
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
38
|
+
try {
|
|
39
|
+
const url = new URL(href, baseUrl);
|
|
40
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
41
|
+
return url.href;
|
|
42
|
+
} catch {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
function isResearchArticlePath(pathname) {
|
|
49
|
+
if (!pathname.startsWith("/research/")) return false;
|
|
50
|
+
if (pathname.startsWith("/research/team/")) return false;
|
|
51
|
+
return pathname.length > "/research/".length;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
function parsePubDate(dateText) {
|
|
56
|
+
const normalized = normalizeText(dateText);
|
|
57
|
+
if (!normalized) return undefined;
|
|
58
|
+
|
|
59
|
+
const m = normalized.match(/^([A-Za-z]{3,9})\s+(\d{1,2}),\s*(\d{4})$/);
|
|
60
|
+
if (m) {
|
|
61
|
+
const month = MONTH_TO_INDEX[m[1].slice(0, 3).toLowerCase()];
|
|
62
|
+
if (month != null) {
|
|
63
|
+
const day = Number(m[2]);
|
|
64
|
+
const year = Number(m[3]);
|
|
65
|
+
const d = new Date(Date.UTC(year, month, day, 12, 0, 0));
|
|
66
|
+
if (!Number.isNaN(d.getTime())) return d;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const direct = new Date(normalized);
|
|
71
|
+
if (!Number.isNaN(direct.getTime())) return direct;
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function extractTitle(anchor) {
|
|
77
|
+
const heading =
|
|
78
|
+
anchor.querySelector("h2") ??
|
|
79
|
+
anchor.querySelector("h3") ??
|
|
80
|
+
anchor.querySelector("h4");
|
|
81
|
+
const headingText = normalizeText(heading?.textContent);
|
|
82
|
+
if (headingText) return headingText;
|
|
83
|
+
|
|
84
|
+
const spans = anchor
|
|
85
|
+
.querySelectorAll("span")
|
|
86
|
+
.map((s) => normalizeText(s.textContent))
|
|
87
|
+
.filter(Boolean);
|
|
88
|
+
if (spans.length > 0) return spans[spans.length - 1];
|
|
89
|
+
|
|
90
|
+
return normalizeText(anchor.textContent);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
function extractSummary(anchor, title) {
|
|
95
|
+
const summary = normalizeText(anchor.querySelector("p")?.textContent);
|
|
96
|
+
if (summary && summary !== title) return summary;
|
|
97
|
+
return undefined;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
function parseAnchorItem(anchor, finalUrl) {
|
|
102
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
|
|
103
|
+
if (!link) return null;
|
|
104
|
+
|
|
105
|
+
const pathname = new URL(link).pathname;
|
|
106
|
+
if (!isResearchArticlePath(pathname)) return null;
|
|
107
|
+
|
|
108
|
+
const title = extractTitle(anchor);
|
|
109
|
+
if (!title) return null;
|
|
110
|
+
|
|
111
|
+
const dateText = normalizeText(anchor.querySelector("time")?.textContent);
|
|
112
|
+
const pubDate = parsePubDate(dateText) ?? new Date();
|
|
113
|
+
const summary = extractSummary(anchor, title);
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
guid: hashGuid(link),
|
|
117
|
+
title,
|
|
118
|
+
link,
|
|
119
|
+
pubDate,
|
|
120
|
+
author: "Anthropic",
|
|
121
|
+
summary,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async function fetchItems(sourceId, ctx) {
|
|
127
|
+
_deps = ctx.deps;
|
|
128
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
129
|
+
const root = _deps.parseHtml(html);
|
|
130
|
+
|
|
131
|
+
const seen = new Set();
|
|
132
|
+
const items = [];
|
|
133
|
+
const anchors = root.querySelectorAll("a[href]");
|
|
134
|
+
|
|
135
|
+
for (const anchor of anchors) {
|
|
136
|
+
const item = parseAnchorItem(anchor, finalUrl || ANTHROPIC_ORIGIN);
|
|
137
|
+
if (!item) continue;
|
|
138
|
+
if (seen.has(item.link)) continue;
|
|
139
|
+
seen.add(item.link);
|
|
140
|
+
items.push(item);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (items.length === 0) {
|
|
144
|
+
throw new Error("[anthropic-research] 未解析到研究条目,页面结构可能已变化");
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return items;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
export default {
|
|
152
|
+
id: "anthropic-research",
|
|
153
|
+
listUrlPattern: /^https?:\/\/(www\.)?anthropic\.com\/research(?:\/)?(\?.*)?$/i,
|
|
154
|
+
fetchItems,
|
|
155
|
+
};
|