rssany 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -22
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +6 -7
- package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
- package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
- package/app/plugins/builtin/appen-resources.rssany.js +6 -7
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
- package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
- package/app/plugins/builtin/baidu-research.rssany.js +5 -8
- package/app/plugins/builtin/brightdata-blog.rssany.js +6 -11
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
- package/app/plugins/builtin/email.rssany.js +9 -9
- package/app/plugins/builtin/five-radar.rssany.js +9 -11
- package/app/plugins/builtin/flageval-news.rssany.js +5 -7
- package/app/plugins/builtin/google-deepmind-research.rssany.js +6 -8
- package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
- package/app/plugins/builtin/google-research.rssany.js +6 -8
- package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
- package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
- package/app/plugins/builtin/lingowhale.rssany.js +7 -9
- package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
- package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
- package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
- package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
- package/app/plugins/builtin/moonshot.rssany.js +6 -8
- package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
- package/app/plugins/builtin/opendatalab.rssany.js +5 -6
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +6 -8
- package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
- package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +7 -9
- package/app/plugins/builtin/rss.rssany.js +11 -12
- package/app/plugins/builtin/selectdataset.rssany.js +6 -8
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
- package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
- package/app/plugins/builtin/theinformation-briefings.rssany.js +7 -13
- package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
- package/app/plugins/builtin/venturebeat.rssany.js +7 -9
- package/app/plugins/builtin/worldlabs.rssany.js +6 -8
- package/app/plugins/builtin/x.rssany.js +7 -9
- package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
- package/app/plugins/builtin/zhipu-research.rssany.js +5 -8
- package/app/plugins/site.rssany.js +25 -26
- package/{statics → app/statics}/README.md +7 -7
- package/app/webui/build/200.html +51 -0
- package/{webui/build/_app/immutable/assets/0.BB88QFoe.css → app/webui/build/_app/immutable/assets/0.DsKls1SN.css} +1 -1
- package/app/webui/build/_app/immutable/assets/13.Qu_tY6H9.css +1 -0
- package/app/webui/build/_app/immutable/assets/14.DfMfOrS3.css +1 -0
- package/app/webui/build/_app/immutable/assets/16.Cw9oSkcO.css +1 -0
- package/app/webui/build/_app/immutable/assets/4.Di6rvlY-.css +1 -0
- package/{webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css → app/webui/build/_app/immutable/assets/SourcesList.D5Lso0bo.css} +1 -1
- package/{webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css → app/webui/build/_app/immutable/assets/homeFeedPanelStore.CE6xTfsa.css} +1 -1
- package/app/webui/build/_app/immutable/chunks/6prdYIKP.js +1 -0
- package/{webui/build/_app/immutable/chunks/Xy_fhzQq.js → app/webui/build/_app/immutable/chunks/B-CeeY89.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/B2cyTHdf.js +2 -0
- package/{webui/build/_app/immutable/chunks/DjNLq3TF.js → app/webui/build/_app/immutable/chunks/B6WG2Sd3.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/BA4Gucnq.js +1 -0
- package/{webui/build/_app/immutable/chunks/xtNWTdbD.js → app/webui/build/_app/immutable/chunks/BAJAS8BI.js} +1 -1
- package/{webui/build/_app/immutable/chunks/Dt2CddFe.js → app/webui/build/_app/immutable/chunks/BkD3yAYe.js} +1 -1
- package/{webui/build/_app/immutable/chunks/DFuhmi31.js → app/webui/build/_app/immutable/chunks/C4uF_YIK.js} +1 -1
- package/{webui/build/_app/immutable/chunks/Dw782Tjs.js → app/webui/build/_app/immutable/chunks/C8umpVpB.js} +1 -1
- package/{webui/build/_app/immutable/chunks/BQqoDzLx.js → app/webui/build/_app/immutable/chunks/CFwxUBGi.js} +1 -1
- package/{webui/build/_app/immutable/chunks/tB7QMF3U.js → app/webui/build/_app/immutable/chunks/CGCMIfh3.js} +1 -1
- package/{webui/build/_app/immutable/chunks/BK3WtZwv.js → app/webui/build/_app/immutable/chunks/CS53ooo0.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/CVW0ymE1.js +1 -0
- package/{webui/build/_app/immutable/chunks/B-OsL1Ct.js → app/webui/build/_app/immutable/chunks/ChUctqXA.js} +1 -1
- package/{webui/build/_app/immutable/chunks/D5GvRCv7.js → app/webui/build/_app/immutable/chunks/ClknbeNl.js} +1 -1
- package/{webui/build/_app/immutable/chunks/Bu9HsS-V.js → app/webui/build/_app/immutable/chunks/CqYSO3Dx.js} +1 -1
- package/{webui/build/_app/immutable/chunks/CWNeClHp.js → app/webui/build/_app/immutable/chunks/D6kzEN_P.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/DAdOEnFb.js +1 -0
- package/{webui/build/_app/immutable/chunks/Cihqbfi5.js → app/webui/build/_app/immutable/chunks/DCEayuDt.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/DJ2e04vK.js +36 -0
- package/{webui/build/_app/immutable/chunks/DEDI7Ecm.js → app/webui/build/_app/immutable/chunks/DL3Q5sfb.js} +1 -1
- package/{webui/build/_app/immutable/chunks/CVzlFH44.js → app/webui/build/_app/immutable/chunks/DVa8Y-mQ.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/DkamXS6W.js +36 -0
- package/app/webui/build/_app/immutable/chunks/DoRPmqLn.js +2 -0
- package/app/webui/build/_app/immutable/chunks/DsxvjlCC.js +13 -0
- package/{webui/build/_app/immutable/chunks/Bp63qm3L.js → app/webui/build/_app/immutable/chunks/Dyvi1wBH.js} +1 -1
- package/{webui/build/_app/immutable/chunks/CmjOpds-.js → app/webui/build/_app/immutable/chunks/_qj9U-za.js} +1 -1
- package/app/webui/build/_app/immutable/chunks/vtBo8kBV.js +1 -0
- package/app/webui/build/_app/immutable/entry/app.RFfWi3_i.js +2 -0
- package/app/webui/build/_app/immutable/entry/start.DU_kyeGS.js +1 -0
- package/{webui/build/_app/immutable/nodes/0.I1lQdWMl.js → app/webui/build/_app/immutable/nodes/0.DK_mcVDm.js} +1 -1
- package/app/webui/build/_app/immutable/nodes/1.0PRrU2uQ.js +1 -0
- package/{webui/build/_app/immutable/nodes/10.CvfUsqsw.js → app/webui/build/_app/immutable/nodes/10.CsxzlUER.js} +1 -1
- package/app/webui/build/_app/immutable/nodes/11.D-PkhIRW.js +1 -0
- package/{webui/build/_app/immutable/nodes/12.DVFJuIWI.js → app/webui/build/_app/immutable/nodes/12.GGf-JLUY.js} +1 -1
- package/app/webui/build/_app/immutable/nodes/13.DWWcH27k.js +6 -0
- package/app/webui/build/_app/immutable/nodes/14.COwSLwDN.js +1 -0
- package/app/webui/build/_app/immutable/nodes/15.nDN_AHrs.js +1 -0
- package/app/webui/build/_app/immutable/nodes/16.zfSe93Ab.js +24 -0
- package/app/webui/build/_app/immutable/nodes/2.AJd2163d.js +1 -0
- package/app/webui/build/_app/immutable/nodes/3.CEVEHuaH.js +1 -0
- package/app/webui/build/_app/immutable/nodes/4.BT_N8pCh.js +2 -0
- package/{webui/build/_app/immutable/nodes/5.B6fR3n6J.js → app/webui/build/_app/immutable/nodes/5.BZScQ2CH.js} +1 -1
- package/{webui/build/_app/immutable/nodes/6.j2O5Mwjv.js → app/webui/build/_app/immutable/nodes/6.CkFk8X--.js} +1 -1
- package/app/webui/build/_app/immutable/nodes/7.CuQJk7te.js +1 -0
- package/{webui/build/_app/immutable/nodes/8.Bw_d63B_.js → app/webui/build/_app/immutable/nodes/8.DIavWJnU.js} +1 -1
- package/{webui/build/_app/immutable/nodes/9.pMMi5PP6.js → app/webui/build/_app/immutable/nodes/9.Db30M8x0.js} +1 -1
- package/app/webui/build/_app/version.json +1 -0
- package/app/webui/build/apple-touch-icon.png +0 -0
- package/app/webui/build/favicon.ico +0 -0
- package/app/webui/build/favicon.png +0 -0
- package/bin/rssany.js +226 -6
- package/dist/index.js +209 -152
- package/dist/index.js.map +1 -1
- package/package.json +22 -16
- package/scripts/dev.mjs +114 -0
- package/scripts/reset.mjs +1 -1
- package/init/config.json +0 -17
- package/init/sources.json +0 -353
- package/statics/401.html +0 -56
- package/statics/404.html +0 -12
- package/statics/image.png +0 -0
- package/webui/build/200.html +0 -49
- package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
- package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
- package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
- package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
- package/webui/build/_app/immutable/chunks/5LVkDJzw.js +0 -1
- package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
- package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
- package/webui/build/_app/immutable/chunks/Bns1MuyM.js +0 -36
- package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
- package/webui/build/_app/immutable/chunks/bvuf_jZd.js +0 -36
- package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
- package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
- package/webui/build/_app/immutable/entry/app.BVkrDt5l.js +0 -2
- package/webui/build/_app/immutable/entry/start.D3Q-BMMd.js +0 -1
- package/webui/build/_app/immutable/nodes/1.BiQQfx2j.js +0 -1
- package/webui/build/_app/immutable/nodes/11.B4LHPNL6.js +0 -1
- package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
- package/webui/build/_app/immutable/nodes/14.DfaAf0f8.js +0 -1
- package/webui/build/_app/immutable/nodes/15.CMzkX9OK.js +0 -1
- package/webui/build/_app/immutable/nodes/16.zPgTQNze.js +0 -24
- package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
- package/webui/build/_app/immutable/nodes/3.B8Viux9S.js +0 -1
- package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
- package/webui/build/_app/immutable/nodes/7.Bd2USIrl.js +0 -1
- package/webui/build/_app/version.json +0 -1
- /package/{webui → app/webui}/build/_app/env.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/10.Dj8_pmut.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/11.qYZMiTb0.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/12.DfJcfUWl.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/5.B-dPiwB7.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/6.B27N7pdA.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/7.CrNxmd8B.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/8.Cgji2b15.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/9.BsCIAvn3.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -0
- /package/{webui → app/webui}/build/_app/immutable/chunks/BUApaBEI.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/chunks/Bfc47y5P.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/chunks/CBY2biv-.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/chunks/hp4PFHFv.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -0
- /package/{webui → app/webui}/build/_app/immutable/nodes/18.BIzqhTqv.js +0 -0
package/README.md
CHANGED
|
@@ -42,41 +42,41 @@
|
|
|
42
42
|
### 全局安装(推荐)
|
|
43
43
|
|
|
44
44
|
```bash
|
|
45
|
-
npm install -g rssany # 与 npm i -g rssany 相同
|
|
46
|
-
rssany
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
安装包内已包含构建好的后端与 Web
|
|
45
|
+
npm install -g rssany # 与 npm i -g rssany 相同
|
|
46
|
+
rssany start
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
安装包内已包含构建好的后端与 Web 界面;用 **`rssany start`** 后台启动并直接返回访问地址(默认 **`http://127.0.0.1:18473/`**,端口可在**运行命令时当前目录**下的 `.env` 里设置 `PORT`);用 **`rssany stop`** 关闭后台服务并输出执行状态。
|
|
50
50
|
|
|
51
51
|
- **数据目录**:首次运行会在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany\`)自动从包内 **`init/`** 生成 `sources.json`、`config.json` 等(已存在则不会覆盖)。
|
|
52
|
-
- **可选配置**:在启动 `rssany` 时的**当前目录**放置 `.env`(可参考仓库里的 `.env.example`),用于 JWT、OAuth、SMTP、LLM(如 `OPENAI_API_KEY` / `OPENAI_BASE_URL` / `OPENAI_MODEL`)等。
|
|
52
|
+
- **可选配置**:在启动 `rssany start` 时的**当前目录**放置 `.env`(可参考仓库里的 `.env.example`),用于 JWT、OAuth、SMTP、LLM(如 `OPENAI_API_KEY` / `OPENAI_BASE_URL` / `OPENAI_MODEL`)等。
|
|
53
53
|
- **重置全部本地数据**(结束占用 `PORT` 的进程并删除用户目录,慎用):执行 **`rssany reset`**;在含 `.env` 的目录下运行可读取 `PORT` / `RSSANY_USER_DIR`,或事先在环境里导出这些变量。
|
|
54
54
|
|
|
55
|
-
|
|
55
|
+
CLI 名为 **`rssany`**;裸 `rssany` 只显示用法,不再直接进入服务运行状态。
|
|
56
56
|
|
|
57
57
|
### 从源码运行(开发 / 贡献)
|
|
58
58
|
|
|
59
|
-
需要 **
|
|
59
|
+
需要 **npm**:
|
|
60
60
|
|
|
61
61
|
```bash
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
npm install
|
|
63
|
+
npm run webui:install
|
|
64
64
|
cp .env.example .env # 按需修改
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
**开发**(单一后端地址;前端静态构建自动 watch):
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
npm run dev
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
该命令会先启动 `webui` 静态构建 watch,等首轮 HTML 构建完成后再启动后端服务;浏览器只访问后端地址(默认 `http://127.0.0.1:3999/`),不再单独启动前端开发服务器。
|
|
74
|
+
|
|
75
|
+
或分步:一个终端运行 `npm run webui:watch`,另一个终端运行 `npm run dev:backend`。
|
|
76
76
|
|
|
77
|
-
**生产**(本仓库):`
|
|
77
|
+
**生产**(本仓库):`npm run webui:build && npm start`。
|
|
78
78
|
|
|
79
|
-
**重置本地数据**(与全局安装的 `rssany reset` 逻辑相同):`
|
|
79
|
+
**重置本地数据**(与全局安装的 `rssany reset` 逻辑相同):`npm run reset`。
|
|
80
80
|
|
|
81
81
|
发布到 npm 时 `prepublishOnly` 会执行 `build:all`(后端 `vite build` + `webui:build`)。
|
|
82
82
|
|
|
@@ -166,4 +166,4 @@ sources.json / 信源插件
|
|
|
166
166
|
|
|
167
167
|
## 许可证
|
|
168
168
|
|
|
169
|
-
MIT
|
|
169
|
+
MIT
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "agi-eval-evaluation";
|
|
2
|
+
export const name = "Agi Eval Evaluation";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/agi-eval\.(org|cn)\/evaluation\/home(?:\/[^/?#]+)?\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
@@ -28,7 +32,7 @@ function clampText(text, maxLen = 300) {
|
|
|
28
32
|
function pickOrigin(sourceId) {
|
|
29
33
|
try {
|
|
30
34
|
const url = new URL(sourceId);
|
|
31
|
-
if (/^https
|
|
35
|
+
if (/^https:$/i.test(url.protocol)) return url.origin;
|
|
32
36
|
} catch {
|
|
33
37
|
// ignore
|
|
34
38
|
}
|
|
@@ -151,7 +155,7 @@ function toFeedItem(record, origin, _source) {
|
|
|
151
155
|
};
|
|
152
156
|
}
|
|
153
157
|
|
|
154
|
-
async function fetchItems(sourceId, ctx) {
|
|
158
|
+
export async function fetchItems(sourceId, ctx) {
|
|
155
159
|
_deps = ctx.deps;
|
|
156
160
|
const origin = pickOrigin(sourceId);
|
|
157
161
|
const sources = parseSources(sourceId);
|
|
@@ -181,8 +185,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
181
185
|
return items;
|
|
182
186
|
}
|
|
183
187
|
|
|
184
|
-
export default {
|
|
185
|
-
id: "agi-eval-evaluation",
|
|
186
|
-
listUrlPattern: /^https?:\/\/agi-eval\.(org|cn)\/evaluation\/home(?:\/[^/?#]+)?\/?(?:\?.*)?$/i,
|
|
187
|
-
fetchItems,
|
|
188
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "amii-research-talent";
|
|
2
|
+
export const name = "Amii Research Talent";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?amii\.ca\/research-talent\/?(\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Amii Research & Talent 插件:抓取人物卡片列表(不做正文 enrich)
|
|
@@ -17,7 +21,7 @@ function resolvePeopleLink(rawHref, pageUrl) {
|
|
|
17
21
|
if (!rawHref) return null;
|
|
18
22
|
try {
|
|
19
23
|
const url = new URL(rawHref, pageUrl);
|
|
20
|
-
if (!/^https
|
|
24
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
21
25
|
if (!PEOPLE_PATH_RE.test(url.pathname)) return null;
|
|
22
26
|
return url.href;
|
|
23
27
|
} catch {
|
|
@@ -52,7 +56,7 @@ function extractPeopleItems(root, pageUrl) {
|
|
|
52
56
|
return items;
|
|
53
57
|
}
|
|
54
58
|
|
|
55
|
-
async function fetchItems(sourceId, ctx) {
|
|
59
|
+
export async function fetchItems(sourceId, ctx) {
|
|
56
60
|
_deps = ctx.deps;
|
|
57
61
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3000 });
|
|
58
62
|
const root = _deps.parseHtml(html);
|
|
@@ -66,8 +70,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
66
70
|
return items;
|
|
67
71
|
}
|
|
68
72
|
|
|
69
|
-
export default {
|
|
70
|
-
id: "amii-research-talent",
|
|
71
|
-
listUrlPattern: /^https?:\/\/(www\.)?amii\.ca\/research-talent\/?(\?.*)?$/i,
|
|
72
|
-
fetchItems,
|
|
73
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "anthropic-research";
|
|
2
|
+
export const name = "Anthropic Research";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?anthropic\.com\/research(?:\/)?(\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Anthropic Research 插件:抓取研究页列表条目(不含 enrich)
|
|
@@ -37,7 +41,7 @@ function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
|
37
41
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
38
42
|
try {
|
|
39
43
|
const url = new URL(href, baseUrl);
|
|
40
|
-
if (!/^https
|
|
44
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
41
45
|
return url.href;
|
|
42
46
|
} catch {
|
|
43
47
|
return null;
|
|
@@ -123,7 +127,7 @@ function parseAnchorItem(anchor, finalUrl) {
|
|
|
123
127
|
}
|
|
124
128
|
|
|
125
129
|
|
|
126
|
-
async function fetchItems(sourceId, ctx) {
|
|
130
|
+
export async function fetchItems(sourceId, ctx) {
|
|
127
131
|
_deps = ctx.deps;
|
|
128
132
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
129
133
|
const root = _deps.parseHtml(html);
|
|
@@ -147,9 +151,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
147
151
|
return items;
|
|
148
152
|
}
|
|
149
153
|
|
|
150
|
-
|
|
151
|
-
export default {
|
|
152
|
-
id: "anthropic-research",
|
|
153
|
-
listUrlPattern: /^https?:\/\/(www\.)?anthropic\.com\/research(?:\/)?(\?.*)?$/i,
|
|
154
|
-
fetchItems,
|
|
155
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "appen-resources";
|
|
2
|
+
export const name = "Appen Resources";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?appen\.com\/resources\/?(\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
@@ -26,7 +30,7 @@ function resolveHttpUrl(href, baseUrl) {
|
|
|
26
30
|
if (!raw || raw.startsWith("#") || raw.startsWith("javascript:")) return null;
|
|
27
31
|
try {
|
|
28
32
|
const url = new URL(raw, baseUrl);
|
|
29
|
-
if (!/^https
|
|
33
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
30
34
|
return url;
|
|
31
35
|
} catch {
|
|
32
36
|
return null;
|
|
@@ -108,7 +112,7 @@ function upsertItem(itemsByLink, candidate) {
|
|
|
108
112
|
}
|
|
109
113
|
}
|
|
110
114
|
|
|
111
|
-
async function fetchItems(sourceId, ctx) {
|
|
115
|
+
export async function fetchItems(sourceId, ctx) {
|
|
112
116
|
_deps = ctx.deps;
|
|
113
117
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
114
118
|
const root = _deps.parseHtml(html);
|
|
@@ -148,8 +152,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
148
152
|
return items;
|
|
149
153
|
}
|
|
150
154
|
|
|
151
|
-
export default {
|
|
152
|
-
id: "appen-resources",
|
|
153
|
-
listUrlPattern: /^https?:\/\/(www\.)?appen\.com\/resources\/?(\?.*)?$/i,
|
|
154
|
-
fetchItems,
|
|
155
|
-
};
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
export const id = "baai-wudao-paper-article";
|
|
2
|
+
export const name = "BAAI Wudao Paper Article";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?github\.com\/BAAI-WuDao\/Paper-Article\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
4
|
-
const SITE_ID =
|
|
8
|
+
const SITE_ID = id;
|
|
5
9
|
const OWNER = "BAAI-WuDao";
|
|
6
10
|
const REPO = "Paper-Article";
|
|
7
11
|
const README_PATH = "README.md";
|
|
@@ -22,7 +26,7 @@ function cleanUrl(raw) {
|
|
|
22
26
|
const text = normalizeText(raw).replace(/[)>.,;!?]+$/g, "");
|
|
23
27
|
try {
|
|
24
28
|
const url = new URL(text);
|
|
25
|
-
if (!/^https
|
|
29
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
26
30
|
return url.href;
|
|
27
31
|
} catch {
|
|
28
32
|
return null;
|
|
@@ -99,9 +103,9 @@ function parseTitleLine(line) {
|
|
|
99
103
|
function parseLinkLine(line) {
|
|
100
104
|
const text = normalizeText(line);
|
|
101
105
|
if (!text) return null;
|
|
102
|
-
const directMatch = text.match(/链接[::]\s*(https
|
|
106
|
+
const directMatch = text.match(/链接[::]\s*(https:\/\/\S+)/i);
|
|
103
107
|
if (directMatch) return cleanUrl(directMatch[1]);
|
|
104
|
-
const urlMatch = text.match(/(https
|
|
108
|
+
const urlMatch = text.match(/(https:\/\/\S+)/i);
|
|
105
109
|
if (urlMatch) return cleanUrl(urlMatch[1]);
|
|
106
110
|
return null;
|
|
107
111
|
}
|
|
@@ -154,7 +158,7 @@ function parseItemsFromReadme(markdown, baseDate) {
|
|
|
154
158
|
return items;
|
|
155
159
|
}
|
|
156
160
|
|
|
157
|
-
async function fetchItems(sourceId, _ctx) {
|
|
161
|
+
export async function fetchItems(sourceId, _ctx) {
|
|
158
162
|
_deps = _ctx.deps;
|
|
159
163
|
let sourceUrl;
|
|
160
164
|
try {
|
|
@@ -178,8 +182,3 @@ async function fetchItems(sourceId, _ctx) {
|
|
|
178
182
|
return items;
|
|
179
183
|
}
|
|
180
184
|
|
|
181
|
-
export default {
|
|
182
|
-
id: SITE_ID,
|
|
183
|
-
listUrlPattern: /^https?:\/\/(www\.)?github\.com\/BAAI-WuDao\/Paper-Article\/?(?:\?.*)?$/i,
|
|
184
|
-
fetchItems,
|
|
185
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "baaidata-csdn";
|
|
2
|
+
export const name = "Baaidata CSDN";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/baaidata\.csdn\.net\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
@@ -16,7 +20,7 @@ function toAbsoluteUrl(rawHref, pageUrl) {
|
|
|
16
20
|
if (!rawHref) return null;
|
|
17
21
|
try {
|
|
18
22
|
const url = new URL(rawHref, pageUrl);
|
|
19
|
-
if (!/^https
|
|
23
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
20
24
|
return url.href;
|
|
21
25
|
} catch {
|
|
22
26
|
return null;
|
|
@@ -222,7 +226,7 @@ function parseStateItems(html, finalUrl) {
|
|
|
222
226
|
return items;
|
|
223
227
|
}
|
|
224
228
|
|
|
225
|
-
async function fetchItems(sourceId, ctx) {
|
|
229
|
+
export async function fetchItems(sourceId, ctx) {
|
|
226
230
|
_deps = ctx.deps;
|
|
227
231
|
const rendered = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
|
|
228
232
|
const fromDom = parseDomItems(rendered.html, rendered.finalUrl);
|
|
@@ -235,8 +239,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
235
239
|
throw new Error("[baaidata-csdn] 未解析到条目,页面结构可能已变化");
|
|
236
240
|
}
|
|
237
241
|
|
|
238
|
-
export default {
|
|
239
|
-
id: "baaidata-csdn",
|
|
240
|
-
listUrlPattern: /^https?:\/\/baaidata\.csdn\.net\/?(?:\?.*)?$/i,
|
|
241
|
-
fetchItems,
|
|
242
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "baidu-research";
|
|
2
|
+
export const name = "Baidu Research";
|
|
3
|
+
export const listUrlPattern = /^https?:\/\/research\.baidu\.com\/(?:(?:Index|Blog)\/?)?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Baidu Research 插件:抓取 Blog 列表条目(不做正文 enrich)
|
|
@@ -193,7 +197,7 @@ function mergeByLink(itemsA, itemsB) {
|
|
|
193
197
|
}
|
|
194
198
|
|
|
195
199
|
|
|
196
|
-
async function fetchItems(sourceId, ctx) {
|
|
200
|
+
export async function fetchItems(sourceId, ctx) {
|
|
197
201
|
_deps = ctx.deps;
|
|
198
202
|
const primary = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
199
203
|
let items = parseBlogItems(primary.html, primary.finalUrl || sourceId);
|
|
@@ -213,10 +217,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
213
217
|
|
|
214
218
|
return items;
|
|
215
219
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
export default {
|
|
219
|
-
id: "baidu-research",
|
|
220
|
-
listUrlPattern: /^https?:\/\/research\.baidu\.com\/(?:(?:Index|Blog)\/?)?(?:\?.*)?$/i,
|
|
221
|
-
fetchItems,
|
|
222
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "brightdata-blog";
|
|
2
|
+
export const name = "Brightdata Blog";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(?:www\.)?brightdata\.com\/blog(?:\/(?:page\/\d+|[a-z0-9-]+(?:\/page\/\d+)?)?)?\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Bright Data 博客插件:优先解析站点 RSS feed,失败时回退解析列表页(不做正文 enrich)
|
|
@@ -5,8 +9,6 @@ let _deps;
|
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
const BRIGHTDATA_ORIGIN = "https://brightdata.com";
|
|
8
|
-
const LIST_URL_RE =
|
|
9
|
-
/^https?:\/\/(?:www\.)?brightdata\.com\/blog(?:\/(?:page\/\d+|[a-z0-9-]+(?:\/page\/\d+)?)?)?\/?(?:\?.*)?$/i;
|
|
10
12
|
const ARTICLE_PATH_RE = /^\/blog\/([^/?#/]+)\/([^/?#/]+)\/?$/i;
|
|
11
13
|
const MIN_READ_RE = /^\d+\s*min\s*read$/i;
|
|
12
14
|
|
|
@@ -27,7 +29,7 @@ function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
|
27
29
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
28
30
|
try {
|
|
29
31
|
const url = new URL(href, baseUrl);
|
|
30
|
-
if (!/^https
|
|
32
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
31
33
|
return url.href;
|
|
32
34
|
} catch {
|
|
33
35
|
return null;
|
|
@@ -268,7 +270,7 @@ function parseHtmlItems(root, baseUrl) {
|
|
|
268
270
|
}
|
|
269
271
|
|
|
270
272
|
|
|
271
|
-
async function fetchItems(sourceId, ctx) {
|
|
273
|
+
export async function fetchItems(sourceId, ctx) {
|
|
272
274
|
_deps = ctx.deps;
|
|
273
275
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
|
|
274
276
|
const root = _deps.parseHtml(html);
|
|
@@ -292,10 +294,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
292
294
|
}
|
|
293
295
|
return fromHtml;
|
|
294
296
|
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
export default {
|
|
298
|
-
id: "brightdata-blog",
|
|
299
|
-
listUrlPattern: LIST_URL_RE,
|
|
300
|
-
fetchItems,
|
|
301
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "bytedance-seed-research";
|
|
2
|
+
export const name = "Bytedance Seed Research";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/seed\.bytedance\.com\/(zh|en)\/research(?:\/)?(\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// ByteDance Seed 研究页插件:抓取研究论文与动态条目(不含 enrich)
|
|
@@ -189,7 +193,7 @@ function dedupeAndSort(items) {
|
|
|
189
193
|
}
|
|
190
194
|
|
|
191
195
|
|
|
192
|
-
async function fetchItems(sourceId, ctx) {
|
|
196
|
+
export async function fetchItems(sourceId, ctx) {
|
|
193
197
|
_deps = ctx.deps;
|
|
194
198
|
// 该站点条目核心数据在脚本 JSON 中,需关闭 purify 才能读取。
|
|
195
199
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500, purify: false });
|
|
@@ -223,9 +227,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
223
227
|
return items;
|
|
224
228
|
}
|
|
225
229
|
|
|
226
|
-
|
|
227
|
-
export default {
|
|
228
|
-
id: "bytedance-seed-research",
|
|
229
|
-
listUrlPattern: /^https?:\/\/seed\.bytedance\.com\/(zh|en)\/research(?:\/)?(\?.*)?$/i,
|
|
230
|
-
fetchItems,
|
|
231
|
-
};
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
// 内置 IMAP 邮件插件:匹配 imap://、imaps:// 协议 URL
|
|
2
|
-
|
|
2
|
+
export const id = "__email__";
|
|
3
|
+
export const name = "Email";
|
|
4
|
+
export const pattern = /^imaps?:\/\//;
|
|
5
|
+
export const priority = 0;
|
|
6
|
+
export const refreshInterval = "30min";
|
|
7
|
+
|
|
3
8
|
function parseImapUrl(sourceId) {
|
|
4
9
|
const url = new URL(sourceId);
|
|
5
10
|
const host = url.hostname;
|
|
@@ -17,12 +22,7 @@ function makeGuid(messageId, uid, host, createHash) {
|
|
|
17
22
|
return createHash("sha256").update(raw).digest("hex");
|
|
18
23
|
}
|
|
19
24
|
|
|
20
|
-
export
|
|
21
|
-
id: "__email__",
|
|
22
|
-
pattern: /^imaps?:\/\//,
|
|
23
|
-
priority: 0,
|
|
24
|
-
refreshInterval: "30min",
|
|
25
|
-
async fetchItems(sourceId, ctx) {
|
|
25
|
+
export async function fetchItems(sourceId, ctx) {
|
|
26
26
|
const { deps } = ctx;
|
|
27
27
|
const { host, port, secure, user, pass, folder, limit } = parseImapUrl(sourceId);
|
|
28
28
|
const client = new deps.ImapFlow({
|
|
@@ -88,5 +88,5 @@ export default {
|
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
return items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
91
|
-
}
|
|
92
|
-
|
|
91
|
+
}
|
|
92
|
+
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
export const id = "five-radar";
|
|
2
|
+
export const name = "Five Radar";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(?:www\.)?5radar\.com(?:\/[A-Za-z0-9_-]+\/news)?\/?(?:[?#].*)?$/i;
|
|
4
|
+
export const refreshInterval = "10min";
|
|
5
|
+
|
|
1
6
|
let _deps;
|
|
2
7
|
|
|
3
8
|
|
|
4
9
|
|
|
5
|
-
const SITE_ID =
|
|
10
|
+
const SITE_ID = id;
|
|
6
11
|
const DEFAULT_PAGE_SIZE = 30;
|
|
7
12
|
const THEME_PAGE_SIZE = 20;
|
|
8
13
|
const THEME_PAGE_SIZE_CANDIDATES = [20, 10, 5];
|
|
@@ -238,7 +243,7 @@ function toAbsoluteLink(rawUrl, origin, row) {
|
|
|
238
243
|
if (text) {
|
|
239
244
|
try {
|
|
240
245
|
const url = new URL(text, origin);
|
|
241
|
-
if (/^https
|
|
246
|
+
if (/^https:$/i.test(url.protocol) && !isHomepageLink(url)) return url.href;
|
|
242
247
|
} catch {
|
|
243
248
|
// ignore malformed url from upstream API
|
|
244
249
|
}
|
|
@@ -439,7 +444,7 @@ async function fetchAllThemeRows(origin, pageSize = THEME_PAGE_SIZE, maxPages =
|
|
|
439
444
|
}
|
|
440
445
|
|
|
441
446
|
|
|
442
|
-
async function fetchItems(sourceId, _ctx) {
|
|
447
|
+
export async function fetchItems(sourceId, _ctx) {
|
|
443
448
|
_deps = _ctx.deps;
|
|
444
449
|
let sourceUrl;
|
|
445
450
|
try {
|
|
@@ -447,7 +452,7 @@ async function fetchItems(sourceId, _ctx) {
|
|
|
447
452
|
} catch {
|
|
448
453
|
throw new Error(`[${SITE_ID}] 无效 URL: ${sourceId}`);
|
|
449
454
|
}
|
|
450
|
-
if (!/^https
|
|
455
|
+
if (!/^https:$/i.test(sourceUrl.protocol)) {
|
|
451
456
|
throw new Error(`[${SITE_ID}] 仅支持 http/https URL`);
|
|
452
457
|
}
|
|
453
458
|
|
|
@@ -481,10 +486,3 @@ async function fetchItems(sourceId, _ctx) {
|
|
|
481
486
|
return items;
|
|
482
487
|
}
|
|
483
488
|
|
|
484
|
-
|
|
485
|
-
export default {
|
|
486
|
-
id: SITE_ID,
|
|
487
|
-
refreshInterval: "10min",
|
|
488
|
-
listUrlPattern: /^https?:\/\/(?:www\.)?5radar\.com(?:\/[A-Za-z0-9_-]+\/news)?\/?(?:[?#].*)?$/i,
|
|
489
|
-
fetchItems,
|
|
490
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "flageval-news";
|
|
2
|
+
export const name = "Flageval News";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/flageval\.baai\.ac\.cn\/#\/news(?:[/?].*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
@@ -72,7 +76,7 @@ async function fetchNewsList() {
|
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
|
|
75
|
-
async function fetchItems(_sourceId, _ctx) {
|
|
79
|
+
export async function fetchItems(_sourceId, _ctx) {
|
|
76
80
|
_deps = _ctx.deps;
|
|
77
81
|
const newsList = await fetchNewsList();
|
|
78
82
|
const seen = new Set();
|
|
@@ -110,9 +114,3 @@ async function fetchItems(_sourceId, _ctx) {
|
|
|
110
114
|
return items;
|
|
111
115
|
}
|
|
112
116
|
|
|
113
|
-
|
|
114
|
-
export default {
|
|
115
|
-
id: "flageval-news",
|
|
116
|
-
listUrlPattern: /^https?:\/\/flageval\.baai\.ac\.cn\/#\/news(?:[/?].*)?$/i,
|
|
117
|
-
fetchItems,
|
|
118
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "google-deepmind-research";
|
|
2
|
+
export const name = "Google Deepmind Research";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/deepmind\.google\/research\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Google DeepMind Research 插件:抓取 research 页面中的最新研究条目(不做 enrich)
|
|
@@ -38,7 +42,7 @@ function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
|
38
42
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
39
43
|
try {
|
|
40
44
|
const url = new URL(href, baseUrl);
|
|
41
|
-
if (!/^https
|
|
45
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
42
46
|
return url.href;
|
|
43
47
|
} catch {
|
|
44
48
|
return null;
|
|
@@ -200,7 +204,7 @@ function parseItemsFromAnchors(root, baseUrl) {
|
|
|
200
204
|
}
|
|
201
205
|
|
|
202
206
|
|
|
203
|
-
async function fetchItems(sourceId, ctx) {
|
|
207
|
+
export async function fetchItems(sourceId, ctx) {
|
|
204
208
|
_deps = ctx.deps;
|
|
205
209
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
206
210
|
const root = _deps.parseHtml(html);
|
|
@@ -215,9 +219,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
215
219
|
throw new Error("[google-deepmind-research] 未解析到研究条目,页面结构可能已变化");
|
|
216
220
|
}
|
|
217
221
|
|
|
218
|
-
|
|
219
|
-
export default {
|
|
220
|
-
id: "google-deepmind-research",
|
|
221
|
-
listUrlPattern: /^https?:\/\/deepmind\.google\/research\/?(?:\?.*)?$/i,
|
|
222
|
-
fetchItems,
|
|
223
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "google-research-datasets";
|
|
2
|
+
export const name = "Google Research Datasets";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/research\.google\/resources\/datasets\/?(\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
|
|
@@ -29,7 +33,7 @@ function resolveHttpUrl(rawHref, baseUrl) {
|
|
|
29
33
|
|
|
30
34
|
try {
|
|
31
35
|
const url = new URL(href, baseUrl);
|
|
32
|
-
if (!/^https
|
|
36
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
33
37
|
return url;
|
|
34
38
|
} catch {
|
|
35
39
|
return null;
|
|
@@ -150,7 +154,7 @@ function parseFromRawHtml(html, finalUrl) {
|
|
|
150
154
|
}
|
|
151
155
|
|
|
152
156
|
|
|
153
|
-
async function fetchItems(sourceId, ctx) {
|
|
157
|
+
export async function fetchItems(sourceId, ctx) {
|
|
154
158
|
_deps = ctx.deps;
|
|
155
159
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
|
|
156
160
|
const fromPurified = parseFromPurifiedHtml(html, finalUrl || sourceId || DATASETS_URL);
|
|
@@ -163,9 +167,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
163
167
|
throw new Error("[google-research-datasets] 未解析到数据集条目,页面结构可能已变化");
|
|
164
168
|
}
|
|
165
169
|
|
|
166
|
-
|
|
167
|
-
export default {
|
|
168
|
-
id: "google-research-datasets",
|
|
169
|
-
listUrlPattern: /^https?:\/\/research\.google\/resources\/datasets\/?(\?.*)?$/i,
|
|
170
|
-
fetchItems,
|
|
171
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "google-research";
|
|
2
|
+
export const name = "Google Research";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/research\.google\/?(?:\?.*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// Google Research 首页插件:抓取 research.google 首页中的最新博客/论文条目(不做 enrich)
|
|
@@ -59,7 +63,7 @@ function toAbsoluteHttpUrl(rawHref, pageUrl) {
|
|
|
59
63
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
60
64
|
try {
|
|
61
65
|
const url = new URL(href, pageUrl);
|
|
62
|
-
if (!/^https
|
|
66
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
63
67
|
return url;
|
|
64
68
|
} catch {
|
|
65
69
|
return null;
|
|
@@ -201,7 +205,7 @@ function parseItemsFromHome(html, pageUrl) {
|
|
|
201
205
|
}
|
|
202
206
|
|
|
203
207
|
|
|
204
|
-
async function fetchItems(sourceId, ctx) {
|
|
208
|
+
export async function fetchItems(sourceId, ctx) {
|
|
205
209
|
_deps = ctx.deps;
|
|
206
210
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4000 });
|
|
207
211
|
const pageUrl = new URL(finalUrl);
|
|
@@ -212,9 +216,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
212
216
|
return items;
|
|
213
217
|
}
|
|
214
218
|
|
|
215
|
-
|
|
216
|
-
export default {
|
|
217
|
-
id: "google-research",
|
|
218
|
-
listUrlPattern: /^https?:\/\/research\.google\/?(?:\?.*)?$/i,
|
|
219
|
-
fetchItems,
|
|
220
|
-
};
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
export const id = "hacker-news-newest";
|
|
2
|
+
export const name = "Hacker News Newest";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/news\.ycombinator\.com\/newest\/?(\?.*)?$/i;
|
|
4
|
+
export const refreshInterval = "10min";
|
|
5
|
+
|
|
1
6
|
let _deps;
|
|
2
7
|
|
|
3
8
|
// Hacker News newest 插件:解析 newest 列表页为 FeedItem(仅列表,不做正文 enrich)
|
|
@@ -17,7 +22,7 @@ function toAbsoluteUrl(rawHref, baseUrl) {
|
|
|
17
22
|
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
18
23
|
try {
|
|
19
24
|
const url = new URL(href, baseUrl);
|
|
20
|
-
if (!/^https
|
|
25
|
+
if (!/^https:$/i.test(url.protocol)) return null;
|
|
21
26
|
return url.href;
|
|
22
27
|
} catch {
|
|
23
28
|
return null;
|
|
@@ -88,7 +93,7 @@ function parseMeta(root, row, itemId) {
|
|
|
88
93
|
}
|
|
89
94
|
|
|
90
95
|
|
|
91
|
-
async function fetchItems(sourceId, ctx) {
|
|
96
|
+
export async function fetchItems(sourceId, ctx) {
|
|
92
97
|
_deps = ctx.deps;
|
|
93
98
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3000 });
|
|
94
99
|
const root = _deps.parseHtml(html);
|
|
@@ -121,10 +126,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
121
126
|
return items;
|
|
122
127
|
}
|
|
123
128
|
|
|
124
|
-
|
|
125
|
-
export default {
|
|
126
|
-
id: "hacker-news-newest",
|
|
127
|
-
listUrlPattern: /^https?:\/\/news\.ycombinator\.com\/newest\/?(\?.*)?$/i,
|
|
128
|
-
refreshInterval: "10min",
|
|
129
|
-
fetchItems,
|
|
130
|
-
};
|