rssany 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -27
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +7 -8
- package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
- package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
- package/app/plugins/builtin/appen-resources.rssany.js +6 -7
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
- package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
- package/app/plugins/builtin/baidu-research.rssany.js +5 -8
- package/app/plugins/builtin/brightdata-blog.rssany.js +7 -12
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
- package/app/plugins/builtin/email.rssany.js +9 -9
- package/app/plugins/builtin/five-radar.rssany.js +10 -12
- package/app/plugins/builtin/flageval-news.rssany.js +5 -7
- package/app/plugins/builtin/google-deepmind-research.rssany.js +7 -9
- package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
- package/app/plugins/builtin/google-research.rssany.js +6 -8
- package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
- package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
- package/app/plugins/builtin/lingowhale.rssany.js +7 -9
- package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
- package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
- package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
- package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
- package/app/plugins/builtin/moonshot.rssany.js +6 -8
- package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
- package/app/plugins/builtin/opendatalab.rssany.js +5 -6
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +7 -9
- package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
- package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +8 -10
- package/app/plugins/builtin/rss.rssany.js +11 -12
- package/app/plugins/builtin/selectdataset.rssany.js +6 -8
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
- package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
- package/app/plugins/builtin/theinformation-briefings.rssany.js +144 -136
- package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
- package/app/plugins/builtin/venturebeat.rssany.js +7 -9
- package/app/plugins/builtin/worldlabs.rssany.js +6 -8
- package/app/plugins/builtin/x.rssany.js +7 -9
- package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
- package/app/plugins/builtin/zhipu-research.rssany.js +7 -10
- package/app/plugins/site.rssany.js +25 -25
- package/{statics → app/statics}/README.md +7 -7
- package/bin/rssany.js +226 -6
- package/dist/index.js +545 -396
- package/dist/index.js.map +1 -1
- package/package.json +20 -13
- package/scripts/dev.mjs +114 -0
- package/scripts/reset.mjs +1 -1
- package/app/plugins/builtin/google.rssany.js +0 -187
- package/init/config.json +0 -17
- package/init/sources.json +0 -353
- package/statics/401.html +0 -56
- package/statics/404.html +0 -12
- package/statics/image.png +0 -0
- package/webui/build/200.html +0 -49
- package/webui/build/_app/env.js +0 -1
- package/webui/build/_app/immutable/assets/0.BB88QFoe.css +0 -1
- package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +0 -1
- package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +0 -1
- package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +0 -1
- package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
- package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
- package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -1
- package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
- package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
- package/webui/build/_app/immutable/assets/5.ClehBQ0g.css +0 -1
- package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +0 -1
- package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +0 -1
- package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +0 -1
- package/webui/build/_app/immutable/assets/9.m-LCx_kl.css +0 -1
- package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -1
- package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +0 -1
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +0 -1
- package/webui/build/_app/immutable/chunks/B-OsL1Ct.js +0 -1
- package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
- package/webui/build/_app/immutable/chunks/BK3WtZwv.js +0 -1
- package/webui/build/_app/immutable/chunks/BQqoDzLx.js +0 -1
- package/webui/build/_app/immutable/chunks/BUApaBEI.js +0 -1
- package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
- package/webui/build/_app/immutable/chunks/Bfc47y5P.js +0 -1
- package/webui/build/_app/immutable/chunks/Bp63qm3L.js +0 -1
- package/webui/build/_app/immutable/chunks/BwlaCkNX.js +0 -36
- package/webui/build/_app/immutable/chunks/C0J2-L94.js +0 -1
- package/webui/build/_app/immutable/chunks/CBY2biv-.js +0 -1
- package/webui/build/_app/immutable/chunks/CLOXMsDk.js +0 -36
- package/webui/build/_app/immutable/chunks/CVzlFH44.js +0 -1
- package/webui/build/_app/immutable/chunks/CWNeClHp.js +0 -6
- package/webui/build/_app/immutable/chunks/Cihqbfi5.js +0 -1
- package/webui/build/_app/immutable/chunks/D5GvRCv7.js +0 -1
- package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +0 -1
- package/webui/build/_app/immutable/chunks/DFuhmi31.js +0 -1
- package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
- package/webui/build/_app/immutable/chunks/DgceFEv5.js +0 -1
- package/webui/build/_app/immutable/chunks/DjNLq3TF.js +0 -1
- package/webui/build/_app/immutable/chunks/Dt2CddFe.js +0 -1
- package/webui/build/_app/immutable/chunks/Dw782Tjs.js +0 -1
- package/webui/build/_app/immutable/chunks/SqCUd34O.js +0 -1
- package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +0 -1
- package/webui/build/_app/immutable/chunks/hp4PFHFv.js +0 -1
- package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
- package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
- package/webui/build/_app/immutable/chunks/tB7QMF3U.js +0 -1
- package/webui/build/_app/immutable/chunks/xtNWTdbD.js +0 -1
- package/webui/build/_app/immutable/entry/app.B8zBPipq.js +0 -2
- package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +0 -1
- package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +0 -11
- package/webui/build/_app/immutable/nodes/1.1N74-4Io.js +0 -1
- package/webui/build/_app/immutable/nodes/10.DY30t9Ib.js +0 -1
- package/webui/build/_app/immutable/nodes/11.ITuxnukH.js +0 -1
- package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +0 -1
- package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
- package/webui/build/_app/immutable/nodes/14.BHnIxbVM.js +0 -1
- package/webui/build/_app/immutable/nodes/15.CLjT9il3.js +0 -1
- package/webui/build/_app/immutable/nodes/16.BD-mKCLN.js +0 -24
- package/webui/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -1
- package/webui/build/_app/immutable/nodes/18.Ba_qJjp6.js +0 -1
- package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
- package/webui/build/_app/immutable/nodes/3.Dt5o2Fmz.js +0 -1
- package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
- package/webui/build/_app/immutable/nodes/5.Dy3vSsIP.js +0 -1
- package/webui/build/_app/immutable/nodes/6.DvclsL6H.js +0 -1
- package/webui/build/_app/immutable/nodes/7.D2nJy-Uz.js +0 -1
- package/webui/build/_app/immutable/nodes/8.C75mhrqs.js +0 -1
- package/webui/build/_app/immutable/nodes/9.Bp_QXw3w.js +0 -1
- package/webui/build/_app/version.json +0 -1
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "xiaohongshu";
|
|
2
|
+
export const name = "Xiaohongshu";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?xiaohongshu\.com\/user\/profile\/[^/?#]+\/?(?:[?#].*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// 小红书站点插件:用户主页列表抓取、笔记详情提取、认证流程
|
|
@@ -5,6 +9,41 @@ let _deps;
|
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
const XHS_ORIGIN = "https://www.xiaohongshu.com";
|
|
12
|
+
const XHS_NOTE_PATH_RE = /^\/(?:explore|discovery\/item)\/([0-9a-f]{24})\/?$/i;
|
|
13
|
+
const XHS_NOTE_ID_RE = /^[0-9a-f]{24}$/i;
|
|
14
|
+
const XHS_NOTE_ID_IN_IMG_RE = /xhscdn\.com\/\d+\/([0-9a-f]{24})/i;
|
|
15
|
+
const XHS_PROFILE_USER_RE = /\/user\/profile\/([0-9a-f]{24})/i;
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
function hashNoteGuid(noteId) {
|
|
19
|
+
return _deps.createHash("sha256").update(`xhs:note:${noteId}`).digest("hex");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
function extractProfileUserId(url) {
|
|
24
|
+
const m = String(url).match(XHS_PROFILE_USER_RE);
|
|
25
|
+
return m?.[1]?.toLowerCase() ?? null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
function buildExploreLink(noteId, origin) {
|
|
30
|
+
return `${origin.replace(/\/$/, "")}/explore/${noteId}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
function extractNoteIdFromSection(section, profileUserId) {
|
|
35
|
+
for (const img of section.querySelectorAll('img[src*="xhscdn"]')) {
|
|
36
|
+
const src = img.getAttribute("src")?.trim() ?? "";
|
|
37
|
+
const fromImg = src.match(XHS_NOTE_ID_IN_IMG_RE);
|
|
38
|
+
if (fromImg?.[1] && fromImg[1] !== profileUserId) return fromImg[1].toLowerCase();
|
|
39
|
+
}
|
|
40
|
+
const html = section.outerHTML ?? "";
|
|
41
|
+
for (const match of html.match(/[0-9a-f]{24}/gi) ?? []) {
|
|
42
|
+
const id = match.toLowerCase();
|
|
43
|
+
if (id !== profileUserId && XHS_NOTE_ID_RE.test(id)) return id;
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
8
47
|
|
|
9
48
|
|
|
10
49
|
function getOrigin(url) {
|
|
@@ -16,57 +55,99 @@ function getOrigin(url) {
|
|
|
16
55
|
}
|
|
17
56
|
|
|
18
57
|
|
|
19
|
-
function
|
|
58
|
+
function normalizeXhsUrl(href, origin) {
|
|
59
|
+
try {
|
|
60
|
+
const url = new URL(href.replace(/&/g, "&"), origin);
|
|
61
|
+
url.hash = "";
|
|
62
|
+
return url;
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
function normalizeXhsItemLink(href, origin) {
|
|
70
|
+
const url = normalizeXhsUrl(href, origin);
|
|
71
|
+
if (!url) return null;
|
|
72
|
+
|
|
20
73
|
try {
|
|
21
|
-
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const token = fullUrl.searchParams.get("xsec_token");
|
|
26
|
-
const source = fullUrl.searchParams.get("xsec_source") ?? "pc_user";
|
|
27
|
-
if (!token) return null;
|
|
28
|
-
const explore = new URL(`/explore/${noteId}`, origin);
|
|
29
|
-
explore.searchParams.set("xsec_token", token);
|
|
30
|
-
explore.searchParams.set("xsec_source", source);
|
|
31
|
-
return explore.href;
|
|
74
|
+
if (!/(^|\.)xiaohongshu\.com$/i.test(url.hostname)) return null;
|
|
75
|
+
const m = url.pathname.match(XHS_NOTE_PATH_RE);
|
|
76
|
+
if (!m?.[1]) return null;
|
|
77
|
+
return buildExploreLink(m[1].toLowerCase(), url.origin);
|
|
32
78
|
} catch {
|
|
33
79
|
return null;
|
|
34
80
|
}
|
|
35
81
|
}
|
|
36
82
|
|
|
37
83
|
|
|
84
|
+
function extractRedirectItemLink(href, origin) {
|
|
85
|
+
const wrapper = normalizeXhsUrl(href, origin);
|
|
86
|
+
if (!wrapper) return null;
|
|
87
|
+
if (!/\/website-login\/error\/?$/i.test(wrapper.pathname)) return null;
|
|
88
|
+
|
|
89
|
+
const redirectPath = wrapper.searchParams.get("redirectPath");
|
|
90
|
+
if (!redirectPath) return null;
|
|
91
|
+
return normalizeXhsItemLink(redirectPath, origin);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
function extractListItemLink(section, origin, profileUserId) {
|
|
96
|
+
const noteId = extractNoteIdFromSection(section, profileUserId);
|
|
97
|
+
if (noteId) return buildExploreLink(noteId, origin);
|
|
98
|
+
|
|
99
|
+
const anchors = section.querySelectorAll("a[href]");
|
|
100
|
+
const candidates = [];
|
|
101
|
+
for (const anchor of anchors) {
|
|
102
|
+
const href = anchor.getAttribute("href")?.trim();
|
|
103
|
+
if (!href) continue;
|
|
104
|
+
|
|
105
|
+
const direct = normalizeXhsItemLink(href, origin);
|
|
106
|
+
if (direct) candidates.push(direct);
|
|
107
|
+
|
|
108
|
+
const redirected = extractRedirectItemLink(href, origin);
|
|
109
|
+
if (redirected) candidates.push(redirected);
|
|
110
|
+
}
|
|
111
|
+
return candidates[0] ?? null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
38
115
|
function parseListHtml(html, url) {
|
|
39
116
|
const root = _deps.parseHtml(html);
|
|
40
117
|
const origin = getOrigin(url);
|
|
118
|
+
const profileUserId = extractProfileUserId(url);
|
|
41
119
|
const feed = root.querySelector("#userPostedFeeds");
|
|
42
120
|
if (!feed) return [];
|
|
43
|
-
const sections = feed.querySelectorAll("section[data-
|
|
121
|
+
const sections = feed.querySelectorAll("section[data-index]");
|
|
44
122
|
const items = [];
|
|
123
|
+
const seenNoteIds = new Set();
|
|
45
124
|
for (const section of sections) {
|
|
46
|
-
const
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
link = new URL(href, origin).href;
|
|
58
|
-
}
|
|
59
|
-
const titleEl = section.querySelector("span[data-v-51ec0135]");
|
|
60
|
-
const title = (titleEl?.textContent ?? "").trim() || "笔记";
|
|
61
|
-
const authorEl = section.querySelector('a[aria-current="page"] span');
|
|
125
|
+
const noteId = extractNoteIdFromSection(section, profileUserId);
|
|
126
|
+
const link = noteId
|
|
127
|
+
? buildExploreLink(noteId, origin)
|
|
128
|
+
: extractListItemLink(section, origin, profileUserId);
|
|
129
|
+
if (!link) continue;
|
|
130
|
+
const dedupeKey = noteId ?? link;
|
|
131
|
+
if (seenNoteIds.has(dedupeKey)) continue;
|
|
132
|
+
seenNoteIds.add(dedupeKey);
|
|
133
|
+
const titleEl = section.querySelector("span[data-v-51ec0135]") ?? section.querySelector(".title span") ?? section.querySelector("span");
|
|
134
|
+
const title = (titleEl?.textContent ?? "").trim() || "Note";
|
|
135
|
+
const authorEl = section.querySelector('a[aria-current="page"] .name') ?? section.querySelector('a[aria-current="page"] span');
|
|
62
136
|
const author = (authorEl?.textContent ?? "").trim() || undefined;
|
|
137
|
+
const imageEl = section.querySelector("img[data-xhs-img], img");
|
|
138
|
+
const image = imageEl?.getAttribute("src")?.trim() || undefined;
|
|
139
|
+
const summary = image ? undefined : title;
|
|
140
|
+
const guid = noteId ? hashNoteGuid(noteId) : _deps.createHash("sha256").update(link).digest("hex");
|
|
63
141
|
items.push({
|
|
64
|
-
guid
|
|
142
|
+
guid,
|
|
65
143
|
title,
|
|
66
144
|
link,
|
|
67
145
|
pubDate: new Date(),
|
|
68
146
|
author,
|
|
69
|
-
summary
|
|
147
|
+
summary,
|
|
148
|
+
imageUrl: image,
|
|
149
|
+
coverImg: image,
|
|
150
|
+
cover_img: image,
|
|
70
151
|
});
|
|
71
152
|
}
|
|
72
153
|
return items;
|
|
@@ -240,9 +321,14 @@ function extractDetailHtml(html) {
|
|
|
240
321
|
}
|
|
241
322
|
|
|
242
323
|
|
|
243
|
-
async function fetchItems(sourceId, ctx) {
|
|
324
|
+
export async function fetchItems(sourceId, ctx) {
|
|
244
325
|
_deps = ctx.deps;
|
|
245
|
-
const { html, finalUrl } = await ctx.fetchHtml(sourceId
|
|
326
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
|
|
327
|
+
waitMs: 3000,
|
|
328
|
+
waitForSelector: "#userPostedFeeds",
|
|
329
|
+
waitForSelectorTimeoutMs: 15000,
|
|
330
|
+
scrollBeforeSnapshot: { selector: "#userPostedFeeds", rounds: 8, pauseMs: 900 },
|
|
331
|
+
});
|
|
246
332
|
return parseListHtml(html, finalUrl);
|
|
247
333
|
}
|
|
248
334
|
|
|
@@ -258,26 +344,3 @@ async function enrichItem(item, ctx) {
|
|
|
258
344
|
pubDate: detail.pubDate ?? item.pubDate,
|
|
259
345
|
};
|
|
260
346
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
async function checkAuth(page, _url) {
|
|
264
|
-
try {
|
|
265
|
-
const loginButton = await page.$(".reds-button-new.login-btn.large.primary");
|
|
266
|
-
return loginButton == null;
|
|
267
|
-
} catch {
|
|
268
|
-
return false;
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
export default {
|
|
274
|
-
id: "xiaohongshu",
|
|
275
|
-
listUrlPattern: "https://xiaohongshu.com/user/profile/{userId}",
|
|
276
|
-
fetchItems,
|
|
277
|
-
enrichItem,
|
|
278
|
-
checkAuth,
|
|
279
|
-
loginUrl: "https://www.xiaohongshu.com/",
|
|
280
|
-
domain: "xiaohongshu.com",
|
|
281
|
-
loginTimeoutMs: 30 * 1000,
|
|
282
|
-
pollIntervalMs: 2000,
|
|
283
|
-
};
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
export const id = "zhipu-research";
|
|
2
|
+
export const name = "Zhipu Research";
|
|
3
|
+
export const listUrlPattern = /^https:\/\/(www\.)?zhipuai\.cn\/zh\/research\/?(?:[?#].*)?$/i;
|
|
4
|
+
|
|
1
5
|
let _deps;
|
|
2
6
|
|
|
3
7
|
// 智谱研究页插件:仅抓取列表,不做正文 enrich(兼容净化后的 HTML)
|
|
@@ -113,7 +117,7 @@ function buildItemsFromBlogsItems(blogsItems) {
|
|
|
113
117
|
const summary = normalizeText(blog.resume_zh ?? blog.resume_en ?? "");
|
|
114
118
|
const createdAt = String(blog.createAt ?? "").trim();
|
|
115
119
|
const pubDate = createdAt ? new Date(createdAt) : new Date();
|
|
116
|
-
const
|
|
120
|
+
const _category = normalizeText(blog.tag_zh ?? blog.tag_en ?? "");
|
|
117
121
|
items.push({
|
|
118
122
|
guid: hashGuid(link),
|
|
119
123
|
title,
|
|
@@ -263,7 +267,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
|
|
|
263
267
|
for (let i = 0; i < leafTexts.length; i += 1) {
|
|
264
268
|
const dateText = leafTexts[i];
|
|
265
269
|
if (!isDateText(dateText)) continue;
|
|
266
|
-
const
|
|
270
|
+
const _category = i > 0 && RESEARCH_TAGS.has(leafTexts[i - 1]) ? leafTexts[i - 1] : undefined;
|
|
267
271
|
|
|
268
272
|
let title = "";
|
|
269
273
|
let summary;
|
|
@@ -300,7 +304,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
|
|
|
300
304
|
}
|
|
301
305
|
|
|
302
306
|
|
|
303
|
-
async function fetchItems(sourceId, ctx) {
|
|
307
|
+
export async function fetchItems(sourceId, ctx) {
|
|
304
308
|
_deps = ctx.deps;
|
|
305
309
|
// 需要读取页面脚本里的 blogsItems(包含详情 id),因此这里禁用净化。
|
|
306
310
|
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 5000, purify: false });
|
|
@@ -325,10 +329,3 @@ async function fetchItems(sourceId, ctx) {
|
|
|
325
329
|
|
|
326
330
|
throw new Error("[zhipu-research] 未解析到研究条目,页面结构可能已变化");
|
|
327
331
|
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
export default {
|
|
331
|
-
id: "zhipu-research",
|
|
332
|
-
listUrlPattern: ZHIPU_RESEARCH_URL,
|
|
333
|
-
fetchItems,
|
|
334
|
-
};
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Site
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Site plugin template. The admin UI copies this file to .rssany/plugins/{id}.rssany.js.
|
|
3
|
+
* Plugin protocol: named exports. No export default is required.
|
|
4
|
+
*
|
|
5
|
+
* Interface: app/scraper/sources/web/site.ts
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Predefined fields stay together at the top.
|
|
9
|
+
export const id = "__PLUGIN_ID__";
|
|
10
|
+
export const name = "__PLUGIN_ID__";
|
|
11
|
+
// eslint-disable-next-line no-undef
|
|
12
|
+
export const listUrlPattern = __LIST_URL_PATTERN__;
|
|
13
|
+
export const refreshInterval = "1day";
|
|
14
|
+
|
|
15
|
+
export async function fetchItems(sourceId, ctx) {
|
|
16
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
|
|
17
|
+
waitMs: 2000,
|
|
18
|
+
purify: true,
|
|
19
|
+
});
|
|
20
|
+
const root = ctx.deps.parseHtml(html);
|
|
21
|
+
void root;
|
|
22
|
+
void finalUrl;
|
|
23
|
+
// TODO: Parse the list page and return FeedItem objects.
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
# statics
|
|
2
|
-
|
|
3
|
-
静态 HTML 页面:home(首页)、401、404。
|
|
4
|
-
|
|
5
|
-
- **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
|
|
6
|
-
- **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
|
|
7
|
-
- **404.html**:无匹配站点时返回。
|
|
1
|
+
# statics
|
|
2
|
+
|
|
3
|
+
静态 HTML 页面:home(首页)、401、404。
|
|
4
|
+
|
|
5
|
+
- **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
|
|
6
|
+
- **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
|
|
7
|
+
- **404.html**:无匹配站点时返回。
|
package/bin/rssany.js
CHANGED
|
@@ -1,6 +1,226 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}
|
|
5
|
-
|
|
6
|
-
}
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawn } from "node:child_process";
|
|
3
|
+
import { closeSync, openSync } from "node:fs";
|
|
4
|
+
import { access, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
5
|
+
import http from "node:http";
|
|
6
|
+
import { homedir, networkInterfaces } from "node:os";
|
|
7
|
+
import { dirname, join } from "node:path";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
9
|
+
|
|
10
|
+
const command = process.argv[2];
|
|
11
|
+
const binDir = dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const packageRoot = join(binDir, "..");
|
|
13
|
+
const userDir = process.env.RSSANY_USER_DIR?.trim() || join(homedir(), ".rssany");
|
|
14
|
+
const pidPath = join(userDir, "rssany.pid");
|
|
15
|
+
const logPath = join(userDir, "rssany.log");
|
|
16
|
+
const port = Number(process.env.PORT) || 18473;
|
|
17
|
+
const serverOrigin = `http://127.0.0.1:${port}`;
|
|
18
|
+
|
|
19
|
+
async function pathExists(path) {
|
|
20
|
+
try {
|
|
21
|
+
await access(path);
|
|
22
|
+
return true;
|
|
23
|
+
} catch {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function readPid() {
|
|
29
|
+
try {
|
|
30
|
+
const raw = await readFile(pidPath, "utf-8");
|
|
31
|
+
const pid = Number(raw.trim());
|
|
32
|
+
return Number.isInteger(pid) && pid > 0 ? pid : null;
|
|
33
|
+
} catch {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function isProcessRunning(pid) {
|
|
39
|
+
try {
|
|
40
|
+
process.kill(pid, 0);
|
|
41
|
+
return true;
|
|
42
|
+
} catch {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function getLanUrl() {
|
|
48
|
+
const lanIp = Object.values(networkInterfaces())
|
|
49
|
+
.flat()
|
|
50
|
+
.find((iface) => iface?.family === "IPv4" && !iface.internal)?.address;
|
|
51
|
+
return lanIp ? `http://${lanIp}:${port}/` : null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function printAddress(prefix = "RssAny 已启动") {
|
|
55
|
+
console.log(`${prefix}: http://127.0.0.1:${port}/`);
|
|
56
|
+
const lanUrl = getLanUrl();
|
|
57
|
+
if (lanUrl) console.log(`局域网访问: ${lanUrl}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function printUsage() {
|
|
61
|
+
console.log("用法: rssany <start|stop|reset|crawl>");
|
|
62
|
+
console.log(" rssany start 后台启动服务并输出访问地址");
|
|
63
|
+
console.log(" rssany stop 关闭后台服务并输出执行状态");
|
|
64
|
+
console.log(" rssany reset 重置本地数据");
|
|
65
|
+
console.log(" rssany crawl <ref> 按内部抓取链路拉取指定信源");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function canConnectToServer() {
|
|
69
|
+
return new Promise((resolve) => {
|
|
70
|
+
const req = http.get(`${serverOrigin}/api/server-info`, (res) => {
|
|
71
|
+
res.resume();
|
|
72
|
+
resolve(true);
|
|
73
|
+
});
|
|
74
|
+
req.setTimeout(500, () => {
|
|
75
|
+
req.destroy();
|
|
76
|
+
resolve(false);
|
|
77
|
+
});
|
|
78
|
+
req.on("error", () => resolve(false));
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function waitForServer(timeoutMs = 5000) {
|
|
83
|
+
const startTime = Date.now();
|
|
84
|
+
while (Date.now() - startTime < timeoutMs) {
|
|
85
|
+
if (await canConnectToServer()) return true;
|
|
86
|
+
await new Promise((resolve) => setTimeout(resolve, 250));
|
|
87
|
+
}
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function start() {
|
|
92
|
+
await mkdir(userDir, { recursive: true });
|
|
93
|
+
|
|
94
|
+
const currentPid = await readPid();
|
|
95
|
+
if (currentPid && isProcessRunning(currentPid)) {
|
|
96
|
+
printAddress(`RssAny 已在运行 (pid ${currentPid})`);
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const entry = join(packageRoot, "dist", "index.js");
|
|
101
|
+
if (!(await pathExists(entry))) {
|
|
102
|
+
console.error("未找到 dist/index.js,请先构建项目或重新安装 rssany。");
|
|
103
|
+
process.exitCode = 1;
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const logFd = openSync(logPath, "a");
|
|
108
|
+
const child = spawn(process.execPath, [entry], {
|
|
109
|
+
cwd: process.cwd(),
|
|
110
|
+
detached: true,
|
|
111
|
+
env: process.env,
|
|
112
|
+
stdio: ["ignore", logFd, logFd],
|
|
113
|
+
});
|
|
114
|
+
closeSync(logFd);
|
|
115
|
+
|
|
116
|
+
await writeFile(pidPath, `${child.pid}\n`, "utf-8");
|
|
117
|
+
console.log(`日志: ${logPath}`);
|
|
118
|
+
if (await waitForServer()) {
|
|
119
|
+
child.unref();
|
|
120
|
+
printAddress(`RssAny 已启动 (pid ${child.pid})`);
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
child.unref();
|
|
125
|
+
console.error(`RssAny 启动未完成,请查看日志: ${logPath}`);
|
|
126
|
+
process.exitCode = 1;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function stop() {
|
|
130
|
+
const pid = await readPid();
|
|
131
|
+
if (!pid) {
|
|
132
|
+
console.log("RssAny 未运行:没有找到 pid 文件。");
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!isProcessRunning(pid)) {
|
|
137
|
+
await rm(pidPath, { force: true });
|
|
138
|
+
console.log(`RssAny 未运行:已清理失效 pid ${pid}。`);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
process.kill(pid, "SIGTERM");
|
|
143
|
+
await rm(pidPath, { force: true });
|
|
144
|
+
console.log(`RssAny 已发送停止信号 (pid ${pid})。`);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function readCrawlRef(args) {
|
|
148
|
+
const refFlagIndex = args.findIndex((arg) => arg === "--ref");
|
|
149
|
+
if (refFlagIndex >= 0) return args[refFlagIndex + 1]?.trim() || "";
|
|
150
|
+
const refEquals = args.find((arg) => arg.startsWith("--ref="));
|
|
151
|
+
if (refEquals) return refEquals.slice("--ref=".length).trim();
|
|
152
|
+
return args.find((arg) => !arg.startsWith("-"))?.trim() || "";
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function postJson(path, body) {
|
|
156
|
+
const res = await fetch(`${serverOrigin}${path}`, {
|
|
157
|
+
method: "POST",
|
|
158
|
+
headers: { "Content-Type": "application/json" },
|
|
159
|
+
body: JSON.stringify(body),
|
|
160
|
+
});
|
|
161
|
+
const data = await res.json().catch(() => ({}));
|
|
162
|
+
if (!res.ok) {
|
|
163
|
+
throw new Error(data.error || `HTTP ${res.status}`);
|
|
164
|
+
}
|
|
165
|
+
return data;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function getJson(path) {
|
|
169
|
+
const res = await fetch(`${serverOrigin}${path}`);
|
|
170
|
+
const data = await res.json().catch(() => ({}));
|
|
171
|
+
if (!res.ok) {
|
|
172
|
+
throw new Error(data.error || `HTTP ${res.status}`);
|
|
173
|
+
}
|
|
174
|
+
return data;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async function pollTask(taskId, timeoutMs = 120000) {
|
|
178
|
+
const start = Date.now();
|
|
179
|
+
while (Date.now() - start < timeoutMs) {
|
|
180
|
+
const task = await getJson(`/api/tasks/${encodeURIComponent(taskId)}`);
|
|
181
|
+
if (task.status === "done") return task;
|
|
182
|
+
if (task.status === "error") {
|
|
183
|
+
throw new Error(task.error || "抓取失败");
|
|
184
|
+
}
|
|
185
|
+
await new Promise((resolve) => setTimeout(resolve, 800));
|
|
186
|
+
}
|
|
187
|
+
throw new Error("抓取超时");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async function crawl() {
|
|
191
|
+
const ref = readCrawlRef(process.argv.slice(3));
|
|
192
|
+
if (!ref) {
|
|
193
|
+
console.error("ref 不能为空。用法: rssany crawl <ref>");
|
|
194
|
+
process.exitCode = 1;
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
if (!(await canConnectToServer())) {
|
|
198
|
+
console.error(`RssAny 服务未运行,请先执行 rssany start。目标: ${serverOrigin}`);
|
|
199
|
+
process.exitCode = 1;
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
try {
|
|
203
|
+
const { taskId } = await postJson("/api/tasks", { type: "source-pull", ref });
|
|
204
|
+
if (!taskId) throw new Error("后端未返回 taskId");
|
|
205
|
+
console.log(`crawl 已提交: ${ref}`);
|
|
206
|
+
console.log(`task: ${taskId}`);
|
|
207
|
+
await pollTask(taskId);
|
|
208
|
+
console.log("crawl 完成");
|
|
209
|
+
} catch (err) {
|
|
210
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
211
|
+
process.exitCode = 1;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (command === "reset") {
|
|
216
|
+
await import(new URL("../scripts/reset.mjs", import.meta.url));
|
|
217
|
+
} else if (command === "start") {
|
|
218
|
+
await start();
|
|
219
|
+
} else if (command === "stop") {
|
|
220
|
+
await stop();
|
|
221
|
+
} else if (command === "crawl") {
|
|
222
|
+
await crawl();
|
|
223
|
+
} else {
|
|
224
|
+
printUsage();
|
|
225
|
+
if (command) process.exitCode = 1;
|
|
226
|
+
}
|