rssany 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +159 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +62 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
- package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
- package/webui/build/_app/immutable/chunks/{C85CNwD2.js → D6VIKef0.js} +1 -1
- package/webui/build/_app/immutable/chunks/{CllQAdvt.js → Dbqx2mXq.js} +1 -1
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
- package/webui/build/_app/immutable/chunks/{CdMsRjxJ.js → dhB8G5Is.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.XPso7q7g.js} +2 -2
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.BS3_Rfxm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.CyyxDCIS.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.CtYgIaGj.js} +1 -1
- package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.D5OEGPR2.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.B4dFN1Gk.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.M7ZII7tl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.7r8v7qkm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.CHIzoGrb.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.BDBqx-GY.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D5czsDmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.pjVNsCdV.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.CsARv1BH.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
- package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
- package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// 智谱研究页插件:仅抓取列表,不做正文 enrich(兼容净化后的 HTML)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const ZHIPU_RESEARCH_URL = "https://www.zhipuai.cn/zh/research";
|
|
8
|
+
const ZHIPU_ORIGIN = "https://www.zhipuai.cn";
|
|
9
|
+
const DATE_RE = /^\d{4}[/-]\d{1,2}[/-]\d{1,2}$/;
|
|
10
|
+
const RESEARCH_TAGS = new Set(["多模态", "语言模型", "基座模型", "推理模型", "Agent", "代码模型"]);
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
function normalizeText(text) {
|
|
14
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
function hashGuid(input) {
|
|
19
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
function parseDate(dateText) {
|
|
24
|
+
if (!dateText) return undefined;
|
|
25
|
+
const normalized = normalizeText(dateText);
|
|
26
|
+
const m = normalized.match(/(\d{4})[/-](\d{1,2})[/-](\d{1,2})/);
|
|
27
|
+
if (!m) return undefined;
|
|
28
|
+
const [, y, mm, dd] = m;
|
|
29
|
+
return new Date(`${y}-${mm.padStart(2, "0")}-${dd.padStart(2, "0")}T00:00:00.000Z`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
function findArrayEnd(raw, startIndex) {
|
|
34
|
+
let inString = false;
|
|
35
|
+
let escaped = false;
|
|
36
|
+
let depth = 0;
|
|
37
|
+
for (let i = startIndex; i < raw.length; i += 1) {
|
|
38
|
+
const ch = raw[i];
|
|
39
|
+
if (inString) {
|
|
40
|
+
if (escaped) {
|
|
41
|
+
escaped = false;
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (ch === "\\") {
|
|
45
|
+
escaped = true;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (ch === "\"") {
|
|
49
|
+
inString = false;
|
|
50
|
+
}
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
if (ch === "\"") {
|
|
54
|
+
inString = true;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (ch === "[") depth += 1;
|
|
58
|
+
if (ch === "]") {
|
|
59
|
+
depth -= 1;
|
|
60
|
+
if (depth === 0) return i;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return -1;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
function extractBlogsItems(html) {
|
|
68
|
+
const normalized = html.replace(/\\"/g, "\"").replace(/\\\\/g, "\\");
|
|
69
|
+
const marker = "\"blogsItems\":";
|
|
70
|
+
const markerIndex = normalized.indexOf(marker);
|
|
71
|
+
if (markerIndex < 0) return [];
|
|
72
|
+
const arrayStart = normalized.indexOf("[", markerIndex + marker.length);
|
|
73
|
+
if (arrayStart < 0) return [];
|
|
74
|
+
const arrayEnd = findArrayEnd(normalized, arrayStart);
|
|
75
|
+
if (arrayEnd < 0) return [];
|
|
76
|
+
const arrayRaw = normalized.slice(arrayStart, arrayEnd + 1);
|
|
77
|
+
try {
|
|
78
|
+
const parsed = JSON.parse(arrayRaw);
|
|
79
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
80
|
+
} catch {
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async function fetchRawHtml(url) {
|
|
87
|
+
try {
|
|
88
|
+
const res = await fetch(url, {
|
|
89
|
+
redirect: "follow",
|
|
90
|
+
headers: {
|
|
91
|
+
"User-Agent":
|
|
92
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
93
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
if (!res.ok) return undefined;
|
|
97
|
+
return await res.text();
|
|
98
|
+
} catch {
|
|
99
|
+
return undefined;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
function buildItemsFromBlogsItems(blogsItems) {
|
|
105
|
+
const items = [];
|
|
106
|
+
for (const blog of blogsItems) {
|
|
107
|
+
if (typeof blog !== "object" || blog == null) continue;
|
|
108
|
+
const id = String(blog.id ?? "").trim();
|
|
109
|
+
if (!id) continue;
|
|
110
|
+
const title = normalizeText(blog.title_zh ?? blog.title_en ?? "");
|
|
111
|
+
if (!title) continue;
|
|
112
|
+
const link = `${ZHIPU_ORIGIN}/zh/research/${id}`;
|
|
113
|
+
const summary = normalizeText(blog.resume_zh ?? blog.resume_en ?? "");
|
|
114
|
+
const createdAt = String(blog.createAt ?? "").trim();
|
|
115
|
+
const pubDate = createdAt ? new Date(createdAt) : new Date();
|
|
116
|
+
const category = normalizeText(blog.tag_zh ?? blog.tag_en ?? "");
|
|
117
|
+
items.push({
|
|
118
|
+
guid: hashGuid(link),
|
|
119
|
+
title,
|
|
120
|
+
link,
|
|
121
|
+
pubDate: Number.isNaN(pubDate.getTime()) ? new Date() : pubDate,
|
|
122
|
+
summary: summary || undefined,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
return items;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
function buildTitleIdMap(blogsItems) {
|
|
130
|
+
const map = new Map();
|
|
131
|
+
for (const blog of blogsItems) {
|
|
132
|
+
if (typeof blog !== "object" || blog == null) continue;
|
|
133
|
+
const id = String(blog.id ?? "").trim();
|
|
134
|
+
const title = normalizeText(blog.title_zh ?? blog.title_en ?? "");
|
|
135
|
+
if (!id || !title) continue;
|
|
136
|
+
map.set(title, id);
|
|
137
|
+
}
|
|
138
|
+
return map;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
function isDateText(text) {
|
|
143
|
+
return DATE_RE.test(normalizeText(text));
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
function uniqueTexts(texts) {
|
|
148
|
+
const out = [];
|
|
149
|
+
const seen = new Set();
|
|
150
|
+
for (const t of texts) {
|
|
151
|
+
if (!t || seen.has(t)) continue;
|
|
152
|
+
seen.add(t);
|
|
153
|
+
out.push(t);
|
|
154
|
+
}
|
|
155
|
+
return out;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
function getLeafTexts(node) {
|
|
160
|
+
const leafs = node
|
|
161
|
+
.querySelectorAll("div, span, p, h1, h2, h3")
|
|
162
|
+
.filter((el) => el.querySelector("div, span, p, h1, h2, h3") == null)
|
|
163
|
+
.map((el) => normalizeText(el.textContent))
|
|
164
|
+
.filter(Boolean)
|
|
165
|
+
.filter((t) => !t.includes("没有更多"))
|
|
166
|
+
.filter((t) => !t.includes("加载更多"));
|
|
167
|
+
return uniqueTexts(leafs);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
function countDateNodes(node) {
|
|
172
|
+
return node
|
|
173
|
+
.querySelectorAll("p")
|
|
174
|
+
.map((p) => normalizeText(p.textContent))
|
|
175
|
+
.filter((t) => isDateText(t))
|
|
176
|
+
.length;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
function findCardRootFromDateNode(dateNode) {
|
|
181
|
+
let current = dateNode?.parentNode ?? null;
|
|
182
|
+
let candidate = null;
|
|
183
|
+
for (let i = 0; i < 8 && current; i += 1) {
|
|
184
|
+
if (current.nodeType !== _deps.NodeType.ELEMENT_NODE) {
|
|
185
|
+
current = current.parentNode ?? null;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
const dateCount = countDateNodes(current);
|
|
189
|
+
if (dateCount === 1) {
|
|
190
|
+
const leafs = getLeafTexts(current);
|
|
191
|
+
const hasTitleCandidate = leafs.some((t) => !isDateText(t) && !RESEARCH_TAGS.has(t) && t.length >= 6);
|
|
192
|
+
if (hasTitleCandidate) candidate = current;
|
|
193
|
+
}
|
|
194
|
+
if (dateCount > 1) break;
|
|
195
|
+
current = current.parentNode ?? null;
|
|
196
|
+
}
|
|
197
|
+
return candidate;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
function parseCardItem(card, dateText, titleIdMap) {
|
|
202
|
+
const leafs = getLeafTexts(card);
|
|
203
|
+
const category = leafs.find((t) => RESEARCH_TAGS.has(t));
|
|
204
|
+
const textCandidates = leafs
|
|
205
|
+
.filter((t) => !isDateText(t))
|
|
206
|
+
.filter((t) => !RESEARCH_TAGS.has(t))
|
|
207
|
+
.filter((t) => t !== "时间排序" && t !== "研究");
|
|
208
|
+
if (textCandidates.length === 0) return null;
|
|
209
|
+
const title = textCandidates[0];
|
|
210
|
+
const summary = textCandidates[1];
|
|
211
|
+
const id = titleIdMap.get(title);
|
|
212
|
+
const link = id
|
|
213
|
+
? `${ZHIPU_ORIGIN}/zh/research/${id}`
|
|
214
|
+
: `${ZHIPU_RESEARCH_URL}#${encodeURIComponent(title)}`;
|
|
215
|
+
const pubDate = parseDate(dateText) ?? new Date();
|
|
216
|
+
return {
|
|
217
|
+
guid: hashGuid(id ? link : `${title}|${normalizeText(dateText)}`),
|
|
218
|
+
title,
|
|
219
|
+
link,
|
|
220
|
+
pubDate,
|
|
221
|
+
summary: summary || undefined,
|
|
222
|
+
categories: category ? [category] : undefined,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
function buildItemsFromDom(html, titleIdMap) {
|
|
228
|
+
const root = _deps.parseHtml(html);
|
|
229
|
+
const dateNodes = root.querySelectorAll("p")
|
|
230
|
+
.map((p) => ({ node: p, dateText: normalizeText(p.textContent) }))
|
|
231
|
+
.filter((x) => isDateText(x.dateText));
|
|
232
|
+
const seen = new Set();
|
|
233
|
+
const items = [];
|
|
234
|
+
for (const { node, dateText } of dateNodes) {
|
|
235
|
+
const card = findCardRootFromDateNode(node);
|
|
236
|
+
if (!card) continue;
|
|
237
|
+
const parsed = parseCardItem(card, dateText, titleIdMap);
|
|
238
|
+
if (!parsed) continue;
|
|
239
|
+
const key = `${parsed.title}|${parsed.pubDate.toISOString()}`;
|
|
240
|
+
if (seen.has(key)) continue;
|
|
241
|
+
seen.add(key);
|
|
242
|
+
items.push(parsed);
|
|
243
|
+
}
|
|
244
|
+
return items;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
function buildItemsFromLeafSequence(html, titleIdMap) {
|
|
249
|
+
const root = _deps.parseHtml(html);
|
|
250
|
+
const leafTexts = uniqueTexts(
|
|
251
|
+
root
|
|
252
|
+
.querySelectorAll("div, span, p, h1, h2, h3")
|
|
253
|
+
.filter((el) => el.querySelector("div, span, p, h1, h2, h3") == null)
|
|
254
|
+
.map((el) => normalizeText(el.textContent))
|
|
255
|
+
.filter(Boolean)
|
|
256
|
+
.filter((t) => !t.includes("没有更多"))
|
|
257
|
+
.filter((t) => !t.includes("加载更多"))
|
|
258
|
+
.filter((t) => t !== "研究" && t !== "时间排序")
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
const items = [];
|
|
262
|
+
const seen = new Set();
|
|
263
|
+
for (let i = 0; i < leafTexts.length; i += 1) {
|
|
264
|
+
const dateText = leafTexts[i];
|
|
265
|
+
if (!isDateText(dateText)) continue;
|
|
266
|
+
const category = i > 0 && RESEARCH_TAGS.has(leafTexts[i - 1]) ? leafTexts[i - 1] : undefined;
|
|
267
|
+
|
|
268
|
+
let title = "";
|
|
269
|
+
let summary;
|
|
270
|
+
for (let j = i + 1; j < leafTexts.length; j += 1) {
|
|
271
|
+
const t = leafTexts[j];
|
|
272
|
+
if (isDateText(t)) break;
|
|
273
|
+
if (RESEARCH_TAGS.has(t)) continue;
|
|
274
|
+
if (!title) {
|
|
275
|
+
title = t;
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
summary = t;
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
if (!title || title.length < 4) continue;
|
|
282
|
+
|
|
283
|
+
const id = titleIdMap.get(title);
|
|
284
|
+
const link = id
|
|
285
|
+
? `${ZHIPU_ORIGIN}/zh/research/${id}`
|
|
286
|
+
: `${ZHIPU_RESEARCH_URL}#${encodeURIComponent(title)}`;
|
|
287
|
+
const pubDate = parseDate(dateText) ?? new Date();
|
|
288
|
+
const key = `${title}|${pubDate.toISOString()}`;
|
|
289
|
+
if (seen.has(key)) continue;
|
|
290
|
+
seen.add(key);
|
|
291
|
+
items.push({
|
|
292
|
+
guid: hashGuid(id ? link : `${title}|${normalizeText(dateText)}`),
|
|
293
|
+
title,
|
|
294
|
+
link,
|
|
295
|
+
pubDate,
|
|
296
|
+
summary: summary || undefined,
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
return items;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
async function fetchItems(sourceId, ctx) {
|
|
304
|
+
_deps = ctx.deps;
|
|
305
|
+
// 需要读取页面脚本里的 blogsItems(包含详情 id),因此这里禁用净化。
|
|
306
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 5000, purify: false });
|
|
307
|
+
|
|
308
|
+
let blogsItems = extractBlogsItems(html);
|
|
309
|
+
if (blogsItems.length === 0) {
|
|
310
|
+
const rawHtml = await fetchRawHtml(finalUrl || sourceId);
|
|
311
|
+
if (rawHtml) {
|
|
312
|
+
blogsItems = extractBlogsItems(rawHtml);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
const parsedFromScript = buildItemsFromBlogsItems(blogsItems);
|
|
317
|
+
if (parsedFromScript.length > 0) return parsedFromScript;
|
|
318
|
+
|
|
319
|
+
const titleIdMap = buildTitleIdMap(blogsItems);
|
|
320
|
+
const parsedFromDom = buildItemsFromDom(html, titleIdMap);
|
|
321
|
+
if (parsedFromDom.length > 0) return parsedFromDom;
|
|
322
|
+
|
|
323
|
+
const parsedFromLeafs = buildItemsFromLeafSequence(html, titleIdMap);
|
|
324
|
+
if (parsedFromLeafs.length > 0) return parsedFromLeafs;
|
|
325
|
+
|
|
326
|
+
throw new Error("[zhipu-research] 未解析到研究条目,页面结构可能已变化");
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
export default {
|
|
331
|
+
id: "zhipu-research",
|
|
332
|
+
listUrlPattern: ZHIPU_RESEARCH_URL,
|
|
333
|
+
fetchItems,
|
|
334
|
+
};
|
package/dist/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import { promisify } from "node:util";
|
|
|
10
10
|
import puppeteerCore from "puppeteer-core";
|
|
11
11
|
import { parse, NodeType } from "node-html-parser";
|
|
12
12
|
import Database from "better-sqlite3";
|
|
13
|
-
import { mkdir, copyFile, access, rename, readFile,
|
|
13
|
+
import { mkdir, writeFile, copyFile, access, rename, readFile, readdir, stat, unlink } from "node:fs/promises";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
15
|
import { createHash } from "node:crypto";
|
|
16
16
|
import { JSDOM } from "jsdom";
|
|
@@ -188,16 +188,18 @@ function mergeSourceStatsRows(rows) {
|
|
|
188
188
|
for (const row of rows) {
|
|
189
189
|
const k = canonicalHttpSourceRef(row.source_url);
|
|
190
190
|
const prev = map.get(k);
|
|
191
|
+
const count7 = row.count_7d ?? 0;
|
|
191
192
|
if (!prev) {
|
|
192
|
-
map.set(k, { count: row.count, latest_at: row.latest_at });
|
|
193
|
+
map.set(k, { count: row.count, count_7d: count7, latest_at: row.latest_at });
|
|
193
194
|
} else {
|
|
194
195
|
map.set(k, {
|
|
195
196
|
count: prev.count + row.count,
|
|
197
|
+
count_7d: prev.count_7d + count7,
|
|
196
198
|
latest_at: maxIso(prev.latest_at, row.latest_at)
|
|
197
199
|
});
|
|
198
200
|
}
|
|
199
201
|
}
|
|
200
|
-
return [...map.entries()].map(([source_url, v]) => ({ source_url, count: v.count, latest_at: v.latest_at })).sort((a, b) => b.count - a.count);
|
|
202
|
+
return [...map.entries()].map(([source_url, v]) => ({ source_url, count: v.count, count_7d: v.count_7d, latest_at: v.latest_at })).sort((a, b) => b.count - a.count);
|
|
201
203
|
}
|
|
202
204
|
const httpSourceRef = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
203
205
|
__proto__: null,
|
|
@@ -218,6 +220,9 @@ const CONFIG_PATH = join(USER_DIR, "config.json");
|
|
|
218
220
|
const LEGACY_SUBSCRIPTIONS_PATH = join(USER_DIR, "subscriptions.json");
|
|
219
221
|
const BUILTIN_PLUGINS_DIR = join(PACKAGE_ROOT, "app/plugins/builtin");
|
|
220
222
|
const USER_PLUGINS_DIR = join(USER_DIR, "plugins");
|
|
223
|
+
const USER_DIR_PACKAGE_JSON = join(USER_DIR, "package.json");
|
|
224
|
+
const USER_DIR_PACKAGE_JSON_MINIMAL = `${JSON.stringify({ type: "module", private: true, description: "RssAny user data root; marks plugins as ESM for Node" })}
|
|
225
|
+
`;
|
|
221
226
|
const PLUGIN_SITE_TEMPLATE_PATH = join(PACKAGE_ROOT, "app/plugins/site.rssany.js");
|
|
222
227
|
async function pathExists(p) {
|
|
223
228
|
try {
|
|
@@ -262,11 +267,24 @@ async function seedExampleConfigsIfMissing() {
|
|
|
262
267
|
}
|
|
263
268
|
}
|
|
264
269
|
}
|
|
270
|
+
async function ensureUserDirPackageJsonForPlugins() {
|
|
271
|
+
if (await pathExists(USER_DIR_PACKAGE_JSON)) return;
|
|
272
|
+
try {
|
|
273
|
+
await writeFile(USER_DIR_PACKAGE_JSON, USER_DIR_PACKAGE_JSON_MINIMAL, "utf-8");
|
|
274
|
+
logger.info("config", "已写入 .rssany/package.json(type: module,消除插件 ESM 歧义)", { path: USER_DIR_PACKAGE_JSON });
|
|
275
|
+
} catch (err) {
|
|
276
|
+
logger.warn("config", "写入 .rssany/package.json 失败", {
|
|
277
|
+
path: USER_DIR_PACKAGE_JSON,
|
|
278
|
+
err: err instanceof Error ? err.message : String(err)
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
}
|
|
265
282
|
async function initUserDir() {
|
|
266
283
|
await mkdir(USER_DIR, { recursive: true });
|
|
267
284
|
await mkdir(DATA_DIR, { recursive: true });
|
|
268
285
|
await mkdir(CACHE_DIR, { recursive: true });
|
|
269
286
|
await mkdir(USER_PLUGINS_DIR, { recursive: true });
|
|
287
|
+
await ensureUserDirPackageJsonForPlugins();
|
|
270
288
|
await seedExampleConfigsIfMissing();
|
|
271
289
|
if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(LEGACY_SUBSCRIPTIONS_PATH)) {
|
|
272
290
|
await migrateFile(LEGACY_SUBSCRIPTIONS_PATH, SOURCES_CONFIG_PATH);
|
|
@@ -876,7 +894,11 @@ async function getSourceStats() {
|
|
|
876
894
|
const { mergeSourceStatsRows: mergeSourceStatsRows2 } = await Promise.resolve().then(() => httpSourceRef);
|
|
877
895
|
const db = await getDb();
|
|
878
896
|
const rows = db.prepare(
|
|
879
|
-
|
|
897
|
+
`SELECT source_url,
|
|
898
|
+
COUNT(*) as count,
|
|
899
|
+
SUM(CASE WHEN julianday(fetched_at) >= julianday('now', '-7 days') THEN 1 ELSE 0 END) as count_7d,
|
|
900
|
+
MAX(COALESCE(pub_date, fetched_at)) as latest_at
|
|
901
|
+
FROM items GROUP BY source_url ORDER BY count DESC`
|
|
880
902
|
).all();
|
|
881
903
|
return mergeSourceStatsRows2(rows);
|
|
882
904
|
}
|
|
@@ -3381,6 +3403,42 @@ function registerSourcesRoutes(app) {
|
|
|
3381
3403
|
return c.json({});
|
|
3382
3404
|
}
|
|
3383
3405
|
});
|
|
3406
|
+
app.post("/api/sources/open-browser", requireAdmin(), async (c) => {
|
|
3407
|
+
try {
|
|
3408
|
+
const body = await c.req.json();
|
|
3409
|
+
const raw = typeof body?.url === "string" ? body.url.trim() : "";
|
|
3410
|
+
if (!raw) return c.json({ ok: false, message: "缺少 url" }, 400);
|
|
3411
|
+
const lower = raw.toLowerCase();
|
|
3412
|
+
if (!lower.startsWith("http://") && !lower.startsWith("https://")) {
|
|
3413
|
+
return c.json({ ok: false, message: "仅支持 http(s) URL" }, 400);
|
|
3414
|
+
}
|
|
3415
|
+
const url = raw;
|
|
3416
|
+
const source = getSource(url);
|
|
3417
|
+
const merged = await getEffectiveProxyForListUrl(url, source);
|
|
3418
|
+
const proxy = resolveProxy({ proxy: merged });
|
|
3419
|
+
void launchBrowser({ headless: false, cacheDir: CACHE_DIR, proxy }).then(async (browser) => {
|
|
3420
|
+
try {
|
|
3421
|
+
const page = await browser.newPage();
|
|
3422
|
+
await applyProxyAuthToPage(page, { proxy: merged });
|
|
3423
|
+
const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
3424
|
+
await page.setUserAgent(realUserAgent);
|
|
3425
|
+
await page.setViewport({ width: 1366, height: 960 });
|
|
3426
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
3427
|
+
page.once("close", () => {
|
|
3428
|
+
void browser.close().catch(() => {
|
|
3429
|
+
});
|
|
3430
|
+
});
|
|
3431
|
+
} catch {
|
|
3432
|
+
await browser.close().catch(() => {
|
|
3433
|
+
});
|
|
3434
|
+
}
|
|
3435
|
+
}).catch(() => {
|
|
3436
|
+
});
|
|
3437
|
+
return c.json({ ok: true, message: "已在爬虫浏览器中打开" });
|
|
3438
|
+
} catch {
|
|
3439
|
+
return c.json({ ok: false, message: "请求体无效" }, 400);
|
|
3440
|
+
}
|
|
3441
|
+
});
|
|
3384
3442
|
app.get("/api/sources/raw", requireAdmin(), async (c) => {
|
|
3385
3443
|
try {
|
|
3386
3444
|
const raw = await getSourcesRaw();
|