rssany 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/README.md +32 -50
- package/app/plugins/builtin/rss.rssany.js +107 -24
- package/app/plugins/site.rssany.js +1 -1
- package/dist/index.js +1003 -271
- package/dist/index.js.map +1 -1
- package/{config.examples.json → init/config.json} +7 -1
- package/init/sources.json +353 -0
- package/package.json +4 -6
- package/statics/401.html +1 -1
- package/statics/README.md +1 -1
- package/webui/build/200.html +16 -18
- package/webui/build/_app/immutable/assets/0.DjU2hdCQ.css +1 -0
- package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +1 -0
- package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +1 -0
- package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +1 -0
- package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +1 -0
- package/webui/build/_app/immutable/assets/14.CujIhjQK.css +1 -0
- package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +1 -0
- package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +1 -0
- package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +1 -0
- package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +1 -0
- package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +1 -0
- package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +1 -0
- package/webui/build/_app/immutable/assets/{9.BZheTlzZ.css → 9.m-LCx_kl.css} +1 -1
- package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +1 -0
- package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +1 -0
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +1 -0
- package/webui/build/_app/immutable/chunks/{V2-VOe88.js → B-OsL1Ct.js} +1 -1
- package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +2 -0
- package/webui/build/_app/immutable/chunks/BK3WtZwv.js +1 -0
- package/webui/build/_app/immutable/chunks/BQqoDzLx.js +1 -0
- package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +1 -0
- package/webui/build/_app/immutable/chunks/Bp63qm3L.js +1 -0
- package/webui/build/_app/immutable/chunks/{CZDFXKiF.js → C85CNwD2.js} +1 -1
- package/webui/build/_app/immutable/chunks/CVzlFH44.js +1 -0
- package/webui/build/_app/immutable/chunks/CWNeClHp.js +6 -0
- package/webui/build/_app/immutable/chunks/CdMsRjxJ.js +1 -0
- package/webui/build/_app/immutable/chunks/Cihqbfi5.js +1 -0
- package/webui/build/_app/immutable/chunks/CllQAdvt.js +1 -0
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +31 -0
- package/webui/build/_app/immutable/chunks/D5GvRCv7.js +1 -0
- package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +1 -0
- package/webui/build/_app/immutable/chunks/DFuhmi31.js +1 -0
- package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +2 -0
- package/webui/build/_app/immutable/chunks/DjNLq3TF.js +1 -0
- package/webui/build/_app/immutable/chunks/Dt2CddFe.js +1 -0
- package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +41 -0
- package/webui/build/_app/immutable/chunks/Dw782Tjs.js +1 -0
- package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +1 -0
- package/webui/build/_app/immutable/chunks/lk5LaiqA.js +1 -0
- package/webui/build/_app/immutable/chunks/mW5RwvnK.js +13 -0
- package/webui/build/_app/immutable/chunks/{CtHRh_pJ.js → tB7QMF3U.js} +1 -1
- package/webui/build/_app/immutable/chunks/xtNWTdbD.js +1 -0
- package/webui/build/_app/immutable/entry/app.BcD2eSsQ.js +2 -0
- package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +1 -0
- package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +11 -0
- package/webui/build/_app/immutable/nodes/1.DU9aYGAb.js +1 -0
- package/webui/build/_app/immutable/nodes/10.Db6vw7Ih.js +1 -0
- package/webui/build/_app/immutable/nodes/11.BaAcorz3.js +1 -0
- package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +1 -0
- package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +1 -0
- package/webui/build/_app/immutable/nodes/14.DqT4pcrQ.js +1 -0
- package/webui/build/_app/immutable/nodes/15.CCLbjxnH.js +1 -0
- package/webui/build/_app/immutable/nodes/{12.CMcby_lY.js → 16.DiigpVdP.js} +15 -15
- package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +1 -0
- package/webui/build/_app/immutable/nodes/3.DEcYOQc-.js +1 -0
- package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +2 -0
- package/webui/build/_app/immutable/nodes/5.CvM1TkLG.js +1 -0
- package/webui/build/_app/immutable/nodes/6.Dscr6LkS.js +1 -0
- package/webui/build/_app/immutable/nodes/7.Bp60MobD.js +1 -0
- package/webui/build/_app/immutable/nodes/8.DwSg0MHh.js +1 -0
- package/webui/build/_app/immutable/nodes/9.BeYOUjxR.js +1 -0
- package/webui/build/_app/version.json +1 -1
- package/sources.example.json +0 -562
- package/webui/build/_app/immutable/assets/0.BUAXpTm6.css +0 -1
- package/webui/build/_app/immutable/assets/10.I1OuCLrU.css +0 -1
- package/webui/build/_app/immutable/assets/11.CrO9xaki.css +0 -1
- package/webui/build/_app/immutable/assets/12.BEi6fInA.css +0 -1
- package/webui/build/_app/immutable/assets/14.Ctlgn1LZ.css +0 -1
- package/webui/build/_app/immutable/assets/2.eJ80XOGm.css +0 -1
- package/webui/build/_app/immutable/assets/4.B8-jYAVj.css +0 -1
- package/webui/build/_app/immutable/assets/6.Drn-0DON.css +0 -1
- package/webui/build/_app/immutable/assets/7.ms2diq_q.css +0 -1
- package/webui/build/_app/immutable/assets/8.DKymkjjs.css +0 -1
- package/webui/build/_app/immutable/assets/SourcesList.BhtYlRsQ.css +0 -1
- package/webui/build/_app/immutable/chunks/BUngiKFg.js +0 -1
- package/webui/build/_app/immutable/chunks/Bt0fzibd.js +0 -1
- package/webui/build/_app/immutable/chunks/BxHqDcpw.js +0 -1
- package/webui/build/_app/immutable/chunks/ByQRbEUX.js +0 -1
- package/webui/build/_app/immutable/chunks/C12mHcUp.js +0 -6
- package/webui/build/_app/immutable/chunks/C1kQ4pHy.js +0 -1
- package/webui/build/_app/immutable/chunks/C74gbb4Q.js +0 -1
- package/webui/build/_app/immutable/chunks/CAtemnMo.js +0 -1
- package/webui/build/_app/immutable/chunks/CVjCNJia.js +0 -1
- package/webui/build/_app/immutable/chunks/CjQQ9_Q2.js +0 -2
- package/webui/build/_app/immutable/chunks/D-6mYMI1.js +0 -1
- package/webui/build/_app/immutable/chunks/D1Gs8-g3.js +0 -1
- package/webui/build/_app/immutable/chunks/D9dRVKgL.js +0 -1
- package/webui/build/_app/immutable/chunks/DCEY1XiC.js +0 -1
- package/webui/build/_app/immutable/chunks/DI-t-G_K.js +0 -2
- package/webui/build/_app/immutable/chunks/DTUxjyWL.js +0 -1
- package/webui/build/_app/immutable/chunks/DWJZOHke.js +0 -1
- package/webui/build/_app/immutable/chunks/Dgs6d7X5.js +0 -1
- package/webui/build/_app/immutable/chunks/DjpPK99f.js +0 -71
- package/webui/build/_app/immutable/chunks/DjzVVxpy.js +0 -1
- package/webui/build/_app/immutable/chunks/DvtNA-3X.js +0 -1
- package/webui/build/_app/immutable/chunks/LQVMBmDN.js +0 -1
- package/webui/build/_app/immutable/chunks/Qw0Qgx6J.js +0 -1
- package/webui/build/_app/immutable/chunks/bohabpgg.js +0 -1
- package/webui/build/_app/immutable/chunks/c-YfbAB_.js +0 -8
- package/webui/build/_app/immutable/chunks/tpTQfoNn.js +0 -1
- package/webui/build/_app/immutable/entry/app.Cra5Zsz4.js +0 -2
- package/webui/build/_app/immutable/entry/start.ToY0Qh0_.js +0 -1
- package/webui/build/_app/immutable/nodes/0.D2-xzG_8.js +0 -11
- package/webui/build/_app/immutable/nodes/1.CFixzRR6.js +0 -1
- package/webui/build/_app/immutable/nodes/10.ayxWydPr.js +0 -1
- package/webui/build/_app/immutable/nodes/11.B0JS3E2j.js +0 -1
- package/webui/build/_app/immutable/nodes/13.DRpZV72T.js +0 -1
- package/webui/build/_app/immutable/nodes/14.DVeJW6bd.js +0 -1
- package/webui/build/_app/immutable/nodes/2.DIZ4IPNm.js +0 -1
- package/webui/build/_app/immutable/nodes/3.BFSNf0FK.js +0 -1
- package/webui/build/_app/immutable/nodes/4.BSsIjejE.js +0 -2
- package/webui/build/_app/immutable/nodes/5.COxRT9Oe.js +0 -1
- package/webui/build/_app/immutable/nodes/6.CBgQ4YzB.js +0 -1
- package/webui/build/_app/immutable/nodes/7.BbzWOL0V.js +0 -6
- package/webui/build/_app/immutable/nodes/8.C8120200.js +0 -1
- package/webui/build/_app/immutable/nodes/9.BH_BGQQ4.js +0 -1
- /package/webui/build/_app/immutable/nodes/{15.BtYZF6FM.js → 17.BtYZF6FM.js} +0 -0
- /package/webui/build/_app/immutable/nodes/{16.Ba_qJjp6.js → 18.Ba_qJjp6.js} +0 -0
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, watch } from "node:fs";
|
|
2
|
+
import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, statSync, watch } from "node:fs";
|
|
3
3
|
import { platform, homedir, networkInterfaces } from "node:os";
|
|
4
4
|
import { serve } from "@hono/node-server";
|
|
5
5
|
import { Hono } from "hono";
|
|
@@ -10,7 +10,7 @@ import { promisify } from "node:util";
|
|
|
10
10
|
import puppeteerCore from "puppeteer-core";
|
|
11
11
|
import { parse, NodeType } from "node-html-parser";
|
|
12
12
|
import Database from "better-sqlite3";
|
|
13
|
-
import { mkdir, copyFile, access, rename, readFile, writeFile, readdir } from "node:fs/promises";
|
|
13
|
+
import { mkdir, copyFile, access, rename, readFile, writeFile, readdir, stat, unlink } from "node:fs/promises";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
15
|
import { createHash } from "node:crypto";
|
|
16
16
|
import { JSDOM } from "jsdom";
|
|
@@ -134,6 +134,18 @@ function getEffectiveItemFields(item, lng) {
|
|
|
134
134
|
content: (t?.content != null && t.content !== "" ? t.content : item.content) ?? ""
|
|
135
135
|
};
|
|
136
136
|
}
|
|
137
|
+
function pubDateToIsoOrNull(pubDate) {
|
|
138
|
+
if (pubDate == null) return null;
|
|
139
|
+
if (pubDate instanceof Date) {
|
|
140
|
+
const ms = pubDate.getTime();
|
|
141
|
+
return Number.isNaN(ms) ? null : pubDate.toISOString();
|
|
142
|
+
}
|
|
143
|
+
if (typeof pubDate === "string") {
|
|
144
|
+
const s = pubDate.trim();
|
|
145
|
+
return s || null;
|
|
146
|
+
}
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
137
149
|
function normalizeAuthor(author) {
|
|
138
150
|
if (author == null) return void 0;
|
|
139
151
|
if (Array.isArray(author)) return author.filter((s2) => typeof s2 === "string" && s2.trim()).map((s2) => s2.trim());
|
|
@@ -148,6 +160,50 @@ function markPipelineDrop(item) {
|
|
|
148
160
|
function isPipelineDroppedItem(item) {
|
|
149
161
|
return item.extra?.[PIPELINE_DROP_EXTRA_KEY] === true;
|
|
150
162
|
}
|
|
163
|
+
function canonicalHttpSourceRef(ref) {
|
|
164
|
+
const t = ref.trim();
|
|
165
|
+
if (!t) return t;
|
|
166
|
+
if (!/^https?:\/\//i.test(t)) return t.toLowerCase();
|
|
167
|
+
try {
|
|
168
|
+
const u = new URL(t);
|
|
169
|
+
const protocol = u.protocol.toLowerCase();
|
|
170
|
+
const host = u.host.toLowerCase();
|
|
171
|
+
let path = u.pathname;
|
|
172
|
+
if (path.length > 1 && path.endsWith("/")) {
|
|
173
|
+
path = path.slice(0, -1);
|
|
174
|
+
}
|
|
175
|
+
path = path.toLowerCase();
|
|
176
|
+
return `${protocol}//${host}${path}${u.search}${u.hash}`;
|
|
177
|
+
} catch {
|
|
178
|
+
return t.toLowerCase();
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
function maxIso(a, b) {
|
|
182
|
+
if (!a) return b;
|
|
183
|
+
if (!b) return a;
|
|
184
|
+
return a >= b ? a : b;
|
|
185
|
+
}
|
|
186
|
+
function mergeSourceStatsRows(rows) {
|
|
187
|
+
const map = /* @__PURE__ */ new Map();
|
|
188
|
+
for (const row of rows) {
|
|
189
|
+
const k = canonicalHttpSourceRef(row.source_url);
|
|
190
|
+
const prev = map.get(k);
|
|
191
|
+
if (!prev) {
|
|
192
|
+
map.set(k, { count: row.count, latest_at: row.latest_at });
|
|
193
|
+
} else {
|
|
194
|
+
map.set(k, {
|
|
195
|
+
count: prev.count + row.count,
|
|
196
|
+
latest_at: maxIso(prev.latest_at, row.latest_at)
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return [...map.entries()].map(([source_url, v]) => ({ source_url, count: v.count, latest_at: v.latest_at })).sort((a, b) => b.count - a.count);
|
|
201
|
+
}
|
|
202
|
+
const httpSourceRef = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
203
|
+
__proto__: null,
|
|
204
|
+
canonicalHttpSourceRef,
|
|
205
|
+
mergeSourceStatsRows
|
|
206
|
+
}, Symbol.toStringTag, { value: "Module" }));
|
|
151
207
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
152
208
|
const base = basename(__dir);
|
|
153
209
|
const PACKAGE_ROOT = base === "app" || base === "dist" ? join(__dir, "..") : __dir;
|
|
@@ -181,8 +237,9 @@ async function migrateFile(from, to) {
|
|
|
181
237
|
logger.warn("config", "配置迁移失败", { from, to, err: err instanceof Error ? err.message : String(err) });
|
|
182
238
|
}
|
|
183
239
|
}
|
|
184
|
-
const
|
|
185
|
-
const
|
|
240
|
+
const INIT_DATA_DIR = join(PACKAGE_ROOT, "init");
|
|
241
|
+
const EXAMPLE_SOURCES = join(INIT_DATA_DIR, "sources.json");
|
|
242
|
+
const EXAMPLE_CONFIG = join(INIT_DATA_DIR, "config.json");
|
|
186
243
|
async function seedExampleConfigsIfMissing() {
|
|
187
244
|
if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(EXAMPLE_SOURCES)) {
|
|
188
245
|
try {
|
|
@@ -519,13 +576,31 @@ function initSchema(db) {
|
|
|
519
576
|
}
|
|
520
577
|
} catch {
|
|
521
578
|
}
|
|
579
|
+
migrateItemsSourceUrlIfNeeded(db);
|
|
580
|
+
}
|
|
581
|
+
function migrateItemsSourceUrlIfNeeded(db) {
|
|
582
|
+
const v = db.pragma("user_version", { simple: true });
|
|
583
|
+
if (v >= 2) return;
|
|
584
|
+
const rows = db.prepare("SELECT rowid, source_url FROM items").all();
|
|
585
|
+
const upd = db.prepare("UPDATE items SET source_url = @next WHERE rowid = @rowid");
|
|
586
|
+
const run = db.transaction(() => {
|
|
587
|
+
for (const r of rows) {
|
|
588
|
+
const next = canonicalHttpSourceRef(r.source_url);
|
|
589
|
+
if (next !== r.source_url) {
|
|
590
|
+
upd.run({ next, rowid: r.rowid });
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
db.pragma("user_version = 2");
|
|
594
|
+
});
|
|
595
|
+
run();
|
|
522
596
|
}
|
|
523
597
|
async function upsertItems(items, sourceUrlOverride) {
|
|
524
598
|
if (items.length === 0) return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
525
|
-
const
|
|
526
|
-
if (!
|
|
599
|
+
const raw = items[0].sourceRef?.trim();
|
|
600
|
+
if (!raw) {
|
|
527
601
|
throw new Error("upsertItems: 每条 item 须有 sourceRef,或传入 sourceUrlOverride");
|
|
528
602
|
}
|
|
603
|
+
const sourceUrl = canonicalHttpSourceRef(raw);
|
|
529
604
|
return withWriteLock(async () => {
|
|
530
605
|
const db = await getDb();
|
|
531
606
|
const stmt = db.prepare(`
|
|
@@ -556,7 +631,7 @@ async function upsertItems(items, sourceUrlOverride) {
|
|
|
556
631
|
const nextSummary = normalizeText(item.summary) || null;
|
|
557
632
|
const nextAuthorArr = normalizeAuthor(item.author);
|
|
558
633
|
const nextAuthor = nextAuthorArr?.length ? JSON.stringify(nextAuthorArr) : null;
|
|
559
|
-
const nextPubDate = item.pubDate
|
|
634
|
+
const nextPubDate = pubDateToIsoOrNull(item.pubDate);
|
|
560
635
|
const nextTags = item.tags?.length ? JSON.stringify(item.tags) : null;
|
|
561
636
|
const nextImageUrl = typeof item.imageUrl === "string" && item.imageUrl.trim() ? item.imageUrl.trim() : null;
|
|
562
637
|
const info = stmt.run({
|
|
@@ -624,7 +699,7 @@ async function updateItemContent(item) {
|
|
|
624
699
|
const arr = normalizeAuthor(item.author);
|
|
625
700
|
return arr?.length ? JSON.stringify(arr) : null;
|
|
626
701
|
})(),
|
|
627
|
-
pubDate: item.pubDate
|
|
702
|
+
pubDate: pubDateToIsoOrNull(item.pubDate),
|
|
628
703
|
tags: item.tags?.length ? JSON.stringify(item.tags) : null,
|
|
629
704
|
translations: item.translations && Object.keys(item.translations).length > 0 ? JSON.stringify(item.translations) : null
|
|
630
705
|
});
|
|
@@ -632,11 +707,13 @@ async function updateItemContent(item) {
|
|
|
632
707
|
}
|
|
633
708
|
async function queryFeedItems(sourceUrls, limit, offset, opts) {
|
|
634
709
|
if (sourceUrls.length === 0) return { items: [], hasMore: false };
|
|
710
|
+
const expanded = [...new Set(sourceUrls.map((u) => canonicalHttpSourceRef(u)).filter(Boolean))];
|
|
711
|
+
if (expanded.length === 0) return { items: [], hasMore: false };
|
|
635
712
|
const db = await getDb();
|
|
636
|
-
const placeholders =
|
|
713
|
+
const placeholders = expanded.map((_, i) => `@u${i}`).join(", ");
|
|
637
714
|
const conditions = [`source_url IN (${placeholders})`];
|
|
638
715
|
const params = { lim: limit + 1, off: offset };
|
|
639
|
-
|
|
716
|
+
expanded.forEach((url, i) => {
|
|
640
717
|
params[`u${i}`] = url;
|
|
641
718
|
});
|
|
642
719
|
if (opts?.since) {
|
|
@@ -670,12 +747,20 @@ async function queryItems(opts) {
|
|
|
670
747
|
const conditions = [];
|
|
671
748
|
const params = { limit, offset };
|
|
672
749
|
if (sourceUrl) {
|
|
750
|
+
const key = canonicalHttpSourceRef(sourceUrl);
|
|
751
|
+
if (!key) {
|
|
752
|
+
return { items: [], total: 0 };
|
|
753
|
+
}
|
|
673
754
|
conditions.push("i.source_url = @sourceUrl");
|
|
674
|
-
params.sourceUrl =
|
|
755
|
+
params.sourceUrl = key;
|
|
675
756
|
} else if (sourceUrls && sourceUrls.length > 0) {
|
|
676
|
-
const
|
|
757
|
+
const expanded = [...new Set(sourceUrls.map((s) => canonicalHttpSourceRef(s)).filter(Boolean))];
|
|
758
|
+
if (expanded.length === 0) {
|
|
759
|
+
return { items: [], total: 0 };
|
|
760
|
+
}
|
|
761
|
+
const placeholders = expanded.map((_, i) => `@src${i}`).join(", ");
|
|
677
762
|
conditions.push(`i.source_url IN (${placeholders})`);
|
|
678
|
-
|
|
763
|
+
expanded.forEach((s, i) => params[`src${i}`] = s);
|
|
679
764
|
}
|
|
680
765
|
if (author && author.trim().length >= 2) {
|
|
681
766
|
conditions.push("instr(i.author, @author) > 0");
|
|
@@ -769,9 +854,11 @@ async function deleteItem(id) {
|
|
|
769
854
|
}
|
|
770
855
|
async function deleteItemsBySourceUrl(sourceUrl) {
|
|
771
856
|
if (!sourceUrl?.trim()) return 0;
|
|
857
|
+
const key = canonicalHttpSourceRef(sourceUrl.trim());
|
|
858
|
+
if (!key) return 0;
|
|
772
859
|
return withWriteLock(async () => {
|
|
773
860
|
const db = await getDb();
|
|
774
|
-
const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl:
|
|
861
|
+
const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl: key });
|
|
775
862
|
return info.changes;
|
|
776
863
|
});
|
|
777
864
|
}
|
|
@@ -786,10 +873,12 @@ async function getPendingPushItems(limit = 100) {
|
|
|
786
873
|
return mapRowsToDbItems(rows);
|
|
787
874
|
}
|
|
788
875
|
async function getSourceStats() {
|
|
876
|
+
const { mergeSourceStatsRows: mergeSourceStatsRows2 } = await Promise.resolve().then(() => httpSourceRef);
|
|
789
877
|
const db = await getDb();
|
|
790
|
-
|
|
878
|
+
const rows = db.prepare(
|
|
791
879
|
"SELECT source_url, COUNT(*) as count, MAX(COALESCE(pub_date, fetched_at)) as latest_at FROM items GROUP BY source_url ORDER BY count DESC"
|
|
792
880
|
).all();
|
|
881
|
+
return mergeSourceStatsRows2(rows);
|
|
793
882
|
}
|
|
794
883
|
async function insertLog(entry) {
|
|
795
884
|
const db = await getLogsDb();
|
|
@@ -969,6 +1058,9 @@ const logger = {
|
|
|
969
1058
|
}
|
|
970
1059
|
};
|
|
971
1060
|
const execAsync = promisify(exec);
|
|
1061
|
+
const VIEWPORT_WIDTH = 1366;
|
|
1062
|
+
const VIEWPORT_HEIGHT_HEADLESS = 5e3;
|
|
1063
|
+
const VIEWPORT_HEIGHT_HEADFUL = 1200;
|
|
972
1064
|
function resolveProxy(config) {
|
|
973
1065
|
return config?.proxy ?? process.env.HTTP_PROXY ?? process.env.HTTPS_PROXY;
|
|
974
1066
|
}
|
|
@@ -979,6 +1071,14 @@ function parseProxy(proxy) {
|
|
|
979
1071
|
const password = u.password || void 0;
|
|
980
1072
|
return { serverUrl, username, password };
|
|
981
1073
|
}
|
|
1074
|
+
async function applyProxyAuthToPage(page, opts) {
|
|
1075
|
+
const proxy = resolveProxy(opts);
|
|
1076
|
+
if (!proxy) return;
|
|
1077
|
+
const { username, password } = parseProxy(proxy);
|
|
1078
|
+
if (username !== void 0 || password !== void 0) {
|
|
1079
|
+
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
982
1082
|
function launchArgs(config) {
|
|
983
1083
|
const base2 = [
|
|
984
1084
|
"--disable-blink-features=AutomationControlled",
|
|
@@ -990,8 +1090,8 @@ function launchArgs(config) {
|
|
|
990
1090
|
"--disable-site-isolation-trials",
|
|
991
1091
|
"--disable-infobars"
|
|
992
1092
|
];
|
|
993
|
-
const height = config?.headless !== false ?
|
|
994
|
-
base2.push(`--window-size
|
|
1093
|
+
const height = config?.headless !== false ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL;
|
|
1094
|
+
base2.push(`--window-size=${VIEWPORT_WIDTH},${height}`);
|
|
995
1095
|
const proxy = resolveProxy(config);
|
|
996
1096
|
if (proxy) {
|
|
997
1097
|
const { serverUrl } = parseProxy(proxy);
|
|
@@ -1081,150 +1181,130 @@ function headersToRecord(headers) {
|
|
|
1081
1181
|
async function setupPage(page, headless = true) {
|
|
1082
1182
|
const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
1083
1183
|
await page.setUserAgent(realUserAgent);
|
|
1084
|
-
await page.setViewport({
|
|
1184
|
+
await page.setViewport({
|
|
1185
|
+
width: VIEWPORT_WIDTH,
|
|
1186
|
+
height: headless ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL
|
|
1187
|
+
});
|
|
1085
1188
|
await stealthPage(page);
|
|
1086
1189
|
}
|
|
1087
|
-
let _browser = null;
|
|
1088
|
-
let _browserHeadless = true;
|
|
1089
|
-
let _launchPromise = null;
|
|
1090
1190
|
function isFrameDetachedError(e) {
|
|
1091
1191
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1092
1192
|
return /detached|Navigating frame was detached|Session closed/i.test(msg);
|
|
1093
1193
|
}
|
|
1094
|
-
async function
|
|
1095
|
-
if (!_browser) return false;
|
|
1096
|
-
try {
|
|
1097
|
-
await _browser.version();
|
|
1098
|
-
return true;
|
|
1099
|
-
} catch {
|
|
1100
|
-
_browser = null;
|
|
1101
|
-
return false;
|
|
1102
|
-
}
|
|
1103
|
-
}
|
|
1104
|
-
async function getOrCreateBrowser(config) {
|
|
1194
|
+
async function launchBrowser(config) {
|
|
1105
1195
|
const wantHeadless = config.headless !== false;
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
|
|
1119
|
-
if (!executablePath) {
|
|
1120
|
-
throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
|
|
1196
|
+
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
|
|
1197
|
+
if (!executablePath) {
|
|
1198
|
+
throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
|
|
1199
|
+
}
|
|
1200
|
+
const userDataDir = getUserDataDir(config.cacheDir);
|
|
1201
|
+
const maxRetries = 2;
|
|
1202
|
+
let lastErr;
|
|
1203
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1204
|
+
try {
|
|
1205
|
+
if (attempt === 0 && userDataDir) {
|
|
1206
|
+
const absUserDataDir = resolve(userDataDir);
|
|
1207
|
+
await killStaleChromeProcesses(absUserDataDir);
|
|
1121
1208
|
}
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
browser.on("disconnected", () => {
|
|
1145
|
-
_browser = null;
|
|
1146
|
-
_launchPromise = null;
|
|
1147
|
-
});
|
|
1148
|
-
_browser = browser;
|
|
1149
|
-
_browserHeadless = wantHeadless;
|
|
1150
|
-
return browser;
|
|
1151
|
-
} catch (e) {
|
|
1152
|
-
lastErr = e;
|
|
1153
|
-
if (attempt < maxRetries && isAlreadyRunningError(e)) {
|
|
1154
|
-
continue;
|
|
1155
|
-
}
|
|
1156
|
-
if (isAlreadyRunningError(e)) {
|
|
1157
|
-
const dir = userDataDir ?? "browser_data/main";
|
|
1158
|
-
throw new Error(
|
|
1159
|
-
`Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
|
|
1160
|
-
);
|
|
1161
|
-
}
|
|
1162
|
-
throw e;
|
|
1163
|
-
}
|
|
1209
|
+
if (attempt > 0) {
|
|
1210
|
+
const waitMs = attempt * 2e3;
|
|
1211
|
+
logger.info("scraper", "userDataDir 曾被占用,等待后重试", { waitMs, attempt });
|
|
1212
|
+
await new Promise((r) => setTimeout(r, waitMs));
|
|
1213
|
+
}
|
|
1214
|
+
return await puppeteerCore.launch({
|
|
1215
|
+
headless: wantHeadless,
|
|
1216
|
+
args: launchArgs({ proxy: config.proxy, headless: wantHeadless }),
|
|
1217
|
+
userDataDir,
|
|
1218
|
+
executablePath,
|
|
1219
|
+
ignoreDefaultArgs: ["--enable-automation"]
|
|
1220
|
+
});
|
|
1221
|
+
} catch (e) {
|
|
1222
|
+
lastErr = e;
|
|
1223
|
+
if (attempt < maxRetries && isAlreadyRunningError(e)) {
|
|
1224
|
+
continue;
|
|
1225
|
+
}
|
|
1226
|
+
if (isAlreadyRunningError(e)) {
|
|
1227
|
+
const dir = userDataDir ?? "browser_data/main";
|
|
1228
|
+
throw new Error(
|
|
1229
|
+
`Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
|
|
1230
|
+
);
|
|
1164
1231
|
}
|
|
1165
|
-
throw lastErr;
|
|
1166
|
-
})().catch((e) => {
|
|
1167
|
-
_launchPromise = null;
|
|
1168
1232
|
throw e;
|
|
1169
|
-
}
|
|
1233
|
+
}
|
|
1170
1234
|
}
|
|
1171
|
-
|
|
1235
|
+
throw lastErr;
|
|
1172
1236
|
}
|
|
1173
|
-
|
|
1174
|
-
_browser?.close().catch(() => {
|
|
1175
|
-
});
|
|
1176
|
-
});
|
|
1177
|
-
process.once("SIGINT", async () => {
|
|
1178
|
-
await _browser?.close().catch(() => {
|
|
1179
|
-
});
|
|
1180
|
-
process.exit(0);
|
|
1181
|
-
});
|
|
1182
|
-
process.once("SIGTERM", async () => {
|
|
1183
|
-
await _browser?.close().catch(() => {
|
|
1184
|
-
});
|
|
1185
|
-
process.exit(0);
|
|
1186
|
-
});
|
|
1187
|
-
async function preCheckAuth(authFlow, cacheDir) {
|
|
1237
|
+
async function preCheckAuth(authFlow, cacheDir, opts) {
|
|
1188
1238
|
const { checkAuth, loginUrl, domain } = authFlow;
|
|
1189
1239
|
if (domain == null || !cacheDir) return true;
|
|
1190
|
-
const
|
|
1191
|
-
const
|
|
1240
|
+
const isHeadless = opts?.headless !== false;
|
|
1241
|
+
const browser = await launchBrowser({
|
|
1242
|
+
headless: isHeadless,
|
|
1243
|
+
cacheDir,
|
|
1244
|
+
proxy: resolveProxy(opts)
|
|
1245
|
+
});
|
|
1192
1246
|
try {
|
|
1193
|
-
await
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1247
|
+
const page = await browser.newPage();
|
|
1248
|
+
try {
|
|
1249
|
+
await setupPage(page, isHeadless);
|
|
1250
|
+
await applyProxyAuthToPage(page, opts);
|
|
1251
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
1252
|
+
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1253
|
+
return await checkAuth(page, page.url());
|
|
1254
|
+
} finally {
|
|
1255
|
+
await page.close().catch(() => {
|
|
1256
|
+
});
|
|
1257
|
+
}
|
|
1197
1258
|
} finally {
|
|
1198
|
-
await
|
|
1259
|
+
await browser.close().catch(() => {
|
|
1199
1260
|
});
|
|
1200
1261
|
}
|
|
1201
1262
|
}
|
|
1202
|
-
async function ensureAuth(authFlow, cacheDir) {
|
|
1263
|
+
async function ensureAuth(authFlow, cacheDir, opts) {
|
|
1203
1264
|
const { checkAuth, loginUrl, loginTimeoutMs = 60 * 1e3, pollIntervalMs = 2e3 } = authFlow;
|
|
1204
|
-
const browser = await
|
|
1205
|
-
const page = await browser.newPage();
|
|
1265
|
+
const browser = await launchBrowser({ headless: false, cacheDir, proxy: resolveProxy(opts) });
|
|
1206
1266
|
try {
|
|
1207
|
-
await
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
const
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1267
|
+
const page = await browser.newPage();
|
|
1268
|
+
try {
|
|
1269
|
+
await setupPage(page, false);
|
|
1270
|
+
await applyProxyAuthToPage(page, opts);
|
|
1271
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
1272
|
+
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1273
|
+
const authenticated = await checkAuth(page, page.url());
|
|
1274
|
+
if (authenticated) return;
|
|
1275
|
+
const startTime = Date.now();
|
|
1276
|
+
while (Date.now() - startTime < loginTimeoutMs) {
|
|
1277
|
+
await new Promise((resolve2) => setTimeout(resolve2, pollIntervalMs));
|
|
1278
|
+
const authenticated2 = await checkAuth(page, page.url());
|
|
1279
|
+
if (authenticated2) return;
|
|
1280
|
+
}
|
|
1281
|
+
throw new Error(`登录超时(${loginTimeoutMs}ms)`);
|
|
1282
|
+
} finally {
|
|
1283
|
+
await page.close().catch(() => {
|
|
1284
|
+
});
|
|
1285
|
+
}
|
|
1219
1286
|
} finally {
|
|
1220
|
-
await
|
|
1287
|
+
await browser.close().catch(() => {
|
|
1221
1288
|
});
|
|
1222
1289
|
}
|
|
1223
1290
|
}
|
|
1224
1291
|
async function fetchHtml(url, config = {}) {
|
|
1225
|
-
const {
|
|
1292
|
+
const {
|
|
1293
|
+
timeoutMs,
|
|
1294
|
+
headers,
|
|
1295
|
+
cookies,
|
|
1296
|
+
cacheDir,
|
|
1297
|
+
checkAuth,
|
|
1298
|
+
authFlow,
|
|
1299
|
+
purify,
|
|
1300
|
+
headless,
|
|
1301
|
+
waitAfterLoadMs,
|
|
1302
|
+
waitForSelector,
|
|
1303
|
+
waitForSelectorTimeoutMs,
|
|
1304
|
+
useHttpResponseBody
|
|
1305
|
+
} = config;
|
|
1226
1306
|
const isHeadless = headless !== false;
|
|
1227
|
-
const browser = await
|
|
1307
|
+
const browser = await launchBrowser({
|
|
1228
1308
|
headless: isHeadless,
|
|
1229
1309
|
cacheDir,
|
|
1230
1310
|
proxy: resolveProxy(config),
|
|
@@ -1233,70 +1313,84 @@ async function fetchHtml(url, config = {}) {
|
|
|
1233
1313
|
const navigationTimeout = timeoutMs ?? 6e4;
|
|
1234
1314
|
const maxAttempts = 2;
|
|
1235
1315
|
let lastError;
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
await setupPage(page, isHeadless);
|
|
1246
|
-
const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
|
|
1247
|
-
if (cookies != null && cookies !== "") {
|
|
1248
|
-
extraHeaders.cookie = cookies;
|
|
1249
|
-
}
|
|
1250
|
-
await page.setExtraHTTPHeaders(extraHeaders);
|
|
1251
|
-
const proxy = resolveProxy(config);
|
|
1252
|
-
if (proxy) {
|
|
1253
|
-
const { username, password } = parseProxy(proxy);
|
|
1254
|
-
if (username !== void 0 || password !== void 0) {
|
|
1255
|
-
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1316
|
+
try {
|
|
1317
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
1318
|
+
const page = await browser.newPage();
|
|
1319
|
+
const isRetry = attempt === 1;
|
|
1320
|
+
const waitUntil = isRetry ? "domcontentloaded" : "load";
|
|
1321
|
+
const extraWaitMs = isRetry ? Math.min(500, Math.max(0, waitAfterLoadMs ?? 2e3)) : Math.max(0, waitAfterLoadMs ?? 2e3);
|
|
1322
|
+
try {
|
|
1323
|
+
if (config.browserContext) {
|
|
1324
|
+
await config.browserContext(page.browserContext());
|
|
1256
1325
|
}
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
}
|
|
1269
|
-
if (checkAuth != null || authFlow != null) {
|
|
1270
|
-
const authCheck = checkAuth ?? authFlow?.checkAuth;
|
|
1271
|
-
if (authCheck != null) {
|
|
1272
|
-
const ok = await authCheck(page, url);
|
|
1273
|
-
if (!ok) {
|
|
1274
|
-
throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
|
|
1326
|
+
await setupPage(page, isHeadless);
|
|
1327
|
+
const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
|
|
1328
|
+
if (cookies != null && cookies !== "") {
|
|
1329
|
+
extraHeaders.cookie = cookies;
|
|
1330
|
+
}
|
|
1331
|
+
await page.setExtraHTTPHeaders(extraHeaders);
|
|
1332
|
+
const proxy = resolveProxy(config);
|
|
1333
|
+
if (proxy) {
|
|
1334
|
+
const { username, password } = parseProxy(proxy);
|
|
1335
|
+
if (username !== void 0 || password !== void 0) {
|
|
1336
|
+
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1275
1337
|
}
|
|
1276
1338
|
}
|
|
1339
|
+
if (timeoutMs != null) {
|
|
1340
|
+
await page.setDefaultNavigationTimeout(timeoutMs);
|
|
1341
|
+
}
|
|
1342
|
+
const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
|
|
1343
|
+
if (extraWaitMs > 0) {
|
|
1344
|
+
await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
|
|
1345
|
+
}
|
|
1346
|
+
if (waitForSelector != null && waitForSelector !== "" && !isRetry) {
|
|
1347
|
+
const selectorTimeout = waitForSelectorTimeoutMs ?? 2e4;
|
|
1348
|
+
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
|
1349
|
+
}
|
|
1350
|
+
if (checkAuth != null || authFlow != null) {
|
|
1351
|
+
const authCheck = checkAuth ?? authFlow?.checkAuth;
|
|
1352
|
+
if (authCheck != null) {
|
|
1353
|
+
const ok = await authCheck(page, url);
|
|
1354
|
+
if (!ok) {
|
|
1355
|
+
throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
let rawBody;
|
|
1360
|
+
if (useHttpResponseBody === true && response != null) {
|
|
1361
|
+
try {
|
|
1362
|
+
rawBody = await response.text();
|
|
1363
|
+
} catch {
|
|
1364
|
+
rawBody = await page.content();
|
|
1365
|
+
}
|
|
1366
|
+
} else {
|
|
1367
|
+
rawBody = await page.content();
|
|
1368
|
+
}
|
|
1369
|
+
const finalUrl = response?.url() ?? page.url() ?? String(url);
|
|
1370
|
+
const status = response?.status() ?? 0;
|
|
1371
|
+
const statusText = response?.statusText() ?? "";
|
|
1372
|
+
const rawHeaders = response?.headers() ?? {};
|
|
1373
|
+
const normalizedHeaders = headersToRecord(rawHeaders);
|
|
1374
|
+
const body = applyPurify(rawBody, purify);
|
|
1375
|
+
await page.close().catch(() => {
|
|
1376
|
+
});
|
|
1377
|
+
return { finalUrl, status, statusText, headers: normalizedHeaders, body };
|
|
1378
|
+
} catch (e) {
|
|
1379
|
+
lastError = e;
|
|
1380
|
+
await page.close().catch(() => {
|
|
1381
|
+
});
|
|
1382
|
+
if (isRetry || !isFrameDetachedError(e)) {
|
|
1383
|
+
throw e;
|
|
1384
|
+
}
|
|
1385
|
+
logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
|
|
1386
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
1277
1387
|
}
|
|
1278
|
-
const rawBody = await page.content();
|
|
1279
|
-
const finalUrl = response?.url() ?? page.url() ?? String(url);
|
|
1280
|
-
const status = response?.status() ?? 0;
|
|
1281
|
-
const statusText = response?.statusText() ?? "";
|
|
1282
|
-
const rawHeaders = response?.headers() ?? {};
|
|
1283
|
-
const normalizedHeaders = headersToRecord(rawHeaders);
|
|
1284
|
-
const body = applyPurify(rawBody, purify);
|
|
1285
|
-
await page.close().catch(() => {
|
|
1286
|
-
});
|
|
1287
|
-
return { finalUrl, status, statusText, headers: normalizedHeaders, body };
|
|
1288
|
-
} catch (e) {
|
|
1289
|
-
lastError = e;
|
|
1290
|
-
await page.close().catch(() => {
|
|
1291
|
-
});
|
|
1292
|
-
if (isRetry || !isFrameDetachedError(e)) {
|
|
1293
|
-
throw e;
|
|
1294
|
-
}
|
|
1295
|
-
logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
|
|
1296
|
-
await new Promise((r) => setTimeout(r, 800));
|
|
1297
1388
|
}
|
|
1389
|
+
throw lastError;
|
|
1390
|
+
} finally {
|
|
1391
|
+
await browser.close().catch(() => {
|
|
1392
|
+
});
|
|
1298
1393
|
}
|
|
1299
|
-
throw lastError;
|
|
1300
1394
|
}
|
|
1301
1395
|
const VALID_INTERVALS = ["1min", "5min", "10min", "30min", "1h", "6h", "12h", "1day", "3day", "7day"];
|
|
1302
1396
|
function cronToRefreshInterval(cronExpr) {
|
|
@@ -1468,19 +1562,78 @@ async function extractFromLink(link, extractorConfig = {}, fetchConfig = {}) {
|
|
|
1468
1562
|
cacheKey: extractorConfig.cacheKey ?? (cacheDir ? cacheKey(link, "forever") : void 0)
|
|
1469
1563
|
});
|
|
1470
1564
|
}
|
|
1565
|
+
const DEFAULT_BASE_URL = "https://api.openai.com/v1";
|
|
1566
|
+
const DEFAULT_MODEL = "gpt-4o-mini";
|
|
1567
|
+
let fileCache = null;
|
|
1568
|
+
function invalidateLLMConfigCache() {
|
|
1569
|
+
fileCache = null;
|
|
1570
|
+
}
|
|
1571
|
+
function readLlmFromFileSync() {
|
|
1572
|
+
if (!existsSync(CONFIG_PATH)) return {};
|
|
1573
|
+
try {
|
|
1574
|
+
const st = statSync(CONFIG_PATH);
|
|
1575
|
+
if (fileCache && fileCache.mtimeMs === st.mtimeMs) return fileCache.llm;
|
|
1576
|
+
const raw = readFileSync(CONFIG_PATH, "utf-8");
|
|
1577
|
+
const j = JSON.parse(raw);
|
|
1578
|
+
const llmRaw = j?.llm;
|
|
1579
|
+
const llm = {};
|
|
1580
|
+
if (llmRaw && typeof llmRaw === "object") {
|
|
1581
|
+
const o = llmRaw;
|
|
1582
|
+
if (typeof o.apiKey === "string" && o.apiKey.length > 0) llm.apiKey = o.apiKey;
|
|
1583
|
+
if (typeof o.baseUrl === "string" && o.baseUrl.trim()) llm.baseUrl = o.baseUrl.trim();
|
|
1584
|
+
if (typeof o.model === "string" && o.model.trim()) llm.model = o.model.trim();
|
|
1585
|
+
}
|
|
1586
|
+
fileCache = { mtimeMs: st.mtimeMs, llm };
|
|
1587
|
+
return llm;
|
|
1588
|
+
} catch {
|
|
1589
|
+
return {};
|
|
1590
|
+
}
|
|
1591
|
+
}
|
|
1471
1592
|
function getLLMConfig() {
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
};
|
|
1593
|
+
const file = readLlmFromFileSync();
|
|
1594
|
+
const apiKey = file.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1595
|
+
const baseUrl = file.baseUrl ?? process.env.OPENAI_BASE_URL ?? DEFAULT_BASE_URL;
|
|
1596
|
+
const model = file.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL;
|
|
1597
|
+
return { apiKey, baseUrl, model };
|
|
1598
|
+
}
|
|
1599
|
+
function extractAssistantText(completion) {
|
|
1600
|
+
const choice = completion.choices[0];
|
|
1601
|
+
if (!choice) throw new Error("LLM 返回无 choices");
|
|
1602
|
+
const msg = choice.message;
|
|
1603
|
+
const raw = msg.content;
|
|
1604
|
+
if (typeof raw === "string") {
|
|
1605
|
+
const t = raw.trim();
|
|
1606
|
+
if (t.length > 0) return t;
|
|
1607
|
+
}
|
|
1608
|
+
const extra = msg;
|
|
1609
|
+
const rc = extra.reasoning_content;
|
|
1610
|
+
if (typeof rc === "string" && rc.trim().length > 0) {
|
|
1611
|
+
return rc.trim();
|
|
1612
|
+
}
|
|
1613
|
+
const refusal = msg.refusal;
|
|
1614
|
+
if (typeof refusal === "string" && refusal.trim()) {
|
|
1615
|
+
throw new Error(`模型拒绝: ${refusal.trim()}`);
|
|
1616
|
+
}
|
|
1617
|
+
const fr = choice.finish_reason;
|
|
1618
|
+
if (fr === "tool_calls") {
|
|
1619
|
+
throw new Error("LLM 返回了工具调用而非文本,请换一个模型或关闭工具调用");
|
|
1620
|
+
}
|
|
1621
|
+
if (fr === "content_filter") {
|
|
1622
|
+
throw new Error("内容被内容策略过滤");
|
|
1623
|
+
}
|
|
1624
|
+
if (fr === "length") {
|
|
1625
|
+
throw new Error(
|
|
1626
|
+
"LLM 输出在 content / reasoning_content 均为空前已用尽"
|
|
1627
|
+
);
|
|
1628
|
+
}
|
|
1629
|
+
throw new Error(`LLM 返回空内容 (finish_reason=${String(fr)})`);
|
|
1477
1630
|
}
|
|
1478
1631
|
function mergeConfig(override) {
|
|
1479
1632
|
const env = getLLMConfig();
|
|
1480
1633
|
const apiKey = override?.apiKey ?? env.apiKey;
|
|
1481
1634
|
const baseUrl = override?.apiUrl ?? override?.baseUrl ?? env.baseUrl;
|
|
1482
1635
|
const model = override?.model ?? env.model;
|
|
1483
|
-
if (!apiKey) throw new Error("LLM API Key
|
|
1636
|
+
if (!apiKey) throw new Error("LLM API Key 未配置:请在管理后台「设置 → LLM」或环境变量 OPENAI_API_KEY 中设置");
|
|
1484
1637
|
return { apiKey, baseUrl, model };
|
|
1485
1638
|
}
|
|
1486
1639
|
async function chatJson(prompt, config, options) {
|
|
@@ -1492,8 +1645,7 @@ async function chatJson(prompt, config, options) {
|
|
|
1492
1645
|
max_tokens: options?.maxTokens ?? 8192,
|
|
1493
1646
|
response_format: { type: "json_object" }
|
|
1494
1647
|
});
|
|
1495
|
-
const content = completion
|
|
1496
|
-
if (!content) throw new Error("LLM 返回空内容");
|
|
1648
|
+
const content = extractAssistantText(completion);
|
|
1497
1649
|
return JSON.parse(content);
|
|
1498
1650
|
}
|
|
1499
1651
|
async function chatText(prompt, config, options) {
|
|
@@ -1504,9 +1656,7 @@ async function chatText(prompt, config, options) {
|
|
|
1504
1656
|
messages: [{ role: "user", content: prompt }],
|
|
1505
1657
|
max_tokens: options?.maxTokens ?? 8192
|
|
1506
1658
|
});
|
|
1507
|
-
|
|
1508
|
-
if (!content) throw new Error("LLM 返回空内容");
|
|
1509
|
-
return content;
|
|
1659
|
+
return extractAssistantText(completion);
|
|
1510
1660
|
}
|
|
1511
1661
|
function generateGuid(link) {
|
|
1512
1662
|
return createHash("sha256").update(link).digest("hex");
|
|
@@ -1590,7 +1740,7 @@ async function parseHtml(html, config = {}) {
|
|
|
1590
1740
|
const actualMode = mode ?? (llmConfig != null ? "llm" : customParser != null ? "custom" : "llm");
|
|
1591
1741
|
if (actualMode === "llm") {
|
|
1592
1742
|
if (llmConfig == null && !getLLMConfig().apiKey) {
|
|
1593
|
-
throw new Error('mode 为 "llm" 时必须提供 llmConfig
|
|
1743
|
+
throw new Error('mode 为 "llm" 时必须提供 llmConfig,或在后台「设置 → LLM」/ OPENAI_API_KEY 中配置 Key');
|
|
1594
1744
|
}
|
|
1595
1745
|
const htmlForLLM = applyPurify(html, purify !== false);
|
|
1596
1746
|
entries = await parseWithLLM(htmlForLLM, url, llmConfig ?? {});
|
|
@@ -1773,7 +1923,8 @@ function buildSiteContext(site, ctx) {
|
|
|
1773
1923
|
waitAfterLoadMs: opts?.waitMs,
|
|
1774
1924
|
purify: opts?.purify,
|
|
1775
1925
|
waitForSelector: opts?.waitForSelector,
|
|
1776
|
-
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs
|
|
1926
|
+
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
|
|
1927
|
+
useHttpResponseBody: opts?.useHttpResponseBody
|
|
1777
1928
|
});
|
|
1778
1929
|
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
1779
1930
|
},
|
|
@@ -1817,7 +1968,10 @@ function createWebSource(site) {
|
|
|
1817
1968
|
proxy: site.proxy ?? void 0,
|
|
1818
1969
|
preCheck: authFlow ? async (ctx) => {
|
|
1819
1970
|
if (!ctx.cacheDir) return;
|
|
1820
|
-
const passed = await preCheckAuth(authFlow, ctx.cacheDir
|
|
1971
|
+
const passed = await preCheckAuth(authFlow, ctx.cacheDir, {
|
|
1972
|
+
proxy: ctx.proxy,
|
|
1973
|
+
headless: ctx.headless
|
|
1974
|
+
});
|
|
1821
1975
|
if (!passed) throw new AuthRequiredError(`站点 ${site.id} 需要登录,请先执行 ensureAuth`);
|
|
1822
1976
|
} : void 0,
|
|
1823
1977
|
async fetchItems(sourceId, ctx) {
|
|
@@ -1870,7 +2024,25 @@ const PLUGIN_HOST_DEPS = {
|
|
|
1870
2024
|
logger
|
|
1871
2025
|
};
|
|
1872
2026
|
function buildSourceContext(partial) {
|
|
1873
|
-
|
|
2027
|
+
const { cacheDir, headless, proxy } = partial;
|
|
2028
|
+
return {
|
|
2029
|
+
...partial,
|
|
2030
|
+
deps: PLUGIN_HOST_DEPS,
|
|
2031
|
+
async fetchHtml(url, opts) {
|
|
2032
|
+
const res = await fetchHtml(url, {
|
|
2033
|
+
cacheDir,
|
|
2034
|
+
useCache: false,
|
|
2035
|
+
headless,
|
|
2036
|
+
proxy,
|
|
2037
|
+
waitAfterLoadMs: opts?.waitMs,
|
|
2038
|
+
purify: opts?.purify,
|
|
2039
|
+
waitForSelector: opts?.waitForSelector,
|
|
2040
|
+
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
|
|
2041
|
+
useHttpResponseBody: opts?.useHttpResponseBody
|
|
2042
|
+
});
|
|
2043
|
+
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
2044
|
+
}
|
|
2045
|
+
};
|
|
1874
2046
|
}
|
|
1875
2047
|
const registeredSources = [];
|
|
1876
2048
|
function sourcePatternToRegex(pattern) {
|
|
@@ -1915,6 +2087,38 @@ async function initSources() {
|
|
|
1915
2087
|
function resolveRef(src) {
|
|
1916
2088
|
return src.ref ?? src.url ?? "";
|
|
1917
2089
|
}
|
|
2090
|
+
async function readGlobalProxyFromConfig() {
|
|
2091
|
+
try {
|
|
2092
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2093
|
+
const j = JSON.parse(raw);
|
|
2094
|
+
if (typeof j.globalProxy === "string") {
|
|
2095
|
+
const t = j.globalProxy.trim();
|
|
2096
|
+
return t.length > 0 ? t : void 0;
|
|
2097
|
+
}
|
|
2098
|
+
} catch {
|
|
2099
|
+
}
|
|
2100
|
+
return void 0;
|
|
2101
|
+
}
|
|
2102
|
+
async function saveGlobalProxyToConfig(proxy) {
|
|
2103
|
+
let root = {};
|
|
2104
|
+
try {
|
|
2105
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2106
|
+
root = JSON.parse(raw);
|
|
2107
|
+
} catch {
|
|
2108
|
+
}
|
|
2109
|
+
const t = proxy.trim();
|
|
2110
|
+
if (t.length === 0) {
|
|
2111
|
+
delete root.globalProxy;
|
|
2112
|
+
} else {
|
|
2113
|
+
root.globalProxy = t;
|
|
2114
|
+
}
|
|
2115
|
+
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2116
|
+
}
|
|
2117
|
+
async function resolveProxyForSite(site) {
|
|
2118
|
+
const s = site.proxy?.trim();
|
|
2119
|
+
if (s) return s;
|
|
2120
|
+
return readGlobalProxyFromConfig();
|
|
2121
|
+
}
|
|
1918
2122
|
async function loadSourcesFile() {
|
|
1919
2123
|
try {
|
|
1920
2124
|
const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
|
|
@@ -1960,6 +2164,15 @@ async function saveSourcesFile(sources) {
|
|
|
1960
2164
|
"utf-8"
|
|
1961
2165
|
);
|
|
1962
2166
|
}
|
|
2167
|
+
async function getEffectiveProxyForListUrl(listUrl, source) {
|
|
2168
|
+
const list = await getAllSources();
|
|
2169
|
+
const sub = list.find((s) => resolveRef(s) === listUrl);
|
|
2170
|
+
const fromSub = sub?.proxy?.trim();
|
|
2171
|
+
if (fromSub) return fromSub;
|
|
2172
|
+
const fromPlugin = source.proxy?.trim();
|
|
2173
|
+
if (fromPlugin) return fromPlugin;
|
|
2174
|
+
return readGlobalProxyFromConfig();
|
|
2175
|
+
}
|
|
1963
2176
|
async function getSourcesRaw() {
|
|
1964
2177
|
try {
|
|
1965
2178
|
const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
|
|
@@ -2277,24 +2490,35 @@ function onFeedUpdated(fn) {
|
|
|
2277
2490
|
eventBus.on("feed:updated", fn);
|
|
2278
2491
|
return () => eventBus.off("feed:updated", fn);
|
|
2279
2492
|
}
|
|
2280
|
-
async function
|
|
2493
|
+
async function getDeliverConfig() {
|
|
2281
2494
|
try {
|
|
2282
2495
|
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2283
2496
|
const j = JSON.parse(raw);
|
|
2284
2497
|
const u = j?.deliver?.url;
|
|
2285
|
-
|
|
2498
|
+
const t = j?.deliver?.token;
|
|
2499
|
+
return {
|
|
2500
|
+
url: typeof u === "string" ? u.trim() : "",
|
|
2501
|
+
token: typeof t === "string" ? t.trim() : ""
|
|
2502
|
+
};
|
|
2286
2503
|
} catch {
|
|
2287
|
-
return "";
|
|
2504
|
+
return { url: "", token: "" };
|
|
2288
2505
|
}
|
|
2289
2506
|
}
|
|
2290
|
-
async function
|
|
2507
|
+
async function saveDeliverConfig(config) {
|
|
2291
2508
|
let root = {};
|
|
2292
2509
|
try {
|
|
2293
2510
|
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2294
2511
|
root = JSON.parse(raw);
|
|
2295
2512
|
} catch {
|
|
2296
2513
|
}
|
|
2297
|
-
|
|
2514
|
+
const prev = root.deliver;
|
|
2515
|
+
const base2 = typeof prev === "object" && prev !== null && !Array.isArray(prev) ? { ...prev } : {};
|
|
2516
|
+
const url = config.url.trim();
|
|
2517
|
+
const token = config.token.trim();
|
|
2518
|
+
const next = { ...base2, url };
|
|
2519
|
+
if (token) next.token = token;
|
|
2520
|
+
else delete next.token;
|
|
2521
|
+
root.deliver = next;
|
|
2298
2522
|
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2299
2523
|
}
|
|
2300
2524
|
function feedItemsToPayload(items) {
|
|
@@ -2302,7 +2526,7 @@ function feedItemsToPayload(items) {
|
|
|
2302
2526
|
guid: i.guid,
|
|
2303
2527
|
title: i.title,
|
|
2304
2528
|
link: i.link,
|
|
2305
|
-
pubDate: i.pubDate
|
|
2529
|
+
pubDate: pubDateToIsoOrNull(i.pubDate) ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
2306
2530
|
author: i.author,
|
|
2307
2531
|
summary: i.summary,
|
|
2308
2532
|
content: i.content,
|
|
@@ -2311,12 +2535,15 @@ function feedItemsToPayload(items) {
|
|
|
2311
2535
|
translations: i.translations
|
|
2312
2536
|
}));
|
|
2313
2537
|
}
|
|
2314
|
-
async function postDeliverItems(url, sourceRef, items) {
|
|
2538
|
+
async function postDeliverItems(url, sourceRef, items, options) {
|
|
2315
2539
|
if (!url.trim() || items.length === 0) return;
|
|
2316
2540
|
const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
|
|
2541
|
+
const headers = { "Content-Type": "application/json" };
|
|
2542
|
+
const t = options?.bearerToken?.trim();
|
|
2543
|
+
if (t) headers.Authorization = `Bearer ${t}`;
|
|
2317
2544
|
const res = await fetch(url.trim(), {
|
|
2318
2545
|
method: "POST",
|
|
2319
|
-
headers
|
|
2546
|
+
headers,
|
|
2320
2547
|
body,
|
|
2321
2548
|
signal: AbortSignal.timeout(12e4)
|
|
2322
2549
|
});
|
|
@@ -2325,9 +2552,9 @@ async function postDeliverItems(url, sourceRef, items) {
|
|
|
2325
2552
|
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2326
2553
|
}
|
|
2327
2554
|
}
|
|
2328
|
-
async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
2555
|
+
async function postDeliverItemsSafe(url, sourceRef, items, options) {
|
|
2329
2556
|
try {
|
|
2330
|
-
await postDeliverItems(url, sourceRef, items);
|
|
2557
|
+
await postDeliverItems(url, sourceRef, items, options);
|
|
2331
2558
|
} catch (err) {
|
|
2332
2559
|
logger.warn("deliver", "投递失败", {
|
|
2333
2560
|
sourceRef,
|
|
@@ -2336,6 +2563,12 @@ async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
|
2336
2563
|
});
|
|
2337
2564
|
}
|
|
2338
2565
|
}
|
|
2566
|
+
function resolveHeadlessForFeeder(config) {
|
|
2567
|
+
if (config.force === true) {
|
|
2568
|
+
return config.headless === true ? true : false;
|
|
2569
|
+
}
|
|
2570
|
+
return config.headless;
|
|
2571
|
+
}
|
|
2339
2572
|
function buildChannelFromItems(listUrl, items, lng) {
|
|
2340
2573
|
const channel = {
|
|
2341
2574
|
title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
|
|
@@ -2354,7 +2587,7 @@ function toRssEntry(item, lng) {
|
|
|
2354
2587
|
link: item.link,
|
|
2355
2588
|
description: desc,
|
|
2356
2589
|
guid: item.guid,
|
|
2357
|
-
published: item.pubDate
|
|
2590
|
+
published: pubDateToIsoOrNull(item.pubDate) ?? void 0,
|
|
2358
2591
|
imageUrl: item.imageUrl
|
|
2359
2592
|
};
|
|
2360
2593
|
}
|
|
@@ -2366,10 +2599,11 @@ const pipelineCtx = {
|
|
|
2366
2599
|
async function runPipelineOnItem(item, ctx) {
|
|
2367
2600
|
return runPipeline(item, { ...pipelineCtx, ...ctx });
|
|
2368
2601
|
}
|
|
2369
|
-
async function generateAndCache(listUrl, key, config) {
|
|
2370
|
-
const { cacheDir = "cache"
|
|
2602
|
+
async function generateAndCache(listUrl, key, config, proxy) {
|
|
2603
|
+
const { cacheDir = "cache" } = config;
|
|
2604
|
+
const headless = resolveHeadlessForFeeder(config);
|
|
2371
2605
|
const source = getSource(listUrl);
|
|
2372
|
-
const ctx = buildSourceContext({ cacheDir, headless, proxy
|
|
2606
|
+
const ctx = buildSourceContext({ cacheDir, headless, proxy });
|
|
2373
2607
|
let items;
|
|
2374
2608
|
try {
|
|
2375
2609
|
items = await source.fetchItems(listUrl, ctx);
|
|
@@ -2379,13 +2613,14 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2379
2613
|
logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
|
|
2380
2614
|
throw err;
|
|
2381
2615
|
}
|
|
2616
|
+
const sourceRefStored = canonicalHttpSourceRef(listUrl);
|
|
2382
2617
|
items.forEach((i) => {
|
|
2383
|
-
i.sourceRef =
|
|
2618
|
+
i.sourceRef = sourceRefStored;
|
|
2384
2619
|
i.author = normalizeAuthor(i.author);
|
|
2385
2620
|
});
|
|
2386
2621
|
generatingKeys.delete(key);
|
|
2387
2622
|
logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
|
|
2388
|
-
const deliverUrl = await
|
|
2623
|
+
const { url: deliverUrl, token: deliverToken } = await getDeliverConfig();
|
|
2389
2624
|
let newCount = 0;
|
|
2390
2625
|
let newIds = /* @__PURE__ */ new Set();
|
|
2391
2626
|
const upsertResult = await upsertItems(items).catch((err) => {
|
|
@@ -2398,7 +2633,7 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2398
2633
|
const shouldRunPipelineRow = (guid) => newIds.has(guid);
|
|
2399
2634
|
for (let i = 0; i < items.length; i++) {
|
|
2400
2635
|
if (!shouldRunPipelineRow(items[i].guid)) continue;
|
|
2401
|
-
const processed = await runPipelineOnItem(items[i], { sourceUrl:
|
|
2636
|
+
const processed = await runPipelineOnItem(items[i], { sourceUrl: sourceRefStored });
|
|
2402
2637
|
items[i] = processed;
|
|
2403
2638
|
if (isPipelineDroppedItem(processed)) {
|
|
2404
2639
|
await deleteItem(processed.guid).catch(
|
|
@@ -2412,24 +2647,28 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2412
2647
|
}
|
|
2413
2648
|
}
|
|
2414
2649
|
if (newCount > 0) {
|
|
2415
|
-
emitFeedUpdated({ sourceUrl:
|
|
2650
|
+
emitFeedUpdated({ sourceUrl: sourceRefStored, newCount: newCount - pipelineDroppedNew });
|
|
2416
2651
|
}
|
|
2417
2652
|
const out = items.filter((i) => !isPipelineDroppedItem(i));
|
|
2418
2653
|
if (deliverUrl && out.length > 0) {
|
|
2419
|
-
await postDeliverItemsSafe(deliverUrl,
|
|
2654
|
+
await postDeliverItemsSafe(deliverUrl, sourceRefStored, out, {
|
|
2655
|
+
bearerToken: deliverToken || void 0
|
|
2656
|
+
});
|
|
2420
2657
|
}
|
|
2421
2658
|
return { items: out };
|
|
2422
2659
|
}
|
|
2423
2660
|
async function getItems(listUrl, config = {}) {
|
|
2424
2661
|
const source = getSource(listUrl);
|
|
2662
|
+
const proxy = await getEffectiveProxyForListUrl(listUrl, source);
|
|
2663
|
+
const headless = resolveHeadlessForFeeder(config);
|
|
2425
2664
|
const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
|
|
2426
2665
|
if (source.preCheck != null) {
|
|
2427
2666
|
try {
|
|
2428
2667
|
await source.preCheck(
|
|
2429
2668
|
buildSourceContext({
|
|
2430
2669
|
cacheDir: config.cacheDir ?? "cache",
|
|
2431
|
-
headless
|
|
2432
|
-
proxy
|
|
2670
|
+
headless,
|
|
2671
|
+
proxy
|
|
2433
2672
|
})
|
|
2434
2673
|
);
|
|
2435
2674
|
} catch (err) {
|
|
@@ -2439,7 +2678,7 @@ async function getItems(listUrl, config = {}) {
|
|
|
2439
2678
|
}
|
|
2440
2679
|
let task = config.force ? void 0 : generatingKeys.get(key);
|
|
2441
2680
|
if (!task) {
|
|
2442
|
-
task = generateAndCache(listUrl, key, config);
|
|
2681
|
+
task = generateAndCache(listUrl, key, config, proxy);
|
|
2443
2682
|
if (!config.force) generatingKeys.set(key, task);
|
|
2444
2683
|
}
|
|
2445
2684
|
const { items } = await task;
|
|
@@ -2635,7 +2874,7 @@ function getGroupStats() {
|
|
|
2635
2874
|
return result;
|
|
2636
2875
|
}
|
|
2637
2876
|
const DEFAULT_REFRESH = "1day";
|
|
2638
|
-
const SOURCES_CONCURRENCY =
|
|
2877
|
+
const SOURCES_CONCURRENCY = 1;
|
|
2639
2878
|
function createPullTask(ref, cacheDir, cronExpr) {
|
|
2640
2879
|
return async () => {
|
|
2641
2880
|
try {
|
|
@@ -2672,7 +2911,7 @@ async function rescheduleSources(cacheDir, runNow2) {
|
|
|
2672
2911
|
}
|
|
2673
2912
|
}
|
|
2674
2913
|
async function initScheduler(cacheDir) {
|
|
2675
|
-
await rescheduleSources(cacheDir,
|
|
2914
|
+
await rescheduleSources(cacheDir, false);
|
|
2676
2915
|
let debounceTimer = null;
|
|
2677
2916
|
try {
|
|
2678
2917
|
const watcher = watch(SOURCES_CONFIG_PATH, () => {
|
|
@@ -2730,7 +2969,7 @@ function registerRssApiRoutes(app) {
|
|
|
2730
2969
|
link: item.link,
|
|
2731
2970
|
summary,
|
|
2732
2971
|
author: item.author,
|
|
2733
|
-
pubDate: item.pubDate
|
|
2972
|
+
pubDate: pubDateToIsoOrNull(item.pubDate)
|
|
2734
2973
|
};
|
|
2735
2974
|
})
|
|
2736
2975
|
});
|
|
@@ -2748,12 +2987,12 @@ function registerSchedulerRoutes(app) {
|
|
|
2748
2987
|
});
|
|
2749
2988
|
}
|
|
2750
2989
|
const SITE_TEMPLATE_FALLBACK = `/**
|
|
2751
|
-
* Site
|
|
2990
|
+
* Site 插件模板(由 /plugins 页添加,位于 .rssany/plugins/)
|
|
2752
2991
|
* HTML DOM 解析请用 ctx.deps.parseHtml,勿在插件内 import node_modules。
|
|
2753
2992
|
*/
|
|
2754
2993
|
export default {
|
|
2755
2994
|
id: "__PLUGIN_ID__",
|
|
2756
|
-
listUrlPattern:
|
|
2995
|
+
listUrlPattern: __LIST_URL_PATTERN__,
|
|
2757
2996
|
refreshInterval: "1day",
|
|
2758
2997
|
|
|
2759
2998
|
async fetchItems(sourceId, ctx) {
|
|
@@ -2770,6 +3009,11 @@ export default {
|
|
|
2770
3009
|
function isValidNewPluginId(id) {
|
|
2771
3010
|
return /^[a-zA-Z][a-zA-Z0-9_-]{0,63}$/.test(id) && id !== "generic" && id !== "new";
|
|
2772
3011
|
}
|
|
3012
|
+
function isValidNewListUrlPattern(pattern) {
|
|
3013
|
+
if (pattern.length === 0 || pattern.length > 2048) return false;
|
|
3014
|
+
if (/[\r\n]/.test(pattern)) return false;
|
|
3015
|
+
return true;
|
|
3016
|
+
}
|
|
2773
3017
|
async function fileExists(p) {
|
|
2774
3018
|
try {
|
|
2775
3019
|
await access(p);
|
|
@@ -2799,6 +3043,13 @@ function registerPluginsRoutes(app) {
|
|
|
2799
3043
|
if (!isValidNewPluginId(id)) {
|
|
2800
3044
|
return c.json({ error: "id 须为字母开头,仅含字母数字、下划线、连字符;不能为 generic 或 new" }, 400);
|
|
2801
3045
|
}
|
|
3046
|
+
const listUrlPatternRaw = typeof body.listUrlPattern === "string" ? body.listUrlPattern.trim() : "";
|
|
3047
|
+
if (!listUrlPatternRaw) {
|
|
3048
|
+
return c.json({ error: "缺少支持的站点(listUrlPattern),例如 https://example.com/*" }, 400);
|
|
3049
|
+
}
|
|
3050
|
+
if (!isValidNewListUrlPattern(listUrlPatternRaw)) {
|
|
3051
|
+
return c.json({ error: "支持的站点须为非空字符串,不超过 2048 字符,且不能含换行" }, 400);
|
|
3052
|
+
}
|
|
2802
3053
|
await mkdir(USER_PLUGINS_DIR, { recursive: true });
|
|
2803
3054
|
const outPath = join(USER_PLUGINS_DIR, `${id}.rssany.js`);
|
|
2804
3055
|
if (await fileExists(outPath)) return c.json({ error: "该 id 已存在同名文件" }, 409);
|
|
@@ -2807,7 +3058,8 @@ function registerPluginsRoutes(app) {
|
|
|
2807
3058
|
tpl = await readFile(PLUGIN_SITE_TEMPLATE_PATH, "utf-8");
|
|
2808
3059
|
} catch {
|
|
2809
3060
|
}
|
|
2810
|
-
const
|
|
3061
|
+
const patternLiteral = JSON.stringify(listUrlPatternRaw);
|
|
3062
|
+
const content = tpl.replace(/__PLUGIN_ID__/g, id).replace(/__LIST_URL_PATTERN__/g, patternLiteral);
|
|
2811
3063
|
if (!isAllowedPluginPath(outPath)) return c.json({ error: "路径不允许" }, 403);
|
|
2812
3064
|
try {
|
|
2813
3065
|
await writeFile(outPath, content, "utf-8");
|
|
@@ -2989,6 +3241,12 @@ function registerItemsRoutes(app) {
|
|
|
2989
3241
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
2990
3242
|
}
|
|
2991
3243
|
});
|
|
3244
|
+
app.delete("/api/items/by-source", requireAdmin(), async (c) => {
|
|
3245
|
+
const sourceUrl = (c.req.query("source_url") ?? "").trim();
|
|
3246
|
+
if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
|
|
3247
|
+
const deleted = await deleteItemsBySourceUrl(sourceUrl);
|
|
3248
|
+
return c.json({ ok: true, deleted });
|
|
3249
|
+
});
|
|
2992
3250
|
app.delete("/api/items/:id", async (c) => {
|
|
2993
3251
|
const id = decodeURIComponent(c.req.param("id") ?? "").trim();
|
|
2994
3252
|
if (!id) return c.json({ ok: false, message: "id 不能为空" }, 400);
|
|
@@ -2996,12 +3254,6 @@ function registerItemsRoutes(app) {
|
|
|
2996
3254
|
if (!deleted) return c.json({ ok: false, message: "条目不存在或已删除" }, 404);
|
|
2997
3255
|
return c.json({ ok: true });
|
|
2998
3256
|
});
|
|
2999
|
-
app.delete("/api/items/by-source", requireAdmin(), async (c) => {
|
|
3000
|
-
const sourceUrl = (c.req.query("source_url") ?? "").trim();
|
|
3001
|
-
if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
|
|
3002
|
-
const deleted = await deleteItemsBySourceUrl(sourceUrl);
|
|
3003
|
-
return c.json({ ok: true, deleted });
|
|
3004
|
-
});
|
|
3005
3257
|
app.get("/api/items", async (c) => {
|
|
3006
3258
|
const ref = c.req.query("ref") ?? c.req.query("source") ?? void 0;
|
|
3007
3259
|
const subscribed = parseSubscribedFlag$1(c.req.query("subscribed"));
|
|
@@ -3149,7 +3401,7 @@ function registerSourcesRoutes(app) {
|
|
|
3149
3401
|
const w = s.weight;
|
|
3150
3402
|
const weight = typeof w === "number" ? w : void 0;
|
|
3151
3403
|
return {
|
|
3152
|
-
ref: String(s.ref),
|
|
3404
|
+
ref: canonicalHttpSourceRef(String(s.ref)),
|
|
3153
3405
|
type,
|
|
3154
3406
|
label: s.label,
|
|
3155
3407
|
description: s.description,
|
|
@@ -3223,15 +3475,16 @@ function registerTopicsRoutes(app) {
|
|
|
3223
3475
|
}
|
|
3224
3476
|
function registerDeliverRoutes(app) {
|
|
3225
3477
|
app.get("/api/deliver", requireAdmin(), async (c) => {
|
|
3226
|
-
const url = await
|
|
3227
|
-
return c.json({ url });
|
|
3478
|
+
const { url, token } = await getDeliverConfig();
|
|
3479
|
+
return c.json({ url, token });
|
|
3228
3480
|
});
|
|
3229
3481
|
app.put("/api/deliver", requireAdmin(), async (c) => {
|
|
3230
3482
|
try {
|
|
3231
3483
|
const body = await c.req.json();
|
|
3232
3484
|
const url = typeof body?.url === "string" ? body.url.trim() : "";
|
|
3233
|
-
|
|
3234
|
-
|
|
3485
|
+
const token = typeof body?.token === "string" ? body.token.trim() : "";
|
|
3486
|
+
await saveDeliverConfig({ url, token });
|
|
3487
|
+
return c.json({ ok: true, url, token });
|
|
3235
3488
|
} catch (err) {
|
|
3236
3489
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
3237
3490
|
}
|
|
@@ -3240,6 +3493,7 @@ function registerDeliverRoutes(app) {
|
|
|
3240
3493
|
try {
|
|
3241
3494
|
const body = await c.req.json();
|
|
3242
3495
|
const url = typeof body?.url === "string" ? body.url.trim() : "";
|
|
3496
|
+
const token = typeof body?.token === "string" ? body.token.trim() : "";
|
|
3243
3497
|
if (!url) return c.json({ ok: false, message: "url 不能为空" }, 400);
|
|
3244
3498
|
const sample = {
|
|
3245
3499
|
guid: "deliver-test-" + Date.now(),
|
|
@@ -3248,22 +3502,150 @@ function registerDeliverRoutes(app) {
|
|
|
3248
3502
|
pubDate: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3249
3503
|
summary: "若下游收到此条,说明投递 URL 可用。"
|
|
3250
3504
|
};
|
|
3251
|
-
await postDeliverItems(
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3505
|
+
await postDeliverItems(
|
|
3506
|
+
url,
|
|
3507
|
+
"rssany-deliver-test",
|
|
3508
|
+
[
|
|
3509
|
+
{
|
|
3510
|
+
guid: sample.guid,
|
|
3511
|
+
title: sample.title,
|
|
3512
|
+
link: sample.link,
|
|
3513
|
+
pubDate: new Date(sample.pubDate),
|
|
3514
|
+
summary: sample.summary,
|
|
3515
|
+
sourceRef: "rssany-deliver-test"
|
|
3516
|
+
}
|
|
3517
|
+
],
|
|
3518
|
+
{ bearerToken: token || void 0 }
|
|
3519
|
+
);
|
|
3261
3520
|
return c.json({ ok: true });
|
|
3262
3521
|
} catch (err) {
|
|
3263
3522
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
3264
3523
|
}
|
|
3265
3524
|
});
|
|
3266
3525
|
}
|
|
3526
|
+
function trimOrUndef(s) {
|
|
3527
|
+
if (typeof s !== "string") return void 0;
|
|
3528
|
+
const t = s.trim();
|
|
3529
|
+
return t.length > 0 ? t : void 0;
|
|
3530
|
+
}
|
|
3531
|
+
async function readLlmFileConfig() {
|
|
3532
|
+
try {
|
|
3533
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
3534
|
+
const j = JSON.parse(raw);
|
|
3535
|
+
const llm = j?.llm;
|
|
3536
|
+
if (!llm || typeof llm !== "object") return {};
|
|
3537
|
+
const o = llm;
|
|
3538
|
+
return {
|
|
3539
|
+
apiKey: typeof o.apiKey === "string" ? o.apiKey : void 0,
|
|
3540
|
+
baseUrl: trimOrUndef(o.baseUrl),
|
|
3541
|
+
model: trimOrUndef(o.model)
|
|
3542
|
+
};
|
|
3543
|
+
} catch {
|
|
3544
|
+
return {};
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
async function saveLlmSettings(input) {
|
|
3548
|
+
let root = {};
|
|
3549
|
+
try {
|
|
3550
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
3551
|
+
root = JSON.parse(raw);
|
|
3552
|
+
} catch {
|
|
3553
|
+
}
|
|
3554
|
+
const prev = await readLlmFileConfig();
|
|
3555
|
+
const next = {
|
|
3556
|
+
baseUrl: input.baseUrl.trim(),
|
|
3557
|
+
model: input.model.trim()
|
|
3558
|
+
};
|
|
3559
|
+
const newKey = typeof input.apiKey === "string" && input.apiKey.length > 0 ? input.apiKey : void 0;
|
|
3560
|
+
if (newKey) {
|
|
3561
|
+
next.apiKey = newKey;
|
|
3562
|
+
} else if (prev.apiKey) {
|
|
3563
|
+
next.apiKey = prev.apiKey;
|
|
3564
|
+
}
|
|
3565
|
+
root.llm = next;
|
|
3566
|
+
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
3567
|
+
invalidateLLMConfigCache();
|
|
3568
|
+
}
|
|
3569
|
+
function registerLlmRoutes(app) {
|
|
3570
|
+
app.get("/api/llm", requireAdmin(), async (c) => {
|
|
3571
|
+
const resolved = getLLMConfig();
|
|
3572
|
+
const file = await readLlmFileConfig();
|
|
3573
|
+
const hasApiKey = !!resolved.apiKey;
|
|
3574
|
+
const apiKeyInFile = !!(file.apiKey && file.apiKey.length > 0);
|
|
3575
|
+
return c.json({
|
|
3576
|
+
baseUrl: resolved.baseUrl,
|
|
3577
|
+
model: resolved.model,
|
|
3578
|
+
hasApiKey,
|
|
3579
|
+
apiKeyInFile
|
|
3580
|
+
});
|
|
3581
|
+
});
|
|
3582
|
+
app.put("/api/llm", requireAdmin(), async (c) => {
|
|
3583
|
+
try {
|
|
3584
|
+
const body = await c.req.json();
|
|
3585
|
+
const baseUrl = typeof body.baseUrl === "string" ? body.baseUrl : "";
|
|
3586
|
+
const model = typeof body.model === "string" ? body.model : "";
|
|
3587
|
+
const apiKey = typeof body.apiKey === "string" ? body.apiKey : void 0;
|
|
3588
|
+
await saveLlmSettings({
|
|
3589
|
+
baseUrl,
|
|
3590
|
+
model,
|
|
3591
|
+
...apiKey !== void 0 ? { apiKey } : {}
|
|
3592
|
+
});
|
|
3593
|
+
const resolved = getLLMConfig();
|
|
3594
|
+
const file = await readLlmFileConfig();
|
|
3595
|
+
return c.json({
|
|
3596
|
+
ok: true,
|
|
3597
|
+
baseUrl: resolved.baseUrl,
|
|
3598
|
+
model: resolved.model,
|
|
3599
|
+
hasApiKey: !!resolved.apiKey,
|
|
3600
|
+
apiKeyInFile: !!(file.apiKey && file.apiKey.length > 0)
|
|
3601
|
+
});
|
|
3602
|
+
} catch (err) {
|
|
3603
|
+
return c.json(
|
|
3604
|
+
{ ok: false, message: err instanceof Error ? err.message : String(err) },
|
|
3605
|
+
400
|
|
3606
|
+
);
|
|
3607
|
+
}
|
|
3608
|
+
});
|
|
3609
|
+
app.post("/api/llm/test", requireAdmin(), async (c) => {
|
|
3610
|
+
const t0 = Date.now();
|
|
3611
|
+
try {
|
|
3612
|
+
const cfg = getLLMConfig();
|
|
3613
|
+
if (!cfg.apiKey) {
|
|
3614
|
+
return c.json({ ok: false, message: "未配置 API Key(请在界面或 OPENAI_API_KEY 中设置)" }, 400);
|
|
3615
|
+
}
|
|
3616
|
+
const reply = await chatText("Reply with exactly the single word: ok", void 0, {
|
|
3617
|
+
maxTokens: 32768,
|
|
3618
|
+
debugLabel: "llmSettingsTest"
|
|
3619
|
+
});
|
|
3620
|
+
return c.json({ ok: true, reply });
|
|
3621
|
+
} catch (err) {
|
|
3622
|
+
const ms = Date.now() - t0;
|
|
3623
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3624
|
+
console.error("[llm/test] fail", { ms, message });
|
|
3625
|
+
return c.json({ ok: false, message }, 400);
|
|
3626
|
+
}
|
|
3627
|
+
});
|
|
3628
|
+
}
|
|
3629
|
+
function registerProxySettingsRoutes(app) {
|
|
3630
|
+
app.get("/api/proxy", requireAdmin(), async (c) => {
|
|
3631
|
+
const globalProxy = await readGlobalProxyFromConfig() ?? "";
|
|
3632
|
+
return c.json({ globalProxy });
|
|
3633
|
+
});
|
|
3634
|
+
app.put("/api/proxy", requireAdmin(), async (c) => {
|
|
3635
|
+
try {
|
|
3636
|
+
const body = await c.req.json().catch(() => ({}));
|
|
3637
|
+
const globalProxy = typeof body.globalProxy === "string" ? body.globalProxy : "";
|
|
3638
|
+
await saveGlobalProxyToConfig(globalProxy);
|
|
3639
|
+
const saved = await readGlobalProxyFromConfig() ?? "";
|
|
3640
|
+
return c.json({ ok: true, globalProxy: saved });
|
|
3641
|
+
} catch (err) {
|
|
3642
|
+
return c.json(
|
|
3643
|
+
{ ok: false, message: err instanceof Error ? err.message : String(err) },
|
|
3644
|
+
400
|
|
3645
|
+
);
|
|
3646
|
+
}
|
|
3647
|
+
});
|
|
3648
|
+
}
|
|
3267
3649
|
const tasks = /* @__PURE__ */ new Map();
|
|
3268
3650
|
let idCounter = 0;
|
|
3269
3651
|
function nextId() {
|
|
@@ -3337,8 +3719,310 @@ function registerTasksRoutes(app) {
|
|
|
3337
3719
|
}
|
|
3338
3720
|
});
|
|
3339
3721
|
}
|
|
3722
|
+
const CACHE_SUBDIR = "feed-favicons";
|
|
3723
|
+
const CACHE_KEY_PREFIX = "feed-favicon:v1:";
|
|
3724
|
+
const CACHE_MAX_AGE_SEC = 3 * 24 * 60 * 60;
|
|
3725
|
+
const CACHE_MAX_AGE_MS = CACHE_MAX_AGE_SEC * 1e3;
|
|
3726
|
+
const CACHE_CONTROL = `public, max-age=${CACHE_MAX_AGE_SEC}`;
|
|
3727
|
+
const FETCH_TIMEOUT_MS = 6e3;
|
|
3728
|
+
const MAX_ICON_BYTES = 2 * 1024 * 1024;
|
|
3729
|
+
const MAX_HTML_BYTES = 512 * 1024;
|
|
3730
|
+
const inflightByDomain = /* @__PURE__ */ new Map();
|
|
3731
|
+
const MAX_DOMAIN_LEN = 253;
|
|
3732
|
+
function isPlausibleHostname(s) {
|
|
3733
|
+
if (s.length === 0 || s.length > MAX_DOMAIN_LEN) return false;
|
|
3734
|
+
return /^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$/i.test(s);
|
|
3735
|
+
}
|
|
3736
|
+
function cacheFilePath(domainKey) {
|
|
3737
|
+
const h = createHash("sha256").update(CACHE_KEY_PREFIX + domainKey.toLowerCase()).digest("hex");
|
|
3738
|
+
return join(CACHE_DIR, CACHE_SUBDIR, h);
|
|
3739
|
+
}
|
|
3740
|
+
function originFaviconUrls(domain) {
|
|
3741
|
+
const d = domain.toLowerCase();
|
|
3742
|
+
const hosts = [`https://${d}`];
|
|
3743
|
+
if (d.startsWith("www.")) {
|
|
3744
|
+
const bare = d.slice(4);
|
|
3745
|
+
if (bare) hosts.push(`https://${bare}`);
|
|
3746
|
+
} else {
|
|
3747
|
+
hosts.push(`https://www.${d}`);
|
|
3748
|
+
}
|
|
3749
|
+
const paths = ["/favicon.ico", "/favicon.png", "/apple-touch-icon.png"];
|
|
3750
|
+
const urls = [];
|
|
3751
|
+
for (const base2 of [...new Set(hosts)]) {
|
|
3752
|
+
for (const p of paths) {
|
|
3753
|
+
urls.push(`${base2}${p}`);
|
|
3754
|
+
}
|
|
3755
|
+
}
|
|
3756
|
+
return urls;
|
|
3757
|
+
}
|
|
3758
|
+
function homepageUrlsForDomain(domain) {
|
|
3759
|
+
const d = domain.toLowerCase();
|
|
3760
|
+
const urls = [`https://${d}/`];
|
|
3761
|
+
if (d.startsWith("www.")) {
|
|
3762
|
+
const bare = d.slice(4);
|
|
3763
|
+
if (bare) urls.push(`https://${bare}/`);
|
|
3764
|
+
} else {
|
|
3765
|
+
urls.push(`https://www.${d}/`);
|
|
3766
|
+
}
|
|
3767
|
+
return [...new Set(urls)];
|
|
3768
|
+
}
|
|
3769
|
+
function isIconLinkRel(rel) {
|
|
3770
|
+
const tokens = rel.toLowerCase().trim().split(/\s+/).filter(Boolean);
|
|
3771
|
+
if (tokens.some((x) => x === "mask-icon")) return true;
|
|
3772
|
+
if (tokens.some((x) => x === "apple-touch-icon" || x === "apple-touch-icon-precomposed")) return true;
|
|
3773
|
+
if (tokens.includes("shortcut") && tokens.includes("icon")) return true;
|
|
3774
|
+
return tokens.includes("icon");
|
|
3775
|
+
}
|
|
3776
|
+
function parseLinkIconHrefs(html, pageUrl) {
|
|
3777
|
+
const root = parse(html, { lowerCaseTagName: true });
|
|
3778
|
+
let base2 = pageUrl;
|
|
3779
|
+
const baseEl = root.querySelector("base[href]");
|
|
3780
|
+
if (baseEl) {
|
|
3781
|
+
const bh = baseEl.getAttribute("href")?.trim();
|
|
3782
|
+
if (bh) {
|
|
3783
|
+
try {
|
|
3784
|
+
base2 = new URL(bh, pageUrl).href;
|
|
3785
|
+
} catch {
|
|
3786
|
+
}
|
|
3787
|
+
}
|
|
3788
|
+
}
|
|
3789
|
+
const out = [];
|
|
3790
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3791
|
+
for (const el of root.querySelectorAll("link[href]")) {
|
|
3792
|
+
const rel = el.getAttribute("rel") ?? "";
|
|
3793
|
+
if (!isIconLinkRel(rel)) continue;
|
|
3794
|
+
const href = el.getAttribute("href")?.trim();
|
|
3795
|
+
if (!href || href.startsWith("data:") || href.startsWith("blob:")) continue;
|
|
3796
|
+
try {
|
|
3797
|
+
const abs = new URL(href, base2).href;
|
|
3798
|
+
if ((abs.startsWith("http:") || abs.startsWith("https:")) && !seen.has(abs)) {
|
|
3799
|
+
seen.add(abs);
|
|
3800
|
+
out.push(abs);
|
|
3801
|
+
}
|
|
3802
|
+
} catch {
|
|
3803
|
+
}
|
|
3804
|
+
}
|
|
3805
|
+
return out;
|
|
3806
|
+
}
|
|
3807
|
+
async function fetchHtmlPage(url) {
|
|
3808
|
+
try {
|
|
3809
|
+
const upstream = await fetch(url, {
|
|
3810
|
+
redirect: "follow",
|
|
3811
|
+
headers: {
|
|
3812
|
+
Accept: "text/html,application/xhtml+xml;q=0.9,*/*;q=0.1",
|
|
3813
|
+
"User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
|
|
3814
|
+
},
|
|
3815
|
+
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
|
|
3816
|
+
});
|
|
3817
|
+
if (!upstream.ok) return null;
|
|
3818
|
+
const ab = await upstream.arrayBuffer();
|
|
3819
|
+
const buf = Buffer.from(ab);
|
|
3820
|
+
const slice = buf.subarray(0, Math.min(buf.length, MAX_HTML_BYTES));
|
|
3821
|
+
return slice.toString("utf-8");
|
|
3822
|
+
} catch {
|
|
3823
|
+
return null;
|
|
3824
|
+
}
|
|
3825
|
+
}
|
|
3826
|
+
async function discoverIconUrlsFromHomepage(domain) {
|
|
3827
|
+
if (process.env.FAVICON_SKIP_HTML === "1" || process.env.FAVICON_SKIP_HTML === "true") {
|
|
3828
|
+
return [];
|
|
3829
|
+
}
|
|
3830
|
+
for (const pageUrl of homepageUrlsForDomain(domain)) {
|
|
3831
|
+
const html = await fetchHtmlPage(pageUrl);
|
|
3832
|
+
if (!html) continue;
|
|
3833
|
+
const hrefs = parseLinkIconHrefs(html, pageUrl);
|
|
3834
|
+
if (hrefs.length > 0) return hrefs;
|
|
3835
|
+
}
|
|
3836
|
+
return [];
|
|
3837
|
+
}
|
|
3838
|
+
function duckduckgoFaviconUrl(domain) {
|
|
3839
|
+
return `https://icons.duckduckgo.com/ip3/${domain}.ico`;
|
|
3840
|
+
}
|
|
3841
|
+
function iconHorseUrl(domain) {
|
|
3842
|
+
return `https://icon.horse/icon/${encodeURIComponent(domain)}`;
|
|
3843
|
+
}
|
|
3844
|
+
function unavatarUrl(domain) {
|
|
3845
|
+
return `https://unavatar.io/${encodeURIComponent(domain)}`;
|
|
3846
|
+
}
|
|
3847
|
+
function googleFaviconUrl(domain) {
|
|
3848
|
+
return `https://www.google.com/s2/favicons?domain=${encodeURIComponent(domain)}&sz=64`;
|
|
3849
|
+
}
|
|
3850
|
+
function letterCharFromDomain(domain) {
|
|
3851
|
+
const d = domain.toLowerCase().replace(/^www\./, "");
|
|
3852
|
+
const m = d.match(/[a-z0-9]/);
|
|
3853
|
+
return m ? m[0].toUpperCase() : "?";
|
|
3854
|
+
}
|
|
3855
|
+
function hueFromDomain(domain) {
|
|
3856
|
+
const h = createHash("sha256").update(domain.toLowerCase()).digest();
|
|
3857
|
+
return (h[0] << 8 | h[1]) % 360;
|
|
3858
|
+
}
|
|
3859
|
+
function escapeXmlText(s) {
|
|
3860
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
3861
|
+
}
|
|
3862
|
+
function letterAvatarSvg(domain) {
|
|
3863
|
+
const letter = escapeXmlText(letterCharFromDomain(domain));
|
|
3864
|
+
const hue = hueFromDomain(domain);
|
|
3865
|
+
const bg = `hsl(${hue} 42% 44%)`;
|
|
3866
|
+
const svg = `<?xml version="1.0" encoding="UTF-8"?>
|
|
3867
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="64" height="64" viewBox="0 0 64 64">
|
|
3868
|
+
<rect width="64" height="64" rx="12" fill="${bg}"/>
|
|
3869
|
+
<text x="32" y="32" dominant-baseline="central" text-anchor="middle" fill="#ffffff" font-family="system-ui,Segoe UI,Helvetica,sans-serif" font-size="28" font-weight="600">${letter}</text>
|
|
3870
|
+
</svg>`;
|
|
3871
|
+
return Buffer.from(svg.trim(), "utf-8");
|
|
3872
|
+
}
|
|
3873
|
+
function letterAvatarForDomain(domain) {
|
|
3874
|
+
return { buf: letterAvatarSvg(domain), mime: "image/svg+xml" };
|
|
3875
|
+
}
|
|
3876
|
+
function isEnoent(e) {
|
|
3877
|
+
return typeof e === "object" && e !== null && e.code === "ENOENT";
|
|
3878
|
+
}
|
|
3879
|
+
function sniffImageMime(buf) {
|
|
3880
|
+
if (buf.length < 4) return null;
|
|
3881
|
+
if (buf[0] === 137 && buf[1] === 80 && buf[2] === 78 && buf[3] === 71) return "image/png";
|
|
3882
|
+
if (buf.length >= 6 && buf[0] === 71 && buf[1] === 73 && buf[2] === 70) return "image/gif";
|
|
3883
|
+
if (buf.length >= 3 && buf[0] === 255 && buf[1] === 216 && buf[2] === 255) return "image/jpeg";
|
|
3884
|
+
if (buf.length >= 12 && buf.subarray(0, 4).toString("ascii") === "RIFF" && buf.subarray(8, 12).toString("ascii") === "WEBP") {
|
|
3885
|
+
return "image/webp";
|
|
3886
|
+
}
|
|
3887
|
+
if (buf.length >= 6 && buf.readUInt16LE(0) === 0 && (buf[2] === 1 || buf[2] === 2) && buf[3] === 0) {
|
|
3888
|
+
return "image/x-icon";
|
|
3889
|
+
}
|
|
3890
|
+
const head = buf.subarray(0, Math.min(256, buf.length)).toString("utf-8").trimStart();
|
|
3891
|
+
if (head.startsWith("<svg") || head.startsWith("<?xml")) return "image/svg+xml";
|
|
3892
|
+
return null;
|
|
3893
|
+
}
|
|
3894
|
+
const IMAGE_CT_PREFIX = "image/";
|
|
3895
|
+
function mimeFromFetch(ct) {
|
|
3896
|
+
if (!ct) return null;
|
|
3897
|
+
const base2 = ct.split(";")[0].trim().toLowerCase();
|
|
3898
|
+
return base2.startsWith(IMAGE_CT_PREFIX) ? base2 : null;
|
|
3899
|
+
}
|
|
3900
|
+
function resolveImageMime(buf, ct) {
|
|
3901
|
+
return sniffImageMime(buf) ?? mimeFromFetch(ct);
|
|
3902
|
+
}
|
|
3903
|
+
async function fetchIconCandidate(url) {
|
|
3904
|
+
let upstream;
|
|
3905
|
+
try {
|
|
3906
|
+
upstream = await fetch(url, {
|
|
3907
|
+
redirect: "follow",
|
|
3908
|
+
headers: {
|
|
3909
|
+
Accept: "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
|
3910
|
+
"User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
|
|
3911
|
+
},
|
|
3912
|
+
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
|
|
3913
|
+
});
|
|
3914
|
+
} catch {
|
|
3915
|
+
return null;
|
|
3916
|
+
}
|
|
3917
|
+
if (!upstream.ok) return null;
|
|
3918
|
+
const ab = await upstream.arrayBuffer();
|
|
3919
|
+
const buf = Buffer.from(ab);
|
|
3920
|
+
if (buf.length === 0 || buf.length > MAX_ICON_BYTES) return null;
|
|
3921
|
+
return { buf, ct: upstream.headers.get("content-type") };
|
|
3922
|
+
}
|
|
3923
|
+
function isValidIcon(got) {
|
|
3924
|
+
if (!got) return false;
|
|
3925
|
+
const mime = resolveImageMime(got.buf, got.ct);
|
|
3926
|
+
return !!(mime && mime.startsWith(IMAGE_CT_PREFIX));
|
|
3927
|
+
}
|
|
3928
|
+
function upstreamFaviconUrls(domain, htmlIconUrls) {
|
|
3929
|
+
const urls = [...originFaviconUrls(domain), ...htmlIconUrls];
|
|
3930
|
+
const thirdPartyOff = process.env.FAVICON_THIRD_PARTY === "0" || process.env.FAVICON_THIRD_PARTY === "false";
|
|
3931
|
+
if (!thirdPartyOff) {
|
|
3932
|
+
urls.push(duckduckgoFaviconUrl(domain), iconHorseUrl(domain), unavatarUrl(domain));
|
|
3933
|
+
}
|
|
3934
|
+
const includeGoogle = process.env.FAVICON_INCLUDE_GOOGLE === "1" || process.env.FAVICON_INCLUDE_GOOGLE === "true";
|
|
3935
|
+
if (includeGoogle) urls.push(googleFaviconUrl(domain));
|
|
3936
|
+
return urls;
|
|
3937
|
+
}
|
|
3938
|
+
async function fetchFaviconFromNetwork(domain) {
|
|
3939
|
+
const htmlIconUrls = await discoverIconUrlsFromHomepage(domain);
|
|
3940
|
+
const urls = upstreamFaviconUrls(domain, htmlIconUrls);
|
|
3941
|
+
const tasks2 = urls.map(async (url) => {
|
|
3942
|
+
const got = await fetchIconCandidate(url);
|
|
3943
|
+
if (!isValidIcon(got)) {
|
|
3944
|
+
throw new Error("not-an-icon");
|
|
3945
|
+
}
|
|
3946
|
+
const mime = resolveImageMime(got.buf, got.ct);
|
|
3947
|
+
return { buf: got.buf, mime };
|
|
3948
|
+
});
|
|
3949
|
+
try {
|
|
3950
|
+
return await Promise.any(tasks2);
|
|
3951
|
+
} catch {
|
|
3952
|
+
return letterAvatarForDomain(domain);
|
|
3953
|
+
}
|
|
3954
|
+
}
|
|
3955
|
+
function fetchFaviconDeduped(domain) {
|
|
3956
|
+
let p = inflightByDomain.get(domain);
|
|
3957
|
+
if (p) return p;
|
|
3958
|
+
p = fetchFaviconFromNetwork(domain).finally(() => {
|
|
3959
|
+
if (inflightByDomain.get(domain) === p) inflightByDomain.delete(domain);
|
|
3960
|
+
});
|
|
3961
|
+
inflightByDomain.set(domain, p);
|
|
3962
|
+
return p;
|
|
3963
|
+
}
|
|
3964
|
+
function registerFeedFaviconRoutes(app) {
|
|
3965
|
+
app.get("/api/feed-favicon", async (c) => {
|
|
3966
|
+
const raw = (c.req.query("domain") ?? "").trim();
|
|
3967
|
+
if (!raw || !isPlausibleHostname(raw)) {
|
|
3968
|
+
return new Response(null, { status: 400 });
|
|
3969
|
+
}
|
|
3970
|
+
const domain = raw.toLowerCase();
|
|
3971
|
+
const path = cacheFilePath(domain);
|
|
3972
|
+
let diskStale = false;
|
|
3973
|
+
try {
|
|
3974
|
+
const st = await stat(path);
|
|
3975
|
+
if (Date.now() - st.mtimeMs >= CACHE_MAX_AGE_MS) {
|
|
3976
|
+
diskStale = true;
|
|
3977
|
+
await unlink(path).catch(() => {
|
|
3978
|
+
});
|
|
3979
|
+
}
|
|
3980
|
+
} catch (e) {
|
|
3981
|
+
if (!isEnoent(e)) {
|
|
3982
|
+
return new Response(null, { status: 500 });
|
|
3983
|
+
}
|
|
3984
|
+
}
|
|
3985
|
+
if (!diskStale) {
|
|
3986
|
+
try {
|
|
3987
|
+
const cached = await readFile(path);
|
|
3988
|
+
const mime2 = resolveImageMime(cached, null);
|
|
3989
|
+
if (mime2) {
|
|
3990
|
+
return new Response(new Uint8Array(cached), {
|
|
3991
|
+
status: 200,
|
|
3992
|
+
headers: {
|
|
3993
|
+
"Content-Type": mime2,
|
|
3994
|
+
"Cache-Control": CACHE_CONTROL
|
|
3995
|
+
}
|
|
3996
|
+
});
|
|
3997
|
+
}
|
|
3998
|
+
await unlink(path).catch(() => {
|
|
3999
|
+
});
|
|
4000
|
+
} catch (e) {
|
|
4001
|
+
if (!isEnoent(e)) {
|
|
4002
|
+
return new Response(null, { status: 500 });
|
|
4003
|
+
}
|
|
4004
|
+
}
|
|
4005
|
+
}
|
|
4006
|
+
const resolved = await fetchFaviconDeduped(domain);
|
|
4007
|
+
const { buf, mime } = resolved;
|
|
4008
|
+
try {
|
|
4009
|
+
await mkdir(join(CACHE_DIR, CACHE_SUBDIR), { recursive: true });
|
|
4010
|
+
await writeFile(path, buf);
|
|
4011
|
+
} catch {
|
|
4012
|
+
return new Response(null, { status: 500 });
|
|
4013
|
+
}
|
|
4014
|
+
return new Response(new Uint8Array(buf), {
|
|
4015
|
+
status: 200,
|
|
4016
|
+
headers: {
|
|
4017
|
+
"Content-Type": mime,
|
|
4018
|
+
"Cache-Control": CACHE_CONTROL
|
|
4019
|
+
}
|
|
4020
|
+
});
|
|
4021
|
+
});
|
|
4022
|
+
}
|
|
3340
4023
|
function registerApiRoutes(app) {
|
|
3341
4024
|
registerServerRoutes(app);
|
|
4025
|
+
registerFeedFaviconRoutes(app);
|
|
3342
4026
|
registerRssApiRoutes(app);
|
|
3343
4027
|
registerSchedulerRoutes(app);
|
|
3344
4028
|
registerPluginsRoutes(app);
|
|
@@ -3350,6 +4034,8 @@ function registerApiRoutes(app) {
|
|
|
3350
4034
|
registerSourcesRoutes(app);
|
|
3351
4035
|
registerTopicsRoutes(app);
|
|
3352
4036
|
registerDeliverRoutes(app);
|
|
4037
|
+
registerLlmRoutes(app);
|
|
4038
|
+
registerProxySettingsRoutes(app);
|
|
3353
4039
|
registerTasksRoutes(app);
|
|
3354
4040
|
}
|
|
3355
4041
|
function registerAuthRoutes(app) {
|
|
@@ -3363,7 +4049,7 @@ function registerAuthRoutes(app) {
|
|
|
3363
4049
|
const authFlow = toAuthFlow(site);
|
|
3364
4050
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3365
4051
|
try {
|
|
3366
|
-
const authenticated = await preCheckAuth(authFlow, CACHE_DIR);
|
|
4052
|
+
const authenticated = await preCheckAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) });
|
|
3367
4053
|
return c.json({ ok: true, authenticated });
|
|
3368
4054
|
} catch (err) {
|
|
3369
4055
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -3380,12 +4066,23 @@ function registerAuthRoutes(app) {
|
|
|
3380
4066
|
const authFlow = toAuthFlow(site);
|
|
3381
4067
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3382
4068
|
const { loginUrl } = authFlow;
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
4069
|
+
const proxy = await resolveProxyForSite(site);
|
|
4070
|
+
void launchBrowser({ headless: false, cacheDir: CACHE_DIR, proxy: resolveProxy({ proxy }) }).then(async (browser) => {
|
|
4071
|
+
try {
|
|
4072
|
+
const page = await browser.newPage();
|
|
4073
|
+
await applyProxyAuthToPage(page, { proxy });
|
|
4074
|
+
const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
4075
|
+
await page.setUserAgent(realUserAgent);
|
|
4076
|
+
await page.setViewport({ width: 1366, height: 960 });
|
|
4077
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
4078
|
+
page.once("close", () => {
|
|
4079
|
+
void browser.close().catch(() => {
|
|
4080
|
+
});
|
|
4081
|
+
});
|
|
4082
|
+
} catch {
|
|
4083
|
+
await browser.close().catch(() => {
|
|
4084
|
+
});
|
|
4085
|
+
}
|
|
3389
4086
|
}).catch(() => {
|
|
3390
4087
|
});
|
|
3391
4088
|
return c.json({ ok: true, message: "已打开登录页面" });
|
|
@@ -3406,7 +4103,7 @@ function registerAuthRoutes(app) {
|
|
|
3406
4103
|
}
|
|
3407
4104
|
const authFlow = toAuthFlow(site);
|
|
3408
4105
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3409
|
-
ensureAuth(authFlow, CACHE_DIR).then(() => {
|
|
4106
|
+
ensureAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) }).then(() => {
|
|
3410
4107
|
}).catch(() => {
|
|
3411
4108
|
});
|
|
3412
4109
|
return c.json({ ok: true, message: "已打开登录窗口,请在弹出的浏览器中完成登录,完成后刷新订阅页面即可。" });
|
|
@@ -3429,6 +4126,24 @@ async function readStaticHtml(name, fallback) {
|
|
|
3429
4126
|
function escapeHtml(s) {
|
|
3430
4127
|
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
3431
4128
|
}
|
|
4129
|
+
function effectiveProxyUsed(override, mergedFromSource) {
|
|
4130
|
+
const o = override?.trim();
|
|
4131
|
+
if (o) return o;
|
|
4132
|
+
const s = mergedFromSource?.trim();
|
|
4133
|
+
if (s) return s;
|
|
4134
|
+
return process.env.HTTP_PROXY?.trim() || process.env.HTTPS_PROXY?.trim();
|
|
4135
|
+
}
|
|
4136
|
+
function redactProxyForLog(p) {
|
|
4137
|
+
if (!p) return null;
|
|
4138
|
+
try {
|
|
4139
|
+
const u = new URL(p);
|
|
4140
|
+
if (u.username) u.username = "***";
|
|
4141
|
+
if (u.password) u.password = "***";
|
|
4142
|
+
return u.toString();
|
|
4143
|
+
} catch {
|
|
4144
|
+
return null;
|
|
4145
|
+
}
|
|
4146
|
+
}
|
|
3432
4147
|
function registerAdminRoutes(app) {
|
|
3433
4148
|
async function render401(listUrl) {
|
|
3434
4149
|
const raw = await readStaticHtml("401", '<!DOCTYPE html><html><head><meta charset="utf-8"><title>401</title></head><body><h1>401 需要登录</h1></body></html>');
|
|
@@ -3439,12 +4154,25 @@ function registerAdminRoutes(app) {
|
|
|
3439
4154
|
if (!url) return c.text("无效 URL,格式: /admin/parse/https://... 或 /admin/parse/example.com/...", 400);
|
|
3440
4155
|
try {
|
|
3441
4156
|
const headlessParam = c.req.query("headless");
|
|
3442
|
-
const headless = headlessParam === "
|
|
4157
|
+
const headless = headlessParam === "true" || headlessParam === "1";
|
|
4158
|
+
const proxyOverride = c.req.query("proxy")?.trim();
|
|
3443
4159
|
const source = getSource(url);
|
|
3444
|
-
const
|
|
4160
|
+
const fromSource = await getEffectiveProxyForListUrl(url, source);
|
|
4161
|
+
const ctx = buildSourceContext({
|
|
4162
|
+
cacheDir: CACHE_DIR,
|
|
4163
|
+
headless,
|
|
4164
|
+
proxy: proxyOverride || fromSource
|
|
4165
|
+
});
|
|
3445
4166
|
const items = await source.fetchItems(url, ctx);
|
|
3446
4167
|
const mode = source.id === "generic" ? "generic" : "plugin";
|
|
3447
|
-
|
|
4168
|
+
const effective = effectiveProxyUsed(proxyOverride, fromSource);
|
|
4169
|
+
return c.json({
|
|
4170
|
+
items,
|
|
4171
|
+
url,
|
|
4172
|
+
mode,
|
|
4173
|
+
pluginId: source.id,
|
|
4174
|
+
effectiveProxy: redactProxyForLog(effective)
|
|
4175
|
+
});
|
|
3448
4176
|
} catch (err) {
|
|
3449
4177
|
if (err instanceof AuthRequiredError) {
|
|
3450
4178
|
const html = await render401(url);
|
|
@@ -3459,16 +4187,20 @@ function registerAdminRoutes(app) {
|
|
|
3459
4187
|
if (!url) return c.text("无效 URL,格式: /admin/extractor/https://... 或 /admin/extractor/example.com/...", 400);
|
|
3460
4188
|
try {
|
|
3461
4189
|
const headlessParam = c.req.query("headless");
|
|
3462
|
-
const headless = headlessParam === "
|
|
3463
|
-
const
|
|
3464
|
-
const
|
|
4190
|
+
const headless = headlessParam === "true" || headlessParam === "1";
|
|
4191
|
+
const proxyOverride = c.req.query("proxy")?.trim();
|
|
4192
|
+
const source = getSource(url);
|
|
4193
|
+
const fromSource = await getEffectiveProxyForListUrl(url, source);
|
|
4194
|
+
const proxy = proxyOverride || fromSource;
|
|
3465
4195
|
const result = await extractFromLink(url, {}, { timeoutMs: 6e4, headless, proxy });
|
|
4196
|
+
const effective = effectiveProxyUsed(proxyOverride, fromSource);
|
|
3466
4197
|
return c.json({
|
|
3467
4198
|
title: result.title ?? null,
|
|
3468
4199
|
author: result.author ?? null,
|
|
3469
4200
|
pubDate: result.pubDate ?? null,
|
|
3470
4201
|
content: result.content ?? null,
|
|
3471
|
-
_extractor: "readability"
|
|
4202
|
+
_extractor: "readability",
|
|
4203
|
+
effectiveProxy: redactProxyForLog(effective)
|
|
3472
4204
|
});
|
|
3473
4205
|
} catch (err) {
|
|
3474
4206
|
const msg = err instanceof Error ? err.message : String(err);
|