rssany 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/README.md +5 -1
- package/app/plugins/builtin/rss.rssany.js +107 -24
- package/app/plugins/site.rssany.js +1 -1
- package/dist/index.js +986 -266
- package/dist/index.js.map +1 -1
- package/{config.examples.json → init/config.json} +7 -1
- package/init/sources.json +353 -0
- package/package.json +4 -6
- package/statics/401.html +1 -1
- package/statics/README.md +1 -1
- package/webui/build/200.html +16 -18
- package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +1 -0
- package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +1 -0
- package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +1 -0
- package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +1 -0
- package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +1 -0
- package/webui/build/_app/immutable/assets/14.CujIhjQK.css +1 -0
- package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +1 -0
- package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +1 -0
- package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +1 -0
- package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +1 -0
- package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +1 -0
- package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +1 -0
- package/webui/build/_app/immutable/assets/{9.BZheTlzZ.css → 9.m-LCx_kl.css} +1 -1
- package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +1 -0
- package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +1 -0
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +1 -0
- package/webui/build/_app/immutable/chunks/{V2-VOe88.js → B-OsL1Ct.js} +1 -1
- package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +2 -0
- package/webui/build/_app/immutable/chunks/BK3WtZwv.js +1 -0
- package/webui/build/_app/immutable/chunks/BQqoDzLx.js +1 -0
- package/webui/build/_app/immutable/chunks/BXCWEhUd.js +1 -0
- package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +1 -0
- package/webui/build/_app/immutable/chunks/Bp63qm3L.js +1 -0
- package/webui/build/_app/immutable/chunks/CVzlFH44.js +1 -0
- package/webui/build/_app/immutable/chunks/CWNeClHp.js +6 -0
- package/webui/build/_app/immutable/chunks/Cihqbfi5.js +1 -0
- package/webui/build/_app/immutable/chunks/CkUAV0m0.js +41 -0
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +31 -0
- package/webui/build/_app/immutable/chunks/D5GvRCv7.js +1 -0
- package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +1 -0
- package/webui/build/_app/immutable/chunks/DFuhmi31.js +1 -0
- package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +2 -0
- package/webui/build/_app/immutable/chunks/{CZDFXKiF.js → DcAshVxe.js} +1 -1
- package/webui/build/_app/immutable/chunks/DjNLq3TF.js +1 -0
- package/webui/build/_app/immutable/chunks/Dt2CddFe.js +1 -0
- package/webui/build/_app/immutable/chunks/Dw782Tjs.js +1 -0
- package/webui/build/_app/immutable/chunks/EIZIMsXK.js +1 -0
- package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +1 -0
- package/webui/build/_app/immutable/chunks/lk5LaiqA.js +1 -0
- package/webui/build/_app/immutable/chunks/mW5RwvnK.js +13 -0
- package/webui/build/_app/immutable/chunks/{CtHRh_pJ.js → tB7QMF3U.js} +1 -1
- package/webui/build/_app/immutable/chunks/xtNWTdbD.js +1 -0
- package/webui/build/_app/immutable/entry/app.DdgnooOk.js +2 -0
- package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +11 -0
- package/webui/build/_app/immutable/nodes/1.5DFDaT4c.js +1 -0
- package/webui/build/_app/immutable/nodes/10.OVK4i9XE.js +1 -0
- package/webui/build/_app/immutable/nodes/11.Dhn_rO4A.js +1 -0
- package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +1 -0
- package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +1 -0
- package/webui/build/_app/immutable/nodes/14.B_KpJLxn.js +1 -0
- package/webui/build/_app/immutable/nodes/15.RaWaA-0I.js +1 -0
- package/webui/build/_app/immutable/nodes/{12.CMcby_lY.js → 16.DSUgqolV.js} +15 -15
- package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +1 -0
- package/webui/build/_app/immutable/nodes/3.wQvGs9w-.js +1 -0
- package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +2 -0
- package/webui/build/_app/immutable/nodes/5.CCtn90c0.js +1 -0
- package/webui/build/_app/immutable/nodes/6.C2_mjW1u.js +1 -0
- package/webui/build/_app/immutable/nodes/7.Dwz6W7A1.js +1 -0
- package/webui/build/_app/immutable/nodes/8.DzkEw6rx.js +1 -0
- package/webui/build/_app/immutable/nodes/9.DtlXEwe1.js +1 -0
- package/webui/build/_app/version.json +1 -1
- package/sources.example.json +0 -562
- package/webui/build/_app/immutable/assets/0.BUAXpTm6.css +0 -1
- package/webui/build/_app/immutable/assets/10.I1OuCLrU.css +0 -1
- package/webui/build/_app/immutable/assets/11.CrO9xaki.css +0 -1
- package/webui/build/_app/immutable/assets/12.BEi6fInA.css +0 -1
- package/webui/build/_app/immutable/assets/14.Ctlgn1LZ.css +0 -1
- package/webui/build/_app/immutable/assets/2.eJ80XOGm.css +0 -1
- package/webui/build/_app/immutable/assets/4.B8-jYAVj.css +0 -1
- package/webui/build/_app/immutable/assets/6.Drn-0DON.css +0 -1
- package/webui/build/_app/immutable/assets/7.ms2diq_q.css +0 -1
- package/webui/build/_app/immutable/assets/8.DKymkjjs.css +0 -1
- package/webui/build/_app/immutable/assets/SourcesList.BhtYlRsQ.css +0 -1
- package/webui/build/_app/immutable/chunks/BUngiKFg.js +0 -1
- package/webui/build/_app/immutable/chunks/Bt0fzibd.js +0 -1
- package/webui/build/_app/immutable/chunks/BxHqDcpw.js +0 -1
- package/webui/build/_app/immutable/chunks/ByQRbEUX.js +0 -1
- package/webui/build/_app/immutable/chunks/C12mHcUp.js +0 -6
- package/webui/build/_app/immutable/chunks/C1kQ4pHy.js +0 -1
- package/webui/build/_app/immutable/chunks/C74gbb4Q.js +0 -1
- package/webui/build/_app/immutable/chunks/CAtemnMo.js +0 -1
- package/webui/build/_app/immutable/chunks/CVjCNJia.js +0 -1
- package/webui/build/_app/immutable/chunks/CjQQ9_Q2.js +0 -2
- package/webui/build/_app/immutable/chunks/D-6mYMI1.js +0 -1
- package/webui/build/_app/immutable/chunks/D1Gs8-g3.js +0 -1
- package/webui/build/_app/immutable/chunks/D9dRVKgL.js +0 -1
- package/webui/build/_app/immutable/chunks/DCEY1XiC.js +0 -1
- package/webui/build/_app/immutable/chunks/DI-t-G_K.js +0 -2
- package/webui/build/_app/immutable/chunks/DTUxjyWL.js +0 -1
- package/webui/build/_app/immutable/chunks/DWJZOHke.js +0 -1
- package/webui/build/_app/immutable/chunks/Dgs6d7X5.js +0 -1
- package/webui/build/_app/immutable/chunks/DjpPK99f.js +0 -71
- package/webui/build/_app/immutable/chunks/DjzVVxpy.js +0 -1
- package/webui/build/_app/immutable/chunks/DvtNA-3X.js +0 -1
- package/webui/build/_app/immutable/chunks/LQVMBmDN.js +0 -1
- package/webui/build/_app/immutable/chunks/Qw0Qgx6J.js +0 -1
- package/webui/build/_app/immutable/chunks/bohabpgg.js +0 -1
- package/webui/build/_app/immutable/chunks/c-YfbAB_.js +0 -8
- package/webui/build/_app/immutable/chunks/tpTQfoNn.js +0 -1
- package/webui/build/_app/immutable/entry/app.Cra5Zsz4.js +0 -2
- package/webui/build/_app/immutable/entry/start.ToY0Qh0_.js +0 -1
- package/webui/build/_app/immutable/nodes/0.D2-xzG_8.js +0 -11
- package/webui/build/_app/immutable/nodes/1.CFixzRR6.js +0 -1
- package/webui/build/_app/immutable/nodes/10.ayxWydPr.js +0 -1
- package/webui/build/_app/immutable/nodes/11.B0JS3E2j.js +0 -1
- package/webui/build/_app/immutable/nodes/13.DRpZV72T.js +0 -1
- package/webui/build/_app/immutable/nodes/14.DVeJW6bd.js +0 -1
- package/webui/build/_app/immutable/nodes/2.DIZ4IPNm.js +0 -1
- package/webui/build/_app/immutable/nodes/3.BFSNf0FK.js +0 -1
- package/webui/build/_app/immutable/nodes/4.BSsIjejE.js +0 -2
- package/webui/build/_app/immutable/nodes/5.COxRT9Oe.js +0 -1
- package/webui/build/_app/immutable/nodes/6.CBgQ4YzB.js +0 -1
- package/webui/build/_app/immutable/nodes/7.BbzWOL0V.js +0 -6
- package/webui/build/_app/immutable/nodes/8.C8120200.js +0 -1
- package/webui/build/_app/immutable/nodes/9.BH_BGQQ4.js +0 -1
- /package/webui/build/_app/immutable/nodes/{15.BtYZF6FM.js → 17.BtYZF6FM.js} +0 -0
- /package/webui/build/_app/immutable/nodes/{16.Ba_qJjp6.js → 18.Ba_qJjp6.js} +0 -0
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, watch } from "node:fs";
|
|
2
|
+
import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, statSync, watch } from "node:fs";
|
|
3
3
|
import { platform, homedir, networkInterfaces } from "node:os";
|
|
4
4
|
import { serve } from "@hono/node-server";
|
|
5
5
|
import { Hono } from "hono";
|
|
@@ -10,7 +10,7 @@ import { promisify } from "node:util";
|
|
|
10
10
|
import puppeteerCore from "puppeteer-core";
|
|
11
11
|
import { parse, NodeType } from "node-html-parser";
|
|
12
12
|
import Database from "better-sqlite3";
|
|
13
|
-
import { mkdir, copyFile, access, rename, readFile, writeFile, readdir } from "node:fs/promises";
|
|
13
|
+
import { mkdir, copyFile, access, rename, readFile, writeFile, readdir, stat, unlink } from "node:fs/promises";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
15
|
import { createHash } from "node:crypto";
|
|
16
16
|
import { JSDOM } from "jsdom";
|
|
@@ -148,6 +148,50 @@ function markPipelineDrop(item) {
|
|
|
148
148
|
function isPipelineDroppedItem(item) {
|
|
149
149
|
return item.extra?.[PIPELINE_DROP_EXTRA_KEY] === true;
|
|
150
150
|
}
|
|
151
|
+
function canonicalHttpSourceRef(ref) {
|
|
152
|
+
const t = ref.trim();
|
|
153
|
+
if (!t) return t;
|
|
154
|
+
if (!/^https?:\/\//i.test(t)) return t.toLowerCase();
|
|
155
|
+
try {
|
|
156
|
+
const u = new URL(t);
|
|
157
|
+
const protocol = u.protocol.toLowerCase();
|
|
158
|
+
const host = u.host.toLowerCase();
|
|
159
|
+
let path = u.pathname;
|
|
160
|
+
if (path.length > 1 && path.endsWith("/")) {
|
|
161
|
+
path = path.slice(0, -1);
|
|
162
|
+
}
|
|
163
|
+
path = path.toLowerCase();
|
|
164
|
+
return `${protocol}//${host}${path}${u.search}${u.hash}`;
|
|
165
|
+
} catch {
|
|
166
|
+
return t.toLowerCase();
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
function maxIso(a, b) {
|
|
170
|
+
if (!a) return b;
|
|
171
|
+
if (!b) return a;
|
|
172
|
+
return a >= b ? a : b;
|
|
173
|
+
}
|
|
174
|
+
function mergeSourceStatsRows(rows) {
|
|
175
|
+
const map = /* @__PURE__ */ new Map();
|
|
176
|
+
for (const row of rows) {
|
|
177
|
+
const k = canonicalHttpSourceRef(row.source_url);
|
|
178
|
+
const prev = map.get(k);
|
|
179
|
+
if (!prev) {
|
|
180
|
+
map.set(k, { count: row.count, latest_at: row.latest_at });
|
|
181
|
+
} else {
|
|
182
|
+
map.set(k, {
|
|
183
|
+
count: prev.count + row.count,
|
|
184
|
+
latest_at: maxIso(prev.latest_at, row.latest_at)
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return [...map.entries()].map(([source_url, v]) => ({ source_url, count: v.count, latest_at: v.latest_at })).sort((a, b) => b.count - a.count);
|
|
189
|
+
}
|
|
190
|
+
const httpSourceRef = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
191
|
+
__proto__: null,
|
|
192
|
+
canonicalHttpSourceRef,
|
|
193
|
+
mergeSourceStatsRows
|
|
194
|
+
}, Symbol.toStringTag, { value: "Module" }));
|
|
151
195
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
152
196
|
const base = basename(__dir);
|
|
153
197
|
const PACKAGE_ROOT = base === "app" || base === "dist" ? join(__dir, "..") : __dir;
|
|
@@ -181,8 +225,9 @@ async function migrateFile(from, to) {
|
|
|
181
225
|
logger.warn("config", "配置迁移失败", { from, to, err: err instanceof Error ? err.message : String(err) });
|
|
182
226
|
}
|
|
183
227
|
}
|
|
184
|
-
const
|
|
185
|
-
const
|
|
228
|
+
const INIT_DATA_DIR = join(PACKAGE_ROOT, "init");
|
|
229
|
+
const EXAMPLE_SOURCES = join(INIT_DATA_DIR, "sources.json");
|
|
230
|
+
const EXAMPLE_CONFIG = join(INIT_DATA_DIR, "config.json");
|
|
186
231
|
async function seedExampleConfigsIfMissing() {
|
|
187
232
|
if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(EXAMPLE_SOURCES)) {
|
|
188
233
|
try {
|
|
@@ -519,13 +564,31 @@ function initSchema(db) {
|
|
|
519
564
|
}
|
|
520
565
|
} catch {
|
|
521
566
|
}
|
|
567
|
+
migrateItemsSourceUrlIfNeeded(db);
|
|
568
|
+
}
|
|
569
|
+
function migrateItemsSourceUrlIfNeeded(db) {
|
|
570
|
+
const v = db.pragma("user_version", { simple: true });
|
|
571
|
+
if (v >= 2) return;
|
|
572
|
+
const rows = db.prepare("SELECT rowid, source_url FROM items").all();
|
|
573
|
+
const upd = db.prepare("UPDATE items SET source_url = @next WHERE rowid = @rowid");
|
|
574
|
+
const run = db.transaction(() => {
|
|
575
|
+
for (const r of rows) {
|
|
576
|
+
const next = canonicalHttpSourceRef(r.source_url);
|
|
577
|
+
if (next !== r.source_url) {
|
|
578
|
+
upd.run({ next, rowid: r.rowid });
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
db.pragma("user_version = 2");
|
|
582
|
+
});
|
|
583
|
+
run();
|
|
522
584
|
}
|
|
523
585
|
async function upsertItems(items, sourceUrlOverride) {
|
|
524
586
|
if (items.length === 0) return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
525
|
-
const
|
|
526
|
-
if (!
|
|
587
|
+
const raw = items[0].sourceRef?.trim();
|
|
588
|
+
if (!raw) {
|
|
527
589
|
throw new Error("upsertItems: 每条 item 须有 sourceRef,或传入 sourceUrlOverride");
|
|
528
590
|
}
|
|
591
|
+
const sourceUrl = canonicalHttpSourceRef(raw);
|
|
529
592
|
return withWriteLock(async () => {
|
|
530
593
|
const db = await getDb();
|
|
531
594
|
const stmt = db.prepare(`
|
|
@@ -632,11 +695,13 @@ async function updateItemContent(item) {
|
|
|
632
695
|
}
|
|
633
696
|
async function queryFeedItems(sourceUrls, limit, offset, opts) {
|
|
634
697
|
if (sourceUrls.length === 0) return { items: [], hasMore: false };
|
|
698
|
+
const expanded = [...new Set(sourceUrls.map((u) => canonicalHttpSourceRef(u)).filter(Boolean))];
|
|
699
|
+
if (expanded.length === 0) return { items: [], hasMore: false };
|
|
635
700
|
const db = await getDb();
|
|
636
|
-
const placeholders =
|
|
701
|
+
const placeholders = expanded.map((_, i) => `@u${i}`).join(", ");
|
|
637
702
|
const conditions = [`source_url IN (${placeholders})`];
|
|
638
703
|
const params = { lim: limit + 1, off: offset };
|
|
639
|
-
|
|
704
|
+
expanded.forEach((url, i) => {
|
|
640
705
|
params[`u${i}`] = url;
|
|
641
706
|
});
|
|
642
707
|
if (opts?.since) {
|
|
@@ -670,12 +735,20 @@ async function queryItems(opts) {
|
|
|
670
735
|
const conditions = [];
|
|
671
736
|
const params = { limit, offset };
|
|
672
737
|
if (sourceUrl) {
|
|
738
|
+
const key = canonicalHttpSourceRef(sourceUrl);
|
|
739
|
+
if (!key) {
|
|
740
|
+
return { items: [], total: 0 };
|
|
741
|
+
}
|
|
673
742
|
conditions.push("i.source_url = @sourceUrl");
|
|
674
|
-
params.sourceUrl =
|
|
743
|
+
params.sourceUrl = key;
|
|
675
744
|
} else if (sourceUrls && sourceUrls.length > 0) {
|
|
676
|
-
const
|
|
745
|
+
const expanded = [...new Set(sourceUrls.map((s) => canonicalHttpSourceRef(s)).filter(Boolean))];
|
|
746
|
+
if (expanded.length === 0) {
|
|
747
|
+
return { items: [], total: 0 };
|
|
748
|
+
}
|
|
749
|
+
const placeholders = expanded.map((_, i) => `@src${i}`).join(", ");
|
|
677
750
|
conditions.push(`i.source_url IN (${placeholders})`);
|
|
678
|
-
|
|
751
|
+
expanded.forEach((s, i) => params[`src${i}`] = s);
|
|
679
752
|
}
|
|
680
753
|
if (author && author.trim().length >= 2) {
|
|
681
754
|
conditions.push("instr(i.author, @author) > 0");
|
|
@@ -769,9 +842,11 @@ async function deleteItem(id) {
|
|
|
769
842
|
}
|
|
770
843
|
async function deleteItemsBySourceUrl(sourceUrl) {
|
|
771
844
|
if (!sourceUrl?.trim()) return 0;
|
|
845
|
+
const key = canonicalHttpSourceRef(sourceUrl.trim());
|
|
846
|
+
if (!key) return 0;
|
|
772
847
|
return withWriteLock(async () => {
|
|
773
848
|
const db = await getDb();
|
|
774
|
-
const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl:
|
|
849
|
+
const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl: key });
|
|
775
850
|
return info.changes;
|
|
776
851
|
});
|
|
777
852
|
}
|
|
@@ -786,10 +861,12 @@ async function getPendingPushItems(limit = 100) {
|
|
|
786
861
|
return mapRowsToDbItems(rows);
|
|
787
862
|
}
|
|
788
863
|
async function getSourceStats() {
|
|
864
|
+
const { mergeSourceStatsRows: mergeSourceStatsRows2 } = await Promise.resolve().then(() => httpSourceRef);
|
|
789
865
|
const db = await getDb();
|
|
790
|
-
|
|
866
|
+
const rows = db.prepare(
|
|
791
867
|
"SELECT source_url, COUNT(*) as count, MAX(COALESCE(pub_date, fetched_at)) as latest_at FROM items GROUP BY source_url ORDER BY count DESC"
|
|
792
868
|
).all();
|
|
869
|
+
return mergeSourceStatsRows2(rows);
|
|
793
870
|
}
|
|
794
871
|
async function insertLog(entry) {
|
|
795
872
|
const db = await getLogsDb();
|
|
@@ -969,6 +1046,9 @@ const logger = {
|
|
|
969
1046
|
}
|
|
970
1047
|
};
|
|
971
1048
|
const execAsync = promisify(exec);
|
|
1049
|
+
const VIEWPORT_WIDTH = 1366;
|
|
1050
|
+
const VIEWPORT_HEIGHT_HEADLESS = 5e3;
|
|
1051
|
+
const VIEWPORT_HEIGHT_HEADFUL = 1200;
|
|
972
1052
|
function resolveProxy(config) {
|
|
973
1053
|
return config?.proxy ?? process.env.HTTP_PROXY ?? process.env.HTTPS_PROXY;
|
|
974
1054
|
}
|
|
@@ -979,6 +1059,14 @@ function parseProxy(proxy) {
|
|
|
979
1059
|
const password = u.password || void 0;
|
|
980
1060
|
return { serverUrl, username, password };
|
|
981
1061
|
}
|
|
1062
|
+
async function applyProxyAuthToPage(page, opts) {
|
|
1063
|
+
const proxy = resolveProxy(opts);
|
|
1064
|
+
if (!proxy) return;
|
|
1065
|
+
const { username, password } = parseProxy(proxy);
|
|
1066
|
+
if (username !== void 0 || password !== void 0) {
|
|
1067
|
+
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
982
1070
|
function launchArgs(config) {
|
|
983
1071
|
const base2 = [
|
|
984
1072
|
"--disable-blink-features=AutomationControlled",
|
|
@@ -990,8 +1078,8 @@ function launchArgs(config) {
|
|
|
990
1078
|
"--disable-site-isolation-trials",
|
|
991
1079
|
"--disable-infobars"
|
|
992
1080
|
];
|
|
993
|
-
const height = config?.headless !== false ?
|
|
994
|
-
base2.push(`--window-size
|
|
1081
|
+
const height = config?.headless !== false ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL;
|
|
1082
|
+
base2.push(`--window-size=${VIEWPORT_WIDTH},${height}`);
|
|
995
1083
|
const proxy = resolveProxy(config);
|
|
996
1084
|
if (proxy) {
|
|
997
1085
|
const { serverUrl } = parseProxy(proxy);
|
|
@@ -1081,150 +1169,130 @@ function headersToRecord(headers) {
|
|
|
1081
1169
|
async function setupPage(page, headless = true) {
|
|
1082
1170
|
const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
1083
1171
|
await page.setUserAgent(realUserAgent);
|
|
1084
|
-
await page.setViewport({
|
|
1172
|
+
await page.setViewport({
|
|
1173
|
+
width: VIEWPORT_WIDTH,
|
|
1174
|
+
height: headless ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL
|
|
1175
|
+
});
|
|
1085
1176
|
await stealthPage(page);
|
|
1086
1177
|
}
|
|
1087
|
-
let _browser = null;
|
|
1088
|
-
let _browserHeadless = true;
|
|
1089
|
-
let _launchPromise = null;
|
|
1090
1178
|
function isFrameDetachedError(e) {
|
|
1091
1179
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1092
1180
|
return /detached|Navigating frame was detached|Session closed/i.test(msg);
|
|
1093
1181
|
}
|
|
1094
|
-
async function
|
|
1095
|
-
if (!_browser) return false;
|
|
1096
|
-
try {
|
|
1097
|
-
await _browser.version();
|
|
1098
|
-
return true;
|
|
1099
|
-
} catch {
|
|
1100
|
-
_browser = null;
|
|
1101
|
-
return false;
|
|
1102
|
-
}
|
|
1103
|
-
}
|
|
1104
|
-
async function getOrCreateBrowser(config) {
|
|
1182
|
+
async function launchBrowser(config) {
|
|
1105
1183
|
const wantHeadless = config.headless !== false;
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
|
|
1119
|
-
if (!executablePath) {
|
|
1120
|
-
throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
|
|
1184
|
+
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
|
|
1185
|
+
if (!executablePath) {
|
|
1186
|
+
throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
|
|
1187
|
+
}
|
|
1188
|
+
const userDataDir = getUserDataDir(config.cacheDir);
|
|
1189
|
+
const maxRetries = 2;
|
|
1190
|
+
let lastErr;
|
|
1191
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1192
|
+
try {
|
|
1193
|
+
if (attempt === 0 && userDataDir) {
|
|
1194
|
+
const absUserDataDir = resolve(userDataDir);
|
|
1195
|
+
await killStaleChromeProcesses(absUserDataDir);
|
|
1121
1196
|
}
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
browser.on("disconnected", () => {
|
|
1145
|
-
_browser = null;
|
|
1146
|
-
_launchPromise = null;
|
|
1147
|
-
});
|
|
1148
|
-
_browser = browser;
|
|
1149
|
-
_browserHeadless = wantHeadless;
|
|
1150
|
-
return browser;
|
|
1151
|
-
} catch (e) {
|
|
1152
|
-
lastErr = e;
|
|
1153
|
-
if (attempt < maxRetries && isAlreadyRunningError(e)) {
|
|
1154
|
-
continue;
|
|
1155
|
-
}
|
|
1156
|
-
if (isAlreadyRunningError(e)) {
|
|
1157
|
-
const dir = userDataDir ?? "browser_data/main";
|
|
1158
|
-
throw new Error(
|
|
1159
|
-
`Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
|
|
1160
|
-
);
|
|
1161
|
-
}
|
|
1162
|
-
throw e;
|
|
1163
|
-
}
|
|
1197
|
+
if (attempt > 0) {
|
|
1198
|
+
const waitMs = attempt * 2e3;
|
|
1199
|
+
logger.info("scraper", "userDataDir 曾被占用,等待后重试", { waitMs, attempt });
|
|
1200
|
+
await new Promise((r) => setTimeout(r, waitMs));
|
|
1201
|
+
}
|
|
1202
|
+
return await puppeteerCore.launch({
|
|
1203
|
+
headless: wantHeadless,
|
|
1204
|
+
args: launchArgs({ proxy: config.proxy, headless: wantHeadless }),
|
|
1205
|
+
userDataDir,
|
|
1206
|
+
executablePath,
|
|
1207
|
+
ignoreDefaultArgs: ["--enable-automation"]
|
|
1208
|
+
});
|
|
1209
|
+
} catch (e) {
|
|
1210
|
+
lastErr = e;
|
|
1211
|
+
if (attempt < maxRetries && isAlreadyRunningError(e)) {
|
|
1212
|
+
continue;
|
|
1213
|
+
}
|
|
1214
|
+
if (isAlreadyRunningError(e)) {
|
|
1215
|
+
const dir = userDataDir ?? "browser_data/main";
|
|
1216
|
+
throw new Error(
|
|
1217
|
+
`Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
|
|
1218
|
+
);
|
|
1164
1219
|
}
|
|
1165
|
-
throw lastErr;
|
|
1166
|
-
})().catch((e) => {
|
|
1167
|
-
_launchPromise = null;
|
|
1168
1220
|
throw e;
|
|
1169
|
-
}
|
|
1221
|
+
}
|
|
1170
1222
|
}
|
|
1171
|
-
|
|
1223
|
+
throw lastErr;
|
|
1172
1224
|
}
|
|
1173
|
-
|
|
1174
|
-
_browser?.close().catch(() => {
|
|
1175
|
-
});
|
|
1176
|
-
});
|
|
1177
|
-
process.once("SIGINT", async () => {
|
|
1178
|
-
await _browser?.close().catch(() => {
|
|
1179
|
-
});
|
|
1180
|
-
process.exit(0);
|
|
1181
|
-
});
|
|
1182
|
-
process.once("SIGTERM", async () => {
|
|
1183
|
-
await _browser?.close().catch(() => {
|
|
1184
|
-
});
|
|
1185
|
-
process.exit(0);
|
|
1186
|
-
});
|
|
1187
|
-
async function preCheckAuth(authFlow, cacheDir) {
|
|
1225
|
+
async function preCheckAuth(authFlow, cacheDir, opts) {
|
|
1188
1226
|
const { checkAuth, loginUrl, domain } = authFlow;
|
|
1189
1227
|
if (domain == null || !cacheDir) return true;
|
|
1190
|
-
const
|
|
1191
|
-
const
|
|
1228
|
+
const isHeadless = opts?.headless !== false;
|
|
1229
|
+
const browser = await launchBrowser({
|
|
1230
|
+
headless: isHeadless,
|
|
1231
|
+
cacheDir,
|
|
1232
|
+
proxy: resolveProxy(opts)
|
|
1233
|
+
});
|
|
1192
1234
|
try {
|
|
1193
|
-
await
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1235
|
+
const page = await browser.newPage();
|
|
1236
|
+
try {
|
|
1237
|
+
await setupPage(page, isHeadless);
|
|
1238
|
+
await applyProxyAuthToPage(page, opts);
|
|
1239
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
1240
|
+
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1241
|
+
return await checkAuth(page, page.url());
|
|
1242
|
+
} finally {
|
|
1243
|
+
await page.close().catch(() => {
|
|
1244
|
+
});
|
|
1245
|
+
}
|
|
1197
1246
|
} finally {
|
|
1198
|
-
await
|
|
1247
|
+
await browser.close().catch(() => {
|
|
1199
1248
|
});
|
|
1200
1249
|
}
|
|
1201
1250
|
}
|
|
1202
|
-
async function ensureAuth(authFlow, cacheDir) {
|
|
1251
|
+
async function ensureAuth(authFlow, cacheDir, opts) {
|
|
1203
1252
|
const { checkAuth, loginUrl, loginTimeoutMs = 60 * 1e3, pollIntervalMs = 2e3 } = authFlow;
|
|
1204
|
-
const browser = await
|
|
1205
|
-
const page = await browser.newPage();
|
|
1253
|
+
const browser = await launchBrowser({ headless: false, cacheDir, proxy: resolveProxy(opts) });
|
|
1206
1254
|
try {
|
|
1207
|
-
await
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
const
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1255
|
+
const page = await browser.newPage();
|
|
1256
|
+
try {
|
|
1257
|
+
await setupPage(page, false);
|
|
1258
|
+
await applyProxyAuthToPage(page, opts);
|
|
1259
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
1260
|
+
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1261
|
+
const authenticated = await checkAuth(page, page.url());
|
|
1262
|
+
if (authenticated) return;
|
|
1263
|
+
const startTime = Date.now();
|
|
1264
|
+
while (Date.now() - startTime < loginTimeoutMs) {
|
|
1265
|
+
await new Promise((resolve2) => setTimeout(resolve2, pollIntervalMs));
|
|
1266
|
+
const authenticated2 = await checkAuth(page, page.url());
|
|
1267
|
+
if (authenticated2) return;
|
|
1268
|
+
}
|
|
1269
|
+
throw new Error(`登录超时(${loginTimeoutMs}ms)`);
|
|
1270
|
+
} finally {
|
|
1271
|
+
await page.close().catch(() => {
|
|
1272
|
+
});
|
|
1273
|
+
}
|
|
1219
1274
|
} finally {
|
|
1220
|
-
await
|
|
1275
|
+
await browser.close().catch(() => {
|
|
1221
1276
|
});
|
|
1222
1277
|
}
|
|
1223
1278
|
}
|
|
1224
1279
|
async function fetchHtml(url, config = {}) {
|
|
1225
|
-
const {
|
|
1280
|
+
const {
|
|
1281
|
+
timeoutMs,
|
|
1282
|
+
headers,
|
|
1283
|
+
cookies,
|
|
1284
|
+
cacheDir,
|
|
1285
|
+
checkAuth,
|
|
1286
|
+
authFlow,
|
|
1287
|
+
purify,
|
|
1288
|
+
headless,
|
|
1289
|
+
waitAfterLoadMs,
|
|
1290
|
+
waitForSelector,
|
|
1291
|
+
waitForSelectorTimeoutMs,
|
|
1292
|
+
useHttpResponseBody
|
|
1293
|
+
} = config;
|
|
1226
1294
|
const isHeadless = headless !== false;
|
|
1227
|
-
const browser = await
|
|
1295
|
+
const browser = await launchBrowser({
|
|
1228
1296
|
headless: isHeadless,
|
|
1229
1297
|
cacheDir,
|
|
1230
1298
|
proxy: resolveProxy(config),
|
|
@@ -1233,70 +1301,84 @@ async function fetchHtml(url, config = {}) {
|
|
|
1233
1301
|
const navigationTimeout = timeoutMs ?? 6e4;
|
|
1234
1302
|
const maxAttempts = 2;
|
|
1235
1303
|
let lastError;
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
await setupPage(page, isHeadless);
|
|
1246
|
-
const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
|
|
1247
|
-
if (cookies != null && cookies !== "") {
|
|
1248
|
-
extraHeaders.cookie = cookies;
|
|
1249
|
-
}
|
|
1250
|
-
await page.setExtraHTTPHeaders(extraHeaders);
|
|
1251
|
-
const proxy = resolveProxy(config);
|
|
1252
|
-
if (proxy) {
|
|
1253
|
-
const { username, password } = parseProxy(proxy);
|
|
1254
|
-
if (username !== void 0 || password !== void 0) {
|
|
1255
|
-
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1304
|
+
try {
|
|
1305
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
1306
|
+
const page = await browser.newPage();
|
|
1307
|
+
const isRetry = attempt === 1;
|
|
1308
|
+
const waitUntil = isRetry ? "domcontentloaded" : "load";
|
|
1309
|
+
const extraWaitMs = isRetry ? Math.min(500, Math.max(0, waitAfterLoadMs ?? 2e3)) : Math.max(0, waitAfterLoadMs ?? 2e3);
|
|
1310
|
+
try {
|
|
1311
|
+
if (config.browserContext) {
|
|
1312
|
+
await config.browserContext(page.browserContext());
|
|
1256
1313
|
}
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
}
|
|
1269
|
-
if (checkAuth != null || authFlow != null) {
|
|
1270
|
-
const authCheck = checkAuth ?? authFlow?.checkAuth;
|
|
1271
|
-
if (authCheck != null) {
|
|
1272
|
-
const ok = await authCheck(page, url);
|
|
1273
|
-
if (!ok) {
|
|
1274
|
-
throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
|
|
1314
|
+
await setupPage(page, isHeadless);
|
|
1315
|
+
const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
|
|
1316
|
+
if (cookies != null && cookies !== "") {
|
|
1317
|
+
extraHeaders.cookie = cookies;
|
|
1318
|
+
}
|
|
1319
|
+
await page.setExtraHTTPHeaders(extraHeaders);
|
|
1320
|
+
const proxy = resolveProxy(config);
|
|
1321
|
+
if (proxy) {
|
|
1322
|
+
const { username, password } = parseProxy(proxy);
|
|
1323
|
+
if (username !== void 0 || password !== void 0) {
|
|
1324
|
+
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1275
1325
|
}
|
|
1276
1326
|
}
|
|
1327
|
+
if (timeoutMs != null) {
|
|
1328
|
+
await page.setDefaultNavigationTimeout(timeoutMs);
|
|
1329
|
+
}
|
|
1330
|
+
const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
|
|
1331
|
+
if (extraWaitMs > 0) {
|
|
1332
|
+
await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
|
|
1333
|
+
}
|
|
1334
|
+
if (waitForSelector != null && waitForSelector !== "" && !isRetry) {
|
|
1335
|
+
const selectorTimeout = waitForSelectorTimeoutMs ?? 2e4;
|
|
1336
|
+
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
|
1337
|
+
}
|
|
1338
|
+
if (checkAuth != null || authFlow != null) {
|
|
1339
|
+
const authCheck = checkAuth ?? authFlow?.checkAuth;
|
|
1340
|
+
if (authCheck != null) {
|
|
1341
|
+
const ok = await authCheck(page, url);
|
|
1342
|
+
if (!ok) {
|
|
1343
|
+
throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
let rawBody;
|
|
1348
|
+
if (useHttpResponseBody === true && response != null) {
|
|
1349
|
+
try {
|
|
1350
|
+
rawBody = await response.text();
|
|
1351
|
+
} catch {
|
|
1352
|
+
rawBody = await page.content();
|
|
1353
|
+
}
|
|
1354
|
+
} else {
|
|
1355
|
+
rawBody = await page.content();
|
|
1356
|
+
}
|
|
1357
|
+
const finalUrl = response?.url() ?? page.url() ?? String(url);
|
|
1358
|
+
const status = response?.status() ?? 0;
|
|
1359
|
+
const statusText = response?.statusText() ?? "";
|
|
1360
|
+
const rawHeaders = response?.headers() ?? {};
|
|
1361
|
+
const normalizedHeaders = headersToRecord(rawHeaders);
|
|
1362
|
+
const body = applyPurify(rawBody, purify);
|
|
1363
|
+
await page.close().catch(() => {
|
|
1364
|
+
});
|
|
1365
|
+
return { finalUrl, status, statusText, headers: normalizedHeaders, body };
|
|
1366
|
+
} catch (e) {
|
|
1367
|
+
lastError = e;
|
|
1368
|
+
await page.close().catch(() => {
|
|
1369
|
+
});
|
|
1370
|
+
if (isRetry || !isFrameDetachedError(e)) {
|
|
1371
|
+
throw e;
|
|
1372
|
+
}
|
|
1373
|
+
logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
|
|
1374
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
1277
1375
|
}
|
|
1278
|
-
const rawBody = await page.content();
|
|
1279
|
-
const finalUrl = response?.url() ?? page.url() ?? String(url);
|
|
1280
|
-
const status = response?.status() ?? 0;
|
|
1281
|
-
const statusText = response?.statusText() ?? "";
|
|
1282
|
-
const rawHeaders = response?.headers() ?? {};
|
|
1283
|
-
const normalizedHeaders = headersToRecord(rawHeaders);
|
|
1284
|
-
const body = applyPurify(rawBody, purify);
|
|
1285
|
-
await page.close().catch(() => {
|
|
1286
|
-
});
|
|
1287
|
-
return { finalUrl, status, statusText, headers: normalizedHeaders, body };
|
|
1288
|
-
} catch (e) {
|
|
1289
|
-
lastError = e;
|
|
1290
|
-
await page.close().catch(() => {
|
|
1291
|
-
});
|
|
1292
|
-
if (isRetry || !isFrameDetachedError(e)) {
|
|
1293
|
-
throw e;
|
|
1294
|
-
}
|
|
1295
|
-
logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
|
|
1296
|
-
await new Promise((r) => setTimeout(r, 800));
|
|
1297
1376
|
}
|
|
1377
|
+
throw lastError;
|
|
1378
|
+
} finally {
|
|
1379
|
+
await browser.close().catch(() => {
|
|
1380
|
+
});
|
|
1298
1381
|
}
|
|
1299
|
-
throw lastError;
|
|
1300
1382
|
}
|
|
1301
1383
|
const VALID_INTERVALS = ["1min", "5min", "10min", "30min", "1h", "6h", "12h", "1day", "3day", "7day"];
|
|
1302
1384
|
function cronToRefreshInterval(cronExpr) {
|
|
@@ -1468,19 +1550,78 @@ async function extractFromLink(link, extractorConfig = {}, fetchConfig = {}) {
|
|
|
1468
1550
|
cacheKey: extractorConfig.cacheKey ?? (cacheDir ? cacheKey(link, "forever") : void 0)
|
|
1469
1551
|
});
|
|
1470
1552
|
}
|
|
1553
|
+
const DEFAULT_BASE_URL = "https://api.openai.com/v1";
|
|
1554
|
+
const DEFAULT_MODEL = "gpt-4o-mini";
|
|
1555
|
+
let fileCache = null;
|
|
1556
|
+
function invalidateLLMConfigCache() {
|
|
1557
|
+
fileCache = null;
|
|
1558
|
+
}
|
|
1559
|
+
function readLlmFromFileSync() {
|
|
1560
|
+
if (!existsSync(CONFIG_PATH)) return {};
|
|
1561
|
+
try {
|
|
1562
|
+
const st = statSync(CONFIG_PATH);
|
|
1563
|
+
if (fileCache && fileCache.mtimeMs === st.mtimeMs) return fileCache.llm;
|
|
1564
|
+
const raw = readFileSync(CONFIG_PATH, "utf-8");
|
|
1565
|
+
const j = JSON.parse(raw);
|
|
1566
|
+
const llmRaw = j?.llm;
|
|
1567
|
+
const llm = {};
|
|
1568
|
+
if (llmRaw && typeof llmRaw === "object") {
|
|
1569
|
+
const o = llmRaw;
|
|
1570
|
+
if (typeof o.apiKey === "string" && o.apiKey.length > 0) llm.apiKey = o.apiKey;
|
|
1571
|
+
if (typeof o.baseUrl === "string" && o.baseUrl.trim()) llm.baseUrl = o.baseUrl.trim();
|
|
1572
|
+
if (typeof o.model === "string" && o.model.trim()) llm.model = o.model.trim();
|
|
1573
|
+
}
|
|
1574
|
+
fileCache = { mtimeMs: st.mtimeMs, llm };
|
|
1575
|
+
return llm;
|
|
1576
|
+
} catch {
|
|
1577
|
+
return {};
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1471
1580
|
function getLLMConfig() {
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
};
|
|
1581
|
+
const file = readLlmFromFileSync();
|
|
1582
|
+
const apiKey = file.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1583
|
+
const baseUrl = file.baseUrl ?? process.env.OPENAI_BASE_URL ?? DEFAULT_BASE_URL;
|
|
1584
|
+
const model = file.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL;
|
|
1585
|
+
return { apiKey, baseUrl, model };
|
|
1586
|
+
}
|
|
1587
|
+
function extractAssistantText(completion) {
|
|
1588
|
+
const choice = completion.choices[0];
|
|
1589
|
+
if (!choice) throw new Error("LLM 返回无 choices");
|
|
1590
|
+
const msg = choice.message;
|
|
1591
|
+
const raw = msg.content;
|
|
1592
|
+
if (typeof raw === "string") {
|
|
1593
|
+
const t = raw.trim();
|
|
1594
|
+
if (t.length > 0) return t;
|
|
1595
|
+
}
|
|
1596
|
+
const extra = msg;
|
|
1597
|
+
const rc = extra.reasoning_content;
|
|
1598
|
+
if (typeof rc === "string" && rc.trim().length > 0) {
|
|
1599
|
+
return rc.trim();
|
|
1600
|
+
}
|
|
1601
|
+
const refusal = msg.refusal;
|
|
1602
|
+
if (typeof refusal === "string" && refusal.trim()) {
|
|
1603
|
+
throw new Error(`模型拒绝: ${refusal.trim()}`);
|
|
1604
|
+
}
|
|
1605
|
+
const fr = choice.finish_reason;
|
|
1606
|
+
if (fr === "tool_calls") {
|
|
1607
|
+
throw new Error("LLM 返回了工具调用而非文本,请换一个模型或关闭工具调用");
|
|
1608
|
+
}
|
|
1609
|
+
if (fr === "content_filter") {
|
|
1610
|
+
throw new Error("内容被内容策略过滤");
|
|
1611
|
+
}
|
|
1612
|
+
if (fr === "length") {
|
|
1613
|
+
throw new Error(
|
|
1614
|
+
"LLM 输出在 content / reasoning_content 均为空前已用尽"
|
|
1615
|
+
);
|
|
1616
|
+
}
|
|
1617
|
+
throw new Error(`LLM 返回空内容 (finish_reason=${String(fr)})`);
|
|
1477
1618
|
}
|
|
1478
1619
|
function mergeConfig(override) {
|
|
1479
1620
|
const env = getLLMConfig();
|
|
1480
1621
|
const apiKey = override?.apiKey ?? env.apiKey;
|
|
1481
1622
|
const baseUrl = override?.apiUrl ?? override?.baseUrl ?? env.baseUrl;
|
|
1482
1623
|
const model = override?.model ?? env.model;
|
|
1483
|
-
if (!apiKey) throw new Error("LLM API Key
|
|
1624
|
+
if (!apiKey) throw new Error("LLM API Key 未配置:请在管理后台「设置 → LLM」或环境变量 OPENAI_API_KEY 中设置");
|
|
1484
1625
|
return { apiKey, baseUrl, model };
|
|
1485
1626
|
}
|
|
1486
1627
|
async function chatJson(prompt, config, options) {
|
|
@@ -1492,8 +1633,7 @@ async function chatJson(prompt, config, options) {
|
|
|
1492
1633
|
max_tokens: options?.maxTokens ?? 8192,
|
|
1493
1634
|
response_format: { type: "json_object" }
|
|
1494
1635
|
});
|
|
1495
|
-
const content = completion
|
|
1496
|
-
if (!content) throw new Error("LLM 返回空内容");
|
|
1636
|
+
const content = extractAssistantText(completion);
|
|
1497
1637
|
return JSON.parse(content);
|
|
1498
1638
|
}
|
|
1499
1639
|
async function chatText(prompt, config, options) {
|
|
@@ -1504,9 +1644,7 @@ async function chatText(prompt, config, options) {
|
|
|
1504
1644
|
messages: [{ role: "user", content: prompt }],
|
|
1505
1645
|
max_tokens: options?.maxTokens ?? 8192
|
|
1506
1646
|
});
|
|
1507
|
-
|
|
1508
|
-
if (!content) throw new Error("LLM 返回空内容");
|
|
1509
|
-
return content;
|
|
1647
|
+
return extractAssistantText(completion);
|
|
1510
1648
|
}
|
|
1511
1649
|
function generateGuid(link) {
|
|
1512
1650
|
return createHash("sha256").update(link).digest("hex");
|
|
@@ -1590,7 +1728,7 @@ async function parseHtml(html, config = {}) {
|
|
|
1590
1728
|
const actualMode = mode ?? (llmConfig != null ? "llm" : customParser != null ? "custom" : "llm");
|
|
1591
1729
|
if (actualMode === "llm") {
|
|
1592
1730
|
if (llmConfig == null && !getLLMConfig().apiKey) {
|
|
1593
|
-
throw new Error('mode 为 "llm" 时必须提供 llmConfig
|
|
1731
|
+
throw new Error('mode 为 "llm" 时必须提供 llmConfig,或在后台「设置 → LLM」/ OPENAI_API_KEY 中配置 Key');
|
|
1594
1732
|
}
|
|
1595
1733
|
const htmlForLLM = applyPurify(html, purify !== false);
|
|
1596
1734
|
entries = await parseWithLLM(htmlForLLM, url, llmConfig ?? {});
|
|
@@ -1773,7 +1911,8 @@ function buildSiteContext(site, ctx) {
|
|
|
1773
1911
|
waitAfterLoadMs: opts?.waitMs,
|
|
1774
1912
|
purify: opts?.purify,
|
|
1775
1913
|
waitForSelector: opts?.waitForSelector,
|
|
1776
|
-
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs
|
|
1914
|
+
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
|
|
1915
|
+
useHttpResponseBody: opts?.useHttpResponseBody
|
|
1777
1916
|
});
|
|
1778
1917
|
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
1779
1918
|
},
|
|
@@ -1817,7 +1956,10 @@ function createWebSource(site) {
|
|
|
1817
1956
|
proxy: site.proxy ?? void 0,
|
|
1818
1957
|
preCheck: authFlow ? async (ctx) => {
|
|
1819
1958
|
if (!ctx.cacheDir) return;
|
|
1820
|
-
const passed = await preCheckAuth(authFlow, ctx.cacheDir
|
|
1959
|
+
const passed = await preCheckAuth(authFlow, ctx.cacheDir, {
|
|
1960
|
+
proxy: ctx.proxy,
|
|
1961
|
+
headless: ctx.headless
|
|
1962
|
+
});
|
|
1821
1963
|
if (!passed) throw new AuthRequiredError(`站点 ${site.id} 需要登录,请先执行 ensureAuth`);
|
|
1822
1964
|
} : void 0,
|
|
1823
1965
|
async fetchItems(sourceId, ctx) {
|
|
@@ -1870,7 +2012,25 @@ const PLUGIN_HOST_DEPS = {
|
|
|
1870
2012
|
logger
|
|
1871
2013
|
};
|
|
1872
2014
|
function buildSourceContext(partial) {
|
|
1873
|
-
|
|
2015
|
+
const { cacheDir, headless, proxy } = partial;
|
|
2016
|
+
return {
|
|
2017
|
+
...partial,
|
|
2018
|
+
deps: PLUGIN_HOST_DEPS,
|
|
2019
|
+
async fetchHtml(url, opts) {
|
|
2020
|
+
const res = await fetchHtml(url, {
|
|
2021
|
+
cacheDir,
|
|
2022
|
+
useCache: false,
|
|
2023
|
+
headless,
|
|
2024
|
+
proxy,
|
|
2025
|
+
waitAfterLoadMs: opts?.waitMs,
|
|
2026
|
+
purify: opts?.purify,
|
|
2027
|
+
waitForSelector: opts?.waitForSelector,
|
|
2028
|
+
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
|
|
2029
|
+
useHttpResponseBody: opts?.useHttpResponseBody
|
|
2030
|
+
});
|
|
2031
|
+
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
2032
|
+
}
|
|
2033
|
+
};
|
|
1874
2034
|
}
|
|
1875
2035
|
const registeredSources = [];
|
|
1876
2036
|
function sourcePatternToRegex(pattern) {
|
|
@@ -1915,6 +2075,38 @@ async function initSources() {
|
|
|
1915
2075
|
function resolveRef(src) {
|
|
1916
2076
|
return src.ref ?? src.url ?? "";
|
|
1917
2077
|
}
|
|
2078
|
+
async function readGlobalProxyFromConfig() {
|
|
2079
|
+
try {
|
|
2080
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2081
|
+
const j = JSON.parse(raw);
|
|
2082
|
+
if (typeof j.globalProxy === "string") {
|
|
2083
|
+
const t = j.globalProxy.trim();
|
|
2084
|
+
return t.length > 0 ? t : void 0;
|
|
2085
|
+
}
|
|
2086
|
+
} catch {
|
|
2087
|
+
}
|
|
2088
|
+
return void 0;
|
|
2089
|
+
}
|
|
2090
|
+
async function saveGlobalProxyToConfig(proxy) {
|
|
2091
|
+
let root = {};
|
|
2092
|
+
try {
|
|
2093
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2094
|
+
root = JSON.parse(raw);
|
|
2095
|
+
} catch {
|
|
2096
|
+
}
|
|
2097
|
+
const t = proxy.trim();
|
|
2098
|
+
if (t.length === 0) {
|
|
2099
|
+
delete root.globalProxy;
|
|
2100
|
+
} else {
|
|
2101
|
+
root.globalProxy = t;
|
|
2102
|
+
}
|
|
2103
|
+
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2104
|
+
}
|
|
2105
|
+
async function resolveProxyForSite(site) {
|
|
2106
|
+
const s = site.proxy?.trim();
|
|
2107
|
+
if (s) return s;
|
|
2108
|
+
return readGlobalProxyFromConfig();
|
|
2109
|
+
}
|
|
1918
2110
|
async function loadSourcesFile() {
|
|
1919
2111
|
try {
|
|
1920
2112
|
const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
|
|
@@ -1960,6 +2152,15 @@ async function saveSourcesFile(sources) {
|
|
|
1960
2152
|
"utf-8"
|
|
1961
2153
|
);
|
|
1962
2154
|
}
|
|
2155
|
+
async function getEffectiveProxyForListUrl(listUrl, source) {
|
|
2156
|
+
const list = await getAllSources();
|
|
2157
|
+
const sub = list.find((s) => resolveRef(s) === listUrl);
|
|
2158
|
+
const fromSub = sub?.proxy?.trim();
|
|
2159
|
+
if (fromSub) return fromSub;
|
|
2160
|
+
const fromPlugin = source.proxy?.trim();
|
|
2161
|
+
if (fromPlugin) return fromPlugin;
|
|
2162
|
+
return readGlobalProxyFromConfig();
|
|
2163
|
+
}
|
|
1963
2164
|
async function getSourcesRaw() {
|
|
1964
2165
|
try {
|
|
1965
2166
|
const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
|
|
@@ -2277,24 +2478,35 @@ function onFeedUpdated(fn) {
|
|
|
2277
2478
|
eventBus.on("feed:updated", fn);
|
|
2278
2479
|
return () => eventBus.off("feed:updated", fn);
|
|
2279
2480
|
}
|
|
2280
|
-
async function
|
|
2481
|
+
async function getDeliverConfig() {
|
|
2281
2482
|
try {
|
|
2282
2483
|
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2283
2484
|
const j = JSON.parse(raw);
|
|
2284
2485
|
const u = j?.deliver?.url;
|
|
2285
|
-
|
|
2486
|
+
const t = j?.deliver?.token;
|
|
2487
|
+
return {
|
|
2488
|
+
url: typeof u === "string" ? u.trim() : "",
|
|
2489
|
+
token: typeof t === "string" ? t.trim() : ""
|
|
2490
|
+
};
|
|
2286
2491
|
} catch {
|
|
2287
|
-
return "";
|
|
2492
|
+
return { url: "", token: "" };
|
|
2288
2493
|
}
|
|
2289
2494
|
}
|
|
2290
|
-
async function
|
|
2495
|
+
async function saveDeliverConfig(config) {
|
|
2291
2496
|
let root = {};
|
|
2292
2497
|
try {
|
|
2293
2498
|
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2294
2499
|
root = JSON.parse(raw);
|
|
2295
2500
|
} catch {
|
|
2296
2501
|
}
|
|
2297
|
-
|
|
2502
|
+
const prev = root.deliver;
|
|
2503
|
+
const base2 = typeof prev === "object" && prev !== null && !Array.isArray(prev) ? { ...prev } : {};
|
|
2504
|
+
const url = config.url.trim();
|
|
2505
|
+
const token = config.token.trim();
|
|
2506
|
+
const next = { ...base2, url };
|
|
2507
|
+
if (token) next.token = token;
|
|
2508
|
+
else delete next.token;
|
|
2509
|
+
root.deliver = next;
|
|
2298
2510
|
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2299
2511
|
}
|
|
2300
2512
|
function feedItemsToPayload(items) {
|
|
@@ -2311,12 +2523,15 @@ function feedItemsToPayload(items) {
|
|
|
2311
2523
|
translations: i.translations
|
|
2312
2524
|
}));
|
|
2313
2525
|
}
|
|
2314
|
-
async function postDeliverItems(url, sourceRef, items) {
|
|
2526
|
+
async function postDeliverItems(url, sourceRef, items, options) {
|
|
2315
2527
|
if (!url.trim() || items.length === 0) return;
|
|
2316
2528
|
const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
|
|
2529
|
+
const headers = { "Content-Type": "application/json" };
|
|
2530
|
+
const t = options?.bearerToken?.trim();
|
|
2531
|
+
if (t) headers.Authorization = `Bearer ${t}`;
|
|
2317
2532
|
const res = await fetch(url.trim(), {
|
|
2318
2533
|
method: "POST",
|
|
2319
|
-
headers
|
|
2534
|
+
headers,
|
|
2320
2535
|
body,
|
|
2321
2536
|
signal: AbortSignal.timeout(12e4)
|
|
2322
2537
|
});
|
|
@@ -2325,9 +2540,9 @@ async function postDeliverItems(url, sourceRef, items) {
|
|
|
2325
2540
|
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2326
2541
|
}
|
|
2327
2542
|
}
|
|
2328
|
-
async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
2543
|
+
async function postDeliverItemsSafe(url, sourceRef, items, options) {
|
|
2329
2544
|
try {
|
|
2330
|
-
await postDeliverItems(url, sourceRef, items);
|
|
2545
|
+
await postDeliverItems(url, sourceRef, items, options);
|
|
2331
2546
|
} catch (err) {
|
|
2332
2547
|
logger.warn("deliver", "投递失败", {
|
|
2333
2548
|
sourceRef,
|
|
@@ -2336,6 +2551,12 @@ async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
|
2336
2551
|
});
|
|
2337
2552
|
}
|
|
2338
2553
|
}
|
|
2554
|
+
function resolveHeadlessForFeeder(config) {
|
|
2555
|
+
if (config.force === true) {
|
|
2556
|
+
return config.headless === true ? true : false;
|
|
2557
|
+
}
|
|
2558
|
+
return config.headless;
|
|
2559
|
+
}
|
|
2339
2560
|
function buildChannelFromItems(listUrl, items, lng) {
|
|
2340
2561
|
const channel = {
|
|
2341
2562
|
title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
|
|
@@ -2366,10 +2587,11 @@ const pipelineCtx = {
|
|
|
2366
2587
|
async function runPipelineOnItem(item, ctx) {
|
|
2367
2588
|
return runPipeline(item, { ...pipelineCtx, ...ctx });
|
|
2368
2589
|
}
|
|
2369
|
-
async function generateAndCache(listUrl, key, config) {
|
|
2370
|
-
const { cacheDir = "cache"
|
|
2590
|
+
async function generateAndCache(listUrl, key, config, proxy) {
|
|
2591
|
+
const { cacheDir = "cache" } = config;
|
|
2592
|
+
const headless = resolveHeadlessForFeeder(config);
|
|
2371
2593
|
const source = getSource(listUrl);
|
|
2372
|
-
const ctx = buildSourceContext({ cacheDir, headless, proxy
|
|
2594
|
+
const ctx = buildSourceContext({ cacheDir, headless, proxy });
|
|
2373
2595
|
let items;
|
|
2374
2596
|
try {
|
|
2375
2597
|
items = await source.fetchItems(listUrl, ctx);
|
|
@@ -2379,13 +2601,14 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2379
2601
|
logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
|
|
2380
2602
|
throw err;
|
|
2381
2603
|
}
|
|
2604
|
+
const sourceRefStored = canonicalHttpSourceRef(listUrl);
|
|
2382
2605
|
items.forEach((i) => {
|
|
2383
|
-
i.sourceRef =
|
|
2606
|
+
i.sourceRef = sourceRefStored;
|
|
2384
2607
|
i.author = normalizeAuthor(i.author);
|
|
2385
2608
|
});
|
|
2386
2609
|
generatingKeys.delete(key);
|
|
2387
2610
|
logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
|
|
2388
|
-
const deliverUrl = await
|
|
2611
|
+
const { url: deliverUrl, token: deliverToken } = await getDeliverConfig();
|
|
2389
2612
|
let newCount = 0;
|
|
2390
2613
|
let newIds = /* @__PURE__ */ new Set();
|
|
2391
2614
|
const upsertResult = await upsertItems(items).catch((err) => {
|
|
@@ -2398,7 +2621,7 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2398
2621
|
const shouldRunPipelineRow = (guid) => newIds.has(guid);
|
|
2399
2622
|
for (let i = 0; i < items.length; i++) {
|
|
2400
2623
|
if (!shouldRunPipelineRow(items[i].guid)) continue;
|
|
2401
|
-
const processed = await runPipelineOnItem(items[i], { sourceUrl:
|
|
2624
|
+
const processed = await runPipelineOnItem(items[i], { sourceUrl: sourceRefStored });
|
|
2402
2625
|
items[i] = processed;
|
|
2403
2626
|
if (isPipelineDroppedItem(processed)) {
|
|
2404
2627
|
await deleteItem(processed.guid).catch(
|
|
@@ -2412,24 +2635,28 @@ async function generateAndCache(listUrl, key, config) {
|
|
|
2412
2635
|
}
|
|
2413
2636
|
}
|
|
2414
2637
|
if (newCount > 0) {
|
|
2415
|
-
emitFeedUpdated({ sourceUrl:
|
|
2638
|
+
emitFeedUpdated({ sourceUrl: sourceRefStored, newCount: newCount - pipelineDroppedNew });
|
|
2416
2639
|
}
|
|
2417
2640
|
const out = items.filter((i) => !isPipelineDroppedItem(i));
|
|
2418
2641
|
if (deliverUrl && out.length > 0) {
|
|
2419
|
-
await postDeliverItemsSafe(deliverUrl,
|
|
2642
|
+
await postDeliverItemsSafe(deliverUrl, sourceRefStored, out, {
|
|
2643
|
+
bearerToken: deliverToken || void 0
|
|
2644
|
+
});
|
|
2420
2645
|
}
|
|
2421
2646
|
return { items: out };
|
|
2422
2647
|
}
|
|
2423
2648
|
async function getItems(listUrl, config = {}) {
|
|
2424
2649
|
const source = getSource(listUrl);
|
|
2650
|
+
const proxy = await getEffectiveProxyForListUrl(listUrl, source);
|
|
2651
|
+
const headless = resolveHeadlessForFeeder(config);
|
|
2425
2652
|
const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
|
|
2426
2653
|
if (source.preCheck != null) {
|
|
2427
2654
|
try {
|
|
2428
2655
|
await source.preCheck(
|
|
2429
2656
|
buildSourceContext({
|
|
2430
2657
|
cacheDir: config.cacheDir ?? "cache",
|
|
2431
|
-
headless
|
|
2432
|
-
proxy
|
|
2658
|
+
headless,
|
|
2659
|
+
proxy
|
|
2433
2660
|
})
|
|
2434
2661
|
);
|
|
2435
2662
|
} catch (err) {
|
|
@@ -2439,7 +2666,7 @@ async function getItems(listUrl, config = {}) {
|
|
|
2439
2666
|
}
|
|
2440
2667
|
let task = config.force ? void 0 : generatingKeys.get(key);
|
|
2441
2668
|
if (!task) {
|
|
2442
|
-
task = generateAndCache(listUrl, key, config);
|
|
2669
|
+
task = generateAndCache(listUrl, key, config, proxy);
|
|
2443
2670
|
if (!config.force) generatingKeys.set(key, task);
|
|
2444
2671
|
}
|
|
2445
2672
|
const { items } = await task;
|
|
@@ -2635,7 +2862,7 @@ function getGroupStats() {
|
|
|
2635
2862
|
return result;
|
|
2636
2863
|
}
|
|
2637
2864
|
const DEFAULT_REFRESH = "1day";
|
|
2638
|
-
const SOURCES_CONCURRENCY =
|
|
2865
|
+
const SOURCES_CONCURRENCY = 1;
|
|
2639
2866
|
function createPullTask(ref, cacheDir, cronExpr) {
|
|
2640
2867
|
return async () => {
|
|
2641
2868
|
try {
|
|
@@ -2672,7 +2899,7 @@ async function rescheduleSources(cacheDir, runNow2) {
|
|
|
2672
2899
|
}
|
|
2673
2900
|
}
|
|
2674
2901
|
async function initScheduler(cacheDir) {
|
|
2675
|
-
await rescheduleSources(cacheDir,
|
|
2902
|
+
await rescheduleSources(cacheDir, false);
|
|
2676
2903
|
let debounceTimer = null;
|
|
2677
2904
|
try {
|
|
2678
2905
|
const watcher = watch(SOURCES_CONFIG_PATH, () => {
|
|
@@ -2748,12 +2975,12 @@ function registerSchedulerRoutes(app) {
|
|
|
2748
2975
|
});
|
|
2749
2976
|
}
|
|
2750
2977
|
const SITE_TEMPLATE_FALLBACK = `/**
|
|
2751
|
-
* Site
|
|
2978
|
+
* Site 插件模板(由 /plugins 页添加,位于 .rssany/plugins/)
|
|
2752
2979
|
* HTML DOM 解析请用 ctx.deps.parseHtml,勿在插件内 import node_modules。
|
|
2753
2980
|
*/
|
|
2754
2981
|
export default {
|
|
2755
2982
|
id: "__PLUGIN_ID__",
|
|
2756
|
-
listUrlPattern:
|
|
2983
|
+
listUrlPattern: __LIST_URL_PATTERN__,
|
|
2757
2984
|
refreshInterval: "1day",
|
|
2758
2985
|
|
|
2759
2986
|
async fetchItems(sourceId, ctx) {
|
|
@@ -2770,6 +2997,11 @@ export default {
|
|
|
2770
2997
|
function isValidNewPluginId(id) {
|
|
2771
2998
|
return /^[a-zA-Z][a-zA-Z0-9_-]{0,63}$/.test(id) && id !== "generic" && id !== "new";
|
|
2772
2999
|
}
|
|
3000
|
+
function isValidNewListUrlPattern(pattern) {
|
|
3001
|
+
if (pattern.length === 0 || pattern.length > 2048) return false;
|
|
3002
|
+
if (/[\r\n]/.test(pattern)) return false;
|
|
3003
|
+
return true;
|
|
3004
|
+
}
|
|
2773
3005
|
async function fileExists(p) {
|
|
2774
3006
|
try {
|
|
2775
3007
|
await access(p);
|
|
@@ -2799,6 +3031,13 @@ function registerPluginsRoutes(app) {
|
|
|
2799
3031
|
if (!isValidNewPluginId(id)) {
|
|
2800
3032
|
return c.json({ error: "id 须为字母开头,仅含字母数字、下划线、连字符;不能为 generic 或 new" }, 400);
|
|
2801
3033
|
}
|
|
3034
|
+
const listUrlPatternRaw = typeof body.listUrlPattern === "string" ? body.listUrlPattern.trim() : "";
|
|
3035
|
+
if (!listUrlPatternRaw) {
|
|
3036
|
+
return c.json({ error: "缺少支持的站点(listUrlPattern),例如 https://example.com/*" }, 400);
|
|
3037
|
+
}
|
|
3038
|
+
if (!isValidNewListUrlPattern(listUrlPatternRaw)) {
|
|
3039
|
+
return c.json({ error: "支持的站点须为非空字符串,不超过 2048 字符,且不能含换行" }, 400);
|
|
3040
|
+
}
|
|
2802
3041
|
await mkdir(USER_PLUGINS_DIR, { recursive: true });
|
|
2803
3042
|
const outPath = join(USER_PLUGINS_DIR, `${id}.rssany.js`);
|
|
2804
3043
|
if (await fileExists(outPath)) return c.json({ error: "该 id 已存在同名文件" }, 409);
|
|
@@ -2807,7 +3046,8 @@ function registerPluginsRoutes(app) {
|
|
|
2807
3046
|
tpl = await readFile(PLUGIN_SITE_TEMPLATE_PATH, "utf-8");
|
|
2808
3047
|
} catch {
|
|
2809
3048
|
}
|
|
2810
|
-
const
|
|
3049
|
+
const patternLiteral = JSON.stringify(listUrlPatternRaw);
|
|
3050
|
+
const content = tpl.replace(/__PLUGIN_ID__/g, id).replace(/__LIST_URL_PATTERN__/g, patternLiteral);
|
|
2811
3051
|
if (!isAllowedPluginPath(outPath)) return c.json({ error: "路径不允许" }, 403);
|
|
2812
3052
|
try {
|
|
2813
3053
|
await writeFile(outPath, content, "utf-8");
|
|
@@ -2989,6 +3229,12 @@ function registerItemsRoutes(app) {
|
|
|
2989
3229
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
2990
3230
|
}
|
|
2991
3231
|
});
|
|
3232
|
+
app.delete("/api/items/by-source", requireAdmin(), async (c) => {
|
|
3233
|
+
const sourceUrl = (c.req.query("source_url") ?? "").trim();
|
|
3234
|
+
if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
|
|
3235
|
+
const deleted = await deleteItemsBySourceUrl(sourceUrl);
|
|
3236
|
+
return c.json({ ok: true, deleted });
|
|
3237
|
+
});
|
|
2992
3238
|
app.delete("/api/items/:id", async (c) => {
|
|
2993
3239
|
const id = decodeURIComponent(c.req.param("id") ?? "").trim();
|
|
2994
3240
|
if (!id) return c.json({ ok: false, message: "id 不能为空" }, 400);
|
|
@@ -2996,12 +3242,6 @@ function registerItemsRoutes(app) {
|
|
|
2996
3242
|
if (!deleted) return c.json({ ok: false, message: "条目不存在或已删除" }, 404);
|
|
2997
3243
|
return c.json({ ok: true });
|
|
2998
3244
|
});
|
|
2999
|
-
app.delete("/api/items/by-source", requireAdmin(), async (c) => {
|
|
3000
|
-
const sourceUrl = (c.req.query("source_url") ?? "").trim();
|
|
3001
|
-
if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
|
|
3002
|
-
const deleted = await deleteItemsBySourceUrl(sourceUrl);
|
|
3003
|
-
return c.json({ ok: true, deleted });
|
|
3004
|
-
});
|
|
3005
3245
|
app.get("/api/items", async (c) => {
|
|
3006
3246
|
const ref = c.req.query("ref") ?? c.req.query("source") ?? void 0;
|
|
3007
3247
|
const subscribed = parseSubscribedFlag$1(c.req.query("subscribed"));
|
|
@@ -3149,7 +3389,7 @@ function registerSourcesRoutes(app) {
|
|
|
3149
3389
|
const w = s.weight;
|
|
3150
3390
|
const weight = typeof w === "number" ? w : void 0;
|
|
3151
3391
|
return {
|
|
3152
|
-
ref: String(s.ref),
|
|
3392
|
+
ref: canonicalHttpSourceRef(String(s.ref)),
|
|
3153
3393
|
type,
|
|
3154
3394
|
label: s.label,
|
|
3155
3395
|
description: s.description,
|
|
@@ -3223,15 +3463,16 @@ function registerTopicsRoutes(app) {
|
|
|
3223
3463
|
}
|
|
3224
3464
|
function registerDeliverRoutes(app) {
|
|
3225
3465
|
app.get("/api/deliver", requireAdmin(), async (c) => {
|
|
3226
|
-
const url = await
|
|
3227
|
-
return c.json({ url });
|
|
3466
|
+
const { url, token } = await getDeliverConfig();
|
|
3467
|
+
return c.json({ url, token });
|
|
3228
3468
|
});
|
|
3229
3469
|
app.put("/api/deliver", requireAdmin(), async (c) => {
|
|
3230
3470
|
try {
|
|
3231
3471
|
const body = await c.req.json();
|
|
3232
3472
|
const url = typeof body?.url === "string" ? body.url.trim() : "";
|
|
3233
|
-
|
|
3234
|
-
|
|
3473
|
+
const token = typeof body?.token === "string" ? body.token.trim() : "";
|
|
3474
|
+
await saveDeliverConfig({ url, token });
|
|
3475
|
+
return c.json({ ok: true, url, token });
|
|
3235
3476
|
} catch (err) {
|
|
3236
3477
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
3237
3478
|
}
|
|
@@ -3240,6 +3481,7 @@ function registerDeliverRoutes(app) {
|
|
|
3240
3481
|
try {
|
|
3241
3482
|
const body = await c.req.json();
|
|
3242
3483
|
const url = typeof body?.url === "string" ? body.url.trim() : "";
|
|
3484
|
+
const token = typeof body?.token === "string" ? body.token.trim() : "";
|
|
3243
3485
|
if (!url) return c.json({ ok: false, message: "url 不能为空" }, 400);
|
|
3244
3486
|
const sample = {
|
|
3245
3487
|
guid: "deliver-test-" + Date.now(),
|
|
@@ -3248,22 +3490,150 @@ function registerDeliverRoutes(app) {
|
|
|
3248
3490
|
pubDate: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3249
3491
|
summary: "若下游收到此条,说明投递 URL 可用。"
|
|
3250
3492
|
};
|
|
3251
|
-
await postDeliverItems(
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3493
|
+
await postDeliverItems(
|
|
3494
|
+
url,
|
|
3495
|
+
"rssany-deliver-test",
|
|
3496
|
+
[
|
|
3497
|
+
{
|
|
3498
|
+
guid: sample.guid,
|
|
3499
|
+
title: sample.title,
|
|
3500
|
+
link: sample.link,
|
|
3501
|
+
pubDate: new Date(sample.pubDate),
|
|
3502
|
+
summary: sample.summary,
|
|
3503
|
+
sourceRef: "rssany-deliver-test"
|
|
3504
|
+
}
|
|
3505
|
+
],
|
|
3506
|
+
{ bearerToken: token || void 0 }
|
|
3507
|
+
);
|
|
3261
3508
|
return c.json({ ok: true });
|
|
3262
3509
|
} catch (err) {
|
|
3263
3510
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
3264
3511
|
}
|
|
3265
3512
|
});
|
|
3266
3513
|
}
|
|
3514
|
+
function trimOrUndef(s) {
|
|
3515
|
+
if (typeof s !== "string") return void 0;
|
|
3516
|
+
const t = s.trim();
|
|
3517
|
+
return t.length > 0 ? t : void 0;
|
|
3518
|
+
}
|
|
3519
|
+
async function readLlmFileConfig() {
|
|
3520
|
+
try {
|
|
3521
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
3522
|
+
const j = JSON.parse(raw);
|
|
3523
|
+
const llm = j?.llm;
|
|
3524
|
+
if (!llm || typeof llm !== "object") return {};
|
|
3525
|
+
const o = llm;
|
|
3526
|
+
return {
|
|
3527
|
+
apiKey: typeof o.apiKey === "string" ? o.apiKey : void 0,
|
|
3528
|
+
baseUrl: trimOrUndef(o.baseUrl),
|
|
3529
|
+
model: trimOrUndef(o.model)
|
|
3530
|
+
};
|
|
3531
|
+
} catch {
|
|
3532
|
+
return {};
|
|
3533
|
+
}
|
|
3534
|
+
}
|
|
3535
|
+
async function saveLlmSettings(input) {
|
|
3536
|
+
let root = {};
|
|
3537
|
+
try {
|
|
3538
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
3539
|
+
root = JSON.parse(raw);
|
|
3540
|
+
} catch {
|
|
3541
|
+
}
|
|
3542
|
+
const prev = await readLlmFileConfig();
|
|
3543
|
+
const next = {
|
|
3544
|
+
baseUrl: input.baseUrl.trim(),
|
|
3545
|
+
model: input.model.trim()
|
|
3546
|
+
};
|
|
3547
|
+
const newKey = typeof input.apiKey === "string" && input.apiKey.length > 0 ? input.apiKey : void 0;
|
|
3548
|
+
if (newKey) {
|
|
3549
|
+
next.apiKey = newKey;
|
|
3550
|
+
} else if (prev.apiKey) {
|
|
3551
|
+
next.apiKey = prev.apiKey;
|
|
3552
|
+
}
|
|
3553
|
+
root.llm = next;
|
|
3554
|
+
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
3555
|
+
invalidateLLMConfigCache();
|
|
3556
|
+
}
|
|
3557
|
+
function registerLlmRoutes(app) {
|
|
3558
|
+
app.get("/api/llm", requireAdmin(), async (c) => {
|
|
3559
|
+
const resolved = getLLMConfig();
|
|
3560
|
+
const file = await readLlmFileConfig();
|
|
3561
|
+
const hasApiKey = !!resolved.apiKey;
|
|
3562
|
+
const apiKeyInFile = !!(file.apiKey && file.apiKey.length > 0);
|
|
3563
|
+
return c.json({
|
|
3564
|
+
baseUrl: resolved.baseUrl,
|
|
3565
|
+
model: resolved.model,
|
|
3566
|
+
hasApiKey,
|
|
3567
|
+
apiKeyInFile
|
|
3568
|
+
});
|
|
3569
|
+
});
|
|
3570
|
+
app.put("/api/llm", requireAdmin(), async (c) => {
|
|
3571
|
+
try {
|
|
3572
|
+
const body = await c.req.json();
|
|
3573
|
+
const baseUrl = typeof body.baseUrl === "string" ? body.baseUrl : "";
|
|
3574
|
+
const model = typeof body.model === "string" ? body.model : "";
|
|
3575
|
+
const apiKey = typeof body.apiKey === "string" ? body.apiKey : void 0;
|
|
3576
|
+
await saveLlmSettings({
|
|
3577
|
+
baseUrl,
|
|
3578
|
+
model,
|
|
3579
|
+
...apiKey !== void 0 ? { apiKey } : {}
|
|
3580
|
+
});
|
|
3581
|
+
const resolved = getLLMConfig();
|
|
3582
|
+
const file = await readLlmFileConfig();
|
|
3583
|
+
return c.json({
|
|
3584
|
+
ok: true,
|
|
3585
|
+
baseUrl: resolved.baseUrl,
|
|
3586
|
+
model: resolved.model,
|
|
3587
|
+
hasApiKey: !!resolved.apiKey,
|
|
3588
|
+
apiKeyInFile: !!(file.apiKey && file.apiKey.length > 0)
|
|
3589
|
+
});
|
|
3590
|
+
} catch (err) {
|
|
3591
|
+
return c.json(
|
|
3592
|
+
{ ok: false, message: err instanceof Error ? err.message : String(err) },
|
|
3593
|
+
400
|
|
3594
|
+
);
|
|
3595
|
+
}
|
|
3596
|
+
});
|
|
3597
|
+
app.post("/api/llm/test", requireAdmin(), async (c) => {
|
|
3598
|
+
const t0 = Date.now();
|
|
3599
|
+
try {
|
|
3600
|
+
const cfg = getLLMConfig();
|
|
3601
|
+
if (!cfg.apiKey) {
|
|
3602
|
+
return c.json({ ok: false, message: "未配置 API Key(请在界面或 OPENAI_API_KEY 中设置)" }, 400);
|
|
3603
|
+
}
|
|
3604
|
+
const reply = await chatText("Reply with exactly the single word: ok", void 0, {
|
|
3605
|
+
maxTokens: 32768,
|
|
3606
|
+
debugLabel: "llmSettingsTest"
|
|
3607
|
+
});
|
|
3608
|
+
return c.json({ ok: true, reply });
|
|
3609
|
+
} catch (err) {
|
|
3610
|
+
const ms = Date.now() - t0;
|
|
3611
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3612
|
+
console.error("[llm/test] fail", { ms, message });
|
|
3613
|
+
return c.json({ ok: false, message }, 400);
|
|
3614
|
+
}
|
|
3615
|
+
});
|
|
3616
|
+
}
|
|
3617
|
+
function registerProxySettingsRoutes(app) {
|
|
3618
|
+
app.get("/api/proxy", requireAdmin(), async (c) => {
|
|
3619
|
+
const globalProxy = await readGlobalProxyFromConfig() ?? "";
|
|
3620
|
+
return c.json({ globalProxy });
|
|
3621
|
+
});
|
|
3622
|
+
app.put("/api/proxy", requireAdmin(), async (c) => {
|
|
3623
|
+
try {
|
|
3624
|
+
const body = await c.req.json().catch(() => ({}));
|
|
3625
|
+
const globalProxy = typeof body.globalProxy === "string" ? body.globalProxy : "";
|
|
3626
|
+
await saveGlobalProxyToConfig(globalProxy);
|
|
3627
|
+
const saved = await readGlobalProxyFromConfig() ?? "";
|
|
3628
|
+
return c.json({ ok: true, globalProxy: saved });
|
|
3629
|
+
} catch (err) {
|
|
3630
|
+
return c.json(
|
|
3631
|
+
{ ok: false, message: err instanceof Error ? err.message : String(err) },
|
|
3632
|
+
400
|
|
3633
|
+
);
|
|
3634
|
+
}
|
|
3635
|
+
});
|
|
3636
|
+
}
|
|
3267
3637
|
const tasks = /* @__PURE__ */ new Map();
|
|
3268
3638
|
let idCounter = 0;
|
|
3269
3639
|
function nextId() {
|
|
@@ -3337,8 +3707,310 @@ function registerTasksRoutes(app) {
|
|
|
3337
3707
|
}
|
|
3338
3708
|
});
|
|
3339
3709
|
}
|
|
3710
|
+
const CACHE_SUBDIR = "feed-favicons";
|
|
3711
|
+
const CACHE_KEY_PREFIX = "feed-favicon:v1:";
|
|
3712
|
+
const CACHE_MAX_AGE_SEC = 3 * 24 * 60 * 60;
|
|
3713
|
+
const CACHE_MAX_AGE_MS = CACHE_MAX_AGE_SEC * 1e3;
|
|
3714
|
+
const CACHE_CONTROL = `public, max-age=${CACHE_MAX_AGE_SEC}`;
|
|
3715
|
+
const FETCH_TIMEOUT_MS = 6e3;
|
|
3716
|
+
const MAX_ICON_BYTES = 2 * 1024 * 1024;
|
|
3717
|
+
const MAX_HTML_BYTES = 512 * 1024;
|
|
3718
|
+
const inflightByDomain = /* @__PURE__ */ new Map();
|
|
3719
|
+
const MAX_DOMAIN_LEN = 253;
|
|
3720
|
+
function isPlausibleHostname(s) {
|
|
3721
|
+
if (s.length === 0 || s.length > MAX_DOMAIN_LEN) return false;
|
|
3722
|
+
return /^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$/i.test(s);
|
|
3723
|
+
}
|
|
3724
|
+
function cacheFilePath(domainKey) {
|
|
3725
|
+
const h = createHash("sha256").update(CACHE_KEY_PREFIX + domainKey.toLowerCase()).digest("hex");
|
|
3726
|
+
return join(CACHE_DIR, CACHE_SUBDIR, h);
|
|
3727
|
+
}
|
|
3728
|
+
function originFaviconUrls(domain) {
|
|
3729
|
+
const d = domain.toLowerCase();
|
|
3730
|
+
const hosts = [`https://${d}`];
|
|
3731
|
+
if (d.startsWith("www.")) {
|
|
3732
|
+
const bare = d.slice(4);
|
|
3733
|
+
if (bare) hosts.push(`https://${bare}`);
|
|
3734
|
+
} else {
|
|
3735
|
+
hosts.push(`https://www.${d}`);
|
|
3736
|
+
}
|
|
3737
|
+
const paths = ["/favicon.ico", "/favicon.png", "/apple-touch-icon.png"];
|
|
3738
|
+
const urls = [];
|
|
3739
|
+
for (const base2 of [...new Set(hosts)]) {
|
|
3740
|
+
for (const p of paths) {
|
|
3741
|
+
urls.push(`${base2}${p}`);
|
|
3742
|
+
}
|
|
3743
|
+
}
|
|
3744
|
+
return urls;
|
|
3745
|
+
}
|
|
3746
|
+
function homepageUrlsForDomain(domain) {
|
|
3747
|
+
const d = domain.toLowerCase();
|
|
3748
|
+
const urls = [`https://${d}/`];
|
|
3749
|
+
if (d.startsWith("www.")) {
|
|
3750
|
+
const bare = d.slice(4);
|
|
3751
|
+
if (bare) urls.push(`https://${bare}/`);
|
|
3752
|
+
} else {
|
|
3753
|
+
urls.push(`https://www.${d}/`);
|
|
3754
|
+
}
|
|
3755
|
+
return [...new Set(urls)];
|
|
3756
|
+
}
|
|
3757
|
+
function isIconLinkRel(rel) {
|
|
3758
|
+
const tokens = rel.toLowerCase().trim().split(/\s+/).filter(Boolean);
|
|
3759
|
+
if (tokens.some((x) => x === "mask-icon")) return true;
|
|
3760
|
+
if (tokens.some((x) => x === "apple-touch-icon" || x === "apple-touch-icon-precomposed")) return true;
|
|
3761
|
+
if (tokens.includes("shortcut") && tokens.includes("icon")) return true;
|
|
3762
|
+
return tokens.includes("icon");
|
|
3763
|
+
}
|
|
3764
|
+
function parseLinkIconHrefs(html, pageUrl) {
|
|
3765
|
+
const root = parse(html, { lowerCaseTagName: true });
|
|
3766
|
+
let base2 = pageUrl;
|
|
3767
|
+
const baseEl = root.querySelector("base[href]");
|
|
3768
|
+
if (baseEl) {
|
|
3769
|
+
const bh = baseEl.getAttribute("href")?.trim();
|
|
3770
|
+
if (bh) {
|
|
3771
|
+
try {
|
|
3772
|
+
base2 = new URL(bh, pageUrl).href;
|
|
3773
|
+
} catch {
|
|
3774
|
+
}
|
|
3775
|
+
}
|
|
3776
|
+
}
|
|
3777
|
+
const out = [];
|
|
3778
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3779
|
+
for (const el of root.querySelectorAll("link[href]")) {
|
|
3780
|
+
const rel = el.getAttribute("rel") ?? "";
|
|
3781
|
+
if (!isIconLinkRel(rel)) continue;
|
|
3782
|
+
const href = el.getAttribute("href")?.trim();
|
|
3783
|
+
if (!href || href.startsWith("data:") || href.startsWith("blob:")) continue;
|
|
3784
|
+
try {
|
|
3785
|
+
const abs = new URL(href, base2).href;
|
|
3786
|
+
if ((abs.startsWith("http:") || abs.startsWith("https:")) && !seen.has(abs)) {
|
|
3787
|
+
seen.add(abs);
|
|
3788
|
+
out.push(abs);
|
|
3789
|
+
}
|
|
3790
|
+
} catch {
|
|
3791
|
+
}
|
|
3792
|
+
}
|
|
3793
|
+
return out;
|
|
3794
|
+
}
|
|
3795
|
+
async function fetchHtmlPage(url) {
|
|
3796
|
+
try {
|
|
3797
|
+
const upstream = await fetch(url, {
|
|
3798
|
+
redirect: "follow",
|
|
3799
|
+
headers: {
|
|
3800
|
+
Accept: "text/html,application/xhtml+xml;q=0.9,*/*;q=0.1",
|
|
3801
|
+
"User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
|
|
3802
|
+
},
|
|
3803
|
+
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
|
|
3804
|
+
});
|
|
3805
|
+
if (!upstream.ok) return null;
|
|
3806
|
+
const ab = await upstream.arrayBuffer();
|
|
3807
|
+
const buf = Buffer.from(ab);
|
|
3808
|
+
const slice = buf.subarray(0, Math.min(buf.length, MAX_HTML_BYTES));
|
|
3809
|
+
return slice.toString("utf-8");
|
|
3810
|
+
} catch {
|
|
3811
|
+
return null;
|
|
3812
|
+
}
|
|
3813
|
+
}
|
|
3814
|
+
async function discoverIconUrlsFromHomepage(domain) {
|
|
3815
|
+
if (process.env.FAVICON_SKIP_HTML === "1" || process.env.FAVICON_SKIP_HTML === "true") {
|
|
3816
|
+
return [];
|
|
3817
|
+
}
|
|
3818
|
+
for (const pageUrl of homepageUrlsForDomain(domain)) {
|
|
3819
|
+
const html = await fetchHtmlPage(pageUrl);
|
|
3820
|
+
if (!html) continue;
|
|
3821
|
+
const hrefs = parseLinkIconHrefs(html, pageUrl);
|
|
3822
|
+
if (hrefs.length > 0) return hrefs;
|
|
3823
|
+
}
|
|
3824
|
+
return [];
|
|
3825
|
+
}
|
|
3826
|
+
function duckduckgoFaviconUrl(domain) {
|
|
3827
|
+
return `https://icons.duckduckgo.com/ip3/${domain}.ico`;
|
|
3828
|
+
}
|
|
3829
|
+
function iconHorseUrl(domain) {
|
|
3830
|
+
return `https://icon.horse/icon/${encodeURIComponent(domain)}`;
|
|
3831
|
+
}
|
|
3832
|
+
function unavatarUrl(domain) {
|
|
3833
|
+
return `https://unavatar.io/${encodeURIComponent(domain)}`;
|
|
3834
|
+
}
|
|
3835
|
+
function googleFaviconUrl(domain) {
|
|
3836
|
+
return `https://www.google.com/s2/favicons?domain=${encodeURIComponent(domain)}&sz=64`;
|
|
3837
|
+
}
|
|
3838
|
+
function letterCharFromDomain(domain) {
|
|
3839
|
+
const d = domain.toLowerCase().replace(/^www\./, "");
|
|
3840
|
+
const m = d.match(/[a-z0-9]/);
|
|
3841
|
+
return m ? m[0].toUpperCase() : "?";
|
|
3842
|
+
}
|
|
3843
|
+
function hueFromDomain(domain) {
|
|
3844
|
+
const h = createHash("sha256").update(domain.toLowerCase()).digest();
|
|
3845
|
+
return (h[0] << 8 | h[1]) % 360;
|
|
3846
|
+
}
|
|
3847
|
+
function escapeXmlText(s) {
|
|
3848
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
3849
|
+
}
|
|
3850
|
+
function letterAvatarSvg(domain) {
|
|
3851
|
+
const letter = escapeXmlText(letterCharFromDomain(domain));
|
|
3852
|
+
const hue = hueFromDomain(domain);
|
|
3853
|
+
const bg = `hsl(${hue} 42% 44%)`;
|
|
3854
|
+
const svg = `<?xml version="1.0" encoding="UTF-8"?>
|
|
3855
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="64" height="64" viewBox="0 0 64 64">
|
|
3856
|
+
<rect width="64" height="64" rx="12" fill="${bg}"/>
|
|
3857
|
+
<text x="32" y="32" dominant-baseline="central" text-anchor="middle" fill="#ffffff" font-family="system-ui,Segoe UI,Helvetica,sans-serif" font-size="28" font-weight="600">${letter}</text>
|
|
3858
|
+
</svg>`;
|
|
3859
|
+
return Buffer.from(svg.trim(), "utf-8");
|
|
3860
|
+
}
|
|
3861
|
+
function letterAvatarForDomain(domain) {
|
|
3862
|
+
return { buf: letterAvatarSvg(domain), mime: "image/svg+xml" };
|
|
3863
|
+
}
|
|
3864
|
+
function isEnoent(e) {
|
|
3865
|
+
return typeof e === "object" && e !== null && e.code === "ENOENT";
|
|
3866
|
+
}
|
|
3867
|
+
function sniffImageMime(buf) {
|
|
3868
|
+
if (buf.length < 4) return null;
|
|
3869
|
+
if (buf[0] === 137 && buf[1] === 80 && buf[2] === 78 && buf[3] === 71) return "image/png";
|
|
3870
|
+
if (buf.length >= 6 && buf[0] === 71 && buf[1] === 73 && buf[2] === 70) return "image/gif";
|
|
3871
|
+
if (buf.length >= 3 && buf[0] === 255 && buf[1] === 216 && buf[2] === 255) return "image/jpeg";
|
|
3872
|
+
if (buf.length >= 12 && buf.subarray(0, 4).toString("ascii") === "RIFF" && buf.subarray(8, 12).toString("ascii") === "WEBP") {
|
|
3873
|
+
return "image/webp";
|
|
3874
|
+
}
|
|
3875
|
+
if (buf.length >= 6 && buf.readUInt16LE(0) === 0 && (buf[2] === 1 || buf[2] === 2) && buf[3] === 0) {
|
|
3876
|
+
return "image/x-icon";
|
|
3877
|
+
}
|
|
3878
|
+
const head = buf.subarray(0, Math.min(256, buf.length)).toString("utf-8").trimStart();
|
|
3879
|
+
if (head.startsWith("<svg") || head.startsWith("<?xml")) return "image/svg+xml";
|
|
3880
|
+
return null;
|
|
3881
|
+
}
|
|
3882
|
+
const IMAGE_CT_PREFIX = "image/";
|
|
3883
|
+
function mimeFromFetch(ct) {
|
|
3884
|
+
if (!ct) return null;
|
|
3885
|
+
const base2 = ct.split(";")[0].trim().toLowerCase();
|
|
3886
|
+
return base2.startsWith(IMAGE_CT_PREFIX) ? base2 : null;
|
|
3887
|
+
}
|
|
3888
|
+
function resolveImageMime(buf, ct) {
|
|
3889
|
+
return sniffImageMime(buf) ?? mimeFromFetch(ct);
|
|
3890
|
+
}
|
|
3891
|
+
async function fetchIconCandidate(url) {
|
|
3892
|
+
let upstream;
|
|
3893
|
+
try {
|
|
3894
|
+
upstream = await fetch(url, {
|
|
3895
|
+
redirect: "follow",
|
|
3896
|
+
headers: {
|
|
3897
|
+
Accept: "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
|
3898
|
+
"User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
|
|
3899
|
+
},
|
|
3900
|
+
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
|
|
3901
|
+
});
|
|
3902
|
+
} catch {
|
|
3903
|
+
return null;
|
|
3904
|
+
}
|
|
3905
|
+
if (!upstream.ok) return null;
|
|
3906
|
+
const ab = await upstream.arrayBuffer();
|
|
3907
|
+
const buf = Buffer.from(ab);
|
|
3908
|
+
if (buf.length === 0 || buf.length > MAX_ICON_BYTES) return null;
|
|
3909
|
+
return { buf, ct: upstream.headers.get("content-type") };
|
|
3910
|
+
}
|
|
3911
|
+
function isValidIcon(got) {
|
|
3912
|
+
if (!got) return false;
|
|
3913
|
+
const mime = resolveImageMime(got.buf, got.ct);
|
|
3914
|
+
return !!(mime && mime.startsWith(IMAGE_CT_PREFIX));
|
|
3915
|
+
}
|
|
3916
|
+
function upstreamFaviconUrls(domain, htmlIconUrls) {
|
|
3917
|
+
const urls = [...originFaviconUrls(domain), ...htmlIconUrls];
|
|
3918
|
+
const thirdPartyOff = process.env.FAVICON_THIRD_PARTY === "0" || process.env.FAVICON_THIRD_PARTY === "false";
|
|
3919
|
+
if (!thirdPartyOff) {
|
|
3920
|
+
urls.push(duckduckgoFaviconUrl(domain), iconHorseUrl(domain), unavatarUrl(domain));
|
|
3921
|
+
}
|
|
3922
|
+
const includeGoogle = process.env.FAVICON_INCLUDE_GOOGLE === "1" || process.env.FAVICON_INCLUDE_GOOGLE === "true";
|
|
3923
|
+
if (includeGoogle) urls.push(googleFaviconUrl(domain));
|
|
3924
|
+
return urls;
|
|
3925
|
+
}
|
|
3926
|
+
async function fetchFaviconFromNetwork(domain) {
|
|
3927
|
+
const htmlIconUrls = await discoverIconUrlsFromHomepage(domain);
|
|
3928
|
+
const urls = upstreamFaviconUrls(domain, htmlIconUrls);
|
|
3929
|
+
const tasks2 = urls.map(async (url) => {
|
|
3930
|
+
const got = await fetchIconCandidate(url);
|
|
3931
|
+
if (!isValidIcon(got)) {
|
|
3932
|
+
throw new Error("not-an-icon");
|
|
3933
|
+
}
|
|
3934
|
+
const mime = resolveImageMime(got.buf, got.ct);
|
|
3935
|
+
return { buf: got.buf, mime };
|
|
3936
|
+
});
|
|
3937
|
+
try {
|
|
3938
|
+
return await Promise.any(tasks2);
|
|
3939
|
+
} catch {
|
|
3940
|
+
return letterAvatarForDomain(domain);
|
|
3941
|
+
}
|
|
3942
|
+
}
|
|
3943
|
+
function fetchFaviconDeduped(domain) {
|
|
3944
|
+
let p = inflightByDomain.get(domain);
|
|
3945
|
+
if (p) return p;
|
|
3946
|
+
p = fetchFaviconFromNetwork(domain).finally(() => {
|
|
3947
|
+
if (inflightByDomain.get(domain) === p) inflightByDomain.delete(domain);
|
|
3948
|
+
});
|
|
3949
|
+
inflightByDomain.set(domain, p);
|
|
3950
|
+
return p;
|
|
3951
|
+
}
|
|
3952
|
+
function registerFeedFaviconRoutes(app) {
|
|
3953
|
+
app.get("/api/feed-favicon", async (c) => {
|
|
3954
|
+
const raw = (c.req.query("domain") ?? "").trim();
|
|
3955
|
+
if (!raw || !isPlausibleHostname(raw)) {
|
|
3956
|
+
return new Response(null, { status: 400 });
|
|
3957
|
+
}
|
|
3958
|
+
const domain = raw.toLowerCase();
|
|
3959
|
+
const path = cacheFilePath(domain);
|
|
3960
|
+
let diskStale = false;
|
|
3961
|
+
try {
|
|
3962
|
+
const st = await stat(path);
|
|
3963
|
+
if (Date.now() - st.mtimeMs >= CACHE_MAX_AGE_MS) {
|
|
3964
|
+
diskStale = true;
|
|
3965
|
+
await unlink(path).catch(() => {
|
|
3966
|
+
});
|
|
3967
|
+
}
|
|
3968
|
+
} catch (e) {
|
|
3969
|
+
if (!isEnoent(e)) {
|
|
3970
|
+
return new Response(null, { status: 500 });
|
|
3971
|
+
}
|
|
3972
|
+
}
|
|
3973
|
+
if (!diskStale) {
|
|
3974
|
+
try {
|
|
3975
|
+
const cached = await readFile(path);
|
|
3976
|
+
const mime2 = resolveImageMime(cached, null);
|
|
3977
|
+
if (mime2) {
|
|
3978
|
+
return new Response(new Uint8Array(cached), {
|
|
3979
|
+
status: 200,
|
|
3980
|
+
headers: {
|
|
3981
|
+
"Content-Type": mime2,
|
|
3982
|
+
"Cache-Control": CACHE_CONTROL
|
|
3983
|
+
}
|
|
3984
|
+
});
|
|
3985
|
+
}
|
|
3986
|
+
await unlink(path).catch(() => {
|
|
3987
|
+
});
|
|
3988
|
+
} catch (e) {
|
|
3989
|
+
if (!isEnoent(e)) {
|
|
3990
|
+
return new Response(null, { status: 500 });
|
|
3991
|
+
}
|
|
3992
|
+
}
|
|
3993
|
+
}
|
|
3994
|
+
const resolved = await fetchFaviconDeduped(domain);
|
|
3995
|
+
const { buf, mime } = resolved;
|
|
3996
|
+
try {
|
|
3997
|
+
await mkdir(join(CACHE_DIR, CACHE_SUBDIR), { recursive: true });
|
|
3998
|
+
await writeFile(path, buf);
|
|
3999
|
+
} catch {
|
|
4000
|
+
return new Response(null, { status: 500 });
|
|
4001
|
+
}
|
|
4002
|
+
return new Response(new Uint8Array(buf), {
|
|
4003
|
+
status: 200,
|
|
4004
|
+
headers: {
|
|
4005
|
+
"Content-Type": mime,
|
|
4006
|
+
"Cache-Control": CACHE_CONTROL
|
|
4007
|
+
}
|
|
4008
|
+
});
|
|
4009
|
+
});
|
|
4010
|
+
}
|
|
3340
4011
|
function registerApiRoutes(app) {
|
|
3341
4012
|
registerServerRoutes(app);
|
|
4013
|
+
registerFeedFaviconRoutes(app);
|
|
3342
4014
|
registerRssApiRoutes(app);
|
|
3343
4015
|
registerSchedulerRoutes(app);
|
|
3344
4016
|
registerPluginsRoutes(app);
|
|
@@ -3350,6 +4022,8 @@ function registerApiRoutes(app) {
|
|
|
3350
4022
|
registerSourcesRoutes(app);
|
|
3351
4023
|
registerTopicsRoutes(app);
|
|
3352
4024
|
registerDeliverRoutes(app);
|
|
4025
|
+
registerLlmRoutes(app);
|
|
4026
|
+
registerProxySettingsRoutes(app);
|
|
3353
4027
|
registerTasksRoutes(app);
|
|
3354
4028
|
}
|
|
3355
4029
|
function registerAuthRoutes(app) {
|
|
@@ -3363,7 +4037,7 @@ function registerAuthRoutes(app) {
|
|
|
3363
4037
|
const authFlow = toAuthFlow(site);
|
|
3364
4038
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3365
4039
|
try {
|
|
3366
|
-
const authenticated = await preCheckAuth(authFlow, CACHE_DIR);
|
|
4040
|
+
const authenticated = await preCheckAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) });
|
|
3367
4041
|
return c.json({ ok: true, authenticated });
|
|
3368
4042
|
} catch (err) {
|
|
3369
4043
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -3380,12 +4054,23 @@ function registerAuthRoutes(app) {
|
|
|
3380
4054
|
const authFlow = toAuthFlow(site);
|
|
3381
4055
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3382
4056
|
const { loginUrl } = authFlow;
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
4057
|
+
const proxy = await resolveProxyForSite(site);
|
|
4058
|
+
void launchBrowser({ headless: false, cacheDir: CACHE_DIR, proxy: resolveProxy({ proxy }) }).then(async (browser) => {
|
|
4059
|
+
try {
|
|
4060
|
+
const page = await browser.newPage();
|
|
4061
|
+
await applyProxyAuthToPage(page, { proxy });
|
|
4062
|
+
const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
4063
|
+
await page.setUserAgent(realUserAgent);
|
|
4064
|
+
await page.setViewport({ width: 1366, height: 960 });
|
|
4065
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
4066
|
+
page.once("close", () => {
|
|
4067
|
+
void browser.close().catch(() => {
|
|
4068
|
+
});
|
|
4069
|
+
});
|
|
4070
|
+
} catch {
|
|
4071
|
+
await browser.close().catch(() => {
|
|
4072
|
+
});
|
|
4073
|
+
}
|
|
3389
4074
|
}).catch(() => {
|
|
3390
4075
|
});
|
|
3391
4076
|
return c.json({ ok: true, message: "已打开登录页面" });
|
|
@@ -3406,7 +4091,7 @@ function registerAuthRoutes(app) {
|
|
|
3406
4091
|
}
|
|
3407
4092
|
const authFlow = toAuthFlow(site);
|
|
3408
4093
|
if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
|
|
3409
|
-
ensureAuth(authFlow, CACHE_DIR).then(() => {
|
|
4094
|
+
ensureAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) }).then(() => {
|
|
3410
4095
|
}).catch(() => {
|
|
3411
4096
|
});
|
|
3412
4097
|
return c.json({ ok: true, message: "已打开登录窗口,请在弹出的浏览器中完成登录,完成后刷新订阅页面即可。" });
|
|
@@ -3429,6 +4114,24 @@ async function readStaticHtml(name, fallback) {
|
|
|
3429
4114
|
function escapeHtml(s) {
|
|
3430
4115
|
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
3431
4116
|
}
|
|
4117
|
+
function effectiveProxyUsed(override, mergedFromSource) {
|
|
4118
|
+
const o = override?.trim();
|
|
4119
|
+
if (o) return o;
|
|
4120
|
+
const s = mergedFromSource?.trim();
|
|
4121
|
+
if (s) return s;
|
|
4122
|
+
return process.env.HTTP_PROXY?.trim() || process.env.HTTPS_PROXY?.trim();
|
|
4123
|
+
}
|
|
4124
|
+
function redactProxyForLog(p) {
|
|
4125
|
+
if (!p) return null;
|
|
4126
|
+
try {
|
|
4127
|
+
const u = new URL(p);
|
|
4128
|
+
if (u.username) u.username = "***";
|
|
4129
|
+
if (u.password) u.password = "***";
|
|
4130
|
+
return u.toString();
|
|
4131
|
+
} catch {
|
|
4132
|
+
return null;
|
|
4133
|
+
}
|
|
4134
|
+
}
|
|
3432
4135
|
function registerAdminRoutes(app) {
|
|
3433
4136
|
async function render401(listUrl) {
|
|
3434
4137
|
const raw = await readStaticHtml("401", '<!DOCTYPE html><html><head><meta charset="utf-8"><title>401</title></head><body><h1>401 需要登录</h1></body></html>');
|
|
@@ -3439,12 +4142,25 @@ function registerAdminRoutes(app) {
|
|
|
3439
4142
|
if (!url) return c.text("无效 URL,格式: /admin/parse/https://... 或 /admin/parse/example.com/...", 400);
|
|
3440
4143
|
try {
|
|
3441
4144
|
const headlessParam = c.req.query("headless");
|
|
3442
|
-
const headless = headlessParam === "
|
|
4145
|
+
const headless = headlessParam === "true" || headlessParam === "1";
|
|
4146
|
+
const proxyOverride = c.req.query("proxy")?.trim();
|
|
3443
4147
|
const source = getSource(url);
|
|
3444
|
-
const
|
|
4148
|
+
const fromSource = await getEffectiveProxyForListUrl(url, source);
|
|
4149
|
+
const ctx = buildSourceContext({
|
|
4150
|
+
cacheDir: CACHE_DIR,
|
|
4151
|
+
headless,
|
|
4152
|
+
proxy: proxyOverride || fromSource
|
|
4153
|
+
});
|
|
3445
4154
|
const items = await source.fetchItems(url, ctx);
|
|
3446
4155
|
const mode = source.id === "generic" ? "generic" : "plugin";
|
|
3447
|
-
|
|
4156
|
+
const effective = effectiveProxyUsed(proxyOverride, fromSource);
|
|
4157
|
+
return c.json({
|
|
4158
|
+
items,
|
|
4159
|
+
url,
|
|
4160
|
+
mode,
|
|
4161
|
+
pluginId: source.id,
|
|
4162
|
+
effectiveProxy: redactProxyForLog(effective)
|
|
4163
|
+
});
|
|
3448
4164
|
} catch (err) {
|
|
3449
4165
|
if (err instanceof AuthRequiredError) {
|
|
3450
4166
|
const html = await render401(url);
|
|
@@ -3459,16 +4175,20 @@ function registerAdminRoutes(app) {
|
|
|
3459
4175
|
if (!url) return c.text("无效 URL,格式: /admin/extractor/https://... 或 /admin/extractor/example.com/...", 400);
|
|
3460
4176
|
try {
|
|
3461
4177
|
const headlessParam = c.req.query("headless");
|
|
3462
|
-
const headless = headlessParam === "
|
|
3463
|
-
const
|
|
3464
|
-
const
|
|
4178
|
+
const headless = headlessParam === "true" || headlessParam === "1";
|
|
4179
|
+
const proxyOverride = c.req.query("proxy")?.trim();
|
|
4180
|
+
const source = getSource(url);
|
|
4181
|
+
const fromSource = await getEffectiveProxyForListUrl(url, source);
|
|
4182
|
+
const proxy = proxyOverride || fromSource;
|
|
3465
4183
|
const result = await extractFromLink(url, {}, { timeoutMs: 6e4, headless, proxy });
|
|
4184
|
+
const effective = effectiveProxyUsed(proxyOverride, fromSource);
|
|
3466
4185
|
return c.json({
|
|
3467
4186
|
title: result.title ?? null,
|
|
3468
4187
|
author: result.author ?? null,
|
|
3469
4188
|
pubDate: result.pubDate ?? null,
|
|
3470
4189
|
content: result.content ?? null,
|
|
3471
|
-
_extractor: "readability"
|
|
4190
|
+
_extractor: "readability",
|
|
4191
|
+
effectiveProxy: redactProxyForLog(effective)
|
|
3472
4192
|
});
|
|
3473
4193
|
} catch (err) {
|
|
3474
4194
|
const msg = err instanceof Error ? err.message : String(err);
|