rssany 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/.env.example +1 -1
  2. package/README.md +10 -11
  3. package/{plugins/sources → app/plugins/builtin}/email.rssany.js +92 -96
  4. package/app/plugins/builtin/rss.rssany.js +164 -0
  5. package/{plugins/templates → app/plugins}/site.rssany.js +6 -7
  6. package/dist/index.js +1200 -807
  7. package/dist/index.js.map +1 -1
  8. package/{config.examples.json → init/config.json} +7 -1
  9. package/init/sources.json +353 -0
  10. package/package.json +6 -7
  11. package/statics/401.html +1 -1
  12. package/statics/README.md +1 -1
  13. package/webui/build/200.html +16 -18
  14. package/webui/build/_app/immutable/assets/0.C6Q_nuW9.css +1 -0
  15. package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +1 -0
  16. package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +1 -0
  17. package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +1 -0
  18. package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +1 -0
  19. package/webui/build/_app/immutable/assets/14.CujIhjQK.css +1 -0
  20. package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +1 -0
  21. package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +1 -0
  22. package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +1 -0
  23. package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +1 -0
  24. package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +1 -0
  25. package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +1 -0
  26. package/webui/build/_app/immutable/assets/{9.BZheTlzZ.css → 9.m-LCx_kl.css} +1 -1
  27. package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +1 -0
  28. package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +1 -0
  29. package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +1 -0
  30. package/webui/build/_app/immutable/chunks/{V2-VOe88.js → B-OsL1Ct.js} +1 -1
  31. package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +2 -0
  32. package/webui/build/_app/immutable/chunks/BK3WtZwv.js +1 -0
  33. package/webui/build/_app/immutable/chunks/BQqoDzLx.js +1 -0
  34. package/webui/build/_app/immutable/chunks/BXCWEhUd.js +1 -0
  35. package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +1 -0
  36. package/webui/build/_app/immutable/chunks/Bp63qm3L.js +1 -0
  37. package/webui/build/_app/immutable/chunks/CVzlFH44.js +1 -0
  38. package/webui/build/_app/immutable/chunks/CWNeClHp.js +6 -0
  39. package/webui/build/_app/immutable/chunks/Cihqbfi5.js +1 -0
  40. package/webui/build/_app/immutable/chunks/CkUAV0m0.js +41 -0
  41. package/webui/build/_app/immutable/chunks/CtijX1u3.js +31 -0
  42. package/webui/build/_app/immutable/chunks/D5GvRCv7.js +1 -0
  43. package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +1 -0
  44. package/webui/build/_app/immutable/chunks/DFuhmi31.js +1 -0
  45. package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +2 -0
  46. package/webui/build/_app/immutable/chunks/{Cg3zih_x.js → DcAshVxe.js} +1 -1
  47. package/webui/build/_app/immutable/chunks/DjNLq3TF.js +1 -0
  48. package/webui/build/_app/immutable/chunks/Dt2CddFe.js +1 -0
  49. package/webui/build/_app/immutable/chunks/Dw782Tjs.js +1 -0
  50. package/webui/build/_app/immutable/chunks/EIZIMsXK.js +1 -0
  51. package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +1 -0
  52. package/webui/build/_app/immutable/chunks/lk5LaiqA.js +1 -0
  53. package/webui/build/_app/immutable/chunks/mW5RwvnK.js +13 -0
  54. package/webui/build/_app/immutable/chunks/{CtHRh_pJ.js → tB7QMF3U.js} +1 -1
  55. package/webui/build/_app/immutable/chunks/xtNWTdbD.js +1 -0
  56. package/webui/build/_app/immutable/entry/app.DdgnooOk.js +2 -0
  57. package/webui/build/_app/immutable/entry/start.DhJaJZhR.js +1 -0
  58. package/webui/build/_app/immutable/nodes/0.BE05Cuc4.js +11 -0
  59. package/webui/build/_app/immutable/nodes/1.5DFDaT4c.js +1 -0
  60. package/webui/build/_app/immutable/nodes/10.OVK4i9XE.js +1 -0
  61. package/webui/build/_app/immutable/nodes/11.Dhn_rO4A.js +1 -0
  62. package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +1 -0
  63. package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +1 -0
  64. package/webui/build/_app/immutable/nodes/14.B_KpJLxn.js +1 -0
  65. package/webui/build/_app/immutable/nodes/15.RaWaA-0I.js +1 -0
  66. package/webui/build/_app/immutable/nodes/{12.D9g8GCjm.js → 16.DSUgqolV.js} +15 -15
  67. package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +1 -0
  68. package/webui/build/_app/immutable/nodes/3.wQvGs9w-.js +1 -0
  69. package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +2 -0
  70. package/webui/build/_app/immutable/nodes/5.CCtn90c0.js +1 -0
  71. package/webui/build/_app/immutable/nodes/6.C2_mjW1u.js +1 -0
  72. package/webui/build/_app/immutable/nodes/7.Dwz6W7A1.js +1 -0
  73. package/webui/build/_app/immutable/nodes/8.DzkEw6rx.js +1 -0
  74. package/webui/build/_app/immutable/nodes/9.DtlXEwe1.js +1 -0
  75. package/webui/build/_app/version.json +1 -1
  76. package/plugins/sources/rss.rssany.js +0 -83
  77. package/sources.example.json +0 -562
  78. package/webui/build/_app/immutable/assets/0.BUAXpTm6.css +0 -1
  79. package/webui/build/_app/immutable/assets/10.I1OuCLrU.css +0 -1
  80. package/webui/build/_app/immutable/assets/11.CrO9xaki.css +0 -1
  81. package/webui/build/_app/immutable/assets/12.BEi6fInA.css +0 -1
  82. package/webui/build/_app/immutable/assets/14.Ctlgn1LZ.css +0 -1
  83. package/webui/build/_app/immutable/assets/2.eJ80XOGm.css +0 -1
  84. package/webui/build/_app/immutable/assets/4.B8-jYAVj.css +0 -1
  85. package/webui/build/_app/immutable/assets/6.Drn-0DON.css +0 -1
  86. package/webui/build/_app/immutable/assets/7.ms2diq_q.css +0 -1
  87. package/webui/build/_app/immutable/assets/8.DKymkjjs.css +0 -1
  88. package/webui/build/_app/immutable/assets/SourcesList.BhtYlRsQ.css +0 -1
  89. package/webui/build/_app/immutable/chunks/BUngiKFg.js +0 -1
  90. package/webui/build/_app/immutable/chunks/Bt0fzibd.js +0 -1
  91. package/webui/build/_app/immutable/chunks/BxHqDcpw.js +0 -1
  92. package/webui/build/_app/immutable/chunks/ByQRbEUX.js +0 -1
  93. package/webui/build/_app/immutable/chunks/C12mHcUp.js +0 -6
  94. package/webui/build/_app/immutable/chunks/C1kQ4pHy.js +0 -1
  95. package/webui/build/_app/immutable/chunks/C74gbb4Q.js +0 -1
  96. package/webui/build/_app/immutable/chunks/CAtemnMo.js +0 -1
  97. package/webui/build/_app/immutable/chunks/CVjCNJia.js +0 -1
  98. package/webui/build/_app/immutable/chunks/CjQQ9_Q2.js +0 -2
  99. package/webui/build/_app/immutable/chunks/CkS2JMkE.js +0 -1
  100. package/webui/build/_app/immutable/chunks/D-6mYMI1.js +0 -1
  101. package/webui/build/_app/immutable/chunks/D1Gs8-g3.js +0 -1
  102. package/webui/build/_app/immutable/chunks/D9dRVKgL.js +0 -1
  103. package/webui/build/_app/immutable/chunks/DCEY1XiC.js +0 -1
  104. package/webui/build/_app/immutable/chunks/DI-t-G_K.js +0 -2
  105. package/webui/build/_app/immutable/chunks/DTUxjyWL.js +0 -1
  106. package/webui/build/_app/immutable/chunks/DWJZOHke.js +0 -1
  107. package/webui/build/_app/immutable/chunks/Dgs6d7X5.js +0 -1
  108. package/webui/build/_app/immutable/chunks/DjpPK99f.js +0 -71
  109. package/webui/build/_app/immutable/chunks/DjzVVxpy.js +0 -1
  110. package/webui/build/_app/immutable/chunks/LQVMBmDN.js +0 -1
  111. package/webui/build/_app/immutable/chunks/Qw0Qgx6J.js +0 -1
  112. package/webui/build/_app/immutable/chunks/bohabpgg.js +0 -1
  113. package/webui/build/_app/immutable/chunks/c-YfbAB_.js +0 -8
  114. package/webui/build/_app/immutable/chunks/tpTQfoNn.js +0 -1
  115. package/webui/build/_app/immutable/entry/app.4I2fqDIL.js +0 -2
  116. package/webui/build/_app/immutable/entry/start.CrgdT2Qb.js +0 -1
  117. package/webui/build/_app/immutable/nodes/0.gA9sQtoM.js +0 -11
  118. package/webui/build/_app/immutable/nodes/1.Bybh7btp.js +0 -1
  119. package/webui/build/_app/immutable/nodes/10.DEkJCZ6X.js +0 -1
  120. package/webui/build/_app/immutable/nodes/11.CDNNJqlQ.js +0 -1
  121. package/webui/build/_app/immutable/nodes/13.DRpZV72T.js +0 -1
  122. package/webui/build/_app/immutable/nodes/14.DVeJW6bd.js +0 -1
  123. package/webui/build/_app/immutable/nodes/2.DIZ4IPNm.js +0 -1
  124. package/webui/build/_app/immutable/nodes/3.BFSNf0FK.js +0 -1
  125. package/webui/build/_app/immutable/nodes/4.BSsIjejE.js +0 -2
  126. package/webui/build/_app/immutable/nodes/5.COxRT9Oe.js +0 -1
  127. package/webui/build/_app/immutable/nodes/6.CBgQ4YzB.js +0 -1
  128. package/webui/build/_app/immutable/nodes/7.BbzWOL0V.js +0 -6
  129. package/webui/build/_app/immutable/nodes/8.C8120200.js +0 -1
  130. package/webui/build/_app/immutable/nodes/9.BH_BGQQ4.js +0 -1
  131. /package/webui/build/_app/immutable/nodes/{15.BtYZF6FM.js → 17.BtYZF6FM.js} +0 -0
  132. /package/webui/build/_app/immutable/nodes/{16.Ba_qJjp6.js → 18.Ba_qJjp6.js} +0 -0
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import "dotenv/config";
2
- import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, watch } from "node:fs";
2
+ import { existsSync, unlinkSync, openSync, writeSync, closeSync, readFileSync, statSync, watch } from "node:fs";
3
3
  import { platform, homedir, networkInterfaces } from "node:os";
4
4
  import { serve } from "@hono/node-server";
5
5
  import { Hono } from "hono";
@@ -10,12 +10,16 @@ import { promisify } from "node:util";
10
10
  import puppeteerCore from "puppeteer-core";
11
11
  import { parse, NodeType } from "node-html-parser";
12
12
  import Database from "better-sqlite3";
13
- import { mkdir, copyFile, access, rename, readFile, writeFile, readdir } from "node:fs/promises";
13
+ import { mkdir, copyFile, access, rename, readFile, writeFile, readdir, stat, unlink } from "node:fs/promises";
14
14
  import { fileURLToPath, pathToFileURL } from "node:url";
15
- import { createHash, randomUUID } from "node:crypto";
15
+ import { createHash } from "node:crypto";
16
16
  import { JSDOM } from "jsdom";
17
17
  import { Readability } from "@mozilla/readability";
18
18
  import OpenAI from "openai";
19
+ import RssParser from "rss-parser";
20
+ import { HttpsProxyAgent } from "https-proxy-agent";
21
+ import { ImapFlow } from "imapflow";
22
+ import { simpleParser } from "mailparser";
19
23
  import { EventEmitter } from "node:events";
20
24
  import { CronExpressionParser } from "cron-parser";
21
25
  import { validate, schedule as schedule$1 } from "node-cron";
@@ -144,6 +148,50 @@ function markPipelineDrop(item) {
144
148
  function isPipelineDroppedItem(item) {
145
149
  return item.extra?.[PIPELINE_DROP_EXTRA_KEY] === true;
146
150
  }
151
+ function canonicalHttpSourceRef(ref) {
152
+ const t = ref.trim();
153
+ if (!t) return t;
154
+ if (!/^https?:\/\//i.test(t)) return t.toLowerCase();
155
+ try {
156
+ const u = new URL(t);
157
+ const protocol = u.protocol.toLowerCase();
158
+ const host = u.host.toLowerCase();
159
+ let path = u.pathname;
160
+ if (path.length > 1 && path.endsWith("/")) {
161
+ path = path.slice(0, -1);
162
+ }
163
+ path = path.toLowerCase();
164
+ return `${protocol}//${host}${path}${u.search}${u.hash}`;
165
+ } catch {
166
+ return t.toLowerCase();
167
+ }
168
+ }
169
+ function maxIso(a, b) {
170
+ if (!a) return b;
171
+ if (!b) return a;
172
+ return a >= b ? a : b;
173
+ }
174
+ function mergeSourceStatsRows(rows) {
175
+ const map = /* @__PURE__ */ new Map();
176
+ for (const row of rows) {
177
+ const k = canonicalHttpSourceRef(row.source_url);
178
+ const prev = map.get(k);
179
+ if (!prev) {
180
+ map.set(k, { count: row.count, latest_at: row.latest_at });
181
+ } else {
182
+ map.set(k, {
183
+ count: prev.count + row.count,
184
+ latest_at: maxIso(prev.latest_at, row.latest_at)
185
+ });
186
+ }
187
+ }
188
+ return [...map.entries()].map(([source_url, v]) => ({ source_url, count: v.count, latest_at: v.latest_at })).sort((a, b) => b.count - a.count);
189
+ }
190
+ const httpSourceRef = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
191
+ __proto__: null,
192
+ canonicalHttpSourceRef,
193
+ mergeSourceStatsRows
194
+ }, Symbol.toStringTag, { value: "Module" }));
147
195
  const __dir = dirname(fileURLToPath(import.meta.url));
148
196
  const base = basename(__dir);
149
197
  const PACKAGE_ROOT = base === "app" || base === "dist" ? join(__dir, "..") : __dir;
@@ -156,12 +204,9 @@ const SOURCES_CONFIG_PATH = join(USER_DIR, "sources.json");
156
204
  const TAGS_CONFIG_PATH = join(USER_DIR, "tags.json");
157
205
  const CONFIG_PATH = join(USER_DIR, "config.json");
158
206
  const LEGACY_SUBSCRIPTIONS_PATH = join(USER_DIR, "subscriptions.json");
159
- const BUILTIN_PLUGINS_DIR = join(PACKAGE_ROOT, "plugins");
207
+ const BUILTIN_PLUGINS_DIR = join(PACKAGE_ROOT, "app/plugins/builtin");
160
208
  const USER_PLUGINS_DIR = join(USER_DIR, "plugins");
161
- const BUILTIN_SOURCES_DIR = join(BUILTIN_PLUGINS_DIR, "sources");
162
- const USER_SOURCES_DIR = join(USER_PLUGINS_DIR, "sources");
163
- const BUILTIN_ENRICH_DIR = join(BUILTIN_PLUGINS_DIR, "enrich");
164
- const USER_ENRICH_DIR = join(USER_PLUGINS_DIR, "enrich");
209
+ const PLUGIN_SITE_TEMPLATE_PATH = join(PACKAGE_ROOT, "app/plugins/site.rssany.js");
165
210
  async function pathExists(p) {
166
211
  try {
167
212
  await access(p);
@@ -180,8 +225,9 @@ async function migrateFile(from, to) {
180
225
  logger.warn("config", "配置迁移失败", { from, to, err: err instanceof Error ? err.message : String(err) });
181
226
  }
182
227
  }
183
- const EXAMPLE_SOURCES = join(PACKAGE_ROOT, "sources.example.json");
184
- const EXAMPLE_CONFIG = join(PACKAGE_ROOT, "config.examples.json");
228
+ const INIT_DATA_DIR = join(PACKAGE_ROOT, "init");
229
+ const EXAMPLE_SOURCES = join(INIT_DATA_DIR, "sources.json");
230
+ const EXAMPLE_CONFIG = join(INIT_DATA_DIR, "config.json");
185
231
  async function seedExampleConfigsIfMissing() {
186
232
  if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(EXAMPLE_SOURCES)) {
187
233
  try {
@@ -209,8 +255,6 @@ async function initUserDir() {
209
255
  await mkdir(DATA_DIR, { recursive: true });
210
256
  await mkdir(CACHE_DIR, { recursive: true });
211
257
  await mkdir(USER_PLUGINS_DIR, { recursive: true });
212
- await mkdir(USER_SOURCES_DIR, { recursive: true });
213
- await mkdir(USER_ENRICH_DIR, { recursive: true });
214
258
  await seedExampleConfigsIfMissing();
215
259
  if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(LEGACY_SUBSCRIPTIONS_PATH)) {
216
260
  await migrateFile(LEGACY_SUBSCRIPTIONS_PATH, SOURCES_CONFIG_PATH);
@@ -225,17 +269,17 @@ function logCorruptDiagnostic(operation, err) {
225
269
  const code = err?.code;
226
270
  const msg = err instanceof Error ? err.message : String(err);
227
271
  const lines = [
228
- "[rssany db] ??????????????",
229
- ` ?????: ${operation}`,
230
- ` ????: ${code ?? "unknown"} - ${msg}`,
231
- " ????????:",
232
- " 1. ??????/????????????????????????? tsx --watch ?????????????????????????????????????????????",
233
- " 2. ???????????????????WAL ????? checkpoint",
234
- " 3. ????/?????/?????????????????????????",
235
- " ??:",
236
- " - ??????????????? --watch???????????????????????????????",
237
- " - ???????????? RSSANY_DB_JOURNAL=delete ?????? DELETE ???????????????????",
238
- " - ?????? .rssany/data/rssany.db ????? -wal???-shm???rssany.db.lock ??????"
272
+ "[rssany db] 数据库可能损坏或并发冲突",
273
+ ` 操作: ${operation}`,
274
+ ` 错误: ${code ?? "unknown"} - ${msg}`,
275
+ " 常见原因:",
276
+ " 1. 多进程同时打开同一库(例如 tsx --watch 与另一实例同时写)",
277
+ " 2. 异常退出后 WAL 未正常 checkpoint",
278
+ " 3. 磁盘/杀毒/同步盘导致文件不完整",
279
+ " 建议:",
280
+ " - 避免多实例同时写库;开发时慎用 --watch 与后台任务并行",
281
+ " - 可尝试 RSSANY_DB_JOURNAL=delete 使用 DELETE 模式降低多文件依赖",
282
+ " - 备份后删除 .rssany/data/rssany.db 及同目录 -wal、-shmrssany.db.lock 再启动"
239
283
  ];
240
284
  process.stderr.write(lines.join("\n") + "\n");
241
285
  }
@@ -274,7 +318,7 @@ function acquireDbLock(dbDir) {
274
318
  })();
275
319
  if (stillRunning) {
276
320
  throw new Error(
277
- `???????????????????????????PID ${oldPid}??????????? tsx --watch ?????????????????????????????????? ${lockPath} ??????????`
321
+ `数据库已被其他进程占用(PID ${oldPid})。请勿多开实例;若确认无其他进程,可删除 ${lockPath} 后重试(常见于 tsx --watch 未退出)`
278
322
  );
279
323
  }
280
324
  }
@@ -307,7 +351,7 @@ function withWriteLock(fn) {
307
351
  },
308
352
  (e) => {
309
353
  if (isCorruptError(e)) {
310
- logCorruptDiagnostic("???????????? updateItemContent/upsertItems??", e);
354
+ logCorruptDiagnostic("withWriteLock updateItemContent/upsertItems", e);
311
355
  }
312
356
  rejectOut(e);
313
357
  throw e;
@@ -315,7 +359,7 @@ function withWriteLock(fn) {
315
359
  );
316
360
  return out;
317
361
  }
318
- const DATE_ONLY_TITLE_RE = /^(?:jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b[\s\d,??./-]*(?:st|nd|rd|th)?[\s\d,??./-]*$/i;
362
+ const DATE_ONLY_TITLE_RE = /^(?:jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b[\s\d,./-]*(?:st|nd|rd|th)?[\s\d,./-]*$/i;
319
363
  function normalizeText(text) {
320
364
  return (text ?? "").replace(/\s+/g, " ").trim();
321
365
  }
@@ -393,7 +437,7 @@ async function getDb() {
393
437
  db = null;
394
438
  }
395
439
  if (isCorruptError(err)) {
396
- logCorruptDiagnostic("?????/??????????? (getDb)", err);
440
+ logCorruptDiagnostic("打开/初始化主库 (getDb)", err);
397
441
  }
398
442
  throw err;
399
443
  }
@@ -407,7 +451,7 @@ async function runIntegrityCheck() {
407
451
  return row?.integrity_check ?? "unknown";
408
452
  } catch (err) {
409
453
  const msg = err instanceof Error ? err.message : String(err);
410
- return `integrity_check ???????: ${msg}`;
454
+ return `integrity_check 执行失败: ${msg}`;
411
455
  }
412
456
  }
413
457
  const LOGS_DB_PATH = join(DATA_DIR, "logs.db");
@@ -520,13 +564,31 @@ function initSchema(db) {
520
564
  }
521
565
  } catch {
522
566
  }
567
+ migrateItemsSourceUrlIfNeeded(db);
568
+ }
569
+ function migrateItemsSourceUrlIfNeeded(db) {
570
+ const v = db.pragma("user_version", { simple: true });
571
+ if (v >= 2) return;
572
+ const rows = db.prepare("SELECT rowid, source_url FROM items").all();
573
+ const upd = db.prepare("UPDATE items SET source_url = @next WHERE rowid = @rowid");
574
+ const run = db.transaction(() => {
575
+ for (const r of rows) {
576
+ const next = canonicalHttpSourceRef(r.source_url);
577
+ if (next !== r.source_url) {
578
+ upd.run({ next, rowid: r.rowid });
579
+ }
580
+ }
581
+ db.pragma("user_version = 2");
582
+ });
583
+ run();
523
584
  }
524
585
  async function upsertItems(items, sourceUrlOverride) {
525
586
  if (items.length === 0) return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
526
- const sourceUrl = items[0].sourceRef;
527
- if (!sourceUrl) {
528
- throw new Error("upsertItems: ???????? item ???? sourceRef????????? sourceUrlOverride");
587
+ const raw = items[0].sourceRef?.trim();
588
+ if (!raw) {
589
+ throw new Error("upsertItems: 每条 item 须有 sourceRef,或传入 sourceUrlOverride");
529
590
  }
591
+ const sourceUrl = canonicalHttpSourceRef(raw);
530
592
  return withWriteLock(async () => {
531
593
  const db = await getDb();
532
594
  const stmt = db.prepare(`
@@ -633,11 +695,13 @@ async function updateItemContent(item) {
633
695
  }
634
696
  async function queryFeedItems(sourceUrls, limit, offset, opts) {
635
697
  if (sourceUrls.length === 0) return { items: [], hasMore: false };
698
+ const expanded = [...new Set(sourceUrls.map((u) => canonicalHttpSourceRef(u)).filter(Boolean))];
699
+ if (expanded.length === 0) return { items: [], hasMore: false };
636
700
  const db = await getDb();
637
- const placeholders = sourceUrls.map((_, i) => `@u${i}`).join(", ");
701
+ const placeholders = expanded.map((_, i) => `@u${i}`).join(", ");
638
702
  const conditions = [`source_url IN (${placeholders})`];
639
703
  const params = { lim: limit + 1, off: offset };
640
- sourceUrls.forEach((url, i) => {
704
+ expanded.forEach((url, i) => {
641
705
  params[`u${i}`] = url;
642
706
  });
643
707
  if (opts?.since) {
@@ -647,7 +711,7 @@ async function queryFeedItems(sourceUrls, limit, offset, opts) {
647
711
  if (opts?.until) {
648
712
  conditions.push("COALESCE(pub_date, fetched_at) < @until");
649
713
  if (opts.until.length === 10) {
650
- const d = /* @__PURE__ */ new Date(opts.until + "T12:00:00Z");
714
+ const d = /* @__PURE__ */ new Date(`${opts.until}T12:00:00Z`);
651
715
  d.setUTCDate(d.getUTCDate() + 1);
652
716
  params.until = d.toISOString();
653
717
  } else {
@@ -671,12 +735,20 @@ async function queryItems(opts) {
671
735
  const conditions = [];
672
736
  const params = { limit, offset };
673
737
  if (sourceUrl) {
738
+ const key = canonicalHttpSourceRef(sourceUrl);
739
+ if (!key) {
740
+ return { items: [], total: 0 };
741
+ }
674
742
  conditions.push("i.source_url = @sourceUrl");
675
- params.sourceUrl = sourceUrl;
743
+ params.sourceUrl = key;
676
744
  } else if (sourceUrls && sourceUrls.length > 0) {
677
- const placeholders = sourceUrls.map((_, i) => `@src${i}`).join(", ");
745
+ const expanded = [...new Set(sourceUrls.map((s) => canonicalHttpSourceRef(s)).filter(Boolean))];
746
+ if (expanded.length === 0) {
747
+ return { items: [], total: 0 };
748
+ }
749
+ const placeholders = expanded.map((_, i) => `@src${i}`).join(", ");
678
750
  conditions.push(`i.source_url IN (${placeholders})`);
679
- sourceUrls.forEach((s, i) => params[`src${i}`] = s);
751
+ expanded.forEach((s, i) => params[`src${i}`] = s);
680
752
  }
681
753
  if (author && author.trim().length >= 2) {
682
754
  conditions.push("instr(i.author, @author) > 0");
@@ -690,9 +762,7 @@ async function queryItems(opts) {
690
762
  const trimmed = tagsFilter.filter((t) => typeof t === "string" && t.trim()).map((t) => t.trim());
691
763
  if (trimmed.length > 0) {
692
764
  const tagConds = trimmed.map((_, i) => `LOWER(TRIM(json_each.value)) = LOWER(@tag${i})`).join(" OR ");
693
- conditions.push(
694
- `i.tags IS NOT NULL AND EXISTS (SELECT 1 FROM json_each(i.tags) WHERE ${tagConds})`
695
- );
765
+ conditions.push(`i.tags IS NOT NULL AND EXISTS (SELECT 1 FROM json_each(i.tags) WHERE ${tagConds})`);
696
766
  trimmed.forEach((t, i) => {
697
767
  params[`tag${i}`] = t;
698
768
  });
@@ -772,9 +842,11 @@ async function deleteItem(id) {
772
842
  }
773
843
  async function deleteItemsBySourceUrl(sourceUrl) {
774
844
  if (!sourceUrl?.trim()) return 0;
845
+ const key = canonicalHttpSourceRef(sourceUrl.trim());
846
+ if (!key) return 0;
775
847
  return withWriteLock(async () => {
776
848
  const db = await getDb();
777
- const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl: sourceUrl.trim() });
849
+ const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl: key });
778
850
  return info.changes;
779
851
  });
780
852
  }
@@ -789,10 +861,12 @@ async function getPendingPushItems(limit = 100) {
789
861
  return mapRowsToDbItems(rows);
790
862
  }
791
863
  async function getSourceStats() {
864
+ const { mergeSourceStatsRows: mergeSourceStatsRows2 } = await Promise.resolve().then(() => httpSourceRef);
792
865
  const db = await getDb();
793
- return db.prepare(
866
+ const rows = db.prepare(
794
867
  "SELECT source_url, COUNT(*) as count, MAX(COALESCE(pub_date, fetched_at)) as latest_at FROM items GROUP BY source_url ORDER BY count DESC"
795
868
  ).all();
869
+ return mergeSourceStatsRows2(rows);
796
870
  }
797
871
  async function insertLog(entry) {
798
872
  const db = await getLogsDb();
@@ -972,6 +1046,9 @@ const logger = {
972
1046
  }
973
1047
  };
974
1048
  const execAsync = promisify(exec);
1049
+ const VIEWPORT_WIDTH = 1366;
1050
+ const VIEWPORT_HEIGHT_HEADLESS = 5e3;
1051
+ const VIEWPORT_HEIGHT_HEADFUL = 1200;
975
1052
  function resolveProxy(config) {
976
1053
  return config?.proxy ?? process.env.HTTP_PROXY ?? process.env.HTTPS_PROXY;
977
1054
  }
@@ -982,6 +1059,14 @@ function parseProxy(proxy) {
982
1059
  const password = u.password || void 0;
983
1060
  return { serverUrl, username, password };
984
1061
  }
1062
+ async function applyProxyAuthToPage(page, opts) {
1063
+ const proxy = resolveProxy(opts);
1064
+ if (!proxy) return;
1065
+ const { username, password } = parseProxy(proxy);
1066
+ if (username !== void 0 || password !== void 0) {
1067
+ await page.authenticate({ username: username ?? "", password: password ?? "" });
1068
+ }
1069
+ }
985
1070
  function launchArgs(config) {
986
1071
  const base2 = [
987
1072
  "--disable-blink-features=AutomationControlled",
@@ -993,8 +1078,8 @@ function launchArgs(config) {
993
1078
  "--disable-site-isolation-trials",
994
1079
  "--disable-infobars"
995
1080
  ];
996
- const height = config?.headless !== false ? 5e3 : 960;
997
- base2.push(`--window-size=1366,${height}`);
1081
+ const height = config?.headless !== false ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL;
1082
+ base2.push(`--window-size=${VIEWPORT_WIDTH},${height}`);
998
1083
  const proxy = resolveProxy(config);
999
1084
  if (proxy) {
1000
1085
  const { serverUrl } = parseProxy(proxy);
@@ -1084,150 +1169,130 @@ function headersToRecord(headers) {
1084
1169
  async function setupPage(page, headless = true) {
1085
1170
  const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
1086
1171
  await page.setUserAgent(realUserAgent);
1087
- await page.setViewport({ width: 1366, height: headless ? 5e3 : 960 });
1172
+ await page.setViewport({
1173
+ width: VIEWPORT_WIDTH,
1174
+ height: headless ? VIEWPORT_HEIGHT_HEADLESS : VIEWPORT_HEIGHT_HEADFUL
1175
+ });
1088
1176
  await stealthPage(page);
1089
1177
  }
1090
- let _browser = null;
1091
- let _browserHeadless = true;
1092
- let _launchPromise = null;
1093
1178
  function isFrameDetachedError(e) {
1094
1179
  const msg = e instanceof Error ? e.message : String(e);
1095
1180
  return /detached|Navigating frame was detached|Session closed/i.test(msg);
1096
1181
  }
1097
- async function isBrowserAlive() {
1098
- if (!_browser) return false;
1099
- try {
1100
- await _browser.version();
1101
- return true;
1102
- } catch {
1103
- _browser = null;
1104
- return false;
1105
- }
1106
- }
1107
- async function getOrCreateBrowser(config) {
1182
+ async function launchBrowser(config) {
1108
1183
  const wantHeadless = config.headless !== false;
1109
- if (await isBrowserAlive()) {
1110
- if (_browserHeadless === wantHeadless) {
1111
- return _browser;
1112
- }
1113
- logger.info("scraper", "浏览器切换模式", { from: _browserHeadless ? "无头" : "有头", to: wantHeadless ? "无头" : "有头" });
1114
- await _browser.close().catch(() => {
1115
- });
1116
- _browser = null;
1117
- _launchPromise = null;
1118
- }
1119
- if (!_launchPromise) {
1120
- _launchPromise = (async () => {
1121
- const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
1122
- if (!executablePath) {
1123
- throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
1184
+ const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
1185
+ if (!executablePath) {
1186
+ throw new Error("未找到 Chrome 可执行文件,请安装 Google Chrome 或设置 CHROME_PATH 环境变量");
1187
+ }
1188
+ const userDataDir = getUserDataDir(config.cacheDir);
1189
+ const maxRetries = 2;
1190
+ let lastErr;
1191
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1192
+ try {
1193
+ if (attempt === 0 && userDataDir) {
1194
+ const absUserDataDir = resolve(userDataDir);
1195
+ await killStaleChromeProcesses(absUserDataDir);
1124
1196
  }
1125
- const userDataDir = getUserDataDir(config.cacheDir);
1126
- const maxRetries = 2;
1127
- let lastErr;
1128
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
1129
- try {
1130
- if (attempt === 0 && userDataDir) {
1131
- const absUserDataDir = resolve(userDataDir);
1132
- await killStaleChromeProcesses(absUserDataDir);
1133
- }
1134
- if (attempt > 0) {
1135
- const waitMs = attempt * 2e3;
1136
- logger.info("scraper", "userDataDir 曾被占用,等待后重试", { waitMs, attempt });
1137
- await new Promise((r) => setTimeout(r, waitMs));
1138
- }
1139
- logger.info("scraper", "启动 Chrome", { headless: wantHeadless, executablePath });
1140
- const browser = await puppeteerCore.launch({
1141
- headless: wantHeadless,
1142
- args: launchArgs({ proxy: config.proxy, headless: wantHeadless }),
1143
- userDataDir,
1144
- executablePath,
1145
- ignoreDefaultArgs: ["--enable-automation"]
1146
- });
1147
- browser.on("disconnected", () => {
1148
- _browser = null;
1149
- _launchPromise = null;
1150
- });
1151
- _browser = browser;
1152
- _browserHeadless = wantHeadless;
1153
- return browser;
1154
- } catch (e) {
1155
- lastErr = e;
1156
- if (attempt < maxRetries && isAlreadyRunningError(e)) {
1157
- continue;
1158
- }
1159
- if (isAlreadyRunningError(e)) {
1160
- const dir = userDataDir ?? "browser_data/main";
1161
- throw new Error(
1162
- `Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
1163
- );
1164
- }
1165
- throw e;
1166
- }
1197
+ if (attempt > 0) {
1198
+ const waitMs = attempt * 2e3;
1199
+ logger.info("scraper", "userDataDir 曾被占用,等待后重试", { waitMs, attempt });
1200
+ await new Promise((r) => setTimeout(r, waitMs));
1201
+ }
1202
+ return await puppeteerCore.launch({
1203
+ headless: wantHeadless,
1204
+ args: launchArgs({ proxy: config.proxy, headless: wantHeadless }),
1205
+ userDataDir,
1206
+ executablePath,
1207
+ ignoreDefaultArgs: ["--enable-automation"]
1208
+ });
1209
+ } catch (e) {
1210
+ lastErr = e;
1211
+ if (attempt < maxRetries && isAlreadyRunningError(e)) {
1212
+ continue;
1213
+ }
1214
+ if (isAlreadyRunningError(e)) {
1215
+ const dir = userDataDir ?? "browser_data/main";
1216
+ throw new Error(
1217
+ `Chrome 的 profile 目录已被占用(${dir})。通常是因为上次未正常退出或同时运行了多个本服务实例。请关闭占用该目录的 Chrome 进程后重试,或设置环境变量 CACHE_DIR 使用不同缓存目录。`
1218
+ );
1167
1219
  }
1168
- throw lastErr;
1169
- })().catch((e) => {
1170
- _launchPromise = null;
1171
1220
  throw e;
1172
- });
1221
+ }
1173
1222
  }
1174
- return _launchPromise;
1223
+ throw lastErr;
1175
1224
  }
1176
- process.once("exit", () => {
1177
- _browser?.close().catch(() => {
1178
- });
1179
- });
1180
- process.once("SIGINT", async () => {
1181
- await _browser?.close().catch(() => {
1182
- });
1183
- process.exit(0);
1184
- });
1185
- process.once("SIGTERM", async () => {
1186
- await _browser?.close().catch(() => {
1187
- });
1188
- process.exit(0);
1189
- });
1190
- async function preCheckAuth(authFlow, cacheDir) {
1225
+ async function preCheckAuth(authFlow, cacheDir, opts) {
1191
1226
  const { checkAuth, loginUrl, domain } = authFlow;
1192
1227
  if (domain == null || !cacheDir) return true;
1193
- const browser = await getOrCreateBrowser({ headless: true, cacheDir });
1194
- const page = await browser.newPage();
1228
+ const isHeadless = opts?.headless !== false;
1229
+ const browser = await launchBrowser({
1230
+ headless: isHeadless,
1231
+ cacheDir,
1232
+ proxy: resolveProxy(opts)
1233
+ });
1195
1234
  try {
1196
- await setupPage(page, true);
1197
- await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
1198
- await new Promise((resolve2) => setTimeout(resolve2, 3e3));
1199
- return await checkAuth(page, page.url());
1235
+ const page = await browser.newPage();
1236
+ try {
1237
+ await setupPage(page, isHeadless);
1238
+ await applyProxyAuthToPage(page, opts);
1239
+ await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
1240
+ await new Promise((resolve2) => setTimeout(resolve2, 3e3));
1241
+ return await checkAuth(page, page.url());
1242
+ } finally {
1243
+ await page.close().catch(() => {
1244
+ });
1245
+ }
1200
1246
  } finally {
1201
- await page.close().catch(() => {
1247
+ await browser.close().catch(() => {
1202
1248
  });
1203
1249
  }
1204
1250
  }
1205
- async function ensureAuth(authFlow, cacheDir) {
1251
+ async function ensureAuth(authFlow, cacheDir, opts) {
1206
1252
  const { checkAuth, loginUrl, loginTimeoutMs = 60 * 1e3, pollIntervalMs = 2e3 } = authFlow;
1207
- const browser = await getOrCreateBrowser({ headless: false, cacheDir });
1208
- const page = await browser.newPage();
1253
+ const browser = await launchBrowser({ headless: false, cacheDir, proxy: resolveProxy(opts) });
1209
1254
  try {
1210
- await setupPage(page, false);
1211
- await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
1212
- await new Promise((resolve2) => setTimeout(resolve2, 3e3));
1213
- const authenticated = await checkAuth(page, page.url());
1214
- if (authenticated) return;
1215
- const startTime = Date.now();
1216
- while (Date.now() - startTime < loginTimeoutMs) {
1217
- await new Promise((resolve2) => setTimeout(resolve2, pollIntervalMs));
1218
- const authenticated2 = await checkAuth(page, page.url());
1219
- if (authenticated2) return;
1220
- }
1221
- throw new Error(`登录超时(${loginTimeoutMs}ms)`);
1255
+ const page = await browser.newPage();
1256
+ try {
1257
+ await setupPage(page, false);
1258
+ await applyProxyAuthToPage(page, opts);
1259
+ await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
1260
+ await new Promise((resolve2) => setTimeout(resolve2, 3e3));
1261
+ const authenticated = await checkAuth(page, page.url());
1262
+ if (authenticated) return;
1263
+ const startTime = Date.now();
1264
+ while (Date.now() - startTime < loginTimeoutMs) {
1265
+ await new Promise((resolve2) => setTimeout(resolve2, pollIntervalMs));
1266
+ const authenticated2 = await checkAuth(page, page.url());
1267
+ if (authenticated2) return;
1268
+ }
1269
+ throw new Error(`登录超时(${loginTimeoutMs}ms)`);
1270
+ } finally {
1271
+ await page.close().catch(() => {
1272
+ });
1273
+ }
1222
1274
  } finally {
1223
- await page.close().catch(() => {
1275
+ await browser.close().catch(() => {
1224
1276
  });
1225
1277
  }
1226
1278
  }
1227
1279
  async function fetchHtml(url, config = {}) {
1228
- const { timeoutMs, headers, cookies, cacheDir, checkAuth, authFlow, purify, headless, waitAfterLoadMs, waitForSelector, waitForSelectorTimeoutMs } = config;
1280
+ const {
1281
+ timeoutMs,
1282
+ headers,
1283
+ cookies,
1284
+ cacheDir,
1285
+ checkAuth,
1286
+ authFlow,
1287
+ purify,
1288
+ headless,
1289
+ waitAfterLoadMs,
1290
+ waitForSelector,
1291
+ waitForSelectorTimeoutMs,
1292
+ useHttpResponseBody
1293
+ } = config;
1229
1294
  const isHeadless = headless !== false;
1230
- const browser = await getOrCreateBrowser({
1295
+ const browser = await launchBrowser({
1231
1296
  headless: isHeadless,
1232
1297
  cacheDir,
1233
1298
  proxy: resolveProxy(config),
@@ -1236,70 +1301,84 @@ async function fetchHtml(url, config = {}) {
1236
1301
  const navigationTimeout = timeoutMs ?? 6e4;
1237
1302
  const maxAttempts = 2;
1238
1303
  let lastError;
1239
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
1240
- const page = await browser.newPage();
1241
- const isRetry = attempt === 1;
1242
- const waitUntil = isRetry ? "domcontentloaded" : "load";
1243
- const extraWaitMs = isRetry ? Math.min(500, Math.max(0, waitAfterLoadMs ?? 2e3)) : Math.max(0, waitAfterLoadMs ?? 2e3);
1244
- try {
1245
- if (config.browserContext) {
1246
- await config.browserContext(page.browserContext());
1247
- }
1248
- await setupPage(page, isHeadless);
1249
- const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
1250
- if (cookies != null && cookies !== "") {
1251
- extraHeaders.cookie = cookies;
1252
- }
1253
- await page.setExtraHTTPHeaders(extraHeaders);
1254
- const proxy = resolveProxy(config);
1255
- if (proxy) {
1256
- const { username, password } = parseProxy(proxy);
1257
- if (username !== void 0 || password !== void 0) {
1258
- await page.authenticate({ username: username ?? "", password: password ?? "" });
1304
+ try {
1305
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1306
+ const page = await browser.newPage();
1307
+ const isRetry = attempt === 1;
1308
+ const waitUntil = isRetry ? "domcontentloaded" : "load";
1309
+ const extraWaitMs = isRetry ? Math.min(500, Math.max(0, waitAfterLoadMs ?? 2e3)) : Math.max(0, waitAfterLoadMs ?? 2e3);
1310
+ try {
1311
+ if (config.browserContext) {
1312
+ await config.browserContext(page.browserContext());
1259
1313
  }
1260
- }
1261
- if (timeoutMs != null) {
1262
- await page.setDefaultNavigationTimeout(timeoutMs);
1263
- }
1264
- const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
1265
- if (extraWaitMs > 0) {
1266
- await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
1267
- }
1268
- if (waitForSelector != null && waitForSelector !== "" && !isRetry) {
1269
- const selectorTimeout = waitForSelectorTimeoutMs ?? 2e4;
1270
- await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
1271
- }
1272
- if (checkAuth != null || authFlow != null) {
1273
- const authCheck = checkAuth ?? authFlow?.checkAuth;
1274
- if (authCheck != null) {
1275
- const ok = await authCheck(page, url);
1276
- if (!ok) {
1277
- throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
1314
+ await setupPage(page, isHeadless);
1315
+ const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
1316
+ if (cookies != null && cookies !== "") {
1317
+ extraHeaders.cookie = cookies;
1318
+ }
1319
+ await page.setExtraHTTPHeaders(extraHeaders);
1320
+ const proxy = resolveProxy(config);
1321
+ if (proxy) {
1322
+ const { username, password } = parseProxy(proxy);
1323
+ if (username !== void 0 || password !== void 0) {
1324
+ await page.authenticate({ username: username ?? "", password: password ?? "" });
1278
1325
  }
1279
1326
  }
1327
+ if (timeoutMs != null) {
1328
+ await page.setDefaultNavigationTimeout(timeoutMs);
1329
+ }
1330
+ const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
1331
+ if (extraWaitMs > 0) {
1332
+ await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
1333
+ }
1334
+ if (waitForSelector != null && waitForSelector !== "" && !isRetry) {
1335
+ const selectorTimeout = waitForSelectorTimeoutMs ?? 2e4;
1336
+ await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
1337
+ }
1338
+ if (checkAuth != null || authFlow != null) {
1339
+ const authCheck = checkAuth ?? authFlow?.checkAuth;
1340
+ if (authCheck != null) {
1341
+ const ok = await authCheck(page, url);
1342
+ if (!ok) {
1343
+ throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
1344
+ }
1345
+ }
1346
+ }
1347
+ let rawBody;
1348
+ if (useHttpResponseBody === true && response != null) {
1349
+ try {
1350
+ rawBody = await response.text();
1351
+ } catch {
1352
+ rawBody = await page.content();
1353
+ }
1354
+ } else {
1355
+ rawBody = await page.content();
1356
+ }
1357
+ const finalUrl = response?.url() ?? page.url() ?? String(url);
1358
+ const status = response?.status() ?? 0;
1359
+ const statusText = response?.statusText() ?? "";
1360
+ const rawHeaders = response?.headers() ?? {};
1361
+ const normalizedHeaders = headersToRecord(rawHeaders);
1362
+ const body = applyPurify(rawBody, purify);
1363
+ await page.close().catch(() => {
1364
+ });
1365
+ return { finalUrl, status, statusText, headers: normalizedHeaders, body };
1366
+ } catch (e) {
1367
+ lastError = e;
1368
+ await page.close().catch(() => {
1369
+ });
1370
+ if (isRetry || !isFrameDetachedError(e)) {
1371
+ throw e;
1372
+ }
1373
+ logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
1374
+ await new Promise((r) => setTimeout(r, 800));
1280
1375
  }
1281
- const rawBody = await page.content();
1282
- const finalUrl = response?.url() ?? page.url() ?? String(url);
1283
- const status = response?.status() ?? 0;
1284
- const statusText = response?.statusText() ?? "";
1285
- const rawHeaders = response?.headers() ?? {};
1286
- const normalizedHeaders = headersToRecord(rawHeaders);
1287
- const body = applyPurify(rawBody, purify);
1288
- await page.close().catch(() => {
1289
- });
1290
- return { finalUrl, status, statusText, headers: normalizedHeaders, body };
1291
- } catch (e) {
1292
- lastError = e;
1293
- await page.close().catch(() => {
1294
- });
1295
- if (isRetry || !isFrameDetachedError(e)) {
1296
- throw e;
1297
- }
1298
- logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
1299
- await new Promise((r) => setTimeout(r, 800));
1300
1376
  }
1377
+ throw lastError;
1378
+ } finally {
1379
+ await browser.close().catch(() => {
1380
+ });
1301
1381
  }
1302
- throw lastError;
1303
1382
  }
1304
1383
  const VALID_INTERVALS = ["1min", "5min", "10min", "30min", "1h", "6h", "12h", "1day", "3day", "7day"];
1305
1384
  function cronToRefreshInterval(cronExpr) {
@@ -1471,19 +1550,78 @@ async function extractFromLink(link, extractorConfig = {}, fetchConfig = {}) {
1471
1550
  cacheKey: extractorConfig.cacheKey ?? (cacheDir ? cacheKey(link, "forever") : void 0)
1472
1551
  });
1473
1552
  }
1553
+ const DEFAULT_BASE_URL = "https://api.openai.com/v1";
1554
+ const DEFAULT_MODEL = "gpt-4o-mini";
1555
+ let fileCache = null;
1556
+ function invalidateLLMConfigCache() {
1557
+ fileCache = null;
1558
+ }
1559
+ function readLlmFromFileSync() {
1560
+ if (!existsSync(CONFIG_PATH)) return {};
1561
+ try {
1562
+ const st = statSync(CONFIG_PATH);
1563
+ if (fileCache && fileCache.mtimeMs === st.mtimeMs) return fileCache.llm;
1564
+ const raw = readFileSync(CONFIG_PATH, "utf-8");
1565
+ const j = JSON.parse(raw);
1566
+ const llmRaw = j?.llm;
1567
+ const llm = {};
1568
+ if (llmRaw && typeof llmRaw === "object") {
1569
+ const o = llmRaw;
1570
+ if (typeof o.apiKey === "string" && o.apiKey.length > 0) llm.apiKey = o.apiKey;
1571
+ if (typeof o.baseUrl === "string" && o.baseUrl.trim()) llm.baseUrl = o.baseUrl.trim();
1572
+ if (typeof o.model === "string" && o.model.trim()) llm.model = o.model.trim();
1573
+ }
1574
+ fileCache = { mtimeMs: st.mtimeMs, llm };
1575
+ return llm;
1576
+ } catch {
1577
+ return {};
1578
+ }
1579
+ }
1474
1580
  function getLLMConfig() {
1475
- return {
1476
- apiKey: process.env.OPENAI_API_KEY,
1477
- baseUrl: process.env.OPENAI_BASE_URL || "https://api.openai.com/v1",
1478
- model: process.env.OPENAI_MODEL || "gpt-4o-mini"
1479
- };
1581
+ const file = readLlmFromFileSync();
1582
+ const apiKey = file.apiKey ?? process.env.OPENAI_API_KEY;
1583
+ const baseUrl = file.baseUrl ?? process.env.OPENAI_BASE_URL ?? DEFAULT_BASE_URL;
1584
+ const model = file.model ?? process.env.OPENAI_MODEL ?? DEFAULT_MODEL;
1585
+ return { apiKey, baseUrl, model };
1586
+ }
1587
+ function extractAssistantText(completion) {
1588
+ const choice = completion.choices[0];
1589
+ if (!choice) throw new Error("LLM 返回无 choices");
1590
+ const msg = choice.message;
1591
+ const raw = msg.content;
1592
+ if (typeof raw === "string") {
1593
+ const t = raw.trim();
1594
+ if (t.length > 0) return t;
1595
+ }
1596
+ const extra = msg;
1597
+ const rc = extra.reasoning_content;
1598
+ if (typeof rc === "string" && rc.trim().length > 0) {
1599
+ return rc.trim();
1600
+ }
1601
+ const refusal = msg.refusal;
1602
+ if (typeof refusal === "string" && refusal.trim()) {
1603
+ throw new Error(`模型拒绝: ${refusal.trim()}`);
1604
+ }
1605
+ const fr = choice.finish_reason;
1606
+ if (fr === "tool_calls") {
1607
+ throw new Error("LLM 返回了工具调用而非文本,请换一个模型或关闭工具调用");
1608
+ }
1609
+ if (fr === "content_filter") {
1610
+ throw new Error("内容被内容策略过滤");
1611
+ }
1612
+ if (fr === "length") {
1613
+ throw new Error(
1614
+ "LLM 输出在 content / reasoning_content 均为空前已用尽"
1615
+ );
1616
+ }
1617
+ throw new Error(`LLM 返回空内容 (finish_reason=${String(fr)})`);
1480
1618
  }
1481
1619
  function mergeConfig(override) {
1482
1620
  const env = getLLMConfig();
1483
1621
  const apiKey = override?.apiKey ?? env.apiKey;
1484
1622
  const baseUrl = override?.apiUrl ?? override?.baseUrl ?? env.baseUrl;
1485
1623
  const model = override?.model ?? env.model;
1486
- if (!apiKey) throw new Error("LLM API Key 未配置,请设置 OPENAI_API_KEY 或传入 apiKey");
1624
+ if (!apiKey) throw new Error("LLM API Key 未配置:请在管理后台「设置 → LLM」或环境变量 OPENAI_API_KEY 中设置");
1487
1625
  return { apiKey, baseUrl, model };
1488
1626
  }
1489
1627
  async function chatJson(prompt, config, options) {
@@ -1495,8 +1633,7 @@ async function chatJson(prompt, config, options) {
1495
1633
  max_tokens: options?.maxTokens ?? 8192,
1496
1634
  response_format: { type: "json_object" }
1497
1635
  });
1498
- const content = completion.choices[0]?.message?.content;
1499
- if (!content) throw new Error("LLM 返回空内容");
1636
+ const content = extractAssistantText(completion);
1500
1637
  return JSON.parse(content);
1501
1638
  }
1502
1639
  async function chatText(prompt, config, options) {
@@ -1507,9 +1644,7 @@ async function chatText(prompt, config, options) {
1507
1644
  messages: [{ role: "user", content: prompt }],
1508
1645
  max_tokens: options?.maxTokens ?? 8192
1509
1646
  });
1510
- const content = completion.choices[0]?.message?.content;
1511
- if (!content) throw new Error("LLM 返回空内容");
1512
- return content;
1647
+ return extractAssistantText(completion);
1513
1648
  }
1514
1649
  function generateGuid(link) {
1515
1650
  return createHash("sha256").update(link).digest("hex");
@@ -1593,7 +1728,7 @@ async function parseHtml(html, config = {}) {
1593
1728
  const actualMode = mode ?? (llmConfig != null ? "llm" : customParser != null ? "custom" : "llm");
1594
1729
  if (actualMode === "llm") {
1595
1730
  if (llmConfig == null && !getLLMConfig().apiKey) {
1596
- throw new Error('mode 为 "llm" 时必须提供 llmConfig 或设置 OPENAI_API_KEY 环境变量');
1731
+ throw new Error('mode 为 "llm" 时必须提供 llmConfig,或在后台「设置 LLM」/ OPENAI_API_KEY 中配置 Key');
1597
1732
  }
1598
1733
  const htmlForLLM = applyPurify(html, purify !== false);
1599
1734
  entries = await parseWithLLM(htmlForLLM, url, llmConfig ?? {});
@@ -1675,11 +1810,6 @@ function isValidSource(obj) {
1675
1810
  const s = obj;
1676
1811
  return typeof s.id === "string" && (typeof s.pattern === "string" || s.pattern instanceof RegExp) && typeof s.fetchItems === "function" && s.listUrlPattern === void 0;
1677
1812
  }
1678
- function isValidEnrichPlugin(obj) {
1679
- if (obj == null || typeof obj !== "object") return false;
1680
- const p = obj;
1681
- return typeof p.id === "string" && typeof p.match === "function" && typeof p.enrichItem === "function";
1682
- }
1683
1813
  async function loadSourcePluginsFromDir(dir, label) {
1684
1814
  const siteEntries = [];
1685
1815
  const sources = [];
@@ -1711,46 +1841,12 @@ async function loadSourcePluginsFromDir(dir, label) {
1711
1841
  }
1712
1842
  return { siteEntries, sources };
1713
1843
  }
1714
- async function loadPluginsFromDir(dir, label, validator) {
1715
- const result = [];
1716
- let entries;
1717
- try {
1718
- const raw = await readdir(dir, { withFileTypes: true, encoding: "utf-8" });
1719
- entries = raw;
1720
- } catch {
1721
- return result;
1722
- }
1723
- for (const e of entries) {
1724
- const name = String(e.name);
1725
- if (!e.isFile()) continue;
1726
- if (!PLUGIN_EXTENSIONS.some((ext) => name.endsWith(ext))) continue;
1727
- const filePath = join(dir, name);
1728
- try {
1729
- const mod = await import(pathToFileURL(filePath).href);
1730
- const plugin = mod.default ?? mod;
1731
- if (validator(plugin)) {
1732
- result.push(plugin);
1733
- } else {
1734
- logger.warn("plugin", "插件接口不匹配,已跳过", { label, name });
1735
- }
1736
- } catch (err) {
1737
- logger.warn("plugin", "插件加载失败", { label, name, err: err instanceof Error ? err.message : String(err) });
1738
- }
1739
- }
1740
- return result;
1741
- }
1742
- async function loadFromSourcesOrRoot() {
1743
- const [builtinFromSources, userFromSources] = await Promise.all([
1744
- loadSourcePluginsFromDir(BUILTIN_SOURCES_DIR, "builtin:sources"),
1745
- loadSourcePluginsFromDir(USER_SOURCES_DIR, "user:sources")
1746
- ]);
1747
- const hasAny = builtinFromSources.siteEntries.length + builtinFromSources.sources.length + userFromSources.siteEntries.length + userFromSources.sources.length > 0;
1748
- if (hasAny) return { builtin: builtinFromSources, user: userFromSources };
1749
- const [builtinRoot, userRoot] = await Promise.all([
1844
+ async function loadBuiltinAndUser() {
1845
+ const [builtin, user] = await Promise.all([
1750
1846
  loadSourcePluginsFromDir(BUILTIN_PLUGINS_DIR, "builtin"),
1751
1847
  loadSourcePluginsFromDir(USER_PLUGINS_DIR, "user")
1752
1848
  ]);
1753
- return { builtin: builtinRoot, user: userRoot };
1849
+ return { builtin, user };
1754
1850
  }
1755
1851
  const pluginSitePaths = /* @__PURE__ */ new Map();
1756
1852
  function mergeSourcePluginPaths(siteIds, pathMap, builtinSources, userSources) {
@@ -1774,7 +1870,7 @@ function getPluginFilePath(id) {
1774
1870
  return pluginSitePaths.get(id);
1775
1871
  }
1776
1872
  async function loadSiteAndSourcePlugins() {
1777
- const { builtin, user } = await loadFromSourcesOrRoot();
1873
+ const { builtin, user } = await loadBuiltinAndUser();
1778
1874
  const siteMap = /* @__PURE__ */ new Map();
1779
1875
  const pathMap = /* @__PURE__ */ new Map();
1780
1876
  for (const { site: s, filePath } of builtin.siteEntries) {
@@ -1797,73 +1893,6 @@ async function loadSiteAndSourcePlugins() {
1797
1893
  pathMap.forEach((path, id) => pluginSitePaths.set(id, path));
1798
1894
  return { sites: Array.from(siteMap.values()), sources: Array.from(sourceMap.values()) };
1799
1895
  }
1800
- let registeredEnrichPlugins = [];
1801
- async function loadEnrichPlugins() {
1802
- const [builtin, user] = await Promise.all([
1803
- loadPluginsFromDir(BUILTIN_ENRICH_DIR, "builtin:enrich", isValidEnrichPlugin),
1804
- loadPluginsFromDir(USER_ENRICH_DIR, "user:enrich", isValidEnrichPlugin)
1805
- ]);
1806
- const merged = /* @__PURE__ */ new Map();
1807
- for (const p of builtin) merged.set(p.id, p);
1808
- for (const p of user) {
1809
- if (merged.has(p.id)) logger.info("plugin", "用户 Enrich 插件覆盖同名内置", { pluginId: p.id });
1810
- merged.set(p.id, p);
1811
- }
1812
- const list = Array.from(merged.values());
1813
- list.sort((a, b) => (a.priority ?? 100) - (b.priority ?? 100));
1814
- registeredEnrichPlugins = list;
1815
- return list;
1816
- }
1817
- function getMatchedEnrichPlugin(item, ctx) {
1818
- return registeredEnrichPlugins.find((p) => p.match(item, ctx));
1819
- }
1820
- function buildEnrichContext(ctx) {
1821
- return {
1822
- cacheDir: ctx.cacheDir,
1823
- headless: ctx.headless,
1824
- proxy: ctx.proxy,
1825
- async fetchHtml(url, opts) {
1826
- const res = await fetchHtml(url, {
1827
- cacheDir: ctx.cacheDir,
1828
- useCache: false,
1829
- authFlow: void 0,
1830
- headless: ctx.headless,
1831
- proxy: ctx.proxy,
1832
- waitAfterLoadMs: opts?.waitMs,
1833
- purify: opts?.purify
1834
- });
1835
- return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
1836
- },
1837
- async extractItem(item, opts) {
1838
- const res = await fetchHtml(item.link, {
1839
- cacheDir: ctx.cacheDir,
1840
- useCache: false,
1841
- authFlow: void 0,
1842
- headless: ctx.headless,
1843
- proxy: ctx.proxy
1844
- });
1845
- if (res.status !== 200 && res.status !== 304) {
1846
- throw new Error(`默认正文提取失败: HTTP ${res.status} ${res.statusText} for ${item.link}`);
1847
- }
1848
- const extracted = await extractHtml(res.body, {
1849
- url: res.finalUrl ?? item.link,
1850
- cacheDir: ctx.cacheDir ?? void 0,
1851
- mode: "readability",
1852
- useCache: true,
1853
- cacheKey: opts?.cacheKey
1854
- });
1855
- const pubDate = extracted.pubDate != null ? typeof extracted.pubDate === "string" ? new Date(extracted.pubDate) : extracted.pubDate : item.pubDate;
1856
- return {
1857
- ...item,
1858
- author: normalizeAuthor(extracted.author ?? item.author),
1859
- title: extracted.title ?? item.title,
1860
- summary: extracted.summary ?? item.summary,
1861
- content: extracted.content ?? item.content,
1862
- pubDate
1863
- };
1864
- }
1865
- };
1866
- }
1867
1896
  function buildSiteContext(site, ctx) {
1868
1897
  const proxy = ctx.proxy ?? site.proxy;
1869
1898
  const authFlow = toAuthFlow(site);
@@ -1871,6 +1900,7 @@ function buildSiteContext(site, ctx) {
1871
1900
  cacheDir: ctx.cacheDir,
1872
1901
  headless: ctx.headless,
1873
1902
  proxy,
1903
+ deps: ctx.deps,
1874
1904
  async fetchHtml(url, opts) {
1875
1905
  const res = await fetchHtml(url, {
1876
1906
  cacheDir: ctx.cacheDir,
@@ -1881,7 +1911,8 @@ function buildSiteContext(site, ctx) {
1881
1911
  waitAfterLoadMs: opts?.waitMs,
1882
1912
  purify: opts?.purify,
1883
1913
  waitForSelector: opts?.waitForSelector,
1884
- waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs
1914
+ waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
1915
+ useHttpResponseBody: opts?.useHttpResponseBody
1885
1916
  });
1886
1917
  return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
1887
1918
  },
@@ -1925,15 +1956,15 @@ function createWebSource(site) {
1925
1956
  proxy: site.proxy ?? void 0,
1926
1957
  preCheck: authFlow ? async (ctx) => {
1927
1958
  if (!ctx.cacheDir) return;
1928
- const passed = await preCheckAuth(authFlow, ctx.cacheDir);
1959
+ const passed = await preCheckAuth(authFlow, ctx.cacheDir, {
1960
+ proxy: ctx.proxy,
1961
+ headless: ctx.headless
1962
+ });
1929
1963
  if (!passed) throw new AuthRequiredError(`站点 ${site.id} 需要登录,请先执行 ensureAuth`);
1930
1964
  } : void 0,
1931
1965
  async fetchItems(sourceId, ctx) {
1932
1966
  return site.fetchItems(sourceId, buildSiteContext(site, ctx));
1933
- },
1934
- enrichItem: site.enrichItem ? async (item, ctx) => {
1935
- return site.enrichItem(item, buildSiteContext(site, ctx));
1936
- } : void 0
1967
+ }
1937
1968
  };
1938
1969
  }
1939
1970
  const genericWebSource = {
@@ -1970,6 +2001,37 @@ function getPluginSites() {
1970
2001
  function getBestSite(url) {
1971
2002
  return getSiteByUrl(url, loadedSites);
1972
2003
  }
2004
+ const PLUGIN_HOST_DEPS = {
2005
+ parseHtml: parse,
2006
+ NodeType,
2007
+ createHash,
2008
+ RssParser,
2009
+ HttpsProxyAgent,
2010
+ ImapFlow,
2011
+ simpleParser,
2012
+ logger
2013
+ };
2014
+ function buildSourceContext(partial) {
2015
+ const { cacheDir, headless, proxy } = partial;
2016
+ return {
2017
+ ...partial,
2018
+ deps: PLUGIN_HOST_DEPS,
2019
+ async fetchHtml(url, opts) {
2020
+ const res = await fetchHtml(url, {
2021
+ cacheDir,
2022
+ useCache: false,
2023
+ headless,
2024
+ proxy,
2025
+ waitAfterLoadMs: opts?.waitMs,
2026
+ purify: opts?.purify,
2027
+ waitForSelector: opts?.waitForSelector,
2028
+ waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
2029
+ useHttpResponseBody: opts?.useHttpResponseBody
2030
+ });
2031
+ return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
2032
+ }
2033
+ };
2034
+ }
1973
2035
  const registeredSources = [];
1974
2036
  function sourcePatternToRegex(pattern) {
1975
2037
  if (pattern instanceof RegExp) return pattern;
@@ -1992,10 +2054,7 @@ function getSource(sourceId) {
1992
2054
  return genericWebSource;
1993
2055
  }
1994
2056
  async function initSources() {
1995
- const [siteResult] = await Promise.all([
1996
- loadSiteAndSourcePlugins(),
1997
- loadEnrichPlugins()
1998
- ]);
2057
+ const siteResult = await loadSiteAndSourcePlugins();
1999
2058
  const { sites, sources: sourcePlugins } = siteResult;
2000
2059
  setLoadedSites(sites);
2001
2060
  registeredSources.length = 0;
@@ -2016,6 +2075,38 @@ async function initSources() {
2016
2075
  function resolveRef(src) {
2017
2076
  return src.ref ?? src.url ?? "";
2018
2077
  }
2078
+ async function readGlobalProxyFromConfig() {
2079
+ try {
2080
+ const raw = await readFile(CONFIG_PATH, "utf-8");
2081
+ const j = JSON.parse(raw);
2082
+ if (typeof j.globalProxy === "string") {
2083
+ const t = j.globalProxy.trim();
2084
+ return t.length > 0 ? t : void 0;
2085
+ }
2086
+ } catch {
2087
+ }
2088
+ return void 0;
2089
+ }
2090
+ async function saveGlobalProxyToConfig(proxy) {
2091
+ let root = {};
2092
+ try {
2093
+ const raw = await readFile(CONFIG_PATH, "utf-8");
2094
+ root = JSON.parse(raw);
2095
+ } catch {
2096
+ }
2097
+ const t = proxy.trim();
2098
+ if (t.length === 0) {
2099
+ delete root.globalProxy;
2100
+ } else {
2101
+ root.globalProxy = t;
2102
+ }
2103
+ await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
2104
+ }
2105
+ async function resolveProxyForSite(site) {
2106
+ const s = site.proxy?.trim();
2107
+ if (s) return s;
2108
+ return readGlobalProxyFromConfig();
2109
+ }
2019
2110
  async function loadSourcesFile() {
2020
2111
  try {
2021
2112
  const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
@@ -2061,6 +2152,15 @@ async function saveSourcesFile(sources) {
2061
2152
  "utf-8"
2062
2153
  );
2063
2154
  }
2155
+ async function getEffectiveProxyForListUrl(listUrl, source) {
2156
+ const list = await getAllSources();
2157
+ const sub = list.find((s) => resolveRef(s) === listUrl);
2158
+ const fromSub = sub?.proxy?.trim();
2159
+ if (fromSub) return fromSub;
2160
+ const fromPlugin = source.proxy?.trim();
2161
+ if (fromPlugin) return fromPlugin;
2162
+ return readGlobalProxyFromConfig();
2163
+ }
2064
2164
  async function getSourcesRaw() {
2065
2165
  try {
2066
2166
  const raw = await readFile(SOURCES_CONFIG_PATH, "utf-8");
@@ -2378,40 +2478,221 @@ function onFeedUpdated(fn) {
2378
2478
  eventBus.on("feed:updated", fn);
2379
2479
  return () => eventBus.off("feed:updated", fn);
2380
2480
  }
2381
- const DEFAULTS = {
2382
- concurrency: 2,
2383
- maxRetries: 2
2384
- };
2385
- async function loadEnrichConfig() {
2386
- let fileEnrich = {};
2481
+ async function getDeliverConfig() {
2387
2482
  try {
2388
- const raw = await readFile(join(USER_DIR, "config.json"), "utf-8");
2389
- const parsed = JSON.parse(raw);
2390
- if (parsed.enrich && typeof parsed.enrich === "object") {
2391
- fileEnrich = parsed.enrich;
2392
- }
2483
+ const raw = await readFile(CONFIG_PATH, "utf-8");
2484
+ const j = JSON.parse(raw);
2485
+ const u = j?.deliver?.url;
2486
+ const t = j?.deliver?.token;
2487
+ return {
2488
+ url: typeof u === "string" ? u.trim() : "",
2489
+ token: typeof t === "string" ? t.trim() : ""
2490
+ };
2393
2491
  } catch {
2492
+ return { url: "", token: "" };
2394
2493
  }
2395
- return {
2396
- concurrency: Number(fileEnrich["concurrency"] ?? process.env.ENRICH_CONCURRENCY ?? DEFAULTS.concurrency),
2397
- maxRetries: Number(fileEnrich["maxRetries"] ?? process.env.ENRICH_MAX_RETRIES ?? DEFAULTS.maxRetries)
2398
- };
2399
2494
  }
2400
- const validateCron = validate;
2401
- const tasks$1 = /* @__PURE__ */ new Map();
2402
- const groups = /* @__PURE__ */ new Map();
2403
- const DEFAULT_RETRY_DELAY_MS = 5e3;
2404
- const DEFAULT_GROUP_CONCURRENCY = 10;
2405
- async function runWithRetry(task, options) {
2406
- const retries = options.retries ?? 0;
2407
- const retryDelayMs = options.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS;
2408
- let lastErr;
2409
- for (let attempt = 0; attempt <= retries; attempt++) {
2410
- try {
2411
- await task();
2412
- return;
2413
- } catch (err) {
2414
- lastErr = err;
2495
+ async function saveDeliverConfig(config) {
2496
+ let root = {};
2497
+ try {
2498
+ const raw = await readFile(CONFIG_PATH, "utf-8");
2499
+ root = JSON.parse(raw);
2500
+ } catch {
2501
+ }
2502
+ const prev = root.deliver;
2503
+ const base2 = typeof prev === "object" && prev !== null && !Array.isArray(prev) ? { ...prev } : {};
2504
+ const url = config.url.trim();
2505
+ const token = config.token.trim();
2506
+ const next = { ...base2, url };
2507
+ if (token) next.token = token;
2508
+ else delete next.token;
2509
+ root.deliver = next;
2510
+ await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
2511
+ }
2512
+ function feedItemsToPayload(items) {
2513
+ return items.map((i) => ({
2514
+ guid: i.guid,
2515
+ title: i.title,
2516
+ link: i.link,
2517
+ pubDate: i.pubDate instanceof Date ? i.pubDate.toISOString() : (/* @__PURE__ */ new Date()).toISOString(),
2518
+ author: i.author,
2519
+ summary: i.summary,
2520
+ content: i.content,
2521
+ tags: i.tags,
2522
+ sourceRef: i.sourceRef,
2523
+ translations: i.translations
2524
+ }));
2525
+ }
2526
+ async function postDeliverItems(url, sourceRef, items, options) {
2527
+ if (!url.trim() || items.length === 0) return;
2528
+ const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
2529
+ const headers = { "Content-Type": "application/json" };
2530
+ const t = options?.bearerToken?.trim();
2531
+ if (t) headers.Authorization = `Bearer ${t}`;
2532
+ const res = await fetch(url.trim(), {
2533
+ method: "POST",
2534
+ headers,
2535
+ body,
2536
+ signal: AbortSignal.timeout(12e4)
2537
+ });
2538
+ if (!res.ok) {
2539
+ const text = await res.text().catch(() => "");
2540
+ throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
2541
+ }
2542
+ }
2543
+ async function postDeliverItemsSafe(url, sourceRef, items, options) {
2544
+ try {
2545
+ await postDeliverItems(url, sourceRef, items, options);
2546
+ } catch (err) {
2547
+ logger.warn("deliver", "投递失败", {
2548
+ sourceRef,
2549
+ count: items.length,
2550
+ err: err instanceof Error ? err.message : String(err)
2551
+ });
2552
+ }
2553
+ }
2554
+ function resolveHeadlessForFeeder(config) {
2555
+ if (config.force === true) {
2556
+ return config.headless === true ? true : false;
2557
+ }
2558
+ return config.headless;
2559
+ }
2560
+ function buildChannelFromItems(listUrl, items, lng) {
2561
+ const channel = {
2562
+ title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
2563
+ link: listUrl,
2564
+ description: `来自 ${listUrl} 的订阅`
2565
+ };
2566
+ if (lng) channel.language = lng;
2567
+ return channel;
2568
+ }
2569
+ function toRssEntry(item, lng) {
2570
+ const eff = getEffectiveItemFields(item, lng);
2571
+ const hasContent = eff.content != null && eff.content !== "";
2572
+ const desc = hasContent ? eff.content : eff.summary;
2573
+ return {
2574
+ title: eff.title,
2575
+ link: item.link,
2576
+ description: desc,
2577
+ guid: item.guid,
2578
+ published: item.pubDate?.toISOString?.() ?? void 0,
2579
+ imageUrl: item.imageUrl
2580
+ };
2581
+ }
2582
+ const generatingKeys = /* @__PURE__ */ new Map();
2583
+ const pipelineCtx = {
2584
+ llm: { chatJson, chatText },
2585
+ db: { getSystemTags }
2586
+ };
2587
+ async function runPipelineOnItem(item, ctx) {
2588
+ return runPipeline(item, { ...pipelineCtx, ...ctx });
2589
+ }
2590
+ async function generateAndCache(listUrl, key, config, proxy) {
2591
+ const { cacheDir = "cache" } = config;
2592
+ const headless = resolveHeadlessForFeeder(config);
2593
+ const source = getSource(listUrl);
2594
+ const ctx = buildSourceContext({ cacheDir, headless, proxy });
2595
+ let items;
2596
+ try {
2597
+ items = await source.fetchItems(listUrl, ctx);
2598
+ } catch (err) {
2599
+ generatingKeys.delete(key);
2600
+ const message = err instanceof Error ? err.message : String(err);
2601
+ logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
2602
+ throw err;
2603
+ }
2604
+ const sourceRefStored = canonicalHttpSourceRef(listUrl);
2605
+ items.forEach((i) => {
2606
+ i.sourceRef = sourceRefStored;
2607
+ i.author = normalizeAuthor(i.author);
2608
+ });
2609
+ generatingKeys.delete(key);
2610
+ logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
2611
+ const { url: deliverUrl, token: deliverToken } = await getDeliverConfig();
2612
+ let newCount = 0;
2613
+ let newIds = /* @__PURE__ */ new Set();
2614
+ const upsertResult = await upsertItems(items).catch((err) => {
2615
+ logger.warn("db", "upsertItems 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) });
2616
+ return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
2617
+ });
2618
+ newCount = upsertResult.newCount;
2619
+ newIds = upsertResult.newIds;
2620
+ let pipelineDroppedNew = 0;
2621
+ const shouldRunPipelineRow = (guid) => newIds.has(guid);
2622
+ for (let i = 0; i < items.length; i++) {
2623
+ if (!shouldRunPipelineRow(items[i].guid)) continue;
2624
+ const processed = await runPipelineOnItem(items[i], { sourceUrl: sourceRefStored });
2625
+ items[i] = processed;
2626
+ if (isPipelineDroppedItem(processed)) {
2627
+ await deleteItem(processed.guid).catch(
2628
+ (err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2629
+ );
2630
+ pipelineDroppedNew++;
2631
+ } else {
2632
+ updateItemContent(processed).catch(
2633
+ (err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2634
+ );
2635
+ }
2636
+ }
2637
+ if (newCount > 0) {
2638
+ emitFeedUpdated({ sourceUrl: sourceRefStored, newCount: newCount - pipelineDroppedNew });
2639
+ }
2640
+ const out = items.filter((i) => !isPipelineDroppedItem(i));
2641
+ if (deliverUrl && out.length > 0) {
2642
+ await postDeliverItemsSafe(deliverUrl, sourceRefStored, out, {
2643
+ bearerToken: deliverToken || void 0
2644
+ });
2645
+ }
2646
+ return { items: out };
2647
+ }
2648
+ async function getItems(listUrl, config = {}) {
2649
+ const source = getSource(listUrl);
2650
+ const proxy = await getEffectiveProxyForListUrl(listUrl, source);
2651
+ const headless = resolveHeadlessForFeeder(config);
2652
+ const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
2653
+ if (source.preCheck != null) {
2654
+ try {
2655
+ await source.preCheck(
2656
+ buildSourceContext({
2657
+ cacheDir: config.cacheDir ?? "cache",
2658
+ headless,
2659
+ proxy
2660
+ })
2661
+ );
2662
+ } catch (err) {
2663
+ if (err instanceof AuthRequiredError) throw err;
2664
+ throw err;
2665
+ }
2666
+ }
2667
+ let task = config.force ? void 0 : generatingKeys.get(key);
2668
+ if (!task) {
2669
+ task = generateAndCache(listUrl, key, config, proxy);
2670
+ if (!config.force) generatingKeys.set(key, task);
2671
+ }
2672
+ const { items } = await task;
2673
+ return { items, fromCache: false };
2674
+ }
2675
+ function feedItemsToRssXml(items, listUrl, lng, opts) {
2676
+ const channel = buildChannelFromItems(listUrl, items, lng);
2677
+ if (opts?.channelTitle) channel.title = opts.channelTitle;
2678
+ if (opts?.channelDesc) channel.description = opts.channelDesc;
2679
+ return buildRssXml(channel, items.map((it) => toRssEntry(it, lng)));
2680
+ }
2681
+ const validateCron = validate;
2682
+ const tasks$1 = /* @__PURE__ */ new Map();
2683
+ const groups = /* @__PURE__ */ new Map();
2684
+ const DEFAULT_RETRY_DELAY_MS = 5e3;
2685
+ const DEFAULT_GROUP_CONCURRENCY = 10;
2686
+ async function runWithRetry(task, options) {
2687
+ const retries = options.retries ?? 0;
2688
+ const retryDelayMs = options.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS;
2689
+ let lastErr;
2690
+ for (let attempt = 0; attempt <= retries; attempt++) {
2691
+ try {
2692
+ await task();
2693
+ return;
2694
+ } catch (err) {
2695
+ lastErr = err;
2415
2696
  if (attempt < retries) {
2416
2697
  await new Promise((r) => setTimeout(r, retryDelayMs));
2417
2698
  }
@@ -2580,365 +2861,8 @@ function getGroupStats() {
2580
2861
  }
2581
2862
  return result;
2582
2863
  }
2583
- const ENRICH_GROUP = "enrich";
2584
- const MAX_STORED_TASKS = 200;
2585
- const RETRY_DELAY_MS = 3e3;
2586
- class EnrichQueue {
2587
- tasks = /* @__PURE__ */ new Map();
2588
- taskItems = /* @__PURE__ */ new Map();
2589
- taskCallbacks = /* @__PURE__ */ new Map();
2590
- configLoaded = false;
2591
- async ensureConfig() {
2592
- if (this.configLoaded) return { concurrency: 2, maxRetries: 2 };
2593
- const config = await loadEnrichConfig();
2594
- this.configLoaded = true;
2595
- logger.info("scraper", "配置加载完成", { concurrency: config.concurrency, maxRetries: config.maxRetries });
2596
- return config;
2597
- }
2598
- evictIfNeeded() {
2599
- if (this.tasks.size <= MAX_STORED_TASKS) return;
2600
- const ids = [...this.tasks.keys()];
2601
- for (const id of ids) {
2602
- if (this.tasks.get(id)?.status === "done") {
2603
- this.removeTask(id);
2604
- if (this.tasks.size <= MAX_STORED_TASKS) return;
2605
- }
2606
- }
2607
- if (this.tasks.size > MAX_STORED_TASKS) this.removeTask(ids[0]);
2608
- }
2609
- removeTask(id) {
2610
- this.tasks.delete(id);
2611
- this.taskItems.delete(id);
2612
- this.taskCallbacks.delete(id);
2613
- }
2614
- checkTaskComplete(taskId) {
2615
- const task = this.tasks.get(taskId);
2616
- const items = this.taskItems.get(taskId);
2617
- const callbacks = this.taskCallbacks.get(taskId);
2618
- if (!task || !items) return;
2619
- const allSettled = task.itemResults.every((r) => r.status === "done" || r.status === "failed");
2620
- if (!allSettled) return;
2621
- task.status = "done";
2622
- task.completedAt = (/* @__PURE__ */ new Date()).toISOString();
2623
- logger.info("scraper", "任务完成", {
2624
- source_url: task.sourceUrl,
2625
- taskId,
2626
- done: task.progress.done,
2627
- failed: task.progress.failed
2628
- });
2629
- Promise.resolve(callbacks?.onAllDone?.(items)).catch((err) => {
2630
- logger.warn("scraper", "onAllDone 回调异常", { taskId, err: err instanceof Error ? err.message : String(err) });
2631
- });
2632
- }
2633
- async submit(items, enrichFn, ctx, opts) {
2634
- const config = await this.ensureConfig();
2635
- const id = randomUUID();
2636
- const itemResults = items.map((_, i) => ({
2637
- index: i,
2638
- status: "pending",
2639
- retries: 0
2640
- }));
2641
- const task = {
2642
- id,
2643
- sourceUrl: opts.sourceUrl,
2644
- status: items.length === 0 ? "done" : "pending",
2645
- progress: { total: items.length, done: 0, failed: 0 },
2646
- itemResults,
2647
- createdAt: (/* @__PURE__ */ new Date()).toISOString(),
2648
- completedAt: items.length === 0 ? (/* @__PURE__ */ new Date()).toISOString() : void 0
2649
- };
2650
- const itemsCopy = [...items];
2651
- this.tasks.set(id, task);
2652
- this.taskItems.set(id, itemsCopy);
2653
- this.taskCallbacks.set(id, opts);
2654
- this.evictIfNeeded();
2655
- for (let i = 0; i < items.length; i++) {
2656
- const itemIndex = i;
2657
- const workId = `${id}-${i}`;
2658
- const taskFn = async () => {
2659
- const t = this.tasks.get(id);
2660
- const its = this.taskItems.get(id);
2661
- const cbs = this.taskCallbacks.get(id);
2662
- if (!t || !its || !cbs) return;
2663
- const itemResult = t.itemResults[itemIndex];
2664
- if (!itemResult) return;
2665
- itemResult.status = "running";
2666
- if (t.status === "pending") t.status = "running";
2667
- for (let r = 0; r <= config.maxRetries; r++) {
2668
- try {
2669
- const enriched = await enrichFn(its[itemIndex], ctx);
2670
- its[itemIndex] = enriched;
2671
- itemResult.item = enriched;
2672
- itemResult.status = "done";
2673
- t.progress.done++;
2674
- await Promise.resolve(cbs.onItemDone?.(enriched, itemIndex));
2675
- this.checkTaskComplete(id);
2676
- return;
2677
- } catch (err) {
2678
- const msg = err instanceof Error ? err.message : String(err);
2679
- if (r < config.maxRetries) {
2680
- logger.warn("scraper", "提取失败,重试中", {
2681
- source_url: t.sourceUrl,
2682
- item_url: its[itemIndex]?.link,
2683
- retries: r + 1,
2684
- maxRetries: config.maxRetries,
2685
- err: msg
2686
- });
2687
- await new Promise((resolve2) => setTimeout(resolve2, RETRY_DELAY_MS));
2688
- } else {
2689
- itemResult.status = "failed";
2690
- itemResult.error = msg;
2691
- t.progress.failed++;
2692
- logger.warn("scraper", "提取最终失败", {
2693
- source_url: t.sourceUrl,
2694
- item_url: its[itemIndex]?.link,
2695
- err: msg
2696
- });
2697
- const failedItem = { ...its[itemIndex], enrichFailed: true };
2698
- its[itemIndex] = failedItem;
2699
- await Promise.resolve(cbs.onItemDone?.(failedItem, itemIndex));
2700
- this.checkTaskComplete(id);
2701
- }
2702
- }
2703
- }
2704
- };
2705
- schedule(ENRICH_GROUP, workId, taskFn, { concurrency: config.concurrency }).catch(() => {
2706
- });
2707
- }
2708
- return id;
2709
- }
2710
- getTask(id) {
2711
- return this.tasks.get(id);
2712
- }
2713
- getTaskItems(id) {
2714
- return this.taskItems.get(id);
2715
- }
2716
- }
2717
- const enrichQueue = new EnrichQueue();
2718
- async function getDeliverUrl() {
2719
- try {
2720
- const raw = await readFile(CONFIG_PATH, "utf-8");
2721
- const j = JSON.parse(raw);
2722
- const u = j?.deliver?.url;
2723
- return typeof u === "string" ? u.trim() : "";
2724
- } catch {
2725
- return "";
2726
- }
2727
- }
2728
- async function saveDeliverUrl(url) {
2729
- let root = {};
2730
- try {
2731
- const raw = await readFile(CONFIG_PATH, "utf-8");
2732
- root = JSON.parse(raw);
2733
- } catch {
2734
- }
2735
- root.deliver = { url: url.trim() };
2736
- await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
2737
- }
2738
- function feedItemsToPayload(items) {
2739
- return items.map((i) => ({
2740
- guid: i.guid,
2741
- title: i.title,
2742
- link: i.link,
2743
- pubDate: i.pubDate instanceof Date ? i.pubDate.toISOString() : (/* @__PURE__ */ new Date()).toISOString(),
2744
- author: i.author,
2745
- summary: i.summary,
2746
- content: i.content,
2747
- tags: i.tags,
2748
- sourceRef: i.sourceRef,
2749
- translations: i.translations
2750
- }));
2751
- }
2752
- async function postDeliverItems(url, sourceRef, items) {
2753
- if (!url.trim() || items.length === 0) return;
2754
- const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
2755
- const res = await fetch(url.trim(), {
2756
- method: "POST",
2757
- headers: { "Content-Type": "application/json" },
2758
- body,
2759
- signal: AbortSignal.timeout(12e4)
2760
- });
2761
- if (!res.ok) {
2762
- const text = await res.text().catch(() => "");
2763
- throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
2764
- }
2765
- }
2766
- async function postDeliverItemsSafe(url, sourceRef, items) {
2767
- try {
2768
- await postDeliverItems(url, sourceRef, items);
2769
- } catch (err) {
2770
- logger.warn("deliver", "投递失败", {
2771
- sourceRef,
2772
- count: items.length,
2773
- err: err instanceof Error ? err.message : String(err)
2774
- });
2775
- }
2776
- }
2777
- function buildChannelFromItems(listUrl, items, lng) {
2778
- const channel = {
2779
- title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
2780
- link: listUrl,
2781
- description: `来自 ${listUrl} 的订阅`
2782
- };
2783
- if (lng) channel.language = lng;
2784
- return channel;
2785
- }
2786
- function toRssEntry(item, lng) {
2787
- const eff = getEffectiveItemFields(item, lng);
2788
- const hasContent = eff.content != null && eff.content !== "";
2789
- const desc = hasContent ? eff.content : eff.summary;
2790
- return {
2791
- title: eff.title,
2792
- link: item.link,
2793
- description: desc,
2794
- guid: item.guid,
2795
- published: item.pubDate?.toISOString?.() ?? void 0,
2796
- imageUrl: item.imageUrl
2797
- };
2798
- }
2799
- const generatingKeys = /* @__PURE__ */ new Map();
2800
- const pipelineCtx = {
2801
- llm: { chatJson, chatText },
2802
- db: { getSystemTags }
2803
- };
2804
- async function runPipelineOnItem(item, ctx) {
2805
- return runPipeline(item, { ...pipelineCtx, ...ctx });
2806
- }
2807
- function buildEnrichFn(source, listUrl, ctx) {
2808
- const enrichCtx = buildEnrichContext(ctx);
2809
- enrichCtx.sourceUrl = listUrl;
2810
- return async (item) => {
2811
- let result = item;
2812
- if (source.enrichItem) {
2813
- result = await source.enrichItem(item, ctx);
2814
- }
2815
- const plugin = getMatchedEnrichPlugin(result, { sourceUrl: listUrl });
2816
- if (plugin) {
2817
- result = await plugin.enrichItem(result, enrichCtx);
2818
- }
2819
- return result;
2820
- };
2821
- }
2822
- async function generateAndCache(listUrl, key, config) {
2823
- const { cacheDir = "cache", includeContent = true, headless } = config;
2824
- const source = getSource(listUrl);
2825
- const ctx = { cacheDir, headless, proxy: config.proxy ?? source.proxy };
2826
- let items;
2827
- try {
2828
- items = await source.fetchItems(listUrl, ctx);
2829
- } catch (err) {
2830
- generatingKeys.delete(key);
2831
- const message = err instanceof Error ? err.message : String(err);
2832
- logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
2833
- throw err;
2834
- }
2835
- items.forEach((i) => {
2836
- i.sourceRef = listUrl;
2837
- i.author = normalizeAuthor(i.author);
2838
- });
2839
- generatingKeys.delete(key);
2840
- logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
2841
- const deliverUrl = await getDeliverUrl();
2842
- let newCount = 0;
2843
- let newIds = /* @__PURE__ */ new Set();
2844
- const upsertResult = await upsertItems(items).catch((err) => {
2845
- logger.warn("db", "upsertItems 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) });
2846
- return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
2847
- });
2848
- newCount = upsertResult.newCount;
2849
- newIds = upsertResult.newIds;
2850
- let pipelineDroppedNew = 0;
2851
- const shouldRunPipelineRow = (guid) => newIds.has(guid);
2852
- const hasEnrich = source.enrichItem != null || items.some((i) => getMatchedEnrichPlugin(i, { sourceUrl: listUrl }));
2853
- if (!includeContent || items.length === 0 || !hasEnrich) {
2854
- for (let i = 0; i < items.length; i++) {
2855
- if (!shouldRunPipelineRow(items[i].guid)) continue;
2856
- const processed = await runPipelineOnItem(items[i], { sourceUrl: listUrl, isEnriched: false });
2857
- items[i] = processed;
2858
- if (isPipelineDroppedItem(processed)) {
2859
- await deleteItem(processed.guid).catch(
2860
- (err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2861
- );
2862
- pipelineDroppedNew++;
2863
- } else {
2864
- updateItemContent(processed).catch(
2865
- (err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2866
- );
2867
- }
2868
- }
2869
- if (newCount > 0) {
2870
- emitFeedUpdated({ sourceUrl: listUrl, newCount: newCount - pipelineDroppedNew });
2871
- }
2872
- const out = items.filter((i) => !isPipelineDroppedItem(i));
2873
- if (deliverUrl && out.length > 0) {
2874
- await postDeliverItemsSafe(deliverUrl, listUrl, out);
2875
- }
2876
- return { items: out };
2877
- }
2878
- const enrichFn = (item, _ctx) => buildEnrichFn(source, listUrl, ctx)(item);
2879
- await enrichQueue.submit(
2880
- items,
2881
- enrichFn,
2882
- ctx,
2883
- {
2884
- sourceUrl: listUrl,
2885
- onItemDone: async (enrichedItem, index) => {
2886
- enrichedItem.sourceRef = listUrl;
2887
- const processed = shouldRunPipelineRow(enrichedItem.guid) ? await runPipelineOnItem(enrichedItem, { sourceUrl: listUrl, isEnriched: true }) : enrichedItem;
2888
- items[index] = processed;
2889
- if (isPipelineDroppedItem(processed)) {
2890
- await deleteItem(processed.guid).catch(
2891
- (err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2892
- );
2893
- pipelineDroppedNew++;
2894
- } else {
2895
- updateItemContent(processed).catch(
2896
- (err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
2897
- );
2898
- }
2899
- },
2900
- onAllDone: async () => {
2901
- for (let i = items.length - 1; i >= 0; i--) {
2902
- if (isPipelineDroppedItem(items[i])) items.splice(i, 1);
2903
- }
2904
- if (newCount > 0) {
2905
- emitFeedUpdated({ sourceUrl: listUrl, newCount: newCount - pipelineDroppedNew });
2906
- }
2907
- if (deliverUrl && items.length > 0) {
2908
- await postDeliverItemsSafe(deliverUrl, listUrl, items);
2909
- }
2910
- }
2911
- }
2912
- );
2913
- return { items };
2914
- }
2915
- async function getItems(listUrl, config = {}) {
2916
- const source = getSource(listUrl);
2917
- const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
2918
- if (source.preCheck != null) {
2919
- try {
2920
- await source.preCheck({ cacheDir: config.cacheDir ?? "cache", headless: config.headless, proxy: config.proxy ?? source.proxy });
2921
- } catch (err) {
2922
- if (err instanceof AuthRequiredError) throw err;
2923
- throw err;
2924
- }
2925
- }
2926
- let task = config.force ? void 0 : generatingKeys.get(key);
2927
- if (!task) {
2928
- task = generateAndCache(listUrl, key, config);
2929
- if (!config.force) generatingKeys.set(key, task);
2930
- }
2931
- const { items } = await task;
2932
- return { items, fromCache: false };
2933
- }
2934
- function feedItemsToRssXml(items, listUrl, lng, opts) {
2935
- const channel = buildChannelFromItems(listUrl, items, lng);
2936
- if (opts?.channelTitle) channel.title = opts.channelTitle;
2937
- if (opts?.channelDesc) channel.description = opts.channelDesc;
2938
- return buildRssXml(channel, items.map((it) => toRssEntry(it, lng)));
2939
- }
2940
2864
  const DEFAULT_REFRESH = "1day";
2941
- const SOURCES_CONCURRENCY = 5;
2865
+ const SOURCES_CONCURRENCY = 1;
2942
2866
  function createPullTask(ref, cacheDir, cronExpr) {
2943
2867
  return async () => {
2944
2868
  try {
@@ -2975,7 +2899,7 @@ async function rescheduleSources(cacheDir, runNow2) {
2975
2899
  }
2976
2900
  }
2977
2901
  async function initScheduler(cacheDir) {
2978
- await rescheduleSources(cacheDir, true);
2902
+ await rescheduleSources(cacheDir, false);
2979
2903
  let debounceTimer = null;
2980
2904
  try {
2981
2905
  const watcher = watch(SOURCES_CONFIG_PATH, () => {
@@ -3044,27 +2968,19 @@ function registerRssApiRoutes(app) {
3044
2968
  }
3045
2969
  });
3046
2970
  }
3047
- function registerEnrichRoutes(app) {
3048
- app.get("/api/enrich/:taskId", (c) => {
3049
- const taskId = c.req.param("taskId");
3050
- const task = enrichQueue.getTask(taskId);
3051
- if (!task) return c.json({ error: "任务不存在或已过期" }, 404);
3052
- return c.json(task);
3053
- });
3054
- }
3055
2971
  function registerSchedulerRoutes(app) {
3056
2972
  app.get("/api/scheduler/stats", requireAdmin(), (c) => {
3057
2973
  const stats = getGroupStats();
3058
2974
  return c.json(stats);
3059
2975
  });
3060
2976
  }
3061
- const USER_SITE_TEMPLATE = join(BUILTIN_PLUGINS_DIR, "templates", "site.rssany.js");
3062
2977
  const SITE_TEMPLATE_FALLBACK = `/**
3063
- * Site 插件模板(由管理页添加,位于 .rssany/plugins/sources/)
2978
+ * Site 插件模板(由 /plugins 页添加,位于 .rssany/plugins/)
2979
+ * HTML DOM 解析请用 ctx.deps.parseHtml,勿在插件内 import node_modules。
3064
2980
  */
3065
2981
  export default {
3066
2982
  id: "__PLUGIN_ID__",
3067
- listUrlPattern: "https://example.com/{segment}",
2983
+ listUrlPattern: __LIST_URL_PATTERN__,
3068
2984
  refreshInterval: "1day",
3069
2985
 
3070
2986
  async fetchItems(sourceId, ctx) {
@@ -3072,7 +2988,7 @@ export default {
3072
2988
  waitMs: 2000,
3073
2989
  purify: true,
3074
2990
  });
3075
- void html;
2991
+ void ctx.deps.parseHtml(html);
3076
2992
  void finalUrl;
3077
2993
  return [];
3078
2994
  },
@@ -3081,6 +2997,11 @@ export default {
3081
2997
  function isValidNewPluginId(id) {
3082
2998
  return /^[a-zA-Z][a-zA-Z0-9_-]{0,63}$/.test(id) && id !== "generic" && id !== "new";
3083
2999
  }
3000
+ function isValidNewListUrlPattern(pattern) {
3001
+ if (pattern.length === 0 || pattern.length > 2048) return false;
3002
+ if (/[\r\n]/.test(pattern)) return false;
3003
+ return true;
3004
+ }
3084
3005
  async function fileExists(p) {
3085
3006
  try {
3086
3007
  await access(p);
@@ -3110,16 +3031,23 @@ function registerPluginsRoutes(app) {
3110
3031
  if (!isValidNewPluginId(id)) {
3111
3032
  return c.json({ error: "id 须为字母开头,仅含字母数字、下划线、连字符;不能为 generic 或 new" }, 400);
3112
3033
  }
3034
+ const listUrlPatternRaw = typeof body.listUrlPattern === "string" ? body.listUrlPattern.trim() : "";
3035
+ if (!listUrlPatternRaw) {
3036
+ return c.json({ error: "缺少支持的站点(listUrlPattern),例如 https://example.com/*" }, 400);
3037
+ }
3038
+ if (!isValidNewListUrlPattern(listUrlPatternRaw)) {
3039
+ return c.json({ error: "支持的站点须为非空字符串,不超过 2048 字符,且不能含换行" }, 400);
3040
+ }
3113
3041
  await mkdir(USER_PLUGINS_DIR, { recursive: true });
3114
- await mkdir(USER_SOURCES_DIR, { recursive: true });
3115
- const outPath = join(USER_SOURCES_DIR, `${id}.rssany.ts`);
3042
+ const outPath = join(USER_PLUGINS_DIR, `${id}.rssany.js`);
3116
3043
  if (await fileExists(outPath)) return c.json({ error: "该 id 已存在同名文件" }, 409);
3117
3044
  let tpl = SITE_TEMPLATE_FALLBACK;
3118
3045
  try {
3119
- tpl = await readFile(USER_SITE_TEMPLATE, "utf-8");
3046
+ tpl = await readFile(PLUGIN_SITE_TEMPLATE_PATH, "utf-8");
3120
3047
  } catch {
3121
3048
  }
3122
- const content = tpl.replace(/__PLUGIN_ID__/g, id);
3049
+ const patternLiteral = JSON.stringify(listUrlPatternRaw);
3050
+ const content = tpl.replace(/__PLUGIN_ID__/g, id).replace(/__LIST_URL_PATTERN__/g, patternLiteral);
3123
3051
  if (!isAllowedPluginPath(outPath)) return c.json({ error: "路径不允许" }, 403);
3124
3052
  try {
3125
3053
  await writeFile(outPath, content, "utf-8");
@@ -3134,7 +3062,6 @@ function registerPluginsRoutes(app) {
3134
3062
  kind: "site",
3135
3063
  id: s.id,
3136
3064
  listUrlPattern: typeof s.listUrlPattern === "string" ? s.listUrlPattern : String(s.listUrlPattern),
3137
- hasEnrich: !!s.enrichItem,
3138
3065
  hasAuth: !!(s.checkAuth && s.loginUrl)
3139
3066
  }));
3140
3067
  const siteIds = new Set(sites.map((p) => p.id));
@@ -3142,7 +3069,6 @@ function registerPluginsRoutes(app) {
3142
3069
  kind: "source",
3143
3070
  id: src.id,
3144
3071
  listUrlPattern: typeof src.pattern === "string" ? src.pattern : String(src.pattern),
3145
- hasEnrich: !!src.enrichItem,
3146
3072
  hasAuth: false
3147
3073
  }));
3148
3074
  return c.json([...sites, ...sources]);
@@ -3303,6 +3229,12 @@ function registerItemsRoutes(app) {
3303
3229
  return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
3304
3230
  }
3305
3231
  });
3232
+ app.delete("/api/items/by-source", requireAdmin(), async (c) => {
3233
+ const sourceUrl = (c.req.query("source_url") ?? "").trim();
3234
+ if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
3235
+ const deleted = await deleteItemsBySourceUrl(sourceUrl);
3236
+ return c.json({ ok: true, deleted });
3237
+ });
3306
3238
  app.delete("/api/items/:id", async (c) => {
3307
3239
  const id = decodeURIComponent(c.req.param("id") ?? "").trim();
3308
3240
  if (!id) return c.json({ ok: false, message: "id 不能为空" }, 400);
@@ -3310,12 +3242,6 @@ function registerItemsRoutes(app) {
3310
3242
  if (!deleted) return c.json({ ok: false, message: "条目不存在或已删除" }, 404);
3311
3243
  return c.json({ ok: true });
3312
3244
  });
3313
- app.delete("/api/items/by-source", requireAdmin(), async (c) => {
3314
- const sourceUrl = (c.req.query("source_url") ?? "").trim();
3315
- if (!sourceUrl) return c.json({ ok: false, message: "source_url 不能为空" }, 400);
3316
- const deleted = await deleteItemsBySourceUrl(sourceUrl);
3317
- return c.json({ ok: true, deleted });
3318
- });
3319
3245
  app.get("/api/items", async (c) => {
3320
3246
  const ref = c.req.query("ref") ?? c.req.query("source") ?? void 0;
3321
3247
  const subscribed = parseSubscribedFlag$1(c.req.query("subscribed"));
@@ -3463,7 +3389,7 @@ function registerSourcesRoutes(app) {
3463
3389
  const w = s.weight;
3464
3390
  const weight = typeof w === "number" ? w : void 0;
3465
3391
  return {
3466
- ref: String(s.ref),
3392
+ ref: canonicalHttpSourceRef(String(s.ref)),
3467
3393
  type,
3468
3394
  label: s.label,
3469
3395
  description: s.description,
@@ -3537,15 +3463,16 @@ function registerTopicsRoutes(app) {
3537
3463
  }
3538
3464
  function registerDeliverRoutes(app) {
3539
3465
  app.get("/api/deliver", requireAdmin(), async (c) => {
3540
- const url = await getDeliverUrl();
3541
- return c.json({ url });
3466
+ const { url, token } = await getDeliverConfig();
3467
+ return c.json({ url, token });
3542
3468
  });
3543
3469
  app.put("/api/deliver", requireAdmin(), async (c) => {
3544
3470
  try {
3545
3471
  const body = await c.req.json();
3546
3472
  const url = typeof body?.url === "string" ? body.url.trim() : "";
3547
- await saveDeliverUrl(url);
3548
- return c.json({ ok: true, url });
3473
+ const token = typeof body?.token === "string" ? body.token.trim() : "";
3474
+ await saveDeliverConfig({ url, token });
3475
+ return c.json({ ok: true, url, token });
3549
3476
  } catch (err) {
3550
3477
  return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
3551
3478
  }
@@ -3554,6 +3481,7 @@ function registerDeliverRoutes(app) {
3554
3481
  try {
3555
3482
  const body = await c.req.json();
3556
3483
  const url = typeof body?.url === "string" ? body.url.trim() : "";
3484
+ const token = typeof body?.token === "string" ? body.token.trim() : "";
3557
3485
  if (!url) return c.json({ ok: false, message: "url 不能为空" }, 400);
3558
3486
  const sample = {
3559
3487
  guid: "deliver-test-" + Date.now(),
@@ -3562,22 +3490,150 @@ function registerDeliverRoutes(app) {
3562
3490
  pubDate: (/* @__PURE__ */ new Date()).toISOString(),
3563
3491
  summary: "若下游收到此条,说明投递 URL 可用。"
3564
3492
  };
3565
- await postDeliverItems(url, "rssany-deliver-test", [
3566
- {
3567
- guid: sample.guid,
3568
- title: sample.title,
3569
- link: sample.link,
3570
- pubDate: new Date(sample.pubDate),
3571
- summary: sample.summary,
3572
- sourceRef: "rssany-deliver-test"
3573
- }
3574
- ]);
3493
+ await postDeliverItems(
3494
+ url,
3495
+ "rssany-deliver-test",
3496
+ [
3497
+ {
3498
+ guid: sample.guid,
3499
+ title: sample.title,
3500
+ link: sample.link,
3501
+ pubDate: new Date(sample.pubDate),
3502
+ summary: sample.summary,
3503
+ sourceRef: "rssany-deliver-test"
3504
+ }
3505
+ ],
3506
+ { bearerToken: token || void 0 }
3507
+ );
3575
3508
  return c.json({ ok: true });
3576
3509
  } catch (err) {
3577
3510
  return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
3578
3511
  }
3579
3512
  });
3580
3513
  }
3514
+ function trimOrUndef(s) {
3515
+ if (typeof s !== "string") return void 0;
3516
+ const t = s.trim();
3517
+ return t.length > 0 ? t : void 0;
3518
+ }
3519
+ async function readLlmFileConfig() {
3520
+ try {
3521
+ const raw = await readFile(CONFIG_PATH, "utf-8");
3522
+ const j = JSON.parse(raw);
3523
+ const llm = j?.llm;
3524
+ if (!llm || typeof llm !== "object") return {};
3525
+ const o = llm;
3526
+ return {
3527
+ apiKey: typeof o.apiKey === "string" ? o.apiKey : void 0,
3528
+ baseUrl: trimOrUndef(o.baseUrl),
3529
+ model: trimOrUndef(o.model)
3530
+ };
3531
+ } catch {
3532
+ return {};
3533
+ }
3534
+ }
3535
+ async function saveLlmSettings(input) {
3536
+ let root = {};
3537
+ try {
3538
+ const raw = await readFile(CONFIG_PATH, "utf-8");
3539
+ root = JSON.parse(raw);
3540
+ } catch {
3541
+ }
3542
+ const prev = await readLlmFileConfig();
3543
+ const next = {
3544
+ baseUrl: input.baseUrl.trim(),
3545
+ model: input.model.trim()
3546
+ };
3547
+ const newKey = typeof input.apiKey === "string" && input.apiKey.length > 0 ? input.apiKey : void 0;
3548
+ if (newKey) {
3549
+ next.apiKey = newKey;
3550
+ } else if (prev.apiKey) {
3551
+ next.apiKey = prev.apiKey;
3552
+ }
3553
+ root.llm = next;
3554
+ await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
3555
+ invalidateLLMConfigCache();
3556
+ }
3557
+ function registerLlmRoutes(app) {
3558
+ app.get("/api/llm", requireAdmin(), async (c) => {
3559
+ const resolved = getLLMConfig();
3560
+ const file = await readLlmFileConfig();
3561
+ const hasApiKey = !!resolved.apiKey;
3562
+ const apiKeyInFile = !!(file.apiKey && file.apiKey.length > 0);
3563
+ return c.json({
3564
+ baseUrl: resolved.baseUrl,
3565
+ model: resolved.model,
3566
+ hasApiKey,
3567
+ apiKeyInFile
3568
+ });
3569
+ });
3570
+ app.put("/api/llm", requireAdmin(), async (c) => {
3571
+ try {
3572
+ const body = await c.req.json();
3573
+ const baseUrl = typeof body.baseUrl === "string" ? body.baseUrl : "";
3574
+ const model = typeof body.model === "string" ? body.model : "";
3575
+ const apiKey = typeof body.apiKey === "string" ? body.apiKey : void 0;
3576
+ await saveLlmSettings({
3577
+ baseUrl,
3578
+ model,
3579
+ ...apiKey !== void 0 ? { apiKey } : {}
3580
+ });
3581
+ const resolved = getLLMConfig();
3582
+ const file = await readLlmFileConfig();
3583
+ return c.json({
3584
+ ok: true,
3585
+ baseUrl: resolved.baseUrl,
3586
+ model: resolved.model,
3587
+ hasApiKey: !!resolved.apiKey,
3588
+ apiKeyInFile: !!(file.apiKey && file.apiKey.length > 0)
3589
+ });
3590
+ } catch (err) {
3591
+ return c.json(
3592
+ { ok: false, message: err instanceof Error ? err.message : String(err) },
3593
+ 400
3594
+ );
3595
+ }
3596
+ });
3597
+ app.post("/api/llm/test", requireAdmin(), async (c) => {
3598
+ const t0 = Date.now();
3599
+ try {
3600
+ const cfg = getLLMConfig();
3601
+ if (!cfg.apiKey) {
3602
+ return c.json({ ok: false, message: "未配置 API Key(请在界面或 OPENAI_API_KEY 中设置)" }, 400);
3603
+ }
3604
+ const reply = await chatText("Reply with exactly the single word: ok", void 0, {
3605
+ maxTokens: 32768,
3606
+ debugLabel: "llmSettingsTest"
3607
+ });
3608
+ return c.json({ ok: true, reply });
3609
+ } catch (err) {
3610
+ const ms = Date.now() - t0;
3611
+ const message = err instanceof Error ? err.message : String(err);
3612
+ console.error("[llm/test] fail", { ms, message });
3613
+ return c.json({ ok: false, message }, 400);
3614
+ }
3615
+ });
3616
+ }
3617
+ function registerProxySettingsRoutes(app) {
3618
+ app.get("/api/proxy", requireAdmin(), async (c) => {
3619
+ const globalProxy = await readGlobalProxyFromConfig() ?? "";
3620
+ return c.json({ globalProxy });
3621
+ });
3622
+ app.put("/api/proxy", requireAdmin(), async (c) => {
3623
+ try {
3624
+ const body = await c.req.json().catch(() => ({}));
3625
+ const globalProxy = typeof body.globalProxy === "string" ? body.globalProxy : "";
3626
+ await saveGlobalProxyToConfig(globalProxy);
3627
+ const saved = await readGlobalProxyFromConfig() ?? "";
3628
+ return c.json({ ok: true, globalProxy: saved });
3629
+ } catch (err) {
3630
+ return c.json(
3631
+ { ok: false, message: err instanceof Error ? err.message : String(err) },
3632
+ 400
3633
+ );
3634
+ }
3635
+ });
3636
+ }
3581
3637
  const tasks = /* @__PURE__ */ new Map();
3582
3638
  let idCounter = 0;
3583
3639
  function nextId() {
@@ -3651,10 +3707,311 @@ function registerTasksRoutes(app) {
3651
3707
  }
3652
3708
  });
3653
3709
  }
3710
+ const CACHE_SUBDIR = "feed-favicons";
3711
+ const CACHE_KEY_PREFIX = "feed-favicon:v1:";
3712
+ const CACHE_MAX_AGE_SEC = 3 * 24 * 60 * 60;
3713
+ const CACHE_MAX_AGE_MS = CACHE_MAX_AGE_SEC * 1e3;
3714
+ const CACHE_CONTROL = `public, max-age=${CACHE_MAX_AGE_SEC}`;
3715
+ const FETCH_TIMEOUT_MS = 6e3;
3716
+ const MAX_ICON_BYTES = 2 * 1024 * 1024;
3717
+ const MAX_HTML_BYTES = 512 * 1024;
3718
+ const inflightByDomain = /* @__PURE__ */ new Map();
3719
+ const MAX_DOMAIN_LEN = 253;
3720
+ function isPlausibleHostname(s) {
3721
+ if (s.length === 0 || s.length > MAX_DOMAIN_LEN) return false;
3722
+ return /^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$/i.test(s);
3723
+ }
3724
+ function cacheFilePath(domainKey) {
3725
+ const h = createHash("sha256").update(CACHE_KEY_PREFIX + domainKey.toLowerCase()).digest("hex");
3726
+ return join(CACHE_DIR, CACHE_SUBDIR, h);
3727
+ }
3728
+ function originFaviconUrls(domain) {
3729
+ const d = domain.toLowerCase();
3730
+ const hosts = [`https://${d}`];
3731
+ if (d.startsWith("www.")) {
3732
+ const bare = d.slice(4);
3733
+ if (bare) hosts.push(`https://${bare}`);
3734
+ } else {
3735
+ hosts.push(`https://www.${d}`);
3736
+ }
3737
+ const paths = ["/favicon.ico", "/favicon.png", "/apple-touch-icon.png"];
3738
+ const urls = [];
3739
+ for (const base2 of [...new Set(hosts)]) {
3740
+ for (const p of paths) {
3741
+ urls.push(`${base2}${p}`);
3742
+ }
3743
+ }
3744
+ return urls;
3745
+ }
3746
+ function homepageUrlsForDomain(domain) {
3747
+ const d = domain.toLowerCase();
3748
+ const urls = [`https://${d}/`];
3749
+ if (d.startsWith("www.")) {
3750
+ const bare = d.slice(4);
3751
+ if (bare) urls.push(`https://${bare}/`);
3752
+ } else {
3753
+ urls.push(`https://www.${d}/`);
3754
+ }
3755
+ return [...new Set(urls)];
3756
+ }
3757
+ function isIconLinkRel(rel) {
3758
+ const tokens = rel.toLowerCase().trim().split(/\s+/).filter(Boolean);
3759
+ if (tokens.some((x) => x === "mask-icon")) return true;
3760
+ if (tokens.some((x) => x === "apple-touch-icon" || x === "apple-touch-icon-precomposed")) return true;
3761
+ if (tokens.includes("shortcut") && tokens.includes("icon")) return true;
3762
+ return tokens.includes("icon");
3763
+ }
3764
+ function parseLinkIconHrefs(html, pageUrl) {
3765
+ const root = parse(html, { lowerCaseTagName: true });
3766
+ let base2 = pageUrl;
3767
+ const baseEl = root.querySelector("base[href]");
3768
+ if (baseEl) {
3769
+ const bh = baseEl.getAttribute("href")?.trim();
3770
+ if (bh) {
3771
+ try {
3772
+ base2 = new URL(bh, pageUrl).href;
3773
+ } catch {
3774
+ }
3775
+ }
3776
+ }
3777
+ const out = [];
3778
+ const seen = /* @__PURE__ */ new Set();
3779
+ for (const el of root.querySelectorAll("link[href]")) {
3780
+ const rel = el.getAttribute("rel") ?? "";
3781
+ if (!isIconLinkRel(rel)) continue;
3782
+ const href = el.getAttribute("href")?.trim();
3783
+ if (!href || href.startsWith("data:") || href.startsWith("blob:")) continue;
3784
+ try {
3785
+ const abs = new URL(href, base2).href;
3786
+ if ((abs.startsWith("http:") || abs.startsWith("https:")) && !seen.has(abs)) {
3787
+ seen.add(abs);
3788
+ out.push(abs);
3789
+ }
3790
+ } catch {
3791
+ }
3792
+ }
3793
+ return out;
3794
+ }
3795
+ async function fetchHtmlPage(url) {
3796
+ try {
3797
+ const upstream = await fetch(url, {
3798
+ redirect: "follow",
3799
+ headers: {
3800
+ Accept: "text/html,application/xhtml+xml;q=0.9,*/*;q=0.1",
3801
+ "User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
3802
+ },
3803
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
3804
+ });
3805
+ if (!upstream.ok) return null;
3806
+ const ab = await upstream.arrayBuffer();
3807
+ const buf = Buffer.from(ab);
3808
+ const slice = buf.subarray(0, Math.min(buf.length, MAX_HTML_BYTES));
3809
+ return slice.toString("utf-8");
3810
+ } catch {
3811
+ return null;
3812
+ }
3813
+ }
3814
+ async function discoverIconUrlsFromHomepage(domain) {
3815
+ if (process.env.FAVICON_SKIP_HTML === "1" || process.env.FAVICON_SKIP_HTML === "true") {
3816
+ return [];
3817
+ }
3818
+ for (const pageUrl of homepageUrlsForDomain(domain)) {
3819
+ const html = await fetchHtmlPage(pageUrl);
3820
+ if (!html) continue;
3821
+ const hrefs = parseLinkIconHrefs(html, pageUrl);
3822
+ if (hrefs.length > 0) return hrefs;
3823
+ }
3824
+ return [];
3825
+ }
3826
+ function duckduckgoFaviconUrl(domain) {
3827
+ return `https://icons.duckduckgo.com/ip3/${domain}.ico`;
3828
+ }
3829
+ function iconHorseUrl(domain) {
3830
+ return `https://icon.horse/icon/${encodeURIComponent(domain)}`;
3831
+ }
3832
+ function unavatarUrl(domain) {
3833
+ return `https://unavatar.io/${encodeURIComponent(domain)}`;
3834
+ }
3835
+ function googleFaviconUrl(domain) {
3836
+ return `https://www.google.com/s2/favicons?domain=${encodeURIComponent(domain)}&sz=64`;
3837
+ }
3838
+ function letterCharFromDomain(domain) {
3839
+ const d = domain.toLowerCase().replace(/^www\./, "");
3840
+ const m = d.match(/[a-z0-9]/);
3841
+ return m ? m[0].toUpperCase() : "?";
3842
+ }
3843
+ function hueFromDomain(domain) {
3844
+ const h = createHash("sha256").update(domain.toLowerCase()).digest();
3845
+ return (h[0] << 8 | h[1]) % 360;
3846
+ }
3847
+ function escapeXmlText(s) {
3848
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
3849
+ }
3850
+ function letterAvatarSvg(domain) {
3851
+ const letter = escapeXmlText(letterCharFromDomain(domain));
3852
+ const hue = hueFromDomain(domain);
3853
+ const bg = `hsl(${hue} 42% 44%)`;
3854
+ const svg = `<?xml version="1.0" encoding="UTF-8"?>
3855
+ <svg xmlns="http://www.w3.org/2000/svg" width="64" height="64" viewBox="0 0 64 64">
3856
+ <rect width="64" height="64" rx="12" fill="${bg}"/>
3857
+ <text x="32" y="32" dominant-baseline="central" text-anchor="middle" fill="#ffffff" font-family="system-ui,Segoe UI,Helvetica,sans-serif" font-size="28" font-weight="600">${letter}</text>
3858
+ </svg>`;
3859
+ return Buffer.from(svg.trim(), "utf-8");
3860
+ }
3861
+ function letterAvatarForDomain(domain) {
3862
+ return { buf: letterAvatarSvg(domain), mime: "image/svg+xml" };
3863
+ }
3864
+ function isEnoent(e) {
3865
+ return typeof e === "object" && e !== null && e.code === "ENOENT";
3866
+ }
3867
+ function sniffImageMime(buf) {
3868
+ if (buf.length < 4) return null;
3869
+ if (buf[0] === 137 && buf[1] === 80 && buf[2] === 78 && buf[3] === 71) return "image/png";
3870
+ if (buf.length >= 6 && buf[0] === 71 && buf[1] === 73 && buf[2] === 70) return "image/gif";
3871
+ if (buf.length >= 3 && buf[0] === 255 && buf[1] === 216 && buf[2] === 255) return "image/jpeg";
3872
+ if (buf.length >= 12 && buf.subarray(0, 4).toString("ascii") === "RIFF" && buf.subarray(8, 12).toString("ascii") === "WEBP") {
3873
+ return "image/webp";
3874
+ }
3875
+ if (buf.length >= 6 && buf.readUInt16LE(0) === 0 && (buf[2] === 1 || buf[2] === 2) && buf[3] === 0) {
3876
+ return "image/x-icon";
3877
+ }
3878
+ const head = buf.subarray(0, Math.min(256, buf.length)).toString("utf-8").trimStart();
3879
+ if (head.startsWith("<svg") || head.startsWith("<?xml")) return "image/svg+xml";
3880
+ return null;
3881
+ }
3882
+ const IMAGE_CT_PREFIX = "image/";
3883
+ function mimeFromFetch(ct) {
3884
+ if (!ct) return null;
3885
+ const base2 = ct.split(";")[0].trim().toLowerCase();
3886
+ return base2.startsWith(IMAGE_CT_PREFIX) ? base2 : null;
3887
+ }
3888
+ function resolveImageMime(buf, ct) {
3889
+ return sniffImageMime(buf) ?? mimeFromFetch(ct);
3890
+ }
3891
+ async function fetchIconCandidate(url) {
3892
+ let upstream;
3893
+ try {
3894
+ upstream = await fetch(url, {
3895
+ redirect: "follow",
3896
+ headers: {
3897
+ Accept: "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
3898
+ "User-Agent": "Mozilla/5.0 (compatible; RssAny/1.0; +https://github.com/rssany/rssany) favicon"
3899
+ },
3900
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
3901
+ });
3902
+ } catch {
3903
+ return null;
3904
+ }
3905
+ if (!upstream.ok) return null;
3906
+ const ab = await upstream.arrayBuffer();
3907
+ const buf = Buffer.from(ab);
3908
+ if (buf.length === 0 || buf.length > MAX_ICON_BYTES) return null;
3909
+ return { buf, ct: upstream.headers.get("content-type") };
3910
+ }
3911
+ function isValidIcon(got) {
3912
+ if (!got) return false;
3913
+ const mime = resolveImageMime(got.buf, got.ct);
3914
+ return !!(mime && mime.startsWith(IMAGE_CT_PREFIX));
3915
+ }
3916
+ function upstreamFaviconUrls(domain, htmlIconUrls) {
3917
+ const urls = [...originFaviconUrls(domain), ...htmlIconUrls];
3918
+ const thirdPartyOff = process.env.FAVICON_THIRD_PARTY === "0" || process.env.FAVICON_THIRD_PARTY === "false";
3919
+ if (!thirdPartyOff) {
3920
+ urls.push(duckduckgoFaviconUrl(domain), iconHorseUrl(domain), unavatarUrl(domain));
3921
+ }
3922
+ const includeGoogle = process.env.FAVICON_INCLUDE_GOOGLE === "1" || process.env.FAVICON_INCLUDE_GOOGLE === "true";
3923
+ if (includeGoogle) urls.push(googleFaviconUrl(domain));
3924
+ return urls;
3925
+ }
3926
+ async function fetchFaviconFromNetwork(domain) {
3927
+ const htmlIconUrls = await discoverIconUrlsFromHomepage(domain);
3928
+ const urls = upstreamFaviconUrls(domain, htmlIconUrls);
3929
+ const tasks2 = urls.map(async (url) => {
3930
+ const got = await fetchIconCandidate(url);
3931
+ if (!isValidIcon(got)) {
3932
+ throw new Error("not-an-icon");
3933
+ }
3934
+ const mime = resolveImageMime(got.buf, got.ct);
3935
+ return { buf: got.buf, mime };
3936
+ });
3937
+ try {
3938
+ return await Promise.any(tasks2);
3939
+ } catch {
3940
+ return letterAvatarForDomain(domain);
3941
+ }
3942
+ }
3943
+ function fetchFaviconDeduped(domain) {
3944
+ let p = inflightByDomain.get(domain);
3945
+ if (p) return p;
3946
+ p = fetchFaviconFromNetwork(domain).finally(() => {
3947
+ if (inflightByDomain.get(domain) === p) inflightByDomain.delete(domain);
3948
+ });
3949
+ inflightByDomain.set(domain, p);
3950
+ return p;
3951
+ }
3952
+ function registerFeedFaviconRoutes(app) {
3953
+ app.get("/api/feed-favicon", async (c) => {
3954
+ const raw = (c.req.query("domain") ?? "").trim();
3955
+ if (!raw || !isPlausibleHostname(raw)) {
3956
+ return new Response(null, { status: 400 });
3957
+ }
3958
+ const domain = raw.toLowerCase();
3959
+ const path = cacheFilePath(domain);
3960
+ let diskStale = false;
3961
+ try {
3962
+ const st = await stat(path);
3963
+ if (Date.now() - st.mtimeMs >= CACHE_MAX_AGE_MS) {
3964
+ diskStale = true;
3965
+ await unlink(path).catch(() => {
3966
+ });
3967
+ }
3968
+ } catch (e) {
3969
+ if (!isEnoent(e)) {
3970
+ return new Response(null, { status: 500 });
3971
+ }
3972
+ }
3973
+ if (!diskStale) {
3974
+ try {
3975
+ const cached = await readFile(path);
3976
+ const mime2 = resolveImageMime(cached, null);
3977
+ if (mime2) {
3978
+ return new Response(new Uint8Array(cached), {
3979
+ status: 200,
3980
+ headers: {
3981
+ "Content-Type": mime2,
3982
+ "Cache-Control": CACHE_CONTROL
3983
+ }
3984
+ });
3985
+ }
3986
+ await unlink(path).catch(() => {
3987
+ });
3988
+ } catch (e) {
3989
+ if (!isEnoent(e)) {
3990
+ return new Response(null, { status: 500 });
3991
+ }
3992
+ }
3993
+ }
3994
+ const resolved = await fetchFaviconDeduped(domain);
3995
+ const { buf, mime } = resolved;
3996
+ try {
3997
+ await mkdir(join(CACHE_DIR, CACHE_SUBDIR), { recursive: true });
3998
+ await writeFile(path, buf);
3999
+ } catch {
4000
+ return new Response(null, { status: 500 });
4001
+ }
4002
+ return new Response(new Uint8Array(buf), {
4003
+ status: 200,
4004
+ headers: {
4005
+ "Content-Type": mime,
4006
+ "Cache-Control": CACHE_CONTROL
4007
+ }
4008
+ });
4009
+ });
4010
+ }
3654
4011
  function registerApiRoutes(app) {
3655
4012
  registerServerRoutes(app);
4013
+ registerFeedFaviconRoutes(app);
3656
4014
  registerRssApiRoutes(app);
3657
- registerEnrichRoutes(app);
3658
4015
  registerSchedulerRoutes(app);
3659
4016
  registerPluginsRoutes(app);
3660
4017
  registerPipelineRoutes(app);
@@ -3665,6 +4022,8 @@ function registerApiRoutes(app) {
3665
4022
  registerSourcesRoutes(app);
3666
4023
  registerTopicsRoutes(app);
3667
4024
  registerDeliverRoutes(app);
4025
+ registerLlmRoutes(app);
4026
+ registerProxySettingsRoutes(app);
3668
4027
  registerTasksRoutes(app);
3669
4028
  }
3670
4029
  function registerAuthRoutes(app) {
@@ -3678,7 +4037,7 @@ function registerAuthRoutes(app) {
3678
4037
  const authFlow = toAuthFlow(site);
3679
4038
  if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
3680
4039
  try {
3681
- const authenticated = await preCheckAuth(authFlow, CACHE_DIR);
4040
+ const authenticated = await preCheckAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) });
3682
4041
  return c.json({ ok: true, authenticated });
3683
4042
  } catch (err) {
3684
4043
  const msg = err instanceof Error ? err.message : String(err);
@@ -3695,12 +4054,23 @@ function registerAuthRoutes(app) {
3695
4054
  const authFlow = toAuthFlow(site);
3696
4055
  if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
3697
4056
  const { loginUrl } = authFlow;
3698
- getOrCreateBrowser({ headless: false, cacheDir: CACHE_DIR }).then(async (browser) => {
3699
- const page = await browser.newPage();
3700
- const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
3701
- await page.setUserAgent(realUserAgent);
3702
- await page.setViewport({ width: 1366, height: 960 });
3703
- await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
4057
+ const proxy = await resolveProxyForSite(site);
4058
+ void launchBrowser({ headless: false, cacheDir: CACHE_DIR, proxy: resolveProxy({ proxy }) }).then(async (browser) => {
4059
+ try {
4060
+ const page = await browser.newPage();
4061
+ await applyProxyAuthToPage(page, { proxy });
4062
+ const realUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
4063
+ await page.setUserAgent(realUserAgent);
4064
+ await page.setViewport({ width: 1366, height: 960 });
4065
+ await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
4066
+ page.once("close", () => {
4067
+ void browser.close().catch(() => {
4068
+ });
4069
+ });
4070
+ } catch {
4071
+ await browser.close().catch(() => {
4072
+ });
4073
+ }
3704
4074
  }).catch(() => {
3705
4075
  });
3706
4076
  return c.json({ ok: true, message: "已打开登录页面" });
@@ -3721,7 +4091,7 @@ function registerAuthRoutes(app) {
3721
4091
  }
3722
4092
  const authFlow = toAuthFlow(site);
3723
4093
  if (!authFlow) return c.json({ ok: false, message: "该站点无需登录" }, 400);
3724
- ensureAuth(authFlow, CACHE_DIR).then(() => {
4094
+ ensureAuth(authFlow, CACHE_DIR, { proxy: await resolveProxyForSite(site) }).then(() => {
3725
4095
  }).catch(() => {
3726
4096
  });
3727
4097
  return c.json({ ok: true, message: "已打开登录窗口,请在弹出的浏览器中完成登录,完成后刷新订阅页面即可。" });
@@ -3744,6 +4114,24 @@ async function readStaticHtml(name, fallback) {
3744
4114
  function escapeHtml(s) {
3745
4115
  return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
3746
4116
  }
4117
+ function effectiveProxyUsed(override, mergedFromSource) {
4118
+ const o = override?.trim();
4119
+ if (o) return o;
4120
+ const s = mergedFromSource?.trim();
4121
+ if (s) return s;
4122
+ return process.env.HTTP_PROXY?.trim() || process.env.HTTPS_PROXY?.trim();
4123
+ }
4124
+ function redactProxyForLog(p) {
4125
+ if (!p) return null;
4126
+ try {
4127
+ const u = new URL(p);
4128
+ if (u.username) u.username = "***";
4129
+ if (u.password) u.password = "***";
4130
+ return u.toString();
4131
+ } catch {
4132
+ return null;
4133
+ }
4134
+ }
3747
4135
  function registerAdminRoutes(app) {
3748
4136
  async function render401(listUrl) {
3749
4137
  const raw = await readStaticHtml("401", '<!DOCTYPE html><html><head><meta charset="utf-8"><title>401</title></head><body><h1>401 需要登录</h1></body></html>');
@@ -3754,12 +4142,25 @@ function registerAdminRoutes(app) {
3754
4142
  if (!url) return c.text("无效 URL,格式: /admin/parse/https://... 或 /admin/parse/example.com/...", 400);
3755
4143
  try {
3756
4144
  const headlessParam = c.req.query("headless");
3757
- const headless = headlessParam === "false" || headlessParam === "0" ? false : void 0;
4145
+ const headless = headlessParam === "true" || headlessParam === "1";
4146
+ const proxyOverride = c.req.query("proxy")?.trim();
3758
4147
  const source = getSource(url);
3759
- const ctx = { cacheDir: CACHE_DIR, headless, proxy: source.proxy };
4148
+ const fromSource = await getEffectiveProxyForListUrl(url, source);
4149
+ const ctx = buildSourceContext({
4150
+ cacheDir: CACHE_DIR,
4151
+ headless,
4152
+ proxy: proxyOverride || fromSource
4153
+ });
3760
4154
  const items = await source.fetchItems(url, ctx);
3761
4155
  const mode = source.id === "generic" ? "generic" : "plugin";
3762
- return c.json({ items, url, mode, pluginId: source.id });
4156
+ const effective = effectiveProxyUsed(proxyOverride, fromSource);
4157
+ return c.json({
4158
+ items,
4159
+ url,
4160
+ mode,
4161
+ pluginId: source.id,
4162
+ effectiveProxy: redactProxyForLog(effective)
4163
+ });
3763
4164
  } catch (err) {
3764
4165
  if (err instanceof AuthRequiredError) {
3765
4166
  const html = await render401(url);
@@ -3774,28 +4175,20 @@ function registerAdminRoutes(app) {
3774
4175
  if (!url) return c.text("无效 URL,格式: /admin/extractor/https://... 或 /admin/extractor/example.com/...", 400);
3775
4176
  try {
3776
4177
  const headlessParam = c.req.query("headless");
3777
- const headless = headlessParam === "false" || headlessParam === "0" ? false : void 0;
3778
- const site = getBestSite(url);
3779
- if (site?.enrichItem) {
3780
- const siteCtx = buildSiteContext(site, { cacheDir: CACHE_DIR, headless });
3781
- const stub = { guid: url, title: "", link: url, pubDate: /* @__PURE__ */ new Date() };
3782
- const enriched = await site.enrichItem(stub, siteCtx);
3783
- return c.json({
3784
- title: enriched.title ?? null,
3785
- author: enriched.author ?? null,
3786
- pubDate: enriched.pubDate instanceof Date ? enriched.pubDate.toISOString() : enriched.pubDate ?? null,
3787
- content: enriched.content ?? null,
3788
- _extractor: site.id
3789
- });
3790
- }
3791
- const proxy = site?.proxy;
4178
+ const headless = headlessParam === "true" || headlessParam === "1";
4179
+ const proxyOverride = c.req.query("proxy")?.trim();
4180
+ const source = getSource(url);
4181
+ const fromSource = await getEffectiveProxyForListUrl(url, source);
4182
+ const proxy = proxyOverride || fromSource;
3792
4183
  const result = await extractFromLink(url, {}, { timeoutMs: 6e4, headless, proxy });
4184
+ const effective = effectiveProxyUsed(proxyOverride, fromSource);
3793
4185
  return c.json({
3794
4186
  title: result.title ?? null,
3795
4187
  author: result.author ?? null,
3796
4188
  pubDate: result.pubDate ?? null,
3797
4189
  content: result.content ?? null,
3798
- _extractor: "readability"
4190
+ _extractor: "readability",
4191
+ effectiveProxy: redactProxyForLog(effective)
3799
4192
  });
3800
4193
  } catch (err) {
3801
4194
  const msg = err instanceof Error ? err.message : String(err);