rssany 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -27
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +7 -8
- package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
- package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
- package/app/plugins/builtin/appen-resources.rssany.js +6 -7
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
- package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
- package/app/plugins/builtin/baidu-research.rssany.js +5 -8
- package/app/plugins/builtin/brightdata-blog.rssany.js +7 -12
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
- package/app/plugins/builtin/email.rssany.js +9 -9
- package/app/plugins/builtin/five-radar.rssany.js +10 -12
- package/app/plugins/builtin/flageval-news.rssany.js +5 -7
- package/app/plugins/builtin/google-deepmind-research.rssany.js +7 -9
- package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
- package/app/plugins/builtin/google-research.rssany.js +6 -8
- package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
- package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
- package/app/plugins/builtin/lingowhale.rssany.js +7 -9
- package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
- package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
- package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
- package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
- package/app/plugins/builtin/moonshot.rssany.js +6 -8
- package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
- package/app/plugins/builtin/opendatalab.rssany.js +5 -6
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +7 -9
- package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
- package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +8 -10
- package/app/plugins/builtin/rss.rssany.js +11 -12
- package/app/plugins/builtin/selectdataset.rssany.js +6 -8
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
- package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
- package/app/plugins/builtin/theinformation-briefings.rssany.js +144 -136
- package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
- package/app/plugins/builtin/venturebeat.rssany.js +7 -9
- package/app/plugins/builtin/worldlabs.rssany.js +6 -8
- package/app/plugins/builtin/x.rssany.js +7 -9
- package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
- package/app/plugins/builtin/zhipu-research.rssany.js +7 -10
- package/app/plugins/site.rssany.js +25 -25
- package/{statics → app/statics}/README.md +7 -7
- package/bin/rssany.js +226 -6
- package/dist/index.js +545 -396
- package/dist/index.js.map +1 -1
- package/package.json +20 -13
- package/scripts/dev.mjs +114 -0
- package/scripts/reset.mjs +1 -1
- package/app/plugins/builtin/google.rssany.js +0 -187
- package/init/config.json +0 -17
- package/init/sources.json +0 -353
- package/statics/401.html +0 -56
- package/statics/404.html +0 -12
- package/statics/image.png +0 -0
- package/webui/build/200.html +0 -49
- package/webui/build/_app/env.js +0 -1
- package/webui/build/_app/immutable/assets/0.BB88QFoe.css +0 -1
- package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +0 -1
- package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +0 -1
- package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +0 -1
- package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
- package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
- package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -1
- package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
- package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
- package/webui/build/_app/immutable/assets/5.ClehBQ0g.css +0 -1
- package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +0 -1
- package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +0 -1
- package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +0 -1
- package/webui/build/_app/immutable/assets/9.m-LCx_kl.css +0 -1
- package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -1
- package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +0 -1
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +0 -1
- package/webui/build/_app/immutable/chunks/B-OsL1Ct.js +0 -1
- package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
- package/webui/build/_app/immutable/chunks/BK3WtZwv.js +0 -1
- package/webui/build/_app/immutable/chunks/BQqoDzLx.js +0 -1
- package/webui/build/_app/immutable/chunks/BUApaBEI.js +0 -1
- package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
- package/webui/build/_app/immutable/chunks/Bfc47y5P.js +0 -1
- package/webui/build/_app/immutable/chunks/Bp63qm3L.js +0 -1
- package/webui/build/_app/immutable/chunks/BwlaCkNX.js +0 -36
- package/webui/build/_app/immutable/chunks/C0J2-L94.js +0 -1
- package/webui/build/_app/immutable/chunks/CBY2biv-.js +0 -1
- package/webui/build/_app/immutable/chunks/CLOXMsDk.js +0 -36
- package/webui/build/_app/immutable/chunks/CVzlFH44.js +0 -1
- package/webui/build/_app/immutable/chunks/CWNeClHp.js +0 -6
- package/webui/build/_app/immutable/chunks/Cihqbfi5.js +0 -1
- package/webui/build/_app/immutable/chunks/D5GvRCv7.js +0 -1
- package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +0 -1
- package/webui/build/_app/immutable/chunks/DFuhmi31.js +0 -1
- package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
- package/webui/build/_app/immutable/chunks/DgceFEv5.js +0 -1
- package/webui/build/_app/immutable/chunks/DjNLq3TF.js +0 -1
- package/webui/build/_app/immutable/chunks/Dt2CddFe.js +0 -1
- package/webui/build/_app/immutable/chunks/Dw782Tjs.js +0 -1
- package/webui/build/_app/immutable/chunks/SqCUd34O.js +0 -1
- package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +0 -1
- package/webui/build/_app/immutable/chunks/hp4PFHFv.js +0 -1
- package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
- package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
- package/webui/build/_app/immutable/chunks/tB7QMF3U.js +0 -1
- package/webui/build/_app/immutable/chunks/xtNWTdbD.js +0 -1
- package/webui/build/_app/immutable/entry/app.B8zBPipq.js +0 -2
- package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +0 -1
- package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +0 -11
- package/webui/build/_app/immutable/nodes/1.1N74-4Io.js +0 -1
- package/webui/build/_app/immutable/nodes/10.DY30t9Ib.js +0 -1
- package/webui/build/_app/immutable/nodes/11.ITuxnukH.js +0 -1
- package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +0 -1
- package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
- package/webui/build/_app/immutable/nodes/14.BHnIxbVM.js +0 -1
- package/webui/build/_app/immutable/nodes/15.CLjT9il3.js +0 -1
- package/webui/build/_app/immutable/nodes/16.BD-mKCLN.js +0 -24
- package/webui/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -1
- package/webui/build/_app/immutable/nodes/18.Ba_qJjp6.js +0 -1
- package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
- package/webui/build/_app/immutable/nodes/3.Dt5o2Fmz.js +0 -1
- package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
- package/webui/build/_app/immutable/nodes/5.Dy3vSsIP.js +0 -1
- package/webui/build/_app/immutable/nodes/6.DvclsL6H.js +0 -1
- package/webui/build/_app/immutable/nodes/7.D2nJy-Uz.js +0 -1
- package/webui/build/_app/immutable/nodes/8.C75mhrqs.js +0 -1
- package/webui/build/_app/immutable/nodes/9.Bp_QXw3w.js +0 -1
- package/webui/build/_app/version.json +0 -1
package/dist/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import { join, dirname, basename, resolve, sep, relative } from "node:path";
|
|
|
9
9
|
import { promisify } from "node:util";
|
|
10
10
|
import puppeteerCore from "puppeteer-core";
|
|
11
11
|
import { parse, NodeType } from "node-html-parser";
|
|
12
|
-
import
|
|
12
|
+
import { DatabaseSync } from "node:sqlite";
|
|
13
13
|
import { mkdir, writeFile, copyFile, access, rename, readFile, readdir, stat, unlink } from "node:fs/promises";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
15
|
import { createHash } from "node:crypto";
|
|
@@ -242,7 +242,7 @@ async function migrateFile(from, to) {
|
|
|
242
242
|
logger.warn("config", "配置迁移失败", { from, to, err: err instanceof Error ? err.message : String(err) });
|
|
243
243
|
}
|
|
244
244
|
}
|
|
245
|
-
const INIT_DATA_DIR = join(PACKAGE_ROOT, "init");
|
|
245
|
+
const INIT_DATA_DIR = join(PACKAGE_ROOT, "app/init");
|
|
246
246
|
const EXAMPLE_SOURCES = join(INIT_DATA_DIR, "sources.json");
|
|
247
247
|
const EXAMPLE_CONFIG = join(INIT_DATA_DIR, "config.json");
|
|
248
248
|
async function seedExampleConfigsIfMissing() {
|
|
@@ -292,16 +292,14 @@ async function initUserDir() {
|
|
|
292
292
|
}
|
|
293
293
|
const MAIN_DB_JOURNAL = (process.env.RSSANY_DB_JOURNAL ?? "wal").toLowerCase() === "delete" ? "DELETE" : "WAL";
|
|
294
294
|
let _db = null;
|
|
295
|
-
let _dbInit = null;
|
|
296
295
|
let _writeLock = Promise.resolve();
|
|
297
296
|
const MAIN_DB_LOCK_PATH = join(DATA_DIR, "rssany.db.lock");
|
|
298
297
|
function logCorruptDiagnostic(operation, err) {
|
|
299
|
-
const code = err?.code;
|
|
300
298
|
const msg = err instanceof Error ? err.message : String(err);
|
|
301
299
|
const lines = [
|
|
302
300
|
"[rssany db] 数据库可能损坏或并发冲突",
|
|
303
301
|
` 操作: ${operation}`,
|
|
304
|
-
` 错误: ${
|
|
302
|
+
` 错误: ${msg}`,
|
|
305
303
|
" 常见原因:",
|
|
306
304
|
" 1. 多进程同时打开同一库(例如 tsx --watch 与另一实例同时写)",
|
|
307
305
|
" 2. 异常退出后 WAL 未正常 checkpoint",
|
|
@@ -437,48 +435,40 @@ function mapRowsToDbItems(rows) {
|
|
|
437
435
|
return rows.map(toDbItem);
|
|
438
436
|
}
|
|
439
437
|
function isCorruptError(err) {
|
|
440
|
-
const code = err?.code;
|
|
441
438
|
const msg = err instanceof Error ? err.message : String(err);
|
|
442
|
-
return
|
|
439
|
+
return msg.includes("SQLITE_CORRUPT") || msg.includes("database disk image is malformed");
|
|
443
440
|
}
|
|
444
441
|
async function getDb() {
|
|
445
442
|
if (_db) return _db;
|
|
446
|
-
if (_dbInit) return _dbInit;
|
|
447
443
|
const dbPath = join(DATA_DIR, "rssany.db");
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
if (db) {
|
|
463
|
-
try {
|
|
464
|
-
db.close();
|
|
465
|
-
} catch {
|
|
466
|
-
}
|
|
467
|
-
db = null;
|
|
468
|
-
}
|
|
469
|
-
if (isCorruptError(err)) {
|
|
470
|
-
logCorruptDiagnostic("打开/初始化主库 (getDb)", err);
|
|
444
|
+
await mkdir(DATA_DIR, { recursive: true });
|
|
445
|
+
acquireDbLock(DATA_DIR);
|
|
446
|
+
try {
|
|
447
|
+
_db = new DatabaseSync(dbPath);
|
|
448
|
+
_db.exec(`PRAGMA journal_mode = ${MAIN_DB_JOURNAL}`);
|
|
449
|
+
_db.exec("PRAGMA synchronous = NORMAL");
|
|
450
|
+
initSchema(_db);
|
|
451
|
+
return _db;
|
|
452
|
+
} catch (err) {
|
|
453
|
+
releaseDbLock();
|
|
454
|
+
if (_db) {
|
|
455
|
+
try {
|
|
456
|
+
_db.close();
|
|
457
|
+
} catch {
|
|
471
458
|
}
|
|
472
|
-
|
|
459
|
+
_db = null;
|
|
460
|
+
}
|
|
461
|
+
if (isCorruptError(err)) {
|
|
462
|
+
logCorruptDiagnostic("打开/初始化主库 (getDb)", err);
|
|
473
463
|
}
|
|
474
|
-
|
|
475
|
-
|
|
464
|
+
throw err;
|
|
465
|
+
}
|
|
476
466
|
}
|
|
477
467
|
async function runIntegrityCheck() {
|
|
478
468
|
const db = await getDb();
|
|
479
469
|
try {
|
|
480
|
-
const
|
|
481
|
-
return
|
|
470
|
+
const result = db.prepare("PRAGMA integrity_check").get();
|
|
471
|
+
return result?.integrity_check ?? "unknown";
|
|
482
472
|
} catch (err) {
|
|
483
473
|
const msg = err instanceof Error ? err.message : String(err);
|
|
484
474
|
return `integrity_check 执行失败: ${msg}`;
|
|
@@ -486,7 +476,6 @@ async function runIntegrityCheck() {
|
|
|
486
476
|
}
|
|
487
477
|
const LOGS_DB_PATH = join(DATA_DIR, "logs.db");
|
|
488
478
|
let _logsDb = null;
|
|
489
|
-
let _logsDbInit = null;
|
|
490
479
|
function initLogsSchema(db) {
|
|
491
480
|
db.exec(`
|
|
492
481
|
CREATE TABLE IF NOT EXISTS logs (
|
|
@@ -504,17 +493,12 @@ function initLogsSchema(db) {
|
|
|
504
493
|
}
|
|
505
494
|
async function getLogsDb() {
|
|
506
495
|
if (_logsDb) return _logsDb;
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
initLogsSchema(db);
|
|
514
|
-
_logsDb = db;
|
|
515
|
-
return db;
|
|
516
|
-
})();
|
|
517
|
-
return _logsDbInit;
|
|
496
|
+
await mkdir(DATA_DIR, { recursive: true });
|
|
497
|
+
_logsDb = new DatabaseSync(LOGS_DB_PATH);
|
|
498
|
+
_logsDb.exec("PRAGMA journal_mode = WAL");
|
|
499
|
+
_logsDb.exec("PRAGMA synchronous = NORMAL");
|
|
500
|
+
initLogsSchema(_logsDb);
|
|
501
|
+
return _logsDb;
|
|
518
502
|
}
|
|
519
503
|
function initSchema(db) {
|
|
520
504
|
db.exec(`
|
|
@@ -588,8 +572,8 @@ function initSchema(db) {
|
|
|
588
572
|
END;
|
|
589
573
|
`);
|
|
590
574
|
try {
|
|
591
|
-
const
|
|
592
|
-
if (
|
|
575
|
+
const cols = db.prepare("PRAGMA table_info(items)").all().map((r) => r.name);
|
|
576
|
+
if (!cols.includes("image_url")) {
|
|
593
577
|
db.exec("ALTER TABLE items ADD COLUMN image_url TEXT");
|
|
594
578
|
}
|
|
595
579
|
} catch {
|
|
@@ -597,20 +581,25 @@ function initSchema(db) {
|
|
|
597
581
|
migrateItemsSourceUrlIfNeeded(db);
|
|
598
582
|
}
|
|
599
583
|
function migrateItemsSourceUrlIfNeeded(db) {
|
|
600
|
-
const
|
|
584
|
+
const pragmaResult = db.exec("PRAGMA user_version");
|
|
585
|
+
const v = pragmaResult?.values?.[0]?.[0] ?? 0;
|
|
601
586
|
if (v >= 2) return;
|
|
602
587
|
const rows = db.prepare("SELECT rowid, source_url FROM items").all();
|
|
603
|
-
const
|
|
604
|
-
|
|
588
|
+
const updateStmt = db.prepare("UPDATE items SET source_url = @next WHERE rowid = @rowid");
|
|
589
|
+
db.exec("BEGIN TRANSACTION");
|
|
590
|
+
try {
|
|
605
591
|
for (const r of rows) {
|
|
606
592
|
const next = canonicalHttpSourceRef(r.source_url);
|
|
607
593
|
if (next !== r.source_url) {
|
|
608
|
-
|
|
594
|
+
updateStmt.run({ next, rowid: r.rowid });
|
|
609
595
|
}
|
|
610
596
|
}
|
|
611
|
-
db.
|
|
612
|
-
|
|
613
|
-
|
|
597
|
+
db.exec("PRAGMA user_version = 2");
|
|
598
|
+
db.exec("COMMIT");
|
|
599
|
+
} catch (err) {
|
|
600
|
+
db.exec("ROLLBACK");
|
|
601
|
+
throw err;
|
|
602
|
+
}
|
|
614
603
|
}
|
|
615
604
|
async function upsertItems(items, sourceUrlOverride) {
|
|
616
605
|
if (items.length === 0) return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
@@ -621,98 +610,96 @@ async function upsertItems(items, sourceUrlOverride) {
|
|
|
621
610
|
const sourceUrl = canonicalHttpSourceRef(raw);
|
|
622
611
|
return withWriteLock(async () => {
|
|
623
612
|
const db = await getDb();
|
|
624
|
-
const stmt = db.prepare(`
|
|
625
|
-
INSERT OR IGNORE INTO items (id, url, source_url, title, author, summary, image_url, tags, pub_date, fetched_at)
|
|
626
|
-
VALUES (@id, @url, @sourceUrl, @title, @author, @summary, @imageUrl, @tags, @pubDate, @fetchedAt)
|
|
627
|
-
`);
|
|
628
|
-
const selectExistingStmt = db.prepare(`
|
|
629
|
-
SELECT id, title, author, summary, image_url, pub_date, fetched_at
|
|
630
|
-
FROM items
|
|
631
|
-
WHERE id = @id
|
|
632
|
-
`);
|
|
633
|
-
const repairExistingStmt = db.prepare(`
|
|
634
|
-
UPDATE items
|
|
635
|
-
SET title = @title,
|
|
636
|
-
author = @author,
|
|
637
|
-
summary = @summary,
|
|
638
|
-
image_url = @imageUrl,
|
|
639
|
-
pub_date = @pubDate,
|
|
640
|
-
fetched_at = @fetchedAt
|
|
641
|
-
WHERE id = @id
|
|
642
|
-
`);
|
|
643
613
|
const now2 = (/* @__PURE__ */ new Date()).toISOString();
|
|
644
614
|
let newCount = 0;
|
|
645
615
|
const newIds = /* @__PURE__ */ new Set();
|
|
646
|
-
const
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
}
|
|
685
|
-
repairExistingStmt.run({
|
|
686
|
-
id: item.guid,
|
|
687
|
-
title: shouldRepairTitle ? nextTitle : existing.title,
|
|
688
|
-
author: shouldRepairAuthor ? nextAuthor : existing.author ?? null,
|
|
689
|
-
summary: shouldRepairSummary ? nextSummary : existing.summary,
|
|
690
|
-
imageUrl: shouldRepairImageUrl ? nextImageUrl : existing.image_url ?? null,
|
|
691
|
-
pubDate: shouldRepairPubDate ? nextPubDate : existing.pub_date,
|
|
692
|
-
fetchedAt: now2
|
|
693
|
-
});
|
|
616
|
+
const insertStmt = db.prepare(`
|
|
617
|
+
INSERT OR IGNORE INTO items (id, url, source_url, title, author, summary, image_url, tags, pub_date, fetched_at)
|
|
618
|
+
VALUES (@id, @url, @sourceUrl, @title, @author, @summary, @imageUrl, @tags, @pubDate, @fetchedAt)
|
|
619
|
+
`);
|
|
620
|
+
const selectExistingStmt = db.prepare(`
|
|
621
|
+
SELECT title, author, summary, image_url, pub_date, fetched_at
|
|
622
|
+
FROM items WHERE id = @id
|
|
623
|
+
`);
|
|
624
|
+
const updateStmt = db.prepare(`
|
|
625
|
+
UPDATE items SET title = @title, author = @author, summary = @summary,
|
|
626
|
+
image_url = @imageUrl, pub_date = @pubDate, fetched_at = @fetchedAt
|
|
627
|
+
WHERE id = @id
|
|
628
|
+
`);
|
|
629
|
+
for (const item of items) {
|
|
630
|
+
const nextTitle = normalizeText(item.title) || null;
|
|
631
|
+
const nextSummary = normalizeText(item.summary) || null;
|
|
632
|
+
const nextAuthorArr = normalizeAuthor(item.author);
|
|
633
|
+
const nextAuthor = nextAuthorArr?.length ? JSON.stringify(nextAuthorArr) : null;
|
|
634
|
+
const nextPubDate = pubDateToIsoOrNull(item.pubDate);
|
|
635
|
+
const nextTags = item.tags?.length ? JSON.stringify(item.tags) : null;
|
|
636
|
+
const rawImageUrl = item.imageUrl ?? item.coverImg ?? item.cover_img;
|
|
637
|
+
const nextImageUrl = typeof rawImageUrl === "string" && rawImageUrl.trim() ? rawImageUrl.trim() : null;
|
|
638
|
+
const info = insertStmt.run({
|
|
639
|
+
id: item.guid,
|
|
640
|
+
url: item.link,
|
|
641
|
+
sourceUrl,
|
|
642
|
+
title: nextTitle,
|
|
643
|
+
author: nextAuthor,
|
|
644
|
+
summary: nextSummary,
|
|
645
|
+
imageUrl: nextImageUrl,
|
|
646
|
+
tags: nextTags,
|
|
647
|
+
pubDate: nextPubDate,
|
|
648
|
+
fetchedAt: now2
|
|
649
|
+
});
|
|
650
|
+
newCount += Number(info.changes);
|
|
651
|
+
if (info.changes > 0) {
|
|
652
|
+
newIds.add(item.guid);
|
|
653
|
+
continue;
|
|
694
654
|
}
|
|
695
|
-
|
|
696
|
-
|
|
655
|
+
const existing = selectExistingStmt.get({ id: item.guid });
|
|
656
|
+
if (!existing) continue;
|
|
657
|
+
const shouldRepairTitle = !!nextTitle && !isDateOnlyTitle(nextTitle) && (isDateOnlyTitle(existing.title) || !normalizeText(existing.title));
|
|
658
|
+
const existingSummaryText = normalizeText(existing.summary ?? "");
|
|
659
|
+
const shouldClearDuplicatedSummary = nextSummary == null && !!nextTitle && existingSummaryText === nextTitle;
|
|
660
|
+
const shouldRepairSummary = !!nextSummary && (existingSummaryText.length < nextSummary.length || /!\[[^\]]*\]\([^)]*\)/.test(existingSummaryText)) || shouldClearDuplicatedSummary;
|
|
661
|
+
const shouldRepairImageUrl = !!nextImageUrl && !existing.image_url?.trim();
|
|
662
|
+
const existingAuthorArr = parseAuthorFromDb(existing.author);
|
|
663
|
+
const shouldRepairAuthor = !!nextAuthorArr?.length && !existingAuthorArr?.length;
|
|
664
|
+
const existingPubDateMs = toMs(existing.pub_date);
|
|
665
|
+
const existingFetchedAtMs = toMs(existing.fetched_at);
|
|
666
|
+
const nextPubDateMs = toMs(nextPubDate);
|
|
667
|
+
const existingPubDateLooksFallback = existingPubDateMs != null && existingFetchedAtMs != null && Math.abs(existingPubDateMs - existingFetchedAtMs) <= 5 * 60 * 1e3;
|
|
668
|
+
const shouldRepairPubDate = nextPubDateMs != null && (existingPubDateMs == null || existingPubDateLooksFallback && nextPubDateMs < existingPubDateMs - 24 * 60 * 60 * 1e3);
|
|
669
|
+
if (!(shouldRepairTitle || shouldRepairSummary || shouldRepairImageUrl || shouldRepairAuthor || shouldRepairPubDate)) {
|
|
670
|
+
continue;
|
|
671
|
+
}
|
|
672
|
+
updateStmt.run({
|
|
673
|
+
id: item.guid,
|
|
674
|
+
title: shouldRepairTitle ? nextTitle : existing.title,
|
|
675
|
+
author: shouldRepairAuthor ? nextAuthor : existing.author ?? null,
|
|
676
|
+
summary: shouldClearDuplicatedSummary ? null : shouldRepairSummary ? nextSummary : existing.summary,
|
|
677
|
+
imageUrl: shouldRepairImageUrl ? nextImageUrl : existing.image_url ?? null,
|
|
678
|
+
pubDate: shouldRepairPubDate ? nextPubDate : existing.pub_date,
|
|
679
|
+
fetchedAt: now2
|
|
680
|
+
});
|
|
681
|
+
}
|
|
697
682
|
return { newCount, newIds };
|
|
698
683
|
});
|
|
699
684
|
}
|
|
700
685
|
async function updateItemContent(item) {
|
|
701
686
|
return withWriteLock(async () => {
|
|
702
687
|
const db = await getDb();
|
|
688
|
+
const rawImageUrl = item.imageUrl ?? item.coverImg ?? item.cover_img;
|
|
689
|
+
const nextImageUrl = typeof rawImageUrl === "string" && rawImageUrl.trim() ? rawImageUrl.trim() : null;
|
|
703
690
|
db.prepare(`
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
image_url
|
|
707
|
-
author
|
|
708
|
-
pub_date
|
|
709
|
-
tags
|
|
691
|
+
UPDATE items SET
|
|
692
|
+
content = COALESCE(content, @content),
|
|
693
|
+
image_url = COALESCE(@imageUrl, image_url),
|
|
694
|
+
author = COALESCE(@author, author),
|
|
695
|
+
pub_date = COALESCE(@pubDate, pub_date),
|
|
696
|
+
tags = @tags,
|
|
710
697
|
translations = COALESCE(@translations, translations)
|
|
711
|
-
|
|
712
|
-
|
|
698
|
+
WHERE id = @id
|
|
699
|
+
`).run({
|
|
713
700
|
id: item.guid,
|
|
714
701
|
content: item.content ?? null,
|
|
715
|
-
imageUrl:
|
|
702
|
+
imageUrl: nextImageUrl,
|
|
716
703
|
author: (() => {
|
|
717
704
|
const arr = normalizeAuthor(item.author);
|
|
718
705
|
return arr?.length ? JSON.stringify(arr) : null;
|
|
@@ -723,62 +710,24 @@ async function updateItemContent(item) {
|
|
|
723
710
|
});
|
|
724
711
|
});
|
|
725
712
|
}
|
|
726
|
-
async function queryFeedItems(sourceUrls, limit, offset, opts) {
|
|
727
|
-
if (sourceUrls.length === 0) return { items: [], hasMore: false };
|
|
728
|
-
const expanded = [...new Set(sourceUrls.map((u) => canonicalHttpSourceRef(u)).filter(Boolean))];
|
|
729
|
-
if (expanded.length === 0) return { items: [], hasMore: false };
|
|
730
|
-
const db = await getDb();
|
|
731
|
-
const placeholders = expanded.map((_, i) => `@u${i}`).join(", ");
|
|
732
|
-
const conditions = [`source_url IN (${placeholders})`];
|
|
733
|
-
const params = { lim: limit + 1, off: offset };
|
|
734
|
-
expanded.forEach((url, i) => {
|
|
735
|
-
params[`u${i}`] = url;
|
|
736
|
-
});
|
|
737
|
-
if (opts?.since) {
|
|
738
|
-
conditions.push("COALESCE(pub_date, fetched_at) >= @since");
|
|
739
|
-
params.since = opts.since.length === 10 ? `${opts.since}T00:00:00.000Z` : opts.since;
|
|
740
|
-
}
|
|
741
|
-
if (opts?.until) {
|
|
742
|
-
conditions.push("COALESCE(pub_date, fetched_at) < @until");
|
|
743
|
-
if (opts.until.length === 10) {
|
|
744
|
-
const d = /* @__PURE__ */ new Date(`${opts.until}T12:00:00Z`);
|
|
745
|
-
d.setUTCDate(d.getUTCDate() + 1);
|
|
746
|
-
params.until = d.toISOString();
|
|
747
|
-
} else {
|
|
748
|
-
params.until = opts.until;
|
|
749
|
-
}
|
|
750
|
-
}
|
|
751
|
-
const where = conditions.length ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
752
|
-
const rows = db.prepare(`
|
|
753
|
-
SELECT * FROM items
|
|
754
|
-
${where}
|
|
755
|
-
ORDER BY COALESCE(pub_date, fetched_at) DESC
|
|
756
|
-
LIMIT @lim OFFSET @off
|
|
757
|
-
`).all(params);
|
|
758
|
-
const hasMore = rows.length > limit;
|
|
759
|
-
const items = mapRowsToDbItems(hasMore ? rows.slice(0, limit) : rows);
|
|
760
|
-
return { items, hasMore };
|
|
761
|
-
}
|
|
762
713
|
async function queryItems(opts) {
|
|
763
714
|
const db = await getDb();
|
|
764
715
|
const { sourceUrl, sourceUrls, author, q, tags: tagsFilter, limit = 20, offset = 0, since, until } = opts;
|
|
765
716
|
const conditions = [];
|
|
766
|
-
const params = {
|
|
717
|
+
const params = {};
|
|
767
718
|
if (sourceUrl) {
|
|
768
719
|
const key = canonicalHttpSourceRef(sourceUrl);
|
|
769
|
-
if (!key) {
|
|
770
|
-
return { items: [], total: 0 };
|
|
771
|
-
}
|
|
720
|
+
if (!key) return { items: [], total: 0 };
|
|
772
721
|
conditions.push("i.source_url = @sourceUrl");
|
|
773
722
|
params.sourceUrl = key;
|
|
774
723
|
} else if (sourceUrls && sourceUrls.length > 0) {
|
|
775
724
|
const expanded = [...new Set(sourceUrls.map((s) => canonicalHttpSourceRef(s)).filter(Boolean))];
|
|
776
|
-
if (expanded.length === 0) {
|
|
777
|
-
return { items: [], total: 0 };
|
|
778
|
-
}
|
|
725
|
+
if (expanded.length === 0) return { items: [], total: 0 };
|
|
779
726
|
const placeholders = expanded.map((_, i) => `@src${i}`).join(", ");
|
|
780
727
|
conditions.push(`i.source_url IN (${placeholders})`);
|
|
781
|
-
expanded.forEach((s, i) =>
|
|
728
|
+
expanded.forEach((s, i) => {
|
|
729
|
+
params[`src${i}`] = s;
|
|
730
|
+
});
|
|
782
731
|
}
|
|
783
732
|
if (author && author.trim().length >= 2) {
|
|
784
733
|
conditions.push("instr(i.author, @author) > 0");
|
|
@@ -789,9 +738,9 @@ async function queryItems(opts) {
|
|
|
789
738
|
params.q = q;
|
|
790
739
|
}
|
|
791
740
|
if (tagsFilter && tagsFilter.length > 0) {
|
|
792
|
-
const trimmed = tagsFilter.filter((t) => typeof t === "string" && t.trim()).map((t) => t.trim());
|
|
741
|
+
const trimmed = tagsFilter.filter((t) => typeof t === "string" && t.trim().length > 0).map((t) => t.trim());
|
|
793
742
|
if (trimmed.length > 0) {
|
|
794
|
-
const tagConds = trimmed.map((_,
|
|
743
|
+
const tagConds = trimmed.map((_, idx) => `LOWER(TRIM(json_each.value)) = LOWER(@tag${idx})`).join(" OR ");
|
|
795
744
|
conditions.push(`i.tags IS NOT NULL AND EXISTS (SELECT 1 FROM json_each(i.tags) WHERE ${tagConds})`);
|
|
796
745
|
trimmed.forEach((t, i) => {
|
|
797
746
|
params[`tag${i}`] = t;
|
|
@@ -807,14 +756,19 @@ async function queryItems(opts) {
|
|
|
807
756
|
params.until = until.toISOString();
|
|
808
757
|
}
|
|
809
758
|
const where = conditions.length ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
759
|
+
const sqlParams = params;
|
|
810
760
|
const rows = db.prepare(`
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
const { count } = db.prepare(`SELECT COUNT(*) as count FROM items i ${where}`).get(
|
|
817
|
-
return { items: mapRowsToDbItems(rows)
|
|
761
|
+
SELECT i.id, i.url, i.source_url, i.title, i.author, i.summary, i.content, i.image_url, i.tags, i.translations, i.pub_date, i.fetched_at, i.pushed_at
|
|
762
|
+
FROM items i ${where}
|
|
763
|
+
ORDER BY COALESCE(i.pub_date, i.fetched_at) DESC
|
|
764
|
+
LIMIT ${limit} OFFSET ${offset}
|
|
765
|
+
`).all(sqlParams);
|
|
766
|
+
const { count } = db.prepare(`SELECT COUNT(*) as count FROM items i ${where}`).get(sqlParams);
|
|
767
|
+
return { items: mapRowsToDbItems(rows.map((r) => {
|
|
768
|
+
const obj = {};
|
|
769
|
+
for (const [k, v] of Object.entries(r)) obj[k] = v;
|
|
770
|
+
return obj;
|
|
771
|
+
})), total: count };
|
|
818
772
|
}
|
|
819
773
|
async function removeTagFromAllItems(tag) {
|
|
820
774
|
const trimmed = String(tag ?? "").trim();
|
|
@@ -825,22 +779,19 @@ async function removeTagFromAllItems(tag) {
|
|
|
825
779
|
const rows = db.prepare("SELECT id, tags FROM items WHERE tags IS NOT NULL AND tags != ''").all();
|
|
826
780
|
const updateStmt = db.prepare("UPDATE items SET tags = @tags WHERE id = @id");
|
|
827
781
|
let count = 0;
|
|
828
|
-
const
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
continue;
|
|
835
|
-
}
|
|
836
|
-
const filtered = itemTags.filter((t) => String(t).trim().toLowerCase() !== targetLower);
|
|
837
|
-
if (filtered.length === itemTags.length) continue;
|
|
838
|
-
const nextTags = filtered.length > 0 ? JSON.stringify(filtered) : null;
|
|
839
|
-
updateStmt.run({ id: row.id, tags: nextTags });
|
|
840
|
-
count += 1;
|
|
782
|
+
for (const row of rows) {
|
|
783
|
+
let itemTags;
|
|
784
|
+
try {
|
|
785
|
+
itemTags = JSON.parse(row.tags);
|
|
786
|
+
} catch {
|
|
787
|
+
continue;
|
|
841
788
|
}
|
|
842
|
-
|
|
843
|
-
|
|
789
|
+
const filtered = itemTags.filter((t) => String(t).trim().toLowerCase() !== targetLower);
|
|
790
|
+
if (filtered.length === itemTags.length) continue;
|
|
791
|
+
const nextTags = filtered.length > 0 ? JSON.stringify(filtered) : null;
|
|
792
|
+
updateStmt.run({ id: row.id, tags: nextTags });
|
|
793
|
+
count += 1;
|
|
794
|
+
}
|
|
844
795
|
return count;
|
|
845
796
|
});
|
|
846
797
|
}
|
|
@@ -849,25 +800,19 @@ async function markPushed(ids) {
|
|
|
849
800
|
return withWriteLock(async () => {
|
|
850
801
|
const db = await getDb();
|
|
851
802
|
const now2 = (/* @__PURE__ */ new Date()).toISOString();
|
|
852
|
-
const
|
|
853
|
-
|
|
854
|
-
for (const id of list) stmt.run({ now: now2, id });
|
|
855
|
-
});
|
|
856
|
-
run(ids);
|
|
803
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
804
|
+
db.prepare(`UPDATE items SET pushed_at = ? WHERE id IN (${placeholders})`).run(now2, ...ids);
|
|
857
805
|
});
|
|
858
806
|
}
|
|
859
807
|
async function deleteItem(id) {
|
|
860
808
|
if (!id?.trim()) return false;
|
|
861
809
|
return withWriteLock(async () => {
|
|
862
810
|
const db = await getDb();
|
|
863
|
-
const
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
return info.changes;
|
|
869
|
-
});
|
|
870
|
-
return run() > 0;
|
|
811
|
+
const row = db.prepare("SELECT rowid FROM items WHERE id = @id").get({ id: id.trim() });
|
|
812
|
+
if (!row) return false;
|
|
813
|
+
db.prepare("DELETE FROM items_fts WHERE rowid = @rowid").run({ rowid: row.rowid });
|
|
814
|
+
const info = db.prepare("DELETE FROM items WHERE id = @id").run({ id: id.trim() });
|
|
815
|
+
return Number(info.changes) > 0;
|
|
871
816
|
});
|
|
872
817
|
}
|
|
873
818
|
async function deleteItemsBySourceUrl(sourceUrl) {
|
|
@@ -877,29 +822,33 @@ async function deleteItemsBySourceUrl(sourceUrl) {
|
|
|
877
822
|
return withWriteLock(async () => {
|
|
878
823
|
const db = await getDb();
|
|
879
824
|
const info = db.prepare("DELETE FROM items WHERE source_url = @sourceUrl").run({ sourceUrl: key });
|
|
880
|
-
return info.changes;
|
|
825
|
+
return Number(info.changes);
|
|
881
826
|
});
|
|
882
827
|
}
|
|
883
828
|
async function getPendingPushItems(limit = 100) {
|
|
884
829
|
const db = await getDb();
|
|
885
830
|
const rows = db.prepare(`
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
return mapRowsToDbItems(rows)
|
|
831
|
+
SELECT * FROM items
|
|
832
|
+
WHERE pushed_at IS NULL AND content IS NOT NULL
|
|
833
|
+
ORDER BY fetched_at ASC
|
|
834
|
+
LIMIT ${limit}
|
|
835
|
+
`).all();
|
|
836
|
+
return mapRowsToDbItems(rows.map((r) => {
|
|
837
|
+
const obj = {};
|
|
838
|
+
for (const [k, v] of Object.entries(r)) obj[k] = v;
|
|
839
|
+
return obj;
|
|
840
|
+
}));
|
|
892
841
|
}
|
|
893
842
|
async function getSourceStats() {
|
|
894
843
|
const { mergeSourceStatsRows: mergeSourceStatsRows2 } = await Promise.resolve().then(() => httpSourceRef);
|
|
895
844
|
const db = await getDb();
|
|
896
|
-
const rows = db.prepare(
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
845
|
+
const rows = db.prepare(`
|
|
846
|
+
SELECT source_url,
|
|
847
|
+
COUNT(*) as count,
|
|
848
|
+
SUM(CASE WHEN julianday(fetched_at) >= julianday('now', '-7 days') THEN 1 ELSE 0 END) as count_7d,
|
|
849
|
+
MAX(COALESCE(pub_date, fetched_at)) as latest_at
|
|
850
|
+
FROM items GROUP BY source_url ORDER BY count DESC
|
|
851
|
+
`).all();
|
|
903
852
|
return mergeSourceStatsRows2(rows);
|
|
904
853
|
}
|
|
905
854
|
async function insertLog(entry) {
|
|
@@ -919,7 +868,7 @@ async function queryLogs(opts) {
|
|
|
919
868
|
const db = await getLogsDb();
|
|
920
869
|
const { level, category, limit = 50, offset = 0, since } = opts;
|
|
921
870
|
const conditions = [];
|
|
922
|
-
const params = {
|
|
871
|
+
const params = {};
|
|
923
872
|
if (level) {
|
|
924
873
|
conditions.push("level = @level");
|
|
925
874
|
params.level = level;
|
|
@@ -933,19 +882,30 @@ async function queryLogs(opts) {
|
|
|
933
882
|
params.since = since.toISOString();
|
|
934
883
|
}
|
|
935
884
|
const where = conditions.length ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
885
|
+
const sqlParams = params;
|
|
936
886
|
const rows = db.prepare(`
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
const { count } = db.prepare(`SELECT COUNT(*) as count FROM logs ${where}`).get(
|
|
943
|
-
return {
|
|
887
|
+
SELECT id, level, category, message, payload, created_at
|
|
888
|
+
FROM logs ${where}
|
|
889
|
+
ORDER BY created_at DESC
|
|
890
|
+
LIMIT ${limit} OFFSET ${offset}
|
|
891
|
+
`).all(sqlParams);
|
|
892
|
+
const { count } = db.prepare(`SELECT COUNT(*) as count FROM logs ${where}`).get(sqlParams);
|
|
893
|
+
return {
|
|
894
|
+
items: rows.map((r) => ({
|
|
895
|
+
id: Number(r.id),
|
|
896
|
+
level: String(r.level),
|
|
897
|
+
category: String(r.category),
|
|
898
|
+
message: String(r.message),
|
|
899
|
+
payload: r.payload,
|
|
900
|
+
created_at: String(r.created_at)
|
|
901
|
+
})),
|
|
902
|
+
total: Number(count)
|
|
903
|
+
};
|
|
944
904
|
}
|
|
945
905
|
async function clearAllLogs() {
|
|
946
906
|
const db = await getLogsDb();
|
|
947
907
|
const r = db.prepare("DELETE FROM logs").run();
|
|
948
|
-
return r.changes;
|
|
908
|
+
return Number(r.changes);
|
|
949
909
|
}
|
|
950
910
|
async function getSystemTags() {
|
|
951
911
|
try {
|
|
@@ -1213,6 +1173,22 @@ function isFrameDetachedError(e) {
|
|
|
1213
1173
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1214
1174
|
return /detached|Navigating frame was detached|Session closed/i.test(msg);
|
|
1215
1175
|
}
|
|
1176
|
+
const sharedBrowsers = /* @__PURE__ */ new Map();
|
|
1177
|
+
function browserKey(config) {
|
|
1178
|
+
const wantHeadless = config.headless !== false;
|
|
1179
|
+
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable() ?? "";
|
|
1180
|
+
const userDataDir = getUserDataDir(config.cacheDir);
|
|
1181
|
+
const proxy = resolveProxy(config) ?? "";
|
|
1182
|
+
return JSON.stringify({
|
|
1183
|
+
headless: wantHeadless,
|
|
1184
|
+
userDataDir: userDataDir ? resolve(userDataDir) : "",
|
|
1185
|
+
proxy,
|
|
1186
|
+
executablePath
|
|
1187
|
+
});
|
|
1188
|
+
}
|
|
1189
|
+
function isBrowserConnected(browser) {
|
|
1190
|
+
return !!browser && browser.connected !== false;
|
|
1191
|
+
}
|
|
1216
1192
|
async function launchBrowser(config) {
|
|
1217
1193
|
const wantHeadless = config.headless !== false;
|
|
1218
1194
|
const executablePath = config.chromeExecutablePath ?? process.env.CHROME_PATH ?? findChromeExecutable();
|
|
@@ -1256,29 +1232,53 @@ async function launchBrowser(config) {
|
|
|
1256
1232
|
}
|
|
1257
1233
|
throw lastErr;
|
|
1258
1234
|
}
|
|
1235
|
+
async function getOrCreateBrowser(config) {
|
|
1236
|
+
const key = browserKey(config);
|
|
1237
|
+
const current = sharedBrowsers.get(key);
|
|
1238
|
+
if (isBrowserConnected(current?.browser)) {
|
|
1239
|
+
return current.browser;
|
|
1240
|
+
}
|
|
1241
|
+
if (current?.promise) {
|
|
1242
|
+
return current.promise;
|
|
1243
|
+
}
|
|
1244
|
+
const slot = {};
|
|
1245
|
+
const promise = launchBrowser({ ...config, proxy: resolveProxy(config) }).then((browser) => {
|
|
1246
|
+
slot.browser = browser;
|
|
1247
|
+
slot.promise = void 0;
|
|
1248
|
+
browser.once("disconnected", () => {
|
|
1249
|
+
if (sharedBrowsers.get(key)?.browser === browser) {
|
|
1250
|
+
sharedBrowsers.delete(key);
|
|
1251
|
+
}
|
|
1252
|
+
});
|
|
1253
|
+
return browser;
|
|
1254
|
+
}).catch((err) => {
|
|
1255
|
+
if (sharedBrowsers.get(key) === slot) {
|
|
1256
|
+
sharedBrowsers.delete(key);
|
|
1257
|
+
}
|
|
1258
|
+
throw err;
|
|
1259
|
+
});
|
|
1260
|
+
slot.promise = promise;
|
|
1261
|
+
sharedBrowsers.set(key, slot);
|
|
1262
|
+
return promise;
|
|
1263
|
+
}
|
|
1259
1264
|
async function preCheckAuth(authFlow, cacheDir, opts) {
|
|
1260
1265
|
const { checkAuth, loginUrl, domain } = authFlow;
|
|
1261
1266
|
if (domain == null || !cacheDir) return true;
|
|
1262
1267
|
const isHeadless = opts?.headless !== false;
|
|
1263
|
-
const browser = await
|
|
1268
|
+
const browser = await getOrCreateBrowser({
|
|
1264
1269
|
headless: isHeadless,
|
|
1265
1270
|
cacheDir,
|
|
1266
1271
|
proxy: resolveProxy(opts)
|
|
1267
1272
|
});
|
|
1273
|
+
const page = await browser.newPage();
|
|
1268
1274
|
try {
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1275
|
-
return await checkAuth(page, page.url());
|
|
1276
|
-
} finally {
|
|
1277
|
-
await page.close().catch(() => {
|
|
1278
|
-
});
|
|
1279
|
-
}
|
|
1275
|
+
await setupPage(page, isHeadless);
|
|
1276
|
+
await applyProxyAuthToPage(page, opts);
|
|
1277
|
+
await page.goto(loginUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
1278
|
+
await new Promise((resolve2) => setTimeout(resolve2, 3e3));
|
|
1279
|
+
return await checkAuth(page, page.url());
|
|
1280
1280
|
} finally {
|
|
1281
|
-
await
|
|
1281
|
+
await page.close().catch(() => {
|
|
1282
1282
|
});
|
|
1283
1283
|
}
|
|
1284
1284
|
}
|
|
@@ -1323,10 +1323,11 @@ async function fetchHtml(url, config = {}) {
|
|
|
1323
1323
|
waitAfterLoadMs,
|
|
1324
1324
|
waitForSelector,
|
|
1325
1325
|
waitForSelectorTimeoutMs,
|
|
1326
|
+
scrollBeforeSnapshot,
|
|
1326
1327
|
useHttpResponseBody
|
|
1327
1328
|
} = config;
|
|
1328
1329
|
const isHeadless = headless !== false;
|
|
1329
|
-
const browser = await
|
|
1330
|
+
const browser = await getOrCreateBrowser({
|
|
1330
1331
|
headless: isHeadless,
|
|
1331
1332
|
cacheDir,
|
|
1332
1333
|
proxy: resolveProxy(config),
|
|
@@ -1335,84 +1336,105 @@ async function fetchHtml(url, config = {}) {
|
|
|
1335
1336
|
const navigationTimeout = timeoutMs ?? 6e4;
|
|
1336
1337
|
const maxAttempts = 2;
|
|
1337
1338
|
let lastError;
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1359
|
-
}
|
|
1360
|
-
}
|
|
1361
|
-
if (timeoutMs != null) {
|
|
1362
|
-
await page.setDefaultNavigationTimeout(timeoutMs);
|
|
1363
|
-
}
|
|
1364
|
-
const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
|
|
1365
|
-
if (extraWaitMs > 0) {
|
|
1366
|
-
await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
|
|
1339
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
1340
|
+
const page = await browser.newPage();
|
|
1341
|
+
const isRetry = attempt === 1;
|
|
1342
|
+
const waitUntil = isRetry ? "domcontentloaded" : "load";
|
|
1343
|
+
const extraWaitMs = isRetry ? Math.min(500, Math.max(0, waitAfterLoadMs ?? 2e3)) : Math.max(0, waitAfterLoadMs ?? 2e3);
|
|
1344
|
+
try {
|
|
1345
|
+
if (config.browserContext) {
|
|
1346
|
+
await config.browserContext(page.browserContext());
|
|
1347
|
+
}
|
|
1348
|
+
await setupPage(page, isHeadless);
|
|
1349
|
+
const extraHeaders = { "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", ...headers ?? {} };
|
|
1350
|
+
if (cookies != null && cookies !== "") {
|
|
1351
|
+
extraHeaders.cookie = cookies;
|
|
1352
|
+
}
|
|
1353
|
+
await page.setExtraHTTPHeaders(extraHeaders);
|
|
1354
|
+
const proxy = resolveProxy(config);
|
|
1355
|
+
if (proxy) {
|
|
1356
|
+
const { username, password } = parseProxy(proxy);
|
|
1357
|
+
if (username !== void 0 || password !== void 0) {
|
|
1358
|
+
await page.authenticate({ username: username ?? "", password: password ?? "" });
|
|
1367
1359
|
}
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1360
|
+
}
|
|
1361
|
+
if (timeoutMs != null) {
|
|
1362
|
+
await page.setDefaultNavigationTimeout(timeoutMs);
|
|
1363
|
+
}
|
|
1364
|
+
const response = await page.goto(url, { waitUntil, timeout: navigationTimeout });
|
|
1365
|
+
if (extraWaitMs > 0) {
|
|
1366
|
+
await new Promise((resolve2) => setTimeout(resolve2, extraWaitMs));
|
|
1367
|
+
}
|
|
1368
|
+
if (waitForSelector != null && waitForSelector !== "" && !isRetry) {
|
|
1369
|
+
const selectorTimeout = waitForSelectorTimeoutMs ?? 2e4;
|
|
1370
|
+
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
|
1371
|
+
}
|
|
1372
|
+
if (scrollBeforeSnapshot && !isRetry) {
|
|
1373
|
+
const scrollSelector = scrollBeforeSnapshot.selector ?? null;
|
|
1374
|
+
const rounds = scrollBeforeSnapshot.rounds ?? 6;
|
|
1375
|
+
const pauseMs = scrollBeforeSnapshot.pauseMs ?? 800;
|
|
1376
|
+
for (let i = 0; i < rounds; i++) {
|
|
1377
|
+
const before = await page.evaluate((sel) => {
|
|
1378
|
+
const target = sel ? document.querySelector(sel) : null;
|
|
1379
|
+
const el = target ?? document.scrollingElement ?? document.documentElement;
|
|
1380
|
+
return el?.scrollHeight ?? 0;
|
|
1381
|
+
}, scrollSelector);
|
|
1382
|
+
await page.evaluate((sel) => {
|
|
1383
|
+
const target = sel ? document.querySelector(sel) : null;
|
|
1384
|
+
const el = target ?? document.scrollingElement ?? document.documentElement;
|
|
1385
|
+
if (!el) return;
|
|
1386
|
+
el.scrollTop = el.scrollHeight;
|
|
1387
|
+
window.scrollBy(0, window.innerHeight);
|
|
1388
|
+
}, scrollSelector);
|
|
1389
|
+
await new Promise((resolve2) => setTimeout(resolve2, pauseMs));
|
|
1390
|
+
const after = await page.evaluate((sel) => {
|
|
1391
|
+
const target = sel ? document.querySelector(sel) : null;
|
|
1392
|
+
const el = target ?? document.scrollingElement ?? document.documentElement;
|
|
1393
|
+
return el?.scrollHeight ?? 0;
|
|
1394
|
+
}, scrollSelector);
|
|
1395
|
+
if (after <= before && i >= 2) break;
|
|
1371
1396
|
}
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1397
|
+
}
|
|
1398
|
+
if (checkAuth != null || authFlow != null) {
|
|
1399
|
+
const authCheck = checkAuth ?? authFlow?.checkAuth;
|
|
1400
|
+
if (authCheck != null) {
|
|
1401
|
+
const ok = await authCheck(page, url);
|
|
1402
|
+
if (!ok) {
|
|
1403
|
+
throw new Error("checkAuth failed: 未通过认证检查,请先调用 ensureAuth 进行预处理登录");
|
|
1379
1404
|
}
|
|
1380
1405
|
}
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
}
|
|
1388
|
-
} else {
|
|
1406
|
+
}
|
|
1407
|
+
let rawBody;
|
|
1408
|
+
if (useHttpResponseBody === true && response != null) {
|
|
1409
|
+
try {
|
|
1410
|
+
rawBody = await response.text();
|
|
1411
|
+
} catch {
|
|
1389
1412
|
rawBody = await page.content();
|
|
1390
1413
|
}
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1414
|
+
} else {
|
|
1415
|
+
rawBody = await page.content();
|
|
1416
|
+
}
|
|
1417
|
+
const finalUrl = response?.url() ?? page.url() ?? String(url);
|
|
1418
|
+
const status = response?.status() ?? 0;
|
|
1419
|
+
const statusText = response?.statusText() ?? "";
|
|
1420
|
+
const rawHeaders = response?.headers() ?? {};
|
|
1421
|
+
const normalizedHeaders = headersToRecord(rawHeaders);
|
|
1422
|
+
const body = applyPurify(rawBody, purify);
|
|
1423
|
+
await page.close().catch(() => {
|
|
1424
|
+
});
|
|
1425
|
+
return { finalUrl, status, statusText, headers: normalizedHeaders, body };
|
|
1426
|
+
} catch (e) {
|
|
1427
|
+
lastError = e;
|
|
1428
|
+
await page.close().catch(() => {
|
|
1429
|
+
});
|
|
1430
|
+
if (isRetry || !isFrameDetachedError(e)) {
|
|
1431
|
+
throw e;
|
|
1409
1432
|
}
|
|
1433
|
+
logger.warn("scraper", "fetchHtml 因 frame 分离重试", { url, attempt: attempt + 1, err: e instanceof Error ? e.message : String(e) });
|
|
1434
|
+
await new Promise((r) => setTimeout(r, 800));
|
|
1410
1435
|
}
|
|
1411
|
-
throw lastError;
|
|
1412
|
-
} finally {
|
|
1413
|
-
await browser.close().catch(() => {
|
|
1414
|
-
});
|
|
1415
1436
|
}
|
|
1437
|
+
throw lastError;
|
|
1416
1438
|
}
|
|
1417
1439
|
const VALID_INTERVALS = ["1min", "5min", "10min", "30min", "1h", "6h", "12h", "1day", "3day", "7day"];
|
|
1418
1440
|
function cronToRefreshInterval(cronExpr) {
|
|
@@ -1946,6 +1968,7 @@ function buildSiteContext(site, ctx) {
|
|
|
1946
1968
|
purify: opts?.purify,
|
|
1947
1969
|
waitForSelector: opts?.waitForSelector,
|
|
1948
1970
|
waitForSelectorTimeoutMs: opts?.waitForSelectorTimeoutMs,
|
|
1971
|
+
scrollBeforeSnapshot: opts?.scrollBeforeSnapshot,
|
|
1949
1972
|
useHttpResponseBody: opts?.useHttpResponseBody
|
|
1950
1973
|
});
|
|
1951
1974
|
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
@@ -1984,6 +2007,7 @@ function createWebSource(site) {
|
|
|
1984
2007
|
const authFlow = toAuthFlow(site);
|
|
1985
2008
|
return {
|
|
1986
2009
|
id: site.id,
|
|
2010
|
+
name: site.name,
|
|
1987
2011
|
pattern: site.listUrlPattern,
|
|
1988
2012
|
priority: 50,
|
|
1989
2013
|
refreshInterval: site.refreshInterval ?? void 0,
|
|
@@ -2512,18 +2536,30 @@ function onFeedUpdated(fn) {
|
|
|
2512
2536
|
eventBus.on("feed:updated", fn);
|
|
2513
2537
|
return () => eventBus.off("feed:updated", fn);
|
|
2514
2538
|
}
|
|
2539
|
+
function migrateGatewayFromFile(j) {
|
|
2540
|
+
const g = j?.deliver?.gateway?.trim();
|
|
2541
|
+
if (g) return g;
|
|
2542
|
+
const u = j?.deliver?.url?.trim() ?? "";
|
|
2543
|
+
if (u) {
|
|
2544
|
+
return u.replace(/\/items\/?$/i, "").replace(/\/+$/, "").trim();
|
|
2545
|
+
}
|
|
2546
|
+
const s = j?.deliver?.sourcesUrl?.trim() ?? "";
|
|
2547
|
+
if (s) {
|
|
2548
|
+
return s.replace(/\/sources\/?$/i, "").replace(/\/+$/, "").trim();
|
|
2549
|
+
}
|
|
2550
|
+
return "";
|
|
2551
|
+
}
|
|
2515
2552
|
async function getDeliverConfig() {
|
|
2516
2553
|
try {
|
|
2517
2554
|
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2518
2555
|
const j = JSON.parse(raw);
|
|
2519
|
-
const u = j?.deliver?.url;
|
|
2520
2556
|
const t = j?.deliver?.token;
|
|
2521
2557
|
return {
|
|
2522
|
-
|
|
2558
|
+
gateway: migrateGatewayFromFile(j),
|
|
2523
2559
|
token: typeof t === "string" ? t.trim() : ""
|
|
2524
2560
|
};
|
|
2525
2561
|
} catch {
|
|
2526
|
-
return {
|
|
2562
|
+
return { gateway: "", token: "" };
|
|
2527
2563
|
}
|
|
2528
2564
|
}
|
|
2529
2565
|
async function saveDeliverConfig(config) {
|
|
@@ -2533,13 +2569,11 @@ async function saveDeliverConfig(config) {
|
|
|
2533
2569
|
root = JSON.parse(raw);
|
|
2534
2570
|
} catch {
|
|
2535
2571
|
}
|
|
2536
|
-
const
|
|
2537
|
-
const base2 = typeof prev === "object" && prev !== null && !Array.isArray(prev) ? { ...prev } : {};
|
|
2538
|
-
const url = config.url.trim();
|
|
2572
|
+
const gateway = config.gateway.trim();
|
|
2539
2573
|
const token = config.token.trim();
|
|
2540
|
-
const next = {
|
|
2574
|
+
const next = {};
|
|
2575
|
+
if (gateway) next.gateway = gateway;
|
|
2541
2576
|
if (token) next.token = token;
|
|
2542
|
-
else delete next.token;
|
|
2543
2577
|
root.deliver = next;
|
|
2544
2578
|
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2545
2579
|
}
|
|
@@ -2557,6 +2591,11 @@ function feedItemsToPayload(items) {
|
|
|
2557
2591
|
translations: i.translations
|
|
2558
2592
|
}));
|
|
2559
2593
|
}
|
|
2594
|
+
function joinGatewayPath(gatewayBase, segment) {
|
|
2595
|
+
const base2 = gatewayBase.trim().replace(/\/+$/, "");
|
|
2596
|
+
if (!base2) return "";
|
|
2597
|
+
return `${base2}/${segment}`;
|
|
2598
|
+
}
|
|
2560
2599
|
async function postDeliverItems(url, sourceRef, items, options) {
|
|
2561
2600
|
if (!url.trim() || items.length === 0) return;
|
|
2562
2601
|
const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
|
|
@@ -2585,6 +2624,52 @@ async function postDeliverItemsSafe(url, sourceRef, items, options) {
|
|
|
2585
2624
|
});
|
|
2586
2625
|
}
|
|
2587
2626
|
}
|
|
2627
|
+
async function postDeliverSources(url, sourcesJson, options) {
|
|
2628
|
+
if (!url.trim() || !sourcesJson.trim()) return;
|
|
2629
|
+
const headers = {
|
|
2630
|
+
"Content-Type": "application/json; charset=utf-8"
|
|
2631
|
+
};
|
|
2632
|
+
const t = options?.bearerToken?.trim();
|
|
2633
|
+
if (t) headers.Authorization = `Bearer ${t}`;
|
|
2634
|
+
const res = await fetch(url.trim(), {
|
|
2635
|
+
method: "POST",
|
|
2636
|
+
headers,
|
|
2637
|
+
body: sourcesJson,
|
|
2638
|
+
signal: AbortSignal.timeout(12e4)
|
|
2639
|
+
});
|
|
2640
|
+
if (!res.ok) {
|
|
2641
|
+
const text = await res.text().catch(() => "");
|
|
2642
|
+
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
async function postDeliverSourcesSafe(url, sourcesJson, options) {
|
|
2646
|
+
try {
|
|
2647
|
+
await postDeliverSources(url, sourcesJson, options);
|
|
2648
|
+
} catch (err) {
|
|
2649
|
+
logger.warn("deliver", "信源配置投递失败", {
|
|
2650
|
+
err: err instanceof Error ? err.message : String(err)
|
|
2651
|
+
});
|
|
2652
|
+
}
|
|
2653
|
+
}
|
|
2654
|
+
async function postDeliverGatewayTest(gateway, body, options) {
|
|
2655
|
+
const url = joinGatewayPath(gateway, "test");
|
|
2656
|
+
if (!url) throw new Error("gateway 不能为空");
|
|
2657
|
+
const headers = {
|
|
2658
|
+
"Content-Type": "application/json; charset=utf-8"
|
|
2659
|
+
};
|
|
2660
|
+
const t = options?.bearerToken?.trim();
|
|
2661
|
+
if (t) headers.Authorization = `Bearer ${t}`;
|
|
2662
|
+
const res = await fetch(url, {
|
|
2663
|
+
method: "POST",
|
|
2664
|
+
headers,
|
|
2665
|
+
body: JSON.stringify(body),
|
|
2666
|
+
signal: AbortSignal.timeout(12e4)
|
|
2667
|
+
});
|
|
2668
|
+
if (!res.ok) {
|
|
2669
|
+
const text = await res.text().catch(() => "");
|
|
2670
|
+
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2588
2673
|
function resolveHeadlessForFeeder(config) {
|
|
2589
2674
|
if (config.force === true) {
|
|
2590
2675
|
return config.headless === true ? true : false;
|
|
@@ -2642,7 +2727,7 @@ async function generateAndCache(listUrl, key, config, proxy) {
|
|
|
2642
2727
|
});
|
|
2643
2728
|
generatingKeys.delete(key);
|
|
2644
2729
|
logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
|
|
2645
|
-
const {
|
|
2730
|
+
const { gateway: deliverGateway, token: deliverToken } = await getDeliverConfig();
|
|
2646
2731
|
let newCount = 0;
|
|
2647
2732
|
let newIds = /* @__PURE__ */ new Set();
|
|
2648
2733
|
const upsertResult = await upsertItems(items).catch((err) => {
|
|
@@ -2672,14 +2757,14 @@ async function generateAndCache(listUrl, key, config, proxy) {
|
|
|
2672
2757
|
emitFeedUpdated({ sourceUrl: sourceRefStored, newCount: newCount - pipelineDroppedNew });
|
|
2673
2758
|
}
|
|
2674
2759
|
const out = items.filter((i) => !isPipelineDroppedItem(i));
|
|
2675
|
-
if (
|
|
2676
|
-
await postDeliverItemsSafe(
|
|
2760
|
+
if (deliverGateway.trim() && out.length > 0) {
|
|
2761
|
+
await postDeliverItemsSafe(joinGatewayPath(deliverGateway, "items"), sourceRefStored, out, {
|
|
2677
2762
|
bearerToken: deliverToken || void 0
|
|
2678
2763
|
});
|
|
2679
2764
|
}
|
|
2680
2765
|
return { items: out };
|
|
2681
2766
|
}
|
|
2682
|
-
async function
|
|
2767
|
+
async function crawlSource(listUrl, config = {}) {
|
|
2683
2768
|
const source = getSource(listUrl);
|
|
2684
2769
|
const proxy = await getEffectiveProxyForListUrl(listUrl, source);
|
|
2685
2770
|
const headless = resolveHeadlessForFeeder(config);
|
|
@@ -2704,6 +2789,10 @@ async function getItems(listUrl, config = {}) {
|
|
|
2704
2789
|
if (!config.force) generatingKeys.set(key, task);
|
|
2705
2790
|
}
|
|
2706
2791
|
const { items } = await task;
|
|
2792
|
+
return { items };
|
|
2793
|
+
}
|
|
2794
|
+
async function getItems(listUrl, config = {}) {
|
|
2795
|
+
const { items } = await crawlSource(listUrl, config);
|
|
2707
2796
|
return { items, fromCache: false };
|
|
2708
2797
|
}
|
|
2709
2798
|
function feedItemsToRssXml(items, listUrl, lng, opts) {
|
|
@@ -2899,17 +2988,24 @@ const DEFAULT_REFRESH = "1day";
|
|
|
2899
2988
|
const SOURCES_CONCURRENCY = 1;
|
|
2900
2989
|
function createPullTask(ref, cacheDir, cronExpr) {
|
|
2901
2990
|
return async () => {
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
});
|
|
2907
|
-
} catch (err) {
|
|
2908
|
-
throw err;
|
|
2909
|
-
}
|
|
2991
|
+
await crawlSource(ref, {
|
|
2992
|
+
cacheDir,
|
|
2993
|
+
cron: cronExpr
|
|
2994
|
+
});
|
|
2910
2995
|
};
|
|
2911
2996
|
}
|
|
2912
2997
|
const SOURCES_GROUP = "sources";
|
|
2998
|
+
async function deliverSourcesConfigIfConfigured() {
|
|
2999
|
+
const { gateway, token } = await getDeliverConfig();
|
|
3000
|
+
if (!gateway.trim()) return;
|
|
3001
|
+
let raw;
|
|
3002
|
+
try {
|
|
3003
|
+
raw = await getSourcesRaw();
|
|
3004
|
+
} catch {
|
|
3005
|
+
return;
|
|
3006
|
+
}
|
|
3007
|
+
await postDeliverSourcesSafe(joinGatewayPath(gateway, "sources"), raw, { bearerToken: token || void 0 });
|
|
3008
|
+
}
|
|
2913
3009
|
async function rescheduleSources(cacheDir, runNow2) {
|
|
2914
3010
|
unscheduleGroup(SOURCES_GROUP);
|
|
2915
3011
|
let sources;
|
|
@@ -2939,7 +3035,7 @@ async function initScheduler(cacheDir) {
|
|
|
2939
3035
|
const watcher = watch(SOURCES_CONFIG_PATH, () => {
|
|
2940
3036
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
2941
3037
|
debounceTimer = setTimeout(() => {
|
|
2942
|
-
rescheduleSources(cacheDir, false).catch(() => {
|
|
3038
|
+
void rescheduleSources(cacheDir, false).then(() => deliverSourcesConfigIfConfigured()).catch(() => {
|
|
2943
3039
|
});
|
|
2944
3040
|
}, 500);
|
|
2945
3041
|
});
|
|
@@ -3009,24 +3105,26 @@ function registerSchedulerRoutes(app) {
|
|
|
3009
3105
|
});
|
|
3010
3106
|
}
|
|
3011
3107
|
const SITE_TEMPLATE_FALLBACK = `/**
|
|
3012
|
-
* Site
|
|
3013
|
-
*
|
|
3108
|
+
* Site plugin template created from the /plugins page.
|
|
3109
|
+
* Plugin protocol: named exports. No export default is required.
|
|
3110
|
+
* Parse HTML with ctx.deps.parseHtml; do not import app dependencies directly.
|
|
3014
3111
|
*/
|
|
3015
|
-
export default {
|
|
3016
|
-
id: "__PLUGIN_ID__",
|
|
3017
|
-
listUrlPattern: __LIST_URL_PATTERN__,
|
|
3018
|
-
refreshInterval: "1day",
|
|
3019
3112
|
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
3025
|
-
|
|
3026
|
-
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3113
|
+
// Predefined fields stay together at the top.
|
|
3114
|
+
export const id = "__PLUGIN_ID__";
|
|
3115
|
+
export const name = "__PLUGIN_ID__";
|
|
3116
|
+
export const listUrlPattern = __LIST_URL_PATTERN__;
|
|
3117
|
+
export const refreshInterval = "1day";
|
|
3118
|
+
|
|
3119
|
+
export async function fetchItems(sourceId, ctx) {
|
|
3120
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
|
|
3121
|
+
waitMs: 2000,
|
|
3122
|
+
purify: true,
|
|
3123
|
+
});
|
|
3124
|
+
void ctx.deps.parseHtml(html);
|
|
3125
|
+
void finalUrl;
|
|
3126
|
+
return [];
|
|
3127
|
+
}
|
|
3030
3128
|
`;
|
|
3031
3129
|
function isValidNewPluginId(id) {
|
|
3032
3130
|
return /^[a-zA-Z][a-zA-Z0-9_-]{0,63}$/.test(id) && id !== "generic" && id !== "new";
|
|
@@ -3095,6 +3193,7 @@ function registerPluginsRoutes(app) {
|
|
|
3095
3193
|
const sites = getPluginSites().map((s) => ({
|
|
3096
3194
|
kind: "site",
|
|
3097
3195
|
id: s.id,
|
|
3196
|
+
name: s.name ?? s.id,
|
|
3098
3197
|
listUrlPattern: typeof s.listUrlPattern === "string" ? s.listUrlPattern : String(s.listUrlPattern),
|
|
3099
3198
|
hasAuth: !!(s.checkAuth && s.loginUrl)
|
|
3100
3199
|
}));
|
|
@@ -3102,6 +3201,7 @@ function registerPluginsRoutes(app) {
|
|
|
3102
3201
|
const sources = registeredSources.filter((src) => src.id !== "generic" && !siteIds.has(src.id)).map((src) => ({
|
|
3103
3202
|
kind: "source",
|
|
3104
3203
|
id: src.id,
|
|
3204
|
+
name: src.name ?? src.id,
|
|
3105
3205
|
listUrlPattern: typeof src.pattern === "string" ? src.pattern : String(src.pattern),
|
|
3106
3206
|
hasAuth: false
|
|
3107
3207
|
}));
|
|
@@ -3204,8 +3304,25 @@ function registerFeedRoutes(app) {
|
|
|
3204
3304
|
ref: resolveRef(s),
|
|
3205
3305
|
label: s.label ?? resolveRef(s)
|
|
3206
3306
|
}));
|
|
3207
|
-
const
|
|
3208
|
-
|
|
3307
|
+
const parseDateBound = (value, endExclusive) => {
|
|
3308
|
+
if (!value) return void 0;
|
|
3309
|
+
if (value.length === 10) {
|
|
3310
|
+
const d2 = /* @__PURE__ */ new Date(endExclusive ? `${value}T12:00:00Z` : `${value}T00:00:00.000Z`);
|
|
3311
|
+
if (endExclusive) d2.setUTCDate(d2.getUTCDate() + 1);
|
|
3312
|
+
return d2;
|
|
3313
|
+
}
|
|
3314
|
+
const d = new Date(value);
|
|
3315
|
+
return Number.isNaN(d.getTime()) ? void 0 : d;
|
|
3316
|
+
};
|
|
3317
|
+
const result = sourceRefs.length > 0 ? await queryItems({
|
|
3318
|
+
sourceUrls: sourceRefs,
|
|
3319
|
+
limit: limit + 1,
|
|
3320
|
+
offset,
|
|
3321
|
+
since: parseDateBound(since ?? void 0, false),
|
|
3322
|
+
until: parseDateBound(until ?? void 0, true)
|
|
3323
|
+
}) : { items: [] };
|
|
3324
|
+
const hasMore = result.items.length > limit;
|
|
3325
|
+
const dbItems = hasMore ? result.items.slice(0, limit) : result.items;
|
|
3209
3326
|
const items = dbItems.map((item) => {
|
|
3210
3327
|
const refKey = item.source_url ?? "";
|
|
3211
3328
|
const base2 = {
|
|
@@ -3326,7 +3443,7 @@ function registerItemsRoutes(app) {
|
|
|
3326
3443
|
return c.json({ items: [], total: 0, hasMore: false });
|
|
3327
3444
|
}
|
|
3328
3445
|
const result = await queryItems({
|
|
3329
|
-
sourceUrl:
|
|
3446
|
+
sourceUrl: sourceUrls ? void 0 : effectiveSourceUrl ? canonicalHttpSourceRef(effectiveSourceUrl) : void 0,
|
|
3330
3447
|
sourceUrls,
|
|
3331
3448
|
author,
|
|
3332
3449
|
q,
|
|
@@ -3533,16 +3650,29 @@ function registerTopicsRoutes(app) {
|
|
|
3533
3650
|
}
|
|
3534
3651
|
function registerDeliverRoutes(app) {
|
|
3535
3652
|
app.get("/api/deliver", requireAdmin(), async (c) => {
|
|
3536
|
-
const {
|
|
3537
|
-
return c.json({
|
|
3653
|
+
const { gateway, token } = await getDeliverConfig();
|
|
3654
|
+
return c.json({ gateway, token });
|
|
3538
3655
|
});
|
|
3539
3656
|
app.put("/api/deliver", requireAdmin(), async (c) => {
|
|
3540
3657
|
try {
|
|
3541
3658
|
const body = await c.req.json();
|
|
3542
|
-
const
|
|
3543
|
-
const
|
|
3544
|
-
|
|
3545
|
-
|
|
3659
|
+
const prev = await getDeliverConfig();
|
|
3660
|
+
const explicitGateway = body != null && "gateway" in body;
|
|
3661
|
+
const explicitUrl = body != null && "url" in body;
|
|
3662
|
+
const explicitToken = body != null && "token" in body;
|
|
3663
|
+
let gateway = typeof body?.gateway === "string" ? body.gateway.trim() : "";
|
|
3664
|
+
if (!gateway && typeof body?.url === "string") {
|
|
3665
|
+
gateway = body.url.trim().replace(/\/items\/?$/i, "").replace(/\/+$/, "");
|
|
3666
|
+
}
|
|
3667
|
+
if (!explicitGateway && !explicitUrl) {
|
|
3668
|
+
gateway = prev.gateway;
|
|
3669
|
+
}
|
|
3670
|
+
let token = typeof body?.token === "string" ? body.token.trim() : "";
|
|
3671
|
+
if (!explicitToken) {
|
|
3672
|
+
token = prev.token;
|
|
3673
|
+
}
|
|
3674
|
+
await saveDeliverConfig({ gateway, token });
|
|
3675
|
+
return c.json({ ok: true, gateway, token });
|
|
3546
3676
|
} catch (err) {
|
|
3547
3677
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
3548
3678
|
}
|
|
@@ -3550,31 +3680,39 @@ function registerDeliverRoutes(app) {
|
|
|
3550
3680
|
app.post("/api/deliver/test", requireAdmin(), async (c) => {
|
|
3551
3681
|
try {
|
|
3552
3682
|
const body = await c.req.json();
|
|
3553
|
-
const
|
|
3554
|
-
|
|
3555
|
-
if (!
|
|
3683
|
+
const prev = await getDeliverConfig();
|
|
3684
|
+
let gateway = typeof body?.gateway === "string" ? body.gateway.trim() : "";
|
|
3685
|
+
if (!gateway && typeof body?.url === "string") {
|
|
3686
|
+
gateway = body.url.trim().replace(/\/items\/?$/i, "").replace(/\/+$/, "");
|
|
3687
|
+
}
|
|
3688
|
+
if (!gateway) gateway = prev.gateway;
|
|
3689
|
+
const token = typeof body?.token === "string" ? body.token.trim() : prev.token;
|
|
3690
|
+
if (!gateway.trim()) return c.json({ ok: false, message: "gateway 不能为空" }, 400);
|
|
3691
|
+
const now2 = Date.now();
|
|
3556
3692
|
const sample = {
|
|
3557
|
-
guid: "deliver-test-" +
|
|
3693
|
+
guid: "deliver-test-" + now2,
|
|
3558
3694
|
title: "投递连通性测试",
|
|
3559
3695
|
link: "https://example.com/rssany-deliver-test",
|
|
3560
|
-
pubDate:
|
|
3561
|
-
summary: "
|
|
3696
|
+
pubDate: /* @__PURE__ */ new Date(),
|
|
3697
|
+
summary: "若下游 /test 收到此条,说明 Gateway 可用。",
|
|
3698
|
+
sourceRef: "rssany-deliver-test"
|
|
3562
3699
|
};
|
|
3563
|
-
await
|
|
3564
|
-
|
|
3565
|
-
|
|
3566
|
-
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
|
|
3577
|
-
|
|
3700
|
+
const raw = await getSourcesRaw();
|
|
3701
|
+
let sourcesDoc;
|
|
3702
|
+
try {
|
|
3703
|
+
sourcesDoc = JSON.parse(raw);
|
|
3704
|
+
} catch {
|
|
3705
|
+
sourcesDoc = { sources: [] };
|
|
3706
|
+
}
|
|
3707
|
+
const payload = {
|
|
3708
|
+
rssanyConnectivityTest: true,
|
|
3709
|
+
items: {
|
|
3710
|
+
sourceRef: "rssany-deliver-test",
|
|
3711
|
+
items: feedItemsToPayload([sample])
|
|
3712
|
+
},
|
|
3713
|
+
sources: sourcesDoc
|
|
3714
|
+
};
|
|
3715
|
+
await postDeliverGatewayTest(gateway.trim(), payload, { bearerToken: token || void 0 });
|
|
3578
3716
|
return c.json({ ok: true });
|
|
3579
3717
|
} catch (err) {
|
|
3580
3718
|
return c.json({ ok: false, message: err instanceof Error ? err.message : String(err) }, 400);
|
|
@@ -3760,7 +3898,7 @@ function registerTasksRoutes(app) {
|
|
|
3760
3898
|
schedule(SOURCES_GROUP, taskId, async () => {
|
|
3761
3899
|
setTaskRunning(taskId);
|
|
3762
3900
|
try {
|
|
3763
|
-
await
|
|
3901
|
+
await crawlSource(ref, { cacheDir: CACHE_DIR, force: true });
|
|
3764
3902
|
setTaskDone(taskId, { ok: true });
|
|
3765
3903
|
} catch (err) {
|
|
3766
3904
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -4167,7 +4305,7 @@ function registerAuthRoutes(app) {
|
|
|
4167
4305
|
return c.json({ ok: true, message: "已打开登录窗口,请在弹出的浏览器中完成登录,完成后刷新订阅页面即可。" });
|
|
4168
4306
|
});
|
|
4169
4307
|
}
|
|
4170
|
-
const STATICS_DIR = join(PACKAGE_ROOT, "statics");
|
|
4308
|
+
const STATICS_DIR = join(PACKAGE_ROOT, "app/statics");
|
|
4171
4309
|
function parseUrlFromPath(path, prefix) {
|
|
4172
4310
|
const raw = path.slice(prefix.length) || "";
|
|
4173
4311
|
const decoded = decodeURIComponent(raw.startsWith("/") ? raw.slice(1) : raw);
|
|
@@ -4395,7 +4533,7 @@ function getWebUiBuildDir() {
|
|
|
4395
4533
|
if (w.startsWith("/") || /^[A-Za-z]:[\\/]/.test(w)) return w;
|
|
4396
4534
|
return join(process.cwd(), w);
|
|
4397
4535
|
}
|
|
4398
|
-
return join(PACKAGE_ROOT, "webui/build");
|
|
4536
|
+
return join(PACKAGE_ROOT, "app/webui/build");
|
|
4399
4537
|
}
|
|
4400
4538
|
function isBackendOnlyPath(pathname) {
|
|
4401
4539
|
if (pathname.startsWith("/api")) return true;
|
|
@@ -4411,11 +4549,10 @@ function registerWebUiRoutes(app) {
|
|
|
4411
4549
|
const absRoot = getWebUiBuildDir();
|
|
4412
4550
|
if (!existsSync(absRoot)) {
|
|
4413
4551
|
console.warn(
|
|
4414
|
-
"未找到 WebUI
|
|
4552
|
+
"未找到 WebUI 构建目录,静态路由已注册,等待前端 watch 构建:",
|
|
4415
4553
|
absRoot,
|
|
4416
|
-
"
|
|
4554
|
+
"(开发模式:npm run dev;单独构建:npm run webui:build)"
|
|
4417
4555
|
);
|
|
4418
|
-
return;
|
|
4419
4556
|
}
|
|
4420
4557
|
const relRoot = relative(process.cwd(), absRoot).replace(/\\/g, "/");
|
|
4421
4558
|
const staticRoot = relRoot === "" || relRoot === "." ? "." : relRoot.startsWith(".") || relRoot.startsWith("/") || /^[A-Za-z]:/.test(relRoot) ? relRoot : `./${relRoot}`;
|
|
@@ -4440,6 +4577,16 @@ function registerWebUiRoutes(app) {
|
|
|
4440
4577
|
};
|
|
4441
4578
|
app.get("*", spaFallback);
|
|
4442
4579
|
}
|
|
4580
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
4581
|
+
function getAppVersion() {
|
|
4582
|
+
try {
|
|
4583
|
+
const pkgPath = join(here, "../package.json");
|
|
4584
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
4585
|
+
return pkg.version ?? "unknown";
|
|
4586
|
+
} catch {
|
|
4587
|
+
return "unknown";
|
|
4588
|
+
}
|
|
4589
|
+
}
|
|
4443
4590
|
const PORT = Number(process.env.PORT) || 18473;
|
|
4444
4591
|
const IS_DEV = process.env.NODE_ENV === "development" || process.argv.includes("--watch");
|
|
4445
4592
|
const PLUGIN_WATCH_EXTS = [".rssany.js", ".rssany.ts"];
|
|
@@ -4489,7 +4636,9 @@ async function main() {
|
|
|
4489
4636
|
const app = createApp();
|
|
4490
4637
|
const server = serve({ fetch: app.fetch, port: PORT, hostname: "0.0.0.0" });
|
|
4491
4638
|
server.setMaxListeners(32);
|
|
4492
|
-
console.log(
|
|
4639
|
+
console.log(
|
|
4640
|
+
`RssAny ${getAppVersion()} 服务已启动 http://127.0.0.1:${PORT}/(API + 静态前端单地址)`
|
|
4641
|
+
);
|
|
4493
4642
|
const lanIp = Object.values(networkInterfaces()).flat().find((iface) => iface?.family === "IPv4" && !iface.internal)?.address;
|
|
4494
4643
|
if (lanIp) console.log(`局域网访问 http://${lanIp}:${PORT}/`);
|
|
4495
4644
|
if (IS_DEV) {
|