rssany 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -10
- package/{plugins/sources → app/plugins/builtin}/email.rssany.js +92 -96
- package/{plugins/sources → app/plugins/builtin}/rss.rssany.js +30 -32
- package/{plugins/templates → app/plugins}/site.rssany.js +5 -6
- package/dist/index.js +220 -547
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/chunks/{Cg3zih_x.js → CZDFXKiF.js} +1 -1
- package/webui/build/_app/immutable/chunks/DvtNA-3X.js +1 -0
- package/webui/build/_app/immutable/entry/{app.4I2fqDIL.js → app.Cra5Zsz4.js} +2 -2
- package/webui/build/_app/immutable/entry/start.ToY0Qh0_.js +1 -0
- package/webui/build/_app/immutable/nodes/{0.gA9sQtoM.js → 0.D2-xzG_8.js} +1 -1
- package/webui/build/_app/immutable/nodes/{1.Bybh7btp.js → 1.CFixzRR6.js} +1 -1
- package/webui/build/_app/immutable/nodes/10.ayxWydPr.js +1 -0
- package/webui/build/_app/immutable/nodes/{11.CDNNJqlQ.js → 11.B0JS3E2j.js} +1 -1
- package/webui/build/_app/immutable/nodes/{12.D9g8GCjm.js → 12.CMcby_lY.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/chunks/CkS2JMkE.js +0 -1
- package/webui/build/_app/immutable/entry/start.CrgdT2Qb.js +0 -1
- package/webui/build/_app/immutable/nodes/10.DEkJCZ6X.js +0 -1
package/dist/index.js
CHANGED
|
@@ -12,10 +12,14 @@ import { parse, NodeType } from "node-html-parser";
|
|
|
12
12
|
import Database from "better-sqlite3";
|
|
13
13
|
import { mkdir, copyFile, access, rename, readFile, writeFile, readdir } from "node:fs/promises";
|
|
14
14
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
15
|
-
import { createHash
|
|
15
|
+
import { createHash } from "node:crypto";
|
|
16
16
|
import { JSDOM } from "jsdom";
|
|
17
17
|
import { Readability } from "@mozilla/readability";
|
|
18
18
|
import OpenAI from "openai";
|
|
19
|
+
import RssParser from "rss-parser";
|
|
20
|
+
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
21
|
+
import { ImapFlow } from "imapflow";
|
|
22
|
+
import { simpleParser } from "mailparser";
|
|
19
23
|
import { EventEmitter } from "node:events";
|
|
20
24
|
import { CronExpressionParser } from "cron-parser";
|
|
21
25
|
import { validate, schedule as schedule$1 } from "node-cron";
|
|
@@ -156,12 +160,9 @@ const SOURCES_CONFIG_PATH = join(USER_DIR, "sources.json");
|
|
|
156
160
|
const TAGS_CONFIG_PATH = join(USER_DIR, "tags.json");
|
|
157
161
|
const CONFIG_PATH = join(USER_DIR, "config.json");
|
|
158
162
|
const LEGACY_SUBSCRIPTIONS_PATH = join(USER_DIR, "subscriptions.json");
|
|
159
|
-
const BUILTIN_PLUGINS_DIR = join(PACKAGE_ROOT, "plugins");
|
|
163
|
+
const BUILTIN_PLUGINS_DIR = join(PACKAGE_ROOT, "app/plugins/builtin");
|
|
160
164
|
const USER_PLUGINS_DIR = join(USER_DIR, "plugins");
|
|
161
|
-
const
|
|
162
|
-
const USER_SOURCES_DIR = join(USER_PLUGINS_DIR, "sources");
|
|
163
|
-
const BUILTIN_ENRICH_DIR = join(BUILTIN_PLUGINS_DIR, "enrich");
|
|
164
|
-
const USER_ENRICH_DIR = join(USER_PLUGINS_DIR, "enrich");
|
|
165
|
+
const PLUGIN_SITE_TEMPLATE_PATH = join(PACKAGE_ROOT, "app/plugins/site.rssany.js");
|
|
165
166
|
async function pathExists(p) {
|
|
166
167
|
try {
|
|
167
168
|
await access(p);
|
|
@@ -209,8 +210,6 @@ async function initUserDir() {
|
|
|
209
210
|
await mkdir(DATA_DIR, { recursive: true });
|
|
210
211
|
await mkdir(CACHE_DIR, { recursive: true });
|
|
211
212
|
await mkdir(USER_PLUGINS_DIR, { recursive: true });
|
|
212
|
-
await mkdir(USER_SOURCES_DIR, { recursive: true });
|
|
213
|
-
await mkdir(USER_ENRICH_DIR, { recursive: true });
|
|
214
213
|
await seedExampleConfigsIfMissing();
|
|
215
214
|
if (!await pathExists(SOURCES_CONFIG_PATH) && await pathExists(LEGACY_SUBSCRIPTIONS_PATH)) {
|
|
216
215
|
await migrateFile(LEGACY_SUBSCRIPTIONS_PATH, SOURCES_CONFIG_PATH);
|
|
@@ -225,17 +224,17 @@ function logCorruptDiagnostic(operation, err) {
|
|
|
225
224
|
const code = err?.code;
|
|
226
225
|
const msg = err instanceof Error ? err.message : String(err);
|
|
227
226
|
const lines = [
|
|
228
|
-
"[rssany db]
|
|
229
|
-
`
|
|
230
|
-
`
|
|
231
|
-
"
|
|
232
|
-
" 1.
|
|
233
|
-
" 2.
|
|
234
|
-
" 3.
|
|
235
|
-
"
|
|
236
|
-
" -
|
|
237
|
-
" -
|
|
238
|
-
" -
|
|
227
|
+
"[rssany db] 数据库可能损坏或并发冲突",
|
|
228
|
+
` 操作: ${operation}`,
|
|
229
|
+
` 错误: ${code ?? "unknown"} - ${msg}`,
|
|
230
|
+
" 常见原因:",
|
|
231
|
+
" 1. 多进程同时打开同一库(例如 tsx --watch 与另一实例同时写)",
|
|
232
|
+
" 2. 异常退出后 WAL 未正常 checkpoint",
|
|
233
|
+
" 3. 磁盘/杀毒/同步盘导致文件不完整",
|
|
234
|
+
" 建议:",
|
|
235
|
+
" - 避免多实例同时写库;开发时慎用 --watch 与后台任务并行",
|
|
236
|
+
" - 可尝试 RSSANY_DB_JOURNAL=delete 使用 DELETE 模式降低多文件依赖",
|
|
237
|
+
" - 备份后删除 .rssany/data/rssany.db 及同目录 -wal、-shm、rssany.db.lock 再启动"
|
|
239
238
|
];
|
|
240
239
|
process.stderr.write(lines.join("\n") + "\n");
|
|
241
240
|
}
|
|
@@ -274,7 +273,7 @@ function acquireDbLock(dbDir) {
|
|
|
274
273
|
})();
|
|
275
274
|
if (stillRunning) {
|
|
276
275
|
throw new Error(
|
|
277
|
-
|
|
276
|
+
`数据库已被其他进程占用(PID ${oldPid})。请勿多开实例;若确认无其他进程,可删除 ${lockPath} 后重试(常见于 tsx --watch 未退出)`
|
|
278
277
|
);
|
|
279
278
|
}
|
|
280
279
|
}
|
|
@@ -307,7 +306,7 @@ function withWriteLock(fn) {
|
|
|
307
306
|
},
|
|
308
307
|
(e) => {
|
|
309
308
|
if (isCorruptError(e)) {
|
|
310
|
-
logCorruptDiagnostic("
|
|
309
|
+
logCorruptDiagnostic("withWriteLock 内 updateItemContent/upsertItems 等", e);
|
|
311
310
|
}
|
|
312
311
|
rejectOut(e);
|
|
313
312
|
throw e;
|
|
@@ -315,7 +314,7 @@ function withWriteLock(fn) {
|
|
|
315
314
|
);
|
|
316
315
|
return out;
|
|
317
316
|
}
|
|
318
|
-
const DATE_ONLY_TITLE_RE = /^(?:jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b[\s\d
|
|
317
|
+
const DATE_ONLY_TITLE_RE = /^(?:jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b[\s\d,./-]*(?:st|nd|rd|th)?[\s\d,./-]*$/i;
|
|
319
318
|
function normalizeText(text) {
|
|
320
319
|
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
321
320
|
}
|
|
@@ -393,7 +392,7 @@ async function getDb() {
|
|
|
393
392
|
db = null;
|
|
394
393
|
}
|
|
395
394
|
if (isCorruptError(err)) {
|
|
396
|
-
logCorruptDiagnostic("
|
|
395
|
+
logCorruptDiagnostic("打开/初始化主库 (getDb)", err);
|
|
397
396
|
}
|
|
398
397
|
throw err;
|
|
399
398
|
}
|
|
@@ -407,7 +406,7 @@ async function runIntegrityCheck() {
|
|
|
407
406
|
return row?.integrity_check ?? "unknown";
|
|
408
407
|
} catch (err) {
|
|
409
408
|
const msg = err instanceof Error ? err.message : String(err);
|
|
410
|
-
return `integrity_check
|
|
409
|
+
return `integrity_check 执行失败: ${msg}`;
|
|
411
410
|
}
|
|
412
411
|
}
|
|
413
412
|
const LOGS_DB_PATH = join(DATA_DIR, "logs.db");
|
|
@@ -525,7 +524,7 @@ async function upsertItems(items, sourceUrlOverride) {
|
|
|
525
524
|
if (items.length === 0) return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
526
525
|
const sourceUrl = items[0].sourceRef;
|
|
527
526
|
if (!sourceUrl) {
|
|
528
|
-
throw new Error("upsertItems:
|
|
527
|
+
throw new Error("upsertItems: 每条 item 须有 sourceRef,或传入 sourceUrlOverride");
|
|
529
528
|
}
|
|
530
529
|
return withWriteLock(async () => {
|
|
531
530
|
const db = await getDb();
|
|
@@ -647,7 +646,7 @@ async function queryFeedItems(sourceUrls, limit, offset, opts) {
|
|
|
647
646
|
if (opts?.until) {
|
|
648
647
|
conditions.push("COALESCE(pub_date, fetched_at) < @until");
|
|
649
648
|
if (opts.until.length === 10) {
|
|
650
|
-
const d = /* @__PURE__ */ new Date(opts.until
|
|
649
|
+
const d = /* @__PURE__ */ new Date(`${opts.until}T12:00:00Z`);
|
|
651
650
|
d.setUTCDate(d.getUTCDate() + 1);
|
|
652
651
|
params.until = d.toISOString();
|
|
653
652
|
} else {
|
|
@@ -690,9 +689,7 @@ async function queryItems(opts) {
|
|
|
690
689
|
const trimmed = tagsFilter.filter((t) => typeof t === "string" && t.trim()).map((t) => t.trim());
|
|
691
690
|
if (trimmed.length > 0) {
|
|
692
691
|
const tagConds = trimmed.map((_, i) => `LOWER(TRIM(json_each.value)) = LOWER(@tag${i})`).join(" OR ");
|
|
693
|
-
conditions.push(
|
|
694
|
-
`i.tags IS NOT NULL AND EXISTS (SELECT 1 FROM json_each(i.tags) WHERE ${tagConds})`
|
|
695
|
-
);
|
|
692
|
+
conditions.push(`i.tags IS NOT NULL AND EXISTS (SELECT 1 FROM json_each(i.tags) WHERE ${tagConds})`);
|
|
696
693
|
trimmed.forEach((t, i) => {
|
|
697
694
|
params[`tag${i}`] = t;
|
|
698
695
|
});
|
|
@@ -1675,11 +1672,6 @@ function isValidSource(obj) {
|
|
|
1675
1672
|
const s = obj;
|
|
1676
1673
|
return typeof s.id === "string" && (typeof s.pattern === "string" || s.pattern instanceof RegExp) && typeof s.fetchItems === "function" && s.listUrlPattern === void 0;
|
|
1677
1674
|
}
|
|
1678
|
-
function isValidEnrichPlugin(obj) {
|
|
1679
|
-
if (obj == null || typeof obj !== "object") return false;
|
|
1680
|
-
const p = obj;
|
|
1681
|
-
return typeof p.id === "string" && typeof p.match === "function" && typeof p.enrichItem === "function";
|
|
1682
|
-
}
|
|
1683
1675
|
async function loadSourcePluginsFromDir(dir, label) {
|
|
1684
1676
|
const siteEntries = [];
|
|
1685
1677
|
const sources = [];
|
|
@@ -1711,46 +1703,12 @@ async function loadSourcePluginsFromDir(dir, label) {
|
|
|
1711
1703
|
}
|
|
1712
1704
|
return { siteEntries, sources };
|
|
1713
1705
|
}
|
|
1714
|
-
async function
|
|
1715
|
-
const
|
|
1716
|
-
let entries;
|
|
1717
|
-
try {
|
|
1718
|
-
const raw = await readdir(dir, { withFileTypes: true, encoding: "utf-8" });
|
|
1719
|
-
entries = raw;
|
|
1720
|
-
} catch {
|
|
1721
|
-
return result;
|
|
1722
|
-
}
|
|
1723
|
-
for (const e of entries) {
|
|
1724
|
-
const name = String(e.name);
|
|
1725
|
-
if (!e.isFile()) continue;
|
|
1726
|
-
if (!PLUGIN_EXTENSIONS.some((ext) => name.endsWith(ext))) continue;
|
|
1727
|
-
const filePath = join(dir, name);
|
|
1728
|
-
try {
|
|
1729
|
-
const mod = await import(pathToFileURL(filePath).href);
|
|
1730
|
-
const plugin = mod.default ?? mod;
|
|
1731
|
-
if (validator(plugin)) {
|
|
1732
|
-
result.push(plugin);
|
|
1733
|
-
} else {
|
|
1734
|
-
logger.warn("plugin", "插件接口不匹配,已跳过", { label, name });
|
|
1735
|
-
}
|
|
1736
|
-
} catch (err) {
|
|
1737
|
-
logger.warn("plugin", "插件加载失败", { label, name, err: err instanceof Error ? err.message : String(err) });
|
|
1738
|
-
}
|
|
1739
|
-
}
|
|
1740
|
-
return result;
|
|
1741
|
-
}
|
|
1742
|
-
async function loadFromSourcesOrRoot() {
|
|
1743
|
-
const [builtinFromSources, userFromSources] = await Promise.all([
|
|
1744
|
-
loadSourcePluginsFromDir(BUILTIN_SOURCES_DIR, "builtin:sources"),
|
|
1745
|
-
loadSourcePluginsFromDir(USER_SOURCES_DIR, "user:sources")
|
|
1746
|
-
]);
|
|
1747
|
-
const hasAny = builtinFromSources.siteEntries.length + builtinFromSources.sources.length + userFromSources.siteEntries.length + userFromSources.sources.length > 0;
|
|
1748
|
-
if (hasAny) return { builtin: builtinFromSources, user: userFromSources };
|
|
1749
|
-
const [builtinRoot, userRoot] = await Promise.all([
|
|
1706
|
+
async function loadBuiltinAndUser() {
|
|
1707
|
+
const [builtin, user] = await Promise.all([
|
|
1750
1708
|
loadSourcePluginsFromDir(BUILTIN_PLUGINS_DIR, "builtin"),
|
|
1751
1709
|
loadSourcePluginsFromDir(USER_PLUGINS_DIR, "user")
|
|
1752
1710
|
]);
|
|
1753
|
-
return { builtin
|
|
1711
|
+
return { builtin, user };
|
|
1754
1712
|
}
|
|
1755
1713
|
const pluginSitePaths = /* @__PURE__ */ new Map();
|
|
1756
1714
|
function mergeSourcePluginPaths(siteIds, pathMap, builtinSources, userSources) {
|
|
@@ -1774,7 +1732,7 @@ function getPluginFilePath(id) {
|
|
|
1774
1732
|
return pluginSitePaths.get(id);
|
|
1775
1733
|
}
|
|
1776
1734
|
async function loadSiteAndSourcePlugins() {
|
|
1777
|
-
const { builtin, user } = await
|
|
1735
|
+
const { builtin, user } = await loadBuiltinAndUser();
|
|
1778
1736
|
const siteMap = /* @__PURE__ */ new Map();
|
|
1779
1737
|
const pathMap = /* @__PURE__ */ new Map();
|
|
1780
1738
|
for (const { site: s, filePath } of builtin.siteEntries) {
|
|
@@ -1797,73 +1755,6 @@ async function loadSiteAndSourcePlugins() {
|
|
|
1797
1755
|
pathMap.forEach((path, id) => pluginSitePaths.set(id, path));
|
|
1798
1756
|
return { sites: Array.from(siteMap.values()), sources: Array.from(sourceMap.values()) };
|
|
1799
1757
|
}
|
|
1800
|
-
let registeredEnrichPlugins = [];
|
|
1801
|
-
async function loadEnrichPlugins() {
|
|
1802
|
-
const [builtin, user] = await Promise.all([
|
|
1803
|
-
loadPluginsFromDir(BUILTIN_ENRICH_DIR, "builtin:enrich", isValidEnrichPlugin),
|
|
1804
|
-
loadPluginsFromDir(USER_ENRICH_DIR, "user:enrich", isValidEnrichPlugin)
|
|
1805
|
-
]);
|
|
1806
|
-
const merged = /* @__PURE__ */ new Map();
|
|
1807
|
-
for (const p of builtin) merged.set(p.id, p);
|
|
1808
|
-
for (const p of user) {
|
|
1809
|
-
if (merged.has(p.id)) logger.info("plugin", "用户 Enrich 插件覆盖同名内置", { pluginId: p.id });
|
|
1810
|
-
merged.set(p.id, p);
|
|
1811
|
-
}
|
|
1812
|
-
const list = Array.from(merged.values());
|
|
1813
|
-
list.sort((a, b) => (a.priority ?? 100) - (b.priority ?? 100));
|
|
1814
|
-
registeredEnrichPlugins = list;
|
|
1815
|
-
return list;
|
|
1816
|
-
}
|
|
1817
|
-
function getMatchedEnrichPlugin(item, ctx) {
|
|
1818
|
-
return registeredEnrichPlugins.find((p) => p.match(item, ctx));
|
|
1819
|
-
}
|
|
1820
|
-
function buildEnrichContext(ctx) {
|
|
1821
|
-
return {
|
|
1822
|
-
cacheDir: ctx.cacheDir,
|
|
1823
|
-
headless: ctx.headless,
|
|
1824
|
-
proxy: ctx.proxy,
|
|
1825
|
-
async fetchHtml(url, opts) {
|
|
1826
|
-
const res = await fetchHtml(url, {
|
|
1827
|
-
cacheDir: ctx.cacheDir,
|
|
1828
|
-
useCache: false,
|
|
1829
|
-
authFlow: void 0,
|
|
1830
|
-
headless: ctx.headless,
|
|
1831
|
-
proxy: ctx.proxy,
|
|
1832
|
-
waitAfterLoadMs: opts?.waitMs,
|
|
1833
|
-
purify: opts?.purify
|
|
1834
|
-
});
|
|
1835
|
-
return { html: res.body, finalUrl: res.finalUrl ?? url, status: res.status };
|
|
1836
|
-
},
|
|
1837
|
-
async extractItem(item, opts) {
|
|
1838
|
-
const res = await fetchHtml(item.link, {
|
|
1839
|
-
cacheDir: ctx.cacheDir,
|
|
1840
|
-
useCache: false,
|
|
1841
|
-
authFlow: void 0,
|
|
1842
|
-
headless: ctx.headless,
|
|
1843
|
-
proxy: ctx.proxy
|
|
1844
|
-
});
|
|
1845
|
-
if (res.status !== 200 && res.status !== 304) {
|
|
1846
|
-
throw new Error(`默认正文提取失败: HTTP ${res.status} ${res.statusText} for ${item.link}`);
|
|
1847
|
-
}
|
|
1848
|
-
const extracted = await extractHtml(res.body, {
|
|
1849
|
-
url: res.finalUrl ?? item.link,
|
|
1850
|
-
cacheDir: ctx.cacheDir ?? void 0,
|
|
1851
|
-
mode: "readability",
|
|
1852
|
-
useCache: true,
|
|
1853
|
-
cacheKey: opts?.cacheKey
|
|
1854
|
-
});
|
|
1855
|
-
const pubDate = extracted.pubDate != null ? typeof extracted.pubDate === "string" ? new Date(extracted.pubDate) : extracted.pubDate : item.pubDate;
|
|
1856
|
-
return {
|
|
1857
|
-
...item,
|
|
1858
|
-
author: normalizeAuthor(extracted.author ?? item.author),
|
|
1859
|
-
title: extracted.title ?? item.title,
|
|
1860
|
-
summary: extracted.summary ?? item.summary,
|
|
1861
|
-
content: extracted.content ?? item.content,
|
|
1862
|
-
pubDate
|
|
1863
|
-
};
|
|
1864
|
-
}
|
|
1865
|
-
};
|
|
1866
|
-
}
|
|
1867
1758
|
function buildSiteContext(site, ctx) {
|
|
1868
1759
|
const proxy = ctx.proxy ?? site.proxy;
|
|
1869
1760
|
const authFlow = toAuthFlow(site);
|
|
@@ -1871,6 +1762,7 @@ function buildSiteContext(site, ctx) {
|
|
|
1871
1762
|
cacheDir: ctx.cacheDir,
|
|
1872
1763
|
headless: ctx.headless,
|
|
1873
1764
|
proxy,
|
|
1765
|
+
deps: ctx.deps,
|
|
1874
1766
|
async fetchHtml(url, opts) {
|
|
1875
1767
|
const res = await fetchHtml(url, {
|
|
1876
1768
|
cacheDir: ctx.cacheDir,
|
|
@@ -1930,10 +1822,7 @@ function createWebSource(site) {
|
|
|
1930
1822
|
} : void 0,
|
|
1931
1823
|
async fetchItems(sourceId, ctx) {
|
|
1932
1824
|
return site.fetchItems(sourceId, buildSiteContext(site, ctx));
|
|
1933
|
-
}
|
|
1934
|
-
enrichItem: site.enrichItem ? async (item, ctx) => {
|
|
1935
|
-
return site.enrichItem(item, buildSiteContext(site, ctx));
|
|
1936
|
-
} : void 0
|
|
1825
|
+
}
|
|
1937
1826
|
};
|
|
1938
1827
|
}
|
|
1939
1828
|
const genericWebSource = {
|
|
@@ -1970,6 +1859,19 @@ function getPluginSites() {
|
|
|
1970
1859
|
function getBestSite(url) {
|
|
1971
1860
|
return getSiteByUrl(url, loadedSites);
|
|
1972
1861
|
}
|
|
1862
|
+
const PLUGIN_HOST_DEPS = {
|
|
1863
|
+
parseHtml: parse,
|
|
1864
|
+
NodeType,
|
|
1865
|
+
createHash,
|
|
1866
|
+
RssParser,
|
|
1867
|
+
HttpsProxyAgent,
|
|
1868
|
+
ImapFlow,
|
|
1869
|
+
simpleParser,
|
|
1870
|
+
logger
|
|
1871
|
+
};
|
|
1872
|
+
function buildSourceContext(partial) {
|
|
1873
|
+
return { ...partial, deps: PLUGIN_HOST_DEPS };
|
|
1874
|
+
}
|
|
1973
1875
|
const registeredSources = [];
|
|
1974
1876
|
function sourcePatternToRegex(pattern) {
|
|
1975
1877
|
if (pattern instanceof RegExp) return pattern;
|
|
@@ -1992,10 +1894,7 @@ function getSource(sourceId) {
|
|
|
1992
1894
|
return genericWebSource;
|
|
1993
1895
|
}
|
|
1994
1896
|
async function initSources() {
|
|
1995
|
-
const
|
|
1996
|
-
loadSiteAndSourcePlugins(),
|
|
1997
|
-
loadEnrichPlugins()
|
|
1998
|
-
]);
|
|
1897
|
+
const siteResult = await loadSiteAndSourcePlugins();
|
|
1999
1898
|
const { sites, sources: sourcePlugins } = siteResult;
|
|
2000
1899
|
setLoadedSites(sites);
|
|
2001
1900
|
registeredSources.length = 0;
|
|
@@ -2378,25 +2277,180 @@ function onFeedUpdated(fn) {
|
|
|
2378
2277
|
eventBus.on("feed:updated", fn);
|
|
2379
2278
|
return () => eventBus.off("feed:updated", fn);
|
|
2380
2279
|
}
|
|
2381
|
-
|
|
2382
|
-
concurrency: 2,
|
|
2383
|
-
maxRetries: 2
|
|
2384
|
-
};
|
|
2385
|
-
async function loadEnrichConfig() {
|
|
2386
|
-
let fileEnrich = {};
|
|
2280
|
+
async function getDeliverUrl() {
|
|
2387
2281
|
try {
|
|
2388
|
-
const raw = await readFile(
|
|
2389
|
-
const
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2282
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2283
|
+
const j = JSON.parse(raw);
|
|
2284
|
+
const u = j?.deliver?.url;
|
|
2285
|
+
return typeof u === "string" ? u.trim() : "";
|
|
2286
|
+
} catch {
|
|
2287
|
+
return "";
|
|
2288
|
+
}
|
|
2289
|
+
}
|
|
2290
|
+
async function saveDeliverUrl(url) {
|
|
2291
|
+
let root = {};
|
|
2292
|
+
try {
|
|
2293
|
+
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2294
|
+
root = JSON.parse(raw);
|
|
2393
2295
|
} catch {
|
|
2394
2296
|
}
|
|
2297
|
+
root.deliver = { url: url.trim() };
|
|
2298
|
+
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2299
|
+
}
|
|
2300
|
+
function feedItemsToPayload(items) {
|
|
2301
|
+
return items.map((i) => ({
|
|
2302
|
+
guid: i.guid,
|
|
2303
|
+
title: i.title,
|
|
2304
|
+
link: i.link,
|
|
2305
|
+
pubDate: i.pubDate instanceof Date ? i.pubDate.toISOString() : (/* @__PURE__ */ new Date()).toISOString(),
|
|
2306
|
+
author: i.author,
|
|
2307
|
+
summary: i.summary,
|
|
2308
|
+
content: i.content,
|
|
2309
|
+
tags: i.tags,
|
|
2310
|
+
sourceRef: i.sourceRef,
|
|
2311
|
+
translations: i.translations
|
|
2312
|
+
}));
|
|
2313
|
+
}
|
|
2314
|
+
async function postDeliverItems(url, sourceRef, items) {
|
|
2315
|
+
if (!url.trim() || items.length === 0) return;
|
|
2316
|
+
const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
|
|
2317
|
+
const res = await fetch(url.trim(), {
|
|
2318
|
+
method: "POST",
|
|
2319
|
+
headers: { "Content-Type": "application/json" },
|
|
2320
|
+
body,
|
|
2321
|
+
signal: AbortSignal.timeout(12e4)
|
|
2322
|
+
});
|
|
2323
|
+
if (!res.ok) {
|
|
2324
|
+
const text = await res.text().catch(() => "");
|
|
2325
|
+
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2326
|
+
}
|
|
2327
|
+
}
|
|
2328
|
+
async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
2329
|
+
try {
|
|
2330
|
+
await postDeliverItems(url, sourceRef, items);
|
|
2331
|
+
} catch (err) {
|
|
2332
|
+
logger.warn("deliver", "投递失败", {
|
|
2333
|
+
sourceRef,
|
|
2334
|
+
count: items.length,
|
|
2335
|
+
err: err instanceof Error ? err.message : String(err)
|
|
2336
|
+
});
|
|
2337
|
+
}
|
|
2338
|
+
}
|
|
2339
|
+
function buildChannelFromItems(listUrl, items, lng) {
|
|
2340
|
+
const channel = {
|
|
2341
|
+
title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
|
|
2342
|
+
link: listUrl,
|
|
2343
|
+
description: `来自 ${listUrl} 的订阅`
|
|
2344
|
+
};
|
|
2345
|
+
if (lng) channel.language = lng;
|
|
2346
|
+
return channel;
|
|
2347
|
+
}
|
|
2348
|
+
function toRssEntry(item, lng) {
|
|
2349
|
+
const eff = getEffectiveItemFields(item, lng);
|
|
2350
|
+
const hasContent = eff.content != null && eff.content !== "";
|
|
2351
|
+
const desc = hasContent ? eff.content : eff.summary;
|
|
2395
2352
|
return {
|
|
2396
|
-
|
|
2397
|
-
|
|
2353
|
+
title: eff.title,
|
|
2354
|
+
link: item.link,
|
|
2355
|
+
description: desc,
|
|
2356
|
+
guid: item.guid,
|
|
2357
|
+
published: item.pubDate?.toISOString?.() ?? void 0,
|
|
2358
|
+
imageUrl: item.imageUrl
|
|
2398
2359
|
};
|
|
2399
2360
|
}
|
|
2361
|
+
const generatingKeys = /* @__PURE__ */ new Map();
|
|
2362
|
+
const pipelineCtx = {
|
|
2363
|
+
llm: { chatJson, chatText },
|
|
2364
|
+
db: { getSystemTags }
|
|
2365
|
+
};
|
|
2366
|
+
async function runPipelineOnItem(item, ctx) {
|
|
2367
|
+
return runPipeline(item, { ...pipelineCtx, ...ctx });
|
|
2368
|
+
}
|
|
2369
|
+
async function generateAndCache(listUrl, key, config) {
|
|
2370
|
+
const { cacheDir = "cache", headless } = config;
|
|
2371
|
+
const source = getSource(listUrl);
|
|
2372
|
+
const ctx = buildSourceContext({ cacheDir, headless, proxy: config.proxy ?? source.proxy });
|
|
2373
|
+
let items;
|
|
2374
|
+
try {
|
|
2375
|
+
items = await source.fetchItems(listUrl, ctx);
|
|
2376
|
+
} catch (err) {
|
|
2377
|
+
generatingKeys.delete(key);
|
|
2378
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2379
|
+
logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
|
|
2380
|
+
throw err;
|
|
2381
|
+
}
|
|
2382
|
+
items.forEach((i) => {
|
|
2383
|
+
i.sourceRef = listUrl;
|
|
2384
|
+
i.author = normalizeAuthor(i.author);
|
|
2385
|
+
});
|
|
2386
|
+
generatingKeys.delete(key);
|
|
2387
|
+
logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
|
|
2388
|
+
const deliverUrl = await getDeliverUrl();
|
|
2389
|
+
let newCount = 0;
|
|
2390
|
+
let newIds = /* @__PURE__ */ new Set();
|
|
2391
|
+
const upsertResult = await upsertItems(items).catch((err) => {
|
|
2392
|
+
logger.warn("db", "upsertItems 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) });
|
|
2393
|
+
return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
2394
|
+
});
|
|
2395
|
+
newCount = upsertResult.newCount;
|
|
2396
|
+
newIds = upsertResult.newIds;
|
|
2397
|
+
let pipelineDroppedNew = 0;
|
|
2398
|
+
const shouldRunPipelineRow = (guid) => newIds.has(guid);
|
|
2399
|
+
for (let i = 0; i < items.length; i++) {
|
|
2400
|
+
if (!shouldRunPipelineRow(items[i].guid)) continue;
|
|
2401
|
+
const processed = await runPipelineOnItem(items[i], { sourceUrl: listUrl });
|
|
2402
|
+
items[i] = processed;
|
|
2403
|
+
if (isPipelineDroppedItem(processed)) {
|
|
2404
|
+
await deleteItem(processed.guid).catch(
|
|
2405
|
+
(err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2406
|
+
);
|
|
2407
|
+
pipelineDroppedNew++;
|
|
2408
|
+
} else {
|
|
2409
|
+
updateItemContent(processed).catch(
|
|
2410
|
+
(err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2411
|
+
);
|
|
2412
|
+
}
|
|
2413
|
+
}
|
|
2414
|
+
if (newCount > 0) {
|
|
2415
|
+
emitFeedUpdated({ sourceUrl: listUrl, newCount: newCount - pipelineDroppedNew });
|
|
2416
|
+
}
|
|
2417
|
+
const out = items.filter((i) => !isPipelineDroppedItem(i));
|
|
2418
|
+
if (deliverUrl && out.length > 0) {
|
|
2419
|
+
await postDeliverItemsSafe(deliverUrl, listUrl, out);
|
|
2420
|
+
}
|
|
2421
|
+
return { items: out };
|
|
2422
|
+
}
|
|
2423
|
+
async function getItems(listUrl, config = {}) {
|
|
2424
|
+
const source = getSource(listUrl);
|
|
2425
|
+
const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
|
|
2426
|
+
if (source.preCheck != null) {
|
|
2427
|
+
try {
|
|
2428
|
+
await source.preCheck(
|
|
2429
|
+
buildSourceContext({
|
|
2430
|
+
cacheDir: config.cacheDir ?? "cache",
|
|
2431
|
+
headless: config.headless,
|
|
2432
|
+
proxy: config.proxy ?? source.proxy
|
|
2433
|
+
})
|
|
2434
|
+
);
|
|
2435
|
+
} catch (err) {
|
|
2436
|
+
if (err instanceof AuthRequiredError) throw err;
|
|
2437
|
+
throw err;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
let task = config.force ? void 0 : generatingKeys.get(key);
|
|
2441
|
+
if (!task) {
|
|
2442
|
+
task = generateAndCache(listUrl, key, config);
|
|
2443
|
+
if (!config.force) generatingKeys.set(key, task);
|
|
2444
|
+
}
|
|
2445
|
+
const { items } = await task;
|
|
2446
|
+
return { items, fromCache: false };
|
|
2447
|
+
}
|
|
2448
|
+
function feedItemsToRssXml(items, listUrl, lng, opts) {
|
|
2449
|
+
const channel = buildChannelFromItems(listUrl, items, lng);
|
|
2450
|
+
if (opts?.channelTitle) channel.title = opts.channelTitle;
|
|
2451
|
+
if (opts?.channelDesc) channel.description = opts.channelDesc;
|
|
2452
|
+
return buildRssXml(channel, items.map((it) => toRssEntry(it, lng)));
|
|
2453
|
+
}
|
|
2400
2454
|
const validateCron = validate;
|
|
2401
2455
|
const tasks$1 = /* @__PURE__ */ new Map();
|
|
2402
2456
|
const groups = /* @__PURE__ */ new Map();
|
|
@@ -2580,363 +2634,6 @@ function getGroupStats() {
|
|
|
2580
2634
|
}
|
|
2581
2635
|
return result;
|
|
2582
2636
|
}
|
|
2583
|
-
const ENRICH_GROUP = "enrich";
|
|
2584
|
-
const MAX_STORED_TASKS = 200;
|
|
2585
|
-
const RETRY_DELAY_MS = 3e3;
|
|
2586
|
-
class EnrichQueue {
|
|
2587
|
-
tasks = /* @__PURE__ */ new Map();
|
|
2588
|
-
taskItems = /* @__PURE__ */ new Map();
|
|
2589
|
-
taskCallbacks = /* @__PURE__ */ new Map();
|
|
2590
|
-
configLoaded = false;
|
|
2591
|
-
async ensureConfig() {
|
|
2592
|
-
if (this.configLoaded) return { concurrency: 2, maxRetries: 2 };
|
|
2593
|
-
const config = await loadEnrichConfig();
|
|
2594
|
-
this.configLoaded = true;
|
|
2595
|
-
logger.info("scraper", "配置加载完成", { concurrency: config.concurrency, maxRetries: config.maxRetries });
|
|
2596
|
-
return config;
|
|
2597
|
-
}
|
|
2598
|
-
evictIfNeeded() {
|
|
2599
|
-
if (this.tasks.size <= MAX_STORED_TASKS) return;
|
|
2600
|
-
const ids = [...this.tasks.keys()];
|
|
2601
|
-
for (const id of ids) {
|
|
2602
|
-
if (this.tasks.get(id)?.status === "done") {
|
|
2603
|
-
this.removeTask(id);
|
|
2604
|
-
if (this.tasks.size <= MAX_STORED_TASKS) return;
|
|
2605
|
-
}
|
|
2606
|
-
}
|
|
2607
|
-
if (this.tasks.size > MAX_STORED_TASKS) this.removeTask(ids[0]);
|
|
2608
|
-
}
|
|
2609
|
-
removeTask(id) {
|
|
2610
|
-
this.tasks.delete(id);
|
|
2611
|
-
this.taskItems.delete(id);
|
|
2612
|
-
this.taskCallbacks.delete(id);
|
|
2613
|
-
}
|
|
2614
|
-
checkTaskComplete(taskId) {
|
|
2615
|
-
const task = this.tasks.get(taskId);
|
|
2616
|
-
const items = this.taskItems.get(taskId);
|
|
2617
|
-
const callbacks = this.taskCallbacks.get(taskId);
|
|
2618
|
-
if (!task || !items) return;
|
|
2619
|
-
const allSettled = task.itemResults.every((r) => r.status === "done" || r.status === "failed");
|
|
2620
|
-
if (!allSettled) return;
|
|
2621
|
-
task.status = "done";
|
|
2622
|
-
task.completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
2623
|
-
logger.info("scraper", "任务完成", {
|
|
2624
|
-
source_url: task.sourceUrl,
|
|
2625
|
-
taskId,
|
|
2626
|
-
done: task.progress.done,
|
|
2627
|
-
failed: task.progress.failed
|
|
2628
|
-
});
|
|
2629
|
-
Promise.resolve(callbacks?.onAllDone?.(items)).catch((err) => {
|
|
2630
|
-
logger.warn("scraper", "onAllDone 回调异常", { taskId, err: err instanceof Error ? err.message : String(err) });
|
|
2631
|
-
});
|
|
2632
|
-
}
|
|
2633
|
-
async submit(items, enrichFn, ctx, opts) {
|
|
2634
|
-
const config = await this.ensureConfig();
|
|
2635
|
-
const id = randomUUID();
|
|
2636
|
-
const itemResults = items.map((_, i) => ({
|
|
2637
|
-
index: i,
|
|
2638
|
-
status: "pending",
|
|
2639
|
-
retries: 0
|
|
2640
|
-
}));
|
|
2641
|
-
const task = {
|
|
2642
|
-
id,
|
|
2643
|
-
sourceUrl: opts.sourceUrl,
|
|
2644
|
-
status: items.length === 0 ? "done" : "pending",
|
|
2645
|
-
progress: { total: items.length, done: 0, failed: 0 },
|
|
2646
|
-
itemResults,
|
|
2647
|
-
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2648
|
-
completedAt: items.length === 0 ? (/* @__PURE__ */ new Date()).toISOString() : void 0
|
|
2649
|
-
};
|
|
2650
|
-
const itemsCopy = [...items];
|
|
2651
|
-
this.tasks.set(id, task);
|
|
2652
|
-
this.taskItems.set(id, itemsCopy);
|
|
2653
|
-
this.taskCallbacks.set(id, opts);
|
|
2654
|
-
this.evictIfNeeded();
|
|
2655
|
-
for (let i = 0; i < items.length; i++) {
|
|
2656
|
-
const itemIndex = i;
|
|
2657
|
-
const workId = `${id}-${i}`;
|
|
2658
|
-
const taskFn = async () => {
|
|
2659
|
-
const t = this.tasks.get(id);
|
|
2660
|
-
const its = this.taskItems.get(id);
|
|
2661
|
-
const cbs = this.taskCallbacks.get(id);
|
|
2662
|
-
if (!t || !its || !cbs) return;
|
|
2663
|
-
const itemResult = t.itemResults[itemIndex];
|
|
2664
|
-
if (!itemResult) return;
|
|
2665
|
-
itemResult.status = "running";
|
|
2666
|
-
if (t.status === "pending") t.status = "running";
|
|
2667
|
-
for (let r = 0; r <= config.maxRetries; r++) {
|
|
2668
|
-
try {
|
|
2669
|
-
const enriched = await enrichFn(its[itemIndex], ctx);
|
|
2670
|
-
its[itemIndex] = enriched;
|
|
2671
|
-
itemResult.item = enriched;
|
|
2672
|
-
itemResult.status = "done";
|
|
2673
|
-
t.progress.done++;
|
|
2674
|
-
await Promise.resolve(cbs.onItemDone?.(enriched, itemIndex));
|
|
2675
|
-
this.checkTaskComplete(id);
|
|
2676
|
-
return;
|
|
2677
|
-
} catch (err) {
|
|
2678
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
2679
|
-
if (r < config.maxRetries) {
|
|
2680
|
-
logger.warn("scraper", "提取失败,重试中", {
|
|
2681
|
-
source_url: t.sourceUrl,
|
|
2682
|
-
item_url: its[itemIndex]?.link,
|
|
2683
|
-
retries: r + 1,
|
|
2684
|
-
maxRetries: config.maxRetries,
|
|
2685
|
-
err: msg
|
|
2686
|
-
});
|
|
2687
|
-
await new Promise((resolve2) => setTimeout(resolve2, RETRY_DELAY_MS));
|
|
2688
|
-
} else {
|
|
2689
|
-
itemResult.status = "failed";
|
|
2690
|
-
itemResult.error = msg;
|
|
2691
|
-
t.progress.failed++;
|
|
2692
|
-
logger.warn("scraper", "提取最终失败", {
|
|
2693
|
-
source_url: t.sourceUrl,
|
|
2694
|
-
item_url: its[itemIndex]?.link,
|
|
2695
|
-
err: msg
|
|
2696
|
-
});
|
|
2697
|
-
const failedItem = { ...its[itemIndex], enrichFailed: true };
|
|
2698
|
-
its[itemIndex] = failedItem;
|
|
2699
|
-
await Promise.resolve(cbs.onItemDone?.(failedItem, itemIndex));
|
|
2700
|
-
this.checkTaskComplete(id);
|
|
2701
|
-
}
|
|
2702
|
-
}
|
|
2703
|
-
}
|
|
2704
|
-
};
|
|
2705
|
-
schedule(ENRICH_GROUP, workId, taskFn, { concurrency: config.concurrency }).catch(() => {
|
|
2706
|
-
});
|
|
2707
|
-
}
|
|
2708
|
-
return id;
|
|
2709
|
-
}
|
|
2710
|
-
getTask(id) {
|
|
2711
|
-
return this.tasks.get(id);
|
|
2712
|
-
}
|
|
2713
|
-
getTaskItems(id) {
|
|
2714
|
-
return this.taskItems.get(id);
|
|
2715
|
-
}
|
|
2716
|
-
}
|
|
2717
|
-
const enrichQueue = new EnrichQueue();
|
|
2718
|
-
async function getDeliverUrl() {
|
|
2719
|
-
try {
|
|
2720
|
-
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2721
|
-
const j = JSON.parse(raw);
|
|
2722
|
-
const u = j?.deliver?.url;
|
|
2723
|
-
return typeof u === "string" ? u.trim() : "";
|
|
2724
|
-
} catch {
|
|
2725
|
-
return "";
|
|
2726
|
-
}
|
|
2727
|
-
}
|
|
2728
|
-
async function saveDeliverUrl(url) {
|
|
2729
|
-
let root = {};
|
|
2730
|
-
try {
|
|
2731
|
-
const raw = await readFile(CONFIG_PATH, "utf-8");
|
|
2732
|
-
root = JSON.parse(raw);
|
|
2733
|
-
} catch {
|
|
2734
|
-
}
|
|
2735
|
-
root.deliver = { url: url.trim() };
|
|
2736
|
-
await writeFile(CONFIG_PATH, JSON.stringify(root, null, 2) + "\n", "utf-8");
|
|
2737
|
-
}
|
|
2738
|
-
function feedItemsToPayload(items) {
|
|
2739
|
-
return items.map((i) => ({
|
|
2740
|
-
guid: i.guid,
|
|
2741
|
-
title: i.title,
|
|
2742
|
-
link: i.link,
|
|
2743
|
-
pubDate: i.pubDate instanceof Date ? i.pubDate.toISOString() : (/* @__PURE__ */ new Date()).toISOString(),
|
|
2744
|
-
author: i.author,
|
|
2745
|
-
summary: i.summary,
|
|
2746
|
-
content: i.content,
|
|
2747
|
-
tags: i.tags,
|
|
2748
|
-
sourceRef: i.sourceRef,
|
|
2749
|
-
translations: i.translations
|
|
2750
|
-
}));
|
|
2751
|
-
}
|
|
2752
|
-
async function postDeliverItems(url, sourceRef, items) {
|
|
2753
|
-
if (!url.trim() || items.length === 0) return;
|
|
2754
|
-
const body = JSON.stringify({ sourceRef, items: feedItemsToPayload(items) });
|
|
2755
|
-
const res = await fetch(url.trim(), {
|
|
2756
|
-
method: "POST",
|
|
2757
|
-
headers: { "Content-Type": "application/json" },
|
|
2758
|
-
body,
|
|
2759
|
-
signal: AbortSignal.timeout(12e4)
|
|
2760
|
-
});
|
|
2761
|
-
if (!res.ok) {
|
|
2762
|
-
const text = await res.text().catch(() => "");
|
|
2763
|
-
throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
|
|
2764
|
-
}
|
|
2765
|
-
}
|
|
2766
|
-
async function postDeliverItemsSafe(url, sourceRef, items) {
|
|
2767
|
-
try {
|
|
2768
|
-
await postDeliverItems(url, sourceRef, items);
|
|
2769
|
-
} catch (err) {
|
|
2770
|
-
logger.warn("deliver", "投递失败", {
|
|
2771
|
-
sourceRef,
|
|
2772
|
-
count: items.length,
|
|
2773
|
-
err: err instanceof Error ? err.message : String(err)
|
|
2774
|
-
});
|
|
2775
|
-
}
|
|
2776
|
-
}
|
|
2777
|
-
function buildChannelFromItems(listUrl, items, lng) {
|
|
2778
|
-
const channel = {
|
|
2779
|
-
title: items[0]?.author?.length ? `${items[0].author[0]} 的订阅` : "RSS 订阅",
|
|
2780
|
-
link: listUrl,
|
|
2781
|
-
description: `来自 ${listUrl} 的订阅`
|
|
2782
|
-
};
|
|
2783
|
-
if (lng) channel.language = lng;
|
|
2784
|
-
return channel;
|
|
2785
|
-
}
|
|
2786
|
-
function toRssEntry(item, lng) {
|
|
2787
|
-
const eff = getEffectiveItemFields(item, lng);
|
|
2788
|
-
const hasContent = eff.content != null && eff.content !== "";
|
|
2789
|
-
const desc = hasContent ? eff.content : eff.summary;
|
|
2790
|
-
return {
|
|
2791
|
-
title: eff.title,
|
|
2792
|
-
link: item.link,
|
|
2793
|
-
description: desc,
|
|
2794
|
-
guid: item.guid,
|
|
2795
|
-
published: item.pubDate?.toISOString?.() ?? void 0,
|
|
2796
|
-
imageUrl: item.imageUrl
|
|
2797
|
-
};
|
|
2798
|
-
}
|
|
2799
|
-
const generatingKeys = /* @__PURE__ */ new Map();
|
|
2800
|
-
const pipelineCtx = {
|
|
2801
|
-
llm: { chatJson, chatText },
|
|
2802
|
-
db: { getSystemTags }
|
|
2803
|
-
};
|
|
2804
|
-
async function runPipelineOnItem(item, ctx) {
|
|
2805
|
-
return runPipeline(item, { ...pipelineCtx, ...ctx });
|
|
2806
|
-
}
|
|
2807
|
-
function buildEnrichFn(source, listUrl, ctx) {
|
|
2808
|
-
const enrichCtx = buildEnrichContext(ctx);
|
|
2809
|
-
enrichCtx.sourceUrl = listUrl;
|
|
2810
|
-
return async (item) => {
|
|
2811
|
-
let result = item;
|
|
2812
|
-
if (source.enrichItem) {
|
|
2813
|
-
result = await source.enrichItem(item, ctx);
|
|
2814
|
-
}
|
|
2815
|
-
const plugin = getMatchedEnrichPlugin(result, { sourceUrl: listUrl });
|
|
2816
|
-
if (plugin) {
|
|
2817
|
-
result = await plugin.enrichItem(result, enrichCtx);
|
|
2818
|
-
}
|
|
2819
|
-
return result;
|
|
2820
|
-
};
|
|
2821
|
-
}
|
|
2822
|
-
async function generateAndCache(listUrl, key, config) {
|
|
2823
|
-
const { cacheDir = "cache", includeContent = true, headless } = config;
|
|
2824
|
-
const source = getSource(listUrl);
|
|
2825
|
-
const ctx = { cacheDir, headless, proxy: config.proxy ?? source.proxy };
|
|
2826
|
-
let items;
|
|
2827
|
-
try {
|
|
2828
|
-
items = await source.fetchItems(listUrl, ctx);
|
|
2829
|
-
} catch (err) {
|
|
2830
|
-
generatingKeys.delete(key);
|
|
2831
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
2832
|
-
logger.error("scraper", "抓取失败", { source_url: listUrl, err: message });
|
|
2833
|
-
throw err;
|
|
2834
|
-
}
|
|
2835
|
-
items.forEach((i) => {
|
|
2836
|
-
i.sourceRef = listUrl;
|
|
2837
|
-
i.author = normalizeAuthor(i.author);
|
|
2838
|
-
});
|
|
2839
|
-
generatingKeys.delete(key);
|
|
2840
|
-
logger.info("scraper", "抓取成功", { source_url: listUrl, count: items.length });
|
|
2841
|
-
const deliverUrl = await getDeliverUrl();
|
|
2842
|
-
let newCount = 0;
|
|
2843
|
-
let newIds = /* @__PURE__ */ new Set();
|
|
2844
|
-
const upsertResult = await upsertItems(items).catch((err) => {
|
|
2845
|
-
logger.warn("db", "upsertItems 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) });
|
|
2846
|
-
return { newCount: 0, newIds: /* @__PURE__ */ new Set() };
|
|
2847
|
-
});
|
|
2848
|
-
newCount = upsertResult.newCount;
|
|
2849
|
-
newIds = upsertResult.newIds;
|
|
2850
|
-
let pipelineDroppedNew = 0;
|
|
2851
|
-
const shouldRunPipelineRow = (guid) => newIds.has(guid);
|
|
2852
|
-
const hasEnrich = source.enrichItem != null || items.some((i) => getMatchedEnrichPlugin(i, { sourceUrl: listUrl }));
|
|
2853
|
-
if (!includeContent || items.length === 0 || !hasEnrich) {
|
|
2854
|
-
for (let i = 0; i < items.length; i++) {
|
|
2855
|
-
if (!shouldRunPipelineRow(items[i].guid)) continue;
|
|
2856
|
-
const processed = await runPipelineOnItem(items[i], { sourceUrl: listUrl, isEnriched: false });
|
|
2857
|
-
items[i] = processed;
|
|
2858
|
-
if (isPipelineDroppedItem(processed)) {
|
|
2859
|
-
await deleteItem(processed.guid).catch(
|
|
2860
|
-
(err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2861
|
-
);
|
|
2862
|
-
pipelineDroppedNew++;
|
|
2863
|
-
} else {
|
|
2864
|
-
updateItemContent(processed).catch(
|
|
2865
|
-
(err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2866
|
-
);
|
|
2867
|
-
}
|
|
2868
|
-
}
|
|
2869
|
-
if (newCount > 0) {
|
|
2870
|
-
emitFeedUpdated({ sourceUrl: listUrl, newCount: newCount - pipelineDroppedNew });
|
|
2871
|
-
}
|
|
2872
|
-
const out = items.filter((i) => !isPipelineDroppedItem(i));
|
|
2873
|
-
if (deliverUrl && out.length > 0) {
|
|
2874
|
-
await postDeliverItemsSafe(deliverUrl, listUrl, out);
|
|
2875
|
-
}
|
|
2876
|
-
return { items: out };
|
|
2877
|
-
}
|
|
2878
|
-
const enrichFn = (item, _ctx) => buildEnrichFn(source, listUrl, ctx)(item);
|
|
2879
|
-
await enrichQueue.submit(
|
|
2880
|
-
items,
|
|
2881
|
-
enrichFn,
|
|
2882
|
-
ctx,
|
|
2883
|
-
{
|
|
2884
|
-
sourceUrl: listUrl,
|
|
2885
|
-
onItemDone: async (enrichedItem, index) => {
|
|
2886
|
-
enrichedItem.sourceRef = listUrl;
|
|
2887
|
-
const processed = shouldRunPipelineRow(enrichedItem.guid) ? await runPipelineOnItem(enrichedItem, { sourceUrl: listUrl, isEnriched: true }) : enrichedItem;
|
|
2888
|
-
items[index] = processed;
|
|
2889
|
-
if (isPipelineDroppedItem(processed)) {
|
|
2890
|
-
await deleteItem(processed.guid).catch(
|
|
2891
|
-
(err) => logger.warn("db", "质量过滤后删除条目失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2892
|
-
);
|
|
2893
|
-
pipelineDroppedNew++;
|
|
2894
|
-
} else {
|
|
2895
|
-
updateItemContent(processed).catch(
|
|
2896
|
-
(err) => logger.warn("db", "updateItemContent 失败", { source_url: listUrl, err: err instanceof Error ? err.message : String(err) })
|
|
2897
|
-
);
|
|
2898
|
-
}
|
|
2899
|
-
},
|
|
2900
|
-
onAllDone: async () => {
|
|
2901
|
-
for (let i = items.length - 1; i >= 0; i--) {
|
|
2902
|
-
if (isPipelineDroppedItem(items[i])) items.splice(i, 1);
|
|
2903
|
-
}
|
|
2904
|
-
if (newCount > 0) {
|
|
2905
|
-
emitFeedUpdated({ sourceUrl: listUrl, newCount: newCount - pipelineDroppedNew });
|
|
2906
|
-
}
|
|
2907
|
-
if (deliverUrl && items.length > 0) {
|
|
2908
|
-
await postDeliverItemsSafe(deliverUrl, listUrl, items);
|
|
2909
|
-
}
|
|
2910
|
-
}
|
|
2911
|
-
}
|
|
2912
|
-
);
|
|
2913
|
-
return { items };
|
|
2914
|
-
}
|
|
2915
|
-
async function getItems(listUrl, config = {}) {
|
|
2916
|
-
const source = getSource(listUrl);
|
|
2917
|
-
const key = config.cron ? cacheKeyFromCron(listUrl, config.cron) : cacheKey(listUrl, config.refreshInterval ?? source.refreshInterval ?? "1day");
|
|
2918
|
-
if (source.preCheck != null) {
|
|
2919
|
-
try {
|
|
2920
|
-
await source.preCheck({ cacheDir: config.cacheDir ?? "cache", headless: config.headless, proxy: config.proxy ?? source.proxy });
|
|
2921
|
-
} catch (err) {
|
|
2922
|
-
if (err instanceof AuthRequiredError) throw err;
|
|
2923
|
-
throw err;
|
|
2924
|
-
}
|
|
2925
|
-
}
|
|
2926
|
-
let task = config.force ? void 0 : generatingKeys.get(key);
|
|
2927
|
-
if (!task) {
|
|
2928
|
-
task = generateAndCache(listUrl, key, config);
|
|
2929
|
-
if (!config.force) generatingKeys.set(key, task);
|
|
2930
|
-
}
|
|
2931
|
-
const { items } = await task;
|
|
2932
|
-
return { items, fromCache: false };
|
|
2933
|
-
}
|
|
2934
|
-
function feedItemsToRssXml(items, listUrl, lng, opts) {
|
|
2935
|
-
const channel = buildChannelFromItems(listUrl, items, lng);
|
|
2936
|
-
if (opts?.channelTitle) channel.title = opts.channelTitle;
|
|
2937
|
-
if (opts?.channelDesc) channel.description = opts.channelDesc;
|
|
2938
|
-
return buildRssXml(channel, items.map((it) => toRssEntry(it, lng)));
|
|
2939
|
-
}
|
|
2940
2637
|
const DEFAULT_REFRESH = "1day";
|
|
2941
2638
|
const SOURCES_CONCURRENCY = 5;
|
|
2942
2639
|
function createPullTask(ref, cacheDir, cronExpr) {
|
|
@@ -3044,23 +2741,15 @@ function registerRssApiRoutes(app) {
|
|
|
3044
2741
|
}
|
|
3045
2742
|
});
|
|
3046
2743
|
}
|
|
3047
|
-
function registerEnrichRoutes(app) {
|
|
3048
|
-
app.get("/api/enrich/:taskId", (c) => {
|
|
3049
|
-
const taskId = c.req.param("taskId");
|
|
3050
|
-
const task = enrichQueue.getTask(taskId);
|
|
3051
|
-
if (!task) return c.json({ error: "任务不存在或已过期" }, 404);
|
|
3052
|
-
return c.json(task);
|
|
3053
|
-
});
|
|
3054
|
-
}
|
|
3055
2744
|
function registerSchedulerRoutes(app) {
|
|
3056
2745
|
app.get("/api/scheduler/stats", requireAdmin(), (c) => {
|
|
3057
2746
|
const stats = getGroupStats();
|
|
3058
2747
|
return c.json(stats);
|
|
3059
2748
|
});
|
|
3060
2749
|
}
|
|
3061
|
-
const USER_SITE_TEMPLATE = join(BUILTIN_PLUGINS_DIR, "templates", "site.rssany.js");
|
|
3062
2750
|
const SITE_TEMPLATE_FALLBACK = `/**
|
|
3063
|
-
* Site 插件模板(由管理页添加,位于 .rssany/plugins
|
|
2751
|
+
* Site 插件模板(由管理页添加,位于 .rssany/plugins/)
|
|
2752
|
+
* HTML DOM 解析请用 ctx.deps.parseHtml,勿在插件内 import node_modules。
|
|
3064
2753
|
*/
|
|
3065
2754
|
export default {
|
|
3066
2755
|
id: "__PLUGIN_ID__",
|
|
@@ -3072,7 +2761,7 @@ export default {
|
|
|
3072
2761
|
waitMs: 2000,
|
|
3073
2762
|
purify: true,
|
|
3074
2763
|
});
|
|
3075
|
-
void html;
|
|
2764
|
+
void ctx.deps.parseHtml(html);
|
|
3076
2765
|
void finalUrl;
|
|
3077
2766
|
return [];
|
|
3078
2767
|
},
|
|
@@ -3111,12 +2800,11 @@ function registerPluginsRoutes(app) {
|
|
|
3111
2800
|
return c.json({ error: "id 须为字母开头,仅含字母数字、下划线、连字符;不能为 generic 或 new" }, 400);
|
|
3112
2801
|
}
|
|
3113
2802
|
await mkdir(USER_PLUGINS_DIR, { recursive: true });
|
|
3114
|
-
|
|
3115
|
-
const outPath = join(USER_SOURCES_DIR, `${id}.rssany.ts`);
|
|
2803
|
+
const outPath = join(USER_PLUGINS_DIR, `${id}.rssany.js`);
|
|
3116
2804
|
if (await fileExists(outPath)) return c.json({ error: "该 id 已存在同名文件" }, 409);
|
|
3117
2805
|
let tpl = SITE_TEMPLATE_FALLBACK;
|
|
3118
2806
|
try {
|
|
3119
|
-
tpl = await readFile(
|
|
2807
|
+
tpl = await readFile(PLUGIN_SITE_TEMPLATE_PATH, "utf-8");
|
|
3120
2808
|
} catch {
|
|
3121
2809
|
}
|
|
3122
2810
|
const content = tpl.replace(/__PLUGIN_ID__/g, id);
|
|
@@ -3134,7 +2822,6 @@ function registerPluginsRoutes(app) {
|
|
|
3134
2822
|
kind: "site",
|
|
3135
2823
|
id: s.id,
|
|
3136
2824
|
listUrlPattern: typeof s.listUrlPattern === "string" ? s.listUrlPattern : String(s.listUrlPattern),
|
|
3137
|
-
hasEnrich: !!s.enrichItem,
|
|
3138
2825
|
hasAuth: !!(s.checkAuth && s.loginUrl)
|
|
3139
2826
|
}));
|
|
3140
2827
|
const siteIds = new Set(sites.map((p) => p.id));
|
|
@@ -3142,7 +2829,6 @@ function registerPluginsRoutes(app) {
|
|
|
3142
2829
|
kind: "source",
|
|
3143
2830
|
id: src.id,
|
|
3144
2831
|
listUrlPattern: typeof src.pattern === "string" ? src.pattern : String(src.pattern),
|
|
3145
|
-
hasEnrich: !!src.enrichItem,
|
|
3146
2832
|
hasAuth: false
|
|
3147
2833
|
}));
|
|
3148
2834
|
return c.json([...sites, ...sources]);
|
|
@@ -3654,7 +3340,6 @@ function registerTasksRoutes(app) {
|
|
|
3654
3340
|
function registerApiRoutes(app) {
|
|
3655
3341
|
registerServerRoutes(app);
|
|
3656
3342
|
registerRssApiRoutes(app);
|
|
3657
|
-
registerEnrichRoutes(app);
|
|
3658
3343
|
registerSchedulerRoutes(app);
|
|
3659
3344
|
registerPluginsRoutes(app);
|
|
3660
3345
|
registerPipelineRoutes(app);
|
|
@@ -3756,7 +3441,7 @@ function registerAdminRoutes(app) {
|
|
|
3756
3441
|
const headlessParam = c.req.query("headless");
|
|
3757
3442
|
const headless = headlessParam === "false" || headlessParam === "0" ? false : void 0;
|
|
3758
3443
|
const source = getSource(url);
|
|
3759
|
-
const ctx = { cacheDir: CACHE_DIR, headless, proxy: source.proxy };
|
|
3444
|
+
const ctx = buildSourceContext({ cacheDir: CACHE_DIR, headless, proxy: source.proxy });
|
|
3760
3445
|
const items = await source.fetchItems(url, ctx);
|
|
3761
3446
|
const mode = source.id === "generic" ? "generic" : "plugin";
|
|
3762
3447
|
return c.json({ items, url, mode, pluginId: source.id });
|
|
@@ -3776,18 +3461,6 @@ function registerAdminRoutes(app) {
|
|
|
3776
3461
|
const headlessParam = c.req.query("headless");
|
|
3777
3462
|
const headless = headlessParam === "false" || headlessParam === "0" ? false : void 0;
|
|
3778
3463
|
const site = getBestSite(url);
|
|
3779
|
-
if (site?.enrichItem) {
|
|
3780
|
-
const siteCtx = buildSiteContext(site, { cacheDir: CACHE_DIR, headless });
|
|
3781
|
-
const stub = { guid: url, title: "", link: url, pubDate: /* @__PURE__ */ new Date() };
|
|
3782
|
-
const enriched = await site.enrichItem(stub, siteCtx);
|
|
3783
|
-
return c.json({
|
|
3784
|
-
title: enriched.title ?? null,
|
|
3785
|
-
author: enriched.author ?? null,
|
|
3786
|
-
pubDate: enriched.pubDate instanceof Date ? enriched.pubDate.toISOString() : enriched.pubDate ?? null,
|
|
3787
|
-
content: enriched.content ?? null,
|
|
3788
|
-
_extractor: site.id
|
|
3789
|
-
});
|
|
3790
|
-
}
|
|
3791
3464
|
const proxy = site?.proxy;
|
|
3792
3465
|
const result = await extractFromLink(url, {}, { timeoutMs: 6e4, headless, proxy });
|
|
3793
3466
|
return c.json({
|