rssany 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,8 +10,8 @@
10
10
  ## 功能概览
11
11
 
12
12
  - **统一订阅**:在 `.rssany/sources.json` 中配置网站列表、标准 RSS、IMAP 邮件等,由调度器按 `refresh` 策略拉取。
13
- - **可插拔信源**:`plugins/sources/` 与 `.rssany/plugins/sources/` 中的 **Site** 插件(`.rssany.js` / `.rssany.ts`),自定义列表解析与详情规则。
14
- - **正文补全**:可选 **enrich** 插件拉全文;无 enrich 时入库后仍会跑 pipeline。
13
+ - **可插拔信源**:`app/plugins/builtin/` 与 `.rssany/plugins/` 中的 **Site** 插件(`.rssany.js` / `.rssany.ts`),自定义列表解析与详情规则。
14
+ - **正文与解析**:在信源 `fetchItems`(及需要的 `ctx.extractItem` 等)内完成;入库后跑 pipeline。
15
15
  - **固定 pipeline**:`app/pipeline/` 中打标签、翻译等,由 `.rssany/config.json` 的 `pipeline.steps` 开关(**不是**用户目录下的 pipeline 插件)。
16
16
  - **LLM 辅助**:解析、提取、标签、翻译等可按配置走 OpenAI 兼容接口。
17
17
  - **站点登录**:需登录的站点通过 Puppeteer 管理 Cookie(与产品用户账号无关)。
@@ -99,7 +99,7 @@ rssany
99
99
 
100
100
  重置数据(结束 `PORT` 监听进程并删除用户目录):**`rssany reset`**(与仓库内 **`pnpm reset`** 相同逻辑;可在含 `.env` 的目录下执行以读取 `PORT` / `RSSANY_USER_DIR`)。
101
101
 
102
- 用户数据在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany`),与工作目录无关。可选环境变量 **`RSSANY_USER_DIR`** 可指定其它路径。等价于 `node node_modules/rssany/dist/index.js`;CLI 名称为 `rssany`。内置 `plugins/`、`statics/`、`webui/build` 随包安装路径解析。
102
+ 用户数据在 **`~/.rssany/`**(Windows:`%USERPROFILE%\.rssany`),与工作目录无关。可选环境变量 **`RSSANY_USER_DIR`** 可指定其它路径。等价于 `node node_modules/rssany/dist/index.js`;CLI 名称为 `rssany`。内置 `app/plugins/builtin/`、`statics/`、`webui/build` 随包安装路径解析。
103
103
 
104
104
  ---
105
105
 
@@ -109,7 +109,6 @@ rssany
109
109
  sources.json / Site 插件
110
110
  → 调度器触发 fetchItems
111
111
  → upsertItems
112
- → [可选] enrich 队列
113
112
  → pipeline(每条一次)
114
113
  → [可选] deliver.url POST(出站,非入站 API)
115
114
  ```
@@ -131,11 +130,7 @@ sources.json / Site 插件
131
130
 
132
131
  ### 信源插件(Site)
133
132
 
134
- 放置于 `**plugins/sources/`** 或 `**.rssany/plugins/sources/**`,用户插件可与内置插件同 `id` 覆盖。最小约定包括 `id`、`listUrlPattern` 等(详见 `app/scraper/sources/web/site.ts`)。
135
-
136
- ### Enrich 插件
137
-
138
- `**plugins/enrich/**`、`**.rssany/plugins/enrich/**`,按 enrich 管线加载。
133
+ 放置于 `**app/plugins/builtin/**` 或 `**.rssany/plugins/**`(扁平),用户插件可与内置插件同 `id` 覆盖。最小约定包括 `id`、`listUrlPattern` 等(详见 `app/scraper/sources/web/site.ts`)。
139
134
 
140
135
  ### Pipeline(固定代码)
141
136
 
@@ -175,7 +170,7 @@ sources.json / Site 插件
175
170
 
176
171
  ```
177
172
  ├── app/ # 后端:路由、feeder、scraper、pipeline、mcp、db、auth…
178
- ├── plugins/ # 内置信源 / enrich 等插件
173
+ │ └── plugins/builtin/ # 内置信源 *.rssany.js
179
174
  └── webui/ # SvelteKit 前端
180
175
 
181
176
  ~/.rssany/ # 运行时用户数据(首次启动创建;或 RSSANY_USER_DIR)
@@ -1,96 +1,92 @@
1
- // 内置 IMAP 邮件插件:匹配 imap://、imaps:// 协议 URL
2
-
3
- import { ImapFlow } from "imapflow";
4
- import { logger } from "../../app/core/logger/index.js";
5
- import { simpleParser } from "mailparser";
6
- import { createHash } from "node:crypto";
7
-
8
- function parseImapUrl(sourceId) {
9
- const url = new URL(sourceId);
10
- const host = url.hostname;
11
- const port = url.port ? parseInt(url.port, 10) : 993;
12
- const secure = url.protocol === "imaps:" || port === 993;
13
- const user = decodeURIComponent(url.username);
14
- const pass = decodeURIComponent(url.password);
15
- const folder = decodeURIComponent(url.pathname.slice(1)) || "INBOX";
16
- const limit = Math.max(1, parseInt(url.searchParams.get("limit") ?? "30", 10));
17
- return { host, port, secure, user, pass, folder, limit };
18
- }
19
-
20
- function makeGuid(messageId, uid, host) {
21
- const raw = messageId ?? `${uid}@${host}`;
22
- return createHash("sha256").update(raw).digest("hex");
23
- }
24
-
25
- export default {
26
- id: "__email__",
27
- pattern: /^imaps?:\/\//,
28
- priority: 0,
29
- refreshInterval: "30min",
30
- async fetchItems(sourceId, _ctx) {
31
- const { host, port, secure, user, pass, folder, limit } = parseImapUrl(sourceId);
32
- const client = new ImapFlow({
33
- host,
34
- port,
35
- secure,
36
- auth: { user, pass },
37
- logger: false,
38
- });
39
-
40
- client.on("error", (err) => {
41
- logger.error("source", "IMAP 连接异常", { err: err?.message, host, folder });
42
- });
43
-
44
- const items = [];
45
- let connected = false;
46
- try {
47
- await client.connect();
48
- connected = true;
49
- const lock = await client.getMailboxLock(folder);
50
- try {
51
- const mailbox = client.mailbox;
52
- if (mailbox === false) return [];
53
- const total = mailbox.exists ?? 0;
54
- if (total === 0) return [];
55
- const start = Math.max(1, total - limit + 1);
56
- for await (const msg of client.fetch(`${start}:*`, { source: true, envelope: true })) {
57
- try {
58
- if (msg.source === undefined || msg.envelope === undefined) continue;
59
- const parsed = await simpleParser(msg.source);
60
- const envelope = msg.envelope;
61
- const guid = makeGuid(envelope.messageId, msg.uid, host);
62
- const title = parsed.subject ?? envelope.subject ?? "(无主题)";
63
- const fromAddr = envelope.from?.[0];
64
- const authorRaw = fromAddr?.name || fromAddr?.address || undefined;
65
- const author = authorRaw ? [authorRaw] : undefined;
66
- const pubDate = parsed.date ?? envelope.date ?? new Date();
67
- const link = `imap://${host}/${encodeURIComponent(folder)}#${msg.uid}`;
68
- const htmlBody = typeof parsed.html === "string" ? parsed.html : undefined;
69
- const textBody = typeof parsed.text === "string" ? parsed.text : undefined;
70
- const content = htmlBody ?? (textBody ? `<pre>${textBody}</pre>` : undefined);
71
- const summary = textBody?.slice(0, 300) || undefined;
72
- items.push({ guid, title, link, pubDate, author, summary, content });
73
- } catch (err) {
74
- logger.warn("source", "解析单封邮件失败", { err: err?.message });
75
- }
76
- }
77
- } finally {
78
- lock.release();
79
- }
80
- } catch (err) {
81
- logger.warn("source", "拉取 IMAP 邮件失败", { err: err?.message, host, folder });
82
- return [];
83
- } finally {
84
- if (connected && client.usable) {
85
- try {
86
- await client.logout();
87
- } catch (err) {
88
- logger.warn("source", "IMAP 退出连接失败", { err: err?.message, host, folder });
89
- }
90
- } else {
91
- client.close();
92
- }
93
- }
94
- return items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
95
- },
96
- };
1
+ // 内置 IMAP 邮件插件:匹配 imap://、imaps:// 协议 URL
2
+
3
+ function parseImapUrl(sourceId) {
4
+ const url = new URL(sourceId);
5
+ const host = url.hostname;
6
+ const port = url.port ? parseInt(url.port, 10) : 993;
7
+ const secure = url.protocol === "imaps:" || port === 993;
8
+ const user = decodeURIComponent(url.username);
9
+ const pass = decodeURIComponent(url.password);
10
+ const folder = decodeURIComponent(url.pathname.slice(1)) || "INBOX";
11
+ const limit = Math.max(1, parseInt(url.searchParams.get("limit") ?? "30", 10));
12
+ return { host, port, secure, user, pass, folder, limit };
13
+ }
14
+
15
+ function makeGuid(messageId, uid, host, createHash) {
16
+ const raw = messageId ?? `${uid}@${host}`;
17
+ return createHash("sha256").update(raw).digest("hex");
18
+ }
19
+
20
+ export default {
21
+ id: "__email__",
22
+ pattern: /^imaps?:\/\//,
23
+ priority: 0,
24
+ refreshInterval: "30min",
25
+ async fetchItems(sourceId, ctx) {
26
+ const { deps } = ctx;
27
+ const { host, port, secure, user, pass, folder, limit } = parseImapUrl(sourceId);
28
+ const client = new deps.ImapFlow({
29
+ host,
30
+ port,
31
+ secure,
32
+ auth: { user, pass },
33
+ logger: false,
34
+ });
35
+
36
+ client.on("error", (err) => {
37
+ deps.logger.error("source", "IMAP 连接异常", { err: err?.message, host, folder });
38
+ });
39
+
40
+ const items = [];
41
+ let connected = false;
42
+ try {
43
+ await client.connect();
44
+ connected = true;
45
+ const lock = await client.getMailboxLock(folder);
46
+ try {
47
+ const mailbox = client.mailbox;
48
+ if (mailbox === false) return [];
49
+ const total = mailbox.exists ?? 0;
50
+ if (total === 0) return [];
51
+ const start = Math.max(1, total - limit + 1);
52
+ for await (const msg of client.fetch(`${start}:*`, { source: true, envelope: true })) {
53
+ try {
54
+ if (msg.source === undefined || msg.envelope === undefined) continue;
55
+ const parsed = await deps.simpleParser(msg.source);
56
+ const envelope = msg.envelope;
57
+ const guid = makeGuid(envelope.messageId, msg.uid, host, deps.createHash);
58
+ const title = parsed.subject ?? envelope.subject ?? "(无主题)";
59
+ const fromAddr = envelope.from?.[0];
60
+ const authorRaw = fromAddr?.name || fromAddr?.address || undefined;
61
+ const author = authorRaw ? [authorRaw] : undefined;
62
+ const pubDate = parsed.date ?? envelope.date ?? new Date();
63
+ const link = `imap://${host}/${encodeURIComponent(folder)}#${msg.uid}`;
64
+ const htmlBody = typeof parsed.html === "string" ? parsed.html : undefined;
65
+ const textBody = typeof parsed.text === "string" ? parsed.text : undefined;
66
+ const content = htmlBody ?? (textBody ? `<pre>${textBody}</pre>` : undefined);
67
+ const summary = textBody?.slice(0, 300) || undefined;
68
+ items.push({ guid, title, link, pubDate, author, summary, content });
69
+ } catch (err) {
70
+ deps.logger.warn("source", "解析单封邮件失败", { err: err?.message });
71
+ }
72
+ }
73
+ } finally {
74
+ lock.release();
75
+ }
76
+ } catch (err) {
77
+ deps.logger.warn("source", "拉取 IMAP 邮件失败", { err: err?.message, host, folder });
78
+ return [];
79
+ } finally {
80
+ if (connected && client.usable) {
81
+ try {
82
+ await client.logout();
83
+ } catch (err) {
84
+ deps.logger.warn("source", "IMAP 退出连接失败", { err: err?.message, host, folder });
85
+ }
86
+ } else {
87
+ client.close();
88
+ }
89
+ }
90
+ return items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
91
+ },
92
+ };
@@ -1,38 +1,13 @@
1
1
  // 内置 RSS/Atom/JSON Feed 插件:匹配 *rss*、*atom*、*.xml 等标准 Feed URL
2
2
 
3
- import Parser from "rss-parser";
4
- import { createHash } from "node:crypto";
5
-
6
3
  const UA = "RssAny/1.0 (+https://github.com/joohw/rssany)";
7
- const parser = new Parser({
8
- timeout: 15_000,
9
- headers: {
10
- "User-Agent": UA,
11
- Accept: "application/rss+xml,application/atom+xml,application/json,application/xml,text/xml,*/*",
12
- },
13
- });
14
-
15
- function looksLikeFeed(url) {
16
- const lower = url.toLowerCase();
17
- return (
18
- lower.includes("/feed") ||
19
- lower.includes("/rss") ||
20
- lower.includes("/atom") ||
21
- lower.endsWith(".xml") ||
22
- lower.endsWith(".rss") ||
23
- lower.endsWith(".atom") ||
24
- lower.includes("format=rss") ||
25
- lower.includes("format=atom") ||
26
- lower.includes("output=rss")
27
- );
28
- }
29
4
 
30
- async function fetchFeed(url, proxy) {
31
- const proxyToUse = proxy ?? process.env.HTTP_PROXY ?? process.env.HTTPS_PROXY;
5
+ async function fetchFeed(url, ctx) {
6
+ const { deps } = ctx;
7
+ const proxyToUse = ctx.proxy ?? process.env.HTTP_PROXY ?? process.env.HTTPS_PROXY;
32
8
  if (proxyToUse) {
33
- const { HttpsProxyAgent } = await import("https-proxy-agent");
34
- const agent = new HttpsProxyAgent(proxyToUse);
35
- const parserWithProxy = new Parser({
9
+ const agent = new deps.HttpsProxyAgent(proxyToUse);
10
+ const parserWithProxy = new deps.RssParser({
36
11
  timeout: 15_000,
37
12
  headers: {
38
13
  "User-Agent": UA,
@@ -42,6 +17,13 @@ async function fetchFeed(url, proxy) {
42
17
  });
43
18
  return parserWithProxy.parseURL(url);
44
19
  }
20
+ const parser = new deps.RssParser({
21
+ timeout: 15_000,
22
+ headers: {
23
+ "User-Agent": UA,
24
+ Accept: "application/rss+xml,application/atom+xml,application/json,application/xml,text/xml,*/*",
25
+ },
26
+ });
45
27
  return parser.parseURL(url);
46
28
  }
47
29
 
@@ -52,10 +34,11 @@ export default {
52
34
  priority: 20,
53
35
  refreshInterval: "1h",
54
36
  async fetchItems(sourceId, ctx) {
55
- const feed = await fetchFeed(sourceId, ctx.proxy);
37
+ const { deps } = ctx;
38
+ const feed = await fetchFeed(sourceId, ctx);
56
39
  return (feed.items ?? []).map((item) => {
57
40
  const link = item.link ?? item.guid ?? sourceId;
58
- const guid = item.guid ?? createHash("sha256").update(link).digest("hex");
41
+ const guid = item.guid ?? deps.createHash("sha256").update(link).digest("hex");
59
42
  const pubDate =
60
43
  item.pubDate != null
61
44
  ? new Date(item.pubDate)
@@ -81,3 +64,18 @@ export default {
81
64
  });
82
65
  },
83
66
  };
67
+
68
+ function looksLikeFeed(url) {
69
+ const lower = url.toLowerCase();
70
+ return (
71
+ lower.includes("/feed") ||
72
+ lower.includes("/rss") ||
73
+ lower.includes("/atom") ||
74
+ lower.endsWith(".xml") ||
75
+ lower.endsWith(".rss") ||
76
+ lower.endsWith(".atom") ||
77
+ lower.includes("format=rss") ||
78
+ lower.includes("format=atom") ||
79
+ lower.includes("output=rss")
80
+ );
81
+ }
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Site 插件模板(管理页「添加插件」会复制到 `.rssany/plugins/sources/{id}.rssany.ts`)
2
+ * Site 插件模板(管理页「添加插件」会复制到 `.rssany/plugins/{id}.rssany.js`)
3
3
  * 修改 `id` 后请与文件名保持一致。
4
4
  *
5
5
  * 接口说明:app/scraper/sources/web/site.ts
@@ -10,17 +10,16 @@ export default {
10
10
  listUrlPattern: "https://example.com/{segment}",
11
11
  refreshInterval: "1day",
12
12
 
13
- /** sourceId 与订阅里 ref 一致;ctx 含 fetchHtml、extractItem */
13
+ /** sourceId 与订阅里 ref 一致;ctx 含 fetchHtml、extractItem、deps(parseHtml 等) */
14
14
  async fetchItems(sourceId, ctx) {
15
15
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
16
16
  waitMs: 2000,
17
17
  purify: true,
18
18
  });
19
- void html;
19
+ const root = ctx.deps.parseHtml(html);
20
+ void root;
20
21
  void finalUrl;
21
- // TODO: 解析列表页 HTML,产出 { title, link, summary?, pubDate? } 等 FeedItem
22
+ // TODO: ctx.deps.parseHtml 解析列表页,产出 { title, link, summary?, pubDate? } 等 FeedItem
22
23
  return [];
23
24
  },
24
-
25
- // enrichItem: async (item, ctx) => ctx.extractItem(item),
26
25
  };