rssany 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +23 -27
  2. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +7 -8
  3. package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
  4. package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
  5. package/app/plugins/builtin/appen-resources.rssany.js +6 -7
  6. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
  7. package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
  8. package/app/plugins/builtin/baidu-research.rssany.js +5 -8
  9. package/app/plugins/builtin/brightdata-blog.rssany.js +7 -12
  10. package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
  11. package/app/plugins/builtin/email.rssany.js +9 -9
  12. package/app/plugins/builtin/five-radar.rssany.js +10 -12
  13. package/app/plugins/builtin/flageval-news.rssany.js +5 -7
  14. package/app/plugins/builtin/google-deepmind-research.rssany.js +7 -9
  15. package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
  16. package/app/plugins/builtin/google-research.rssany.js +6 -8
  17. package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
  18. package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
  19. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
  20. package/app/plugins/builtin/lingowhale.rssany.js +7 -9
  21. package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
  22. package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
  23. package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
  24. package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
  25. package/app/plugins/builtin/moonshot.rssany.js +6 -8
  26. package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
  27. package/app/plugins/builtin/opendatalab.rssany.js +5 -6
  28. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
  29. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
  30. package/app/plugins/builtin/opendrivelab-publications.rssany.js +7 -9
  31. package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
  32. package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
  33. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +8 -10
  34. package/app/plugins/builtin/rss.rssany.js +11 -12
  35. package/app/plugins/builtin/selectdataset.rssany.js +6 -8
  36. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
  37. package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
  38. package/app/plugins/builtin/theinformation-briefings.rssany.js +144 -136
  39. package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
  40. package/app/plugins/builtin/venturebeat.rssany.js +7 -9
  41. package/app/plugins/builtin/worldlabs.rssany.js +6 -8
  42. package/app/plugins/builtin/x.rssany.js +7 -9
  43. package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
  44. package/app/plugins/builtin/zhipu-research.rssany.js +7 -10
  45. package/app/plugins/site.rssany.js +25 -25
  46. package/{statics → app/statics}/README.md +7 -7
  47. package/bin/rssany.js +226 -6
  48. package/dist/index.js +545 -396
  49. package/dist/index.js.map +1 -1
  50. package/package.json +20 -13
  51. package/scripts/dev.mjs +114 -0
  52. package/scripts/reset.mjs +1 -1
  53. package/app/plugins/builtin/google.rssany.js +0 -187
  54. package/init/config.json +0 -17
  55. package/init/sources.json +0 -353
  56. package/statics/401.html +0 -56
  57. package/statics/404.html +0 -12
  58. package/statics/image.png +0 -0
  59. package/webui/build/200.html +0 -49
  60. package/webui/build/_app/env.js +0 -1
  61. package/webui/build/_app/immutable/assets/0.BB88QFoe.css +0 -1
  62. package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +0 -1
  63. package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +0 -1
  64. package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +0 -1
  65. package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
  66. package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
  67. package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -1
  68. package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
  69. package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
  70. package/webui/build/_app/immutable/assets/5.ClehBQ0g.css +0 -1
  71. package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +0 -1
  72. package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +0 -1
  73. package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +0 -1
  74. package/webui/build/_app/immutable/assets/9.m-LCx_kl.css +0 -1
  75. package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -1
  76. package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +0 -1
  77. package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +0 -1
  78. package/webui/build/_app/immutable/chunks/B-OsL1Ct.js +0 -1
  79. package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
  80. package/webui/build/_app/immutable/chunks/BK3WtZwv.js +0 -1
  81. package/webui/build/_app/immutable/chunks/BQqoDzLx.js +0 -1
  82. package/webui/build/_app/immutable/chunks/BUApaBEI.js +0 -1
  83. package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
  84. package/webui/build/_app/immutable/chunks/Bfc47y5P.js +0 -1
  85. package/webui/build/_app/immutable/chunks/Bp63qm3L.js +0 -1
  86. package/webui/build/_app/immutable/chunks/BwlaCkNX.js +0 -36
  87. package/webui/build/_app/immutable/chunks/C0J2-L94.js +0 -1
  88. package/webui/build/_app/immutable/chunks/CBY2biv-.js +0 -1
  89. package/webui/build/_app/immutable/chunks/CLOXMsDk.js +0 -36
  90. package/webui/build/_app/immutable/chunks/CVzlFH44.js +0 -1
  91. package/webui/build/_app/immutable/chunks/CWNeClHp.js +0 -6
  92. package/webui/build/_app/immutable/chunks/Cihqbfi5.js +0 -1
  93. package/webui/build/_app/immutable/chunks/D5GvRCv7.js +0 -1
  94. package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +0 -1
  95. package/webui/build/_app/immutable/chunks/DFuhmi31.js +0 -1
  96. package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
  97. package/webui/build/_app/immutable/chunks/DgceFEv5.js +0 -1
  98. package/webui/build/_app/immutable/chunks/DjNLq3TF.js +0 -1
  99. package/webui/build/_app/immutable/chunks/Dt2CddFe.js +0 -1
  100. package/webui/build/_app/immutable/chunks/Dw782Tjs.js +0 -1
  101. package/webui/build/_app/immutable/chunks/SqCUd34O.js +0 -1
  102. package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +0 -1
  103. package/webui/build/_app/immutable/chunks/hp4PFHFv.js +0 -1
  104. package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
  105. package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
  106. package/webui/build/_app/immutable/chunks/tB7QMF3U.js +0 -1
  107. package/webui/build/_app/immutable/chunks/xtNWTdbD.js +0 -1
  108. package/webui/build/_app/immutable/entry/app.B8zBPipq.js +0 -2
  109. package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +0 -1
  110. package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +0 -11
  111. package/webui/build/_app/immutable/nodes/1.1N74-4Io.js +0 -1
  112. package/webui/build/_app/immutable/nodes/10.DY30t9Ib.js +0 -1
  113. package/webui/build/_app/immutable/nodes/11.ITuxnukH.js +0 -1
  114. package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +0 -1
  115. package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
  116. package/webui/build/_app/immutable/nodes/14.BHnIxbVM.js +0 -1
  117. package/webui/build/_app/immutable/nodes/15.CLjT9il3.js +0 -1
  118. package/webui/build/_app/immutable/nodes/16.BD-mKCLN.js +0 -24
  119. package/webui/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -1
  120. package/webui/build/_app/immutable/nodes/18.Ba_qJjp6.js +0 -1
  121. package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
  122. package/webui/build/_app/immutable/nodes/3.Dt5o2Fmz.js +0 -1
  123. package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
  124. package/webui/build/_app/immutable/nodes/5.Dy3vSsIP.js +0 -1
  125. package/webui/build/_app/immutable/nodes/6.DvclsL6H.js +0 -1
  126. package/webui/build/_app/immutable/nodes/7.D2nJy-Uz.js +0 -1
  127. package/webui/build/_app/immutable/nodes/8.C75mhrqs.js +0 -1
  128. package/webui/build/_app/immutable/nodes/9.Bp_QXw3w.js +0 -1
  129. package/webui/build/_app/version.json +0 -1
@@ -1,3 +1,7 @@
1
+ export const id = "xiaohongshu";
2
+ export const name = "Xiaohongshu";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?xiaohongshu\.com\/user\/profile\/[^/?#]+\/?(?:[?#].*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // 小红书站点插件:用户主页列表抓取、笔记详情提取、认证流程
@@ -5,6 +9,41 @@ let _deps;
5
9
 
6
10
 
7
11
  const XHS_ORIGIN = "https://www.xiaohongshu.com";
12
+ const XHS_NOTE_PATH_RE = /^\/(?:explore|discovery\/item)\/([0-9a-f]{24})\/?$/i;
13
+ const XHS_NOTE_ID_RE = /^[0-9a-f]{24}$/i;
14
+ const XHS_NOTE_ID_IN_IMG_RE = /xhscdn\.com\/\d+\/([0-9a-f]{24})/i;
15
+ const XHS_PROFILE_USER_RE = /\/user\/profile\/([0-9a-f]{24})/i;
16
+
17
+
18
+ function hashNoteGuid(noteId) {
19
+ return _deps.createHash("sha256").update(`xhs:note:${noteId}`).digest("hex");
20
+ }
21
+
22
+
23
+ function extractProfileUserId(url) {
24
+ const m = String(url).match(XHS_PROFILE_USER_RE);
25
+ return m?.[1]?.toLowerCase() ?? null;
26
+ }
27
+
28
+
29
+ function buildExploreLink(noteId, origin) {
30
+ return `${origin.replace(/\/$/, "")}/explore/${noteId}`;
31
+ }
32
+
33
+
34
+ function extractNoteIdFromSection(section, profileUserId) {
35
+ for (const img of section.querySelectorAll('img[src*="xhscdn"]')) {
36
+ const src = img.getAttribute("src")?.trim() ?? "";
37
+ const fromImg = src.match(XHS_NOTE_ID_IN_IMG_RE);
38
+ if (fromImg?.[1] && fromImg[1] !== profileUserId) return fromImg[1].toLowerCase();
39
+ }
40
+ const html = section.outerHTML ?? "";
41
+ for (const match of html.match(/[0-9a-f]{24}/gi) ?? []) {
42
+ const id = match.toLowerCase();
43
+ if (id !== profileUserId && XHS_NOTE_ID_RE.test(id)) return id;
44
+ }
45
+ return null;
46
+ }
8
47
 
9
48
 
10
49
  function getOrigin(url) {
@@ -16,57 +55,99 @@ function getOrigin(url) {
16
55
  }
17
56
 
18
57
 
19
- function buildExploreLinkWithXsec(profileHref, origin) {
58
+ function normalizeXhsUrl(href, origin) {
59
+ try {
60
+ const url = new URL(href.replace(/&/g, "&"), origin);
61
+ url.hash = "";
62
+ return url;
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+
69
+ function normalizeXhsItemLink(href, origin) {
70
+ const url = normalizeXhsUrl(href, origin);
71
+ if (!url) return null;
72
+
20
73
  try {
21
- const fullUrl = new URL(profileHref.replace(/&/g, "&"), origin);
22
- const pathSegs = fullUrl.pathname.split("/").filter(Boolean);
23
- const noteId = pathSegs[pathSegs.length - 1];
24
- if (!noteId || !/^[0-9a-f]+$/i.test(noteId)) return null;
25
- const token = fullUrl.searchParams.get("xsec_token");
26
- const source = fullUrl.searchParams.get("xsec_source") ?? "pc_user";
27
- if (!token) return null;
28
- const explore = new URL(`/explore/${noteId}`, origin);
29
- explore.searchParams.set("xsec_token", token);
30
- explore.searchParams.set("xsec_source", source);
31
- return explore.href;
74
+ if (!/(^|\.)xiaohongshu\.com$/i.test(url.hostname)) return null;
75
+ const m = url.pathname.match(XHS_NOTE_PATH_RE);
76
+ if (!m?.[1]) return null;
77
+ return buildExploreLink(m[1].toLowerCase(), url.origin);
32
78
  } catch {
33
79
  return null;
34
80
  }
35
81
  }
36
82
 
37
83
 
84
+ function extractRedirectItemLink(href, origin) {
85
+ const wrapper = normalizeXhsUrl(href, origin);
86
+ if (!wrapper) return null;
87
+ if (!/\/website-login\/error\/?$/i.test(wrapper.pathname)) return null;
88
+
89
+ const redirectPath = wrapper.searchParams.get("redirectPath");
90
+ if (!redirectPath) return null;
91
+ return normalizeXhsItemLink(redirectPath, origin);
92
+ }
93
+
94
+
95
+ function extractListItemLink(section, origin, profileUserId) {
96
+ const noteId = extractNoteIdFromSection(section, profileUserId);
97
+ if (noteId) return buildExploreLink(noteId, origin);
98
+
99
+ const anchors = section.querySelectorAll("a[href]");
100
+ const candidates = [];
101
+ for (const anchor of anchors) {
102
+ const href = anchor.getAttribute("href")?.trim();
103
+ if (!href) continue;
104
+
105
+ const direct = normalizeXhsItemLink(href, origin);
106
+ if (direct) candidates.push(direct);
107
+
108
+ const redirected = extractRedirectItemLink(href, origin);
109
+ if (redirected) candidates.push(redirected);
110
+ }
111
+ return candidates[0] ?? null;
112
+ }
113
+
114
+
38
115
  function parseListHtml(html, url) {
39
116
  const root = _deps.parseHtml(html);
40
117
  const origin = getOrigin(url);
118
+ const profileUserId = extractProfileUserId(url);
41
119
  const feed = root.querySelector("#userPostedFeeds");
42
120
  if (!feed) return [];
43
- const sections = feed.querySelectorAll("section[data-v-79abd645][data-index]");
121
+ const sections = feed.querySelectorAll("section[data-index]");
44
122
  const items = [];
123
+ const seenNoteIds = new Set();
45
124
  for (const section of sections) {
46
- const profileWithToken = section.querySelector('a[href*="xsec_token="]');
47
- const profileHref = profileWithToken?.getAttribute("href")?.trim();
48
- let link;
49
- if (profileHref && profileHref.includes("/user/profile/")) {
50
- const withXsec = buildExploreLinkWithXsec(profileHref, origin);
51
- if (withXsec) link = withXsec;
52
- else link = new URL(profileHref.replace(/&/g, "&"), origin).href;
53
- } else {
54
- const linkEl = section.querySelector('a[href^="/explore/"]');
55
- const href = linkEl?.getAttribute("href")?.trim();
56
- if (!href) continue;
57
- link = new URL(href, origin).href;
58
- }
59
- const titleEl = section.querySelector("span[data-v-51ec0135]");
60
- const title = (titleEl?.textContent ?? "").trim() || "笔记";
61
- const authorEl = section.querySelector('a[aria-current="page"] span');
125
+ const noteId = extractNoteIdFromSection(section, profileUserId);
126
+ const link = noteId
127
+ ? buildExploreLink(noteId, origin)
128
+ : extractListItemLink(section, origin, profileUserId);
129
+ if (!link) continue;
130
+ const dedupeKey = noteId ?? link;
131
+ if (seenNoteIds.has(dedupeKey)) continue;
132
+ seenNoteIds.add(dedupeKey);
133
+ const titleEl = section.querySelector("span[data-v-51ec0135]") ?? section.querySelector(".title span") ?? section.querySelector("span");
134
+ const title = (titleEl?.textContent ?? "").trim() || "Note";
135
+ const authorEl = section.querySelector('a[aria-current="page"] .name') ?? section.querySelector('a[aria-current="page"] span');
62
136
  const author = (authorEl?.textContent ?? "").trim() || undefined;
137
+ const imageEl = section.querySelector("img[data-xhs-img], img");
138
+ const image = imageEl?.getAttribute("src")?.trim() || undefined;
139
+ const summary = image ? undefined : title;
140
+ const guid = noteId ? hashNoteGuid(noteId) : _deps.createHash("sha256").update(link).digest("hex");
63
141
  items.push({
64
- guid: _deps.createHash("sha256").update(link).digest("hex"),
142
+ guid,
65
143
  title,
66
144
  link,
67
145
  pubDate: new Date(),
68
146
  author,
69
- summary: title,
147
+ summary,
148
+ imageUrl: image,
149
+ coverImg: image,
150
+ cover_img: image,
70
151
  });
71
152
  }
72
153
  return items;
@@ -240,9 +321,14 @@ function extractDetailHtml(html) {
240
321
  }
241
322
 
242
323
 
243
- async function fetchItems(sourceId, ctx) {
324
+ export async function fetchItems(sourceId, ctx) {
244
325
  _deps = ctx.deps;
245
- const { html, finalUrl } = await ctx.fetchHtml(sourceId);
326
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
327
+ waitMs: 3000,
328
+ waitForSelector: "#userPostedFeeds",
329
+ waitForSelectorTimeoutMs: 15000,
330
+ scrollBeforeSnapshot: { selector: "#userPostedFeeds", rounds: 8, pauseMs: 900 },
331
+ });
246
332
  return parseListHtml(html, finalUrl);
247
333
  }
248
334
 
@@ -258,26 +344,3 @@ async function enrichItem(item, ctx) {
258
344
  pubDate: detail.pubDate ?? item.pubDate,
259
345
  };
260
346
  }
261
-
262
-
263
- async function checkAuth(page, _url) {
264
- try {
265
- const loginButton = await page.$(".reds-button-new.login-btn.large.primary");
266
- return loginButton == null;
267
- } catch {
268
- return false;
269
- }
270
- }
271
-
272
-
273
- export default {
274
- id: "xiaohongshu",
275
- listUrlPattern: "https://xiaohongshu.com/user/profile/{userId}",
276
- fetchItems,
277
- enrichItem,
278
- checkAuth,
279
- loginUrl: "https://www.xiaohongshu.com/",
280
- domain: "xiaohongshu.com",
281
- loginTimeoutMs: 30 * 1000,
282
- pollIntervalMs: 2000,
283
- };
@@ -1,3 +1,7 @@
1
+ export const id = "zhipu-research";
2
+ export const name = "Zhipu Research";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?zhipuai\.cn\/zh\/research\/?(?:[?#].*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // 智谱研究页插件:仅抓取列表,不做正文 enrich(兼容净化后的 HTML)
@@ -113,7 +117,7 @@ function buildItemsFromBlogsItems(blogsItems) {
113
117
  const summary = normalizeText(blog.resume_zh ?? blog.resume_en ?? "");
114
118
  const createdAt = String(blog.createAt ?? "").trim();
115
119
  const pubDate = createdAt ? new Date(createdAt) : new Date();
116
- const category = normalizeText(blog.tag_zh ?? blog.tag_en ?? "");
120
+ const _category = normalizeText(blog.tag_zh ?? blog.tag_en ?? "");
117
121
  items.push({
118
122
  guid: hashGuid(link),
119
123
  title,
@@ -263,7 +267,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
263
267
  for (let i = 0; i < leafTexts.length; i += 1) {
264
268
  const dateText = leafTexts[i];
265
269
  if (!isDateText(dateText)) continue;
266
- const category = i > 0 && RESEARCH_TAGS.has(leafTexts[i - 1]) ? leafTexts[i - 1] : undefined;
270
+ const _category = i > 0 && RESEARCH_TAGS.has(leafTexts[i - 1]) ? leafTexts[i - 1] : undefined;
267
271
 
268
272
  let title = "";
269
273
  let summary;
@@ -300,7 +304,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
300
304
  }
301
305
 
302
306
 
303
- async function fetchItems(sourceId, ctx) {
307
+ export async function fetchItems(sourceId, ctx) {
304
308
  _deps = ctx.deps;
305
309
  // 需要读取页面脚本里的 blogsItems(包含详情 id),因此这里禁用净化。
306
310
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 5000, purify: false });
@@ -325,10 +329,3 @@ async function fetchItems(sourceId, ctx) {
325
329
 
326
330
  throw new Error("[zhipu-research] 未解析到研究条目,页面结构可能已变化");
327
331
  }
328
-
329
-
330
- export default {
331
- id: "zhipu-research",
332
- listUrlPattern: ZHIPU_RESEARCH_URL,
333
- fetchItems,
334
- };
@@ -1,25 +1,25 @@
1
- /**
2
- * Site 插件模板(管理页「添加插件」会复制到 `.rssany/plugins/{id}.rssany.js`)
3
- * 修改 `id` 后请与文件名保持一致。
4
- *
5
- * 接口说明:app/scraper/sources/web/site.ts
6
- */
7
-
8
- export default {
9
- id: "__PLUGIN_ID__",
10
- listUrlPattern: __LIST_URL_PATTERN__,
11
- refreshInterval: "1day",
12
-
13
- /** sourceId 与订阅里 ref 一致;ctx 含 fetchHtml、extractItem、deps(parseHtml 等) */
14
- async fetchItems(sourceId, ctx) {
15
- const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
16
- waitMs: 2000,
17
- purify: true,
18
- });
19
- const root = ctx.deps.parseHtml(html);
20
- void root;
21
- void finalUrl;
22
- // TODO: 用 ctx.deps.parseHtml 解析列表页,产出 { title, link, summary?, pubDate? } 等 FeedItem
23
- return [];
24
- },
25
- };
1
+ /**
2
+ * Site plugin template. The admin UI copies this file to .rssany/plugins/{id}.rssany.js.
3
+ * Plugin protocol: named exports. No export default is required.
4
+ *
5
+ * Interface: app/scraper/sources/web/site.ts
6
+ */
7
+
8
+ // Predefined fields stay together at the top.
9
+ export const id = "__PLUGIN_ID__";
10
+ export const name = "__PLUGIN_ID__";
11
+ // eslint-disable-next-line no-undef
12
+ export const listUrlPattern = __LIST_URL_PATTERN__;
13
+ export const refreshInterval = "1day";
14
+
15
+ export async function fetchItems(sourceId, ctx) {
16
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
17
+ waitMs: 2000,
18
+ purify: true,
19
+ });
20
+ const root = ctx.deps.parseHtml(html);
21
+ void root;
22
+ void finalUrl;
23
+ // TODO: Parse the list page and return FeedItem objects.
24
+ return [];
25
+ }
@@ -1,7 +1,7 @@
1
- # statics
2
-
3
- 静态 HTML 页面:home(首页)、401、404。
4
-
5
- - **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
6
- - **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
7
- - **404.html**:无匹配站点时返回。
1
+ # statics
2
+
3
+ 静态 HTML 页面:home(首页)、401、404。
4
+
5
+ - **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
6
+ - **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
7
+ - **404.html**:无匹配站点时返回。
package/bin/rssany.js CHANGED
@@ -1,6 +1,226 @@
1
- #!/usr/bin/env node
2
- if (process.argv[2] === "reset") {
3
- await import(new URL("../scripts/reset.mjs", import.meta.url));
4
- } else {
5
- await import(new URL("../dist/index.js", import.meta.url));
6
- }
1
+ #!/usr/bin/env node
2
+ import { spawn } from "node:child_process";
3
+ import { closeSync, openSync } from "node:fs";
4
+ import { access, mkdir, readFile, rm, writeFile } from "node:fs/promises";
5
+ import http from "node:http";
6
+ import { homedir, networkInterfaces } from "node:os";
7
+ import { dirname, join } from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+
10
+ const command = process.argv[2];
11
+ const binDir = dirname(fileURLToPath(import.meta.url));
12
+ const packageRoot = join(binDir, "..");
13
+ const userDir = process.env.RSSANY_USER_DIR?.trim() || join(homedir(), ".rssany");
14
+ const pidPath = join(userDir, "rssany.pid");
15
+ const logPath = join(userDir, "rssany.log");
16
+ const port = Number(process.env.PORT) || 18473;
17
+ const serverOrigin = `http://127.0.0.1:${port}`;
18
+
19
+ async function pathExists(path) {
20
+ try {
21
+ await access(path);
22
+ return true;
23
+ } catch {
24
+ return false;
25
+ }
26
+ }
27
+
28
+ async function readPid() {
29
+ try {
30
+ const raw = await readFile(pidPath, "utf-8");
31
+ const pid = Number(raw.trim());
32
+ return Number.isInteger(pid) && pid > 0 ? pid : null;
33
+ } catch {
34
+ return null;
35
+ }
36
+ }
37
+
38
+ function isProcessRunning(pid) {
39
+ try {
40
+ process.kill(pid, 0);
41
+ return true;
42
+ } catch {
43
+ return false;
44
+ }
45
+ }
46
+
47
+ function getLanUrl() {
48
+ const lanIp = Object.values(networkInterfaces())
49
+ .flat()
50
+ .find((iface) => iface?.family === "IPv4" && !iface.internal)?.address;
51
+ return lanIp ? `http://${lanIp}:${port}/` : null;
52
+ }
53
+
54
+ function printAddress(prefix = "RssAny 已启动") {
55
+ console.log(`${prefix}: http://127.0.0.1:${port}/`);
56
+ const lanUrl = getLanUrl();
57
+ if (lanUrl) console.log(`局域网访问: ${lanUrl}`);
58
+ }
59
+
60
+ function printUsage() {
61
+ console.log("用法: rssany <start|stop|reset|crawl>");
62
+ console.log(" rssany start 后台启动服务并输出访问地址");
63
+ console.log(" rssany stop 关闭后台服务并输出执行状态");
64
+ console.log(" rssany reset 重置本地数据");
65
+ console.log(" rssany crawl <ref> 按内部抓取链路拉取指定信源");
66
+ }
67
+
68
+ async function canConnectToServer() {
69
+ return new Promise((resolve) => {
70
+ const req = http.get(`${serverOrigin}/api/server-info`, (res) => {
71
+ res.resume();
72
+ resolve(true);
73
+ });
74
+ req.setTimeout(500, () => {
75
+ req.destroy();
76
+ resolve(false);
77
+ });
78
+ req.on("error", () => resolve(false));
79
+ });
80
+ }
81
+
82
+ async function waitForServer(timeoutMs = 5000) {
83
+ const startTime = Date.now();
84
+ while (Date.now() - startTime < timeoutMs) {
85
+ if (await canConnectToServer()) return true;
86
+ await new Promise((resolve) => setTimeout(resolve, 250));
87
+ }
88
+ return false;
89
+ }
90
+
91
+ async function start() {
92
+ await mkdir(userDir, { recursive: true });
93
+
94
+ const currentPid = await readPid();
95
+ if (currentPid && isProcessRunning(currentPid)) {
96
+ printAddress(`RssAny 已在运行 (pid ${currentPid})`);
97
+ return;
98
+ }
99
+
100
+ const entry = join(packageRoot, "dist", "index.js");
101
+ if (!(await pathExists(entry))) {
102
+ console.error("未找到 dist/index.js,请先构建项目或重新安装 rssany。");
103
+ process.exitCode = 1;
104
+ return;
105
+ }
106
+
107
+ const logFd = openSync(logPath, "a");
108
+ const child = spawn(process.execPath, [entry], {
109
+ cwd: process.cwd(),
110
+ detached: true,
111
+ env: process.env,
112
+ stdio: ["ignore", logFd, logFd],
113
+ });
114
+ closeSync(logFd);
115
+
116
+ await writeFile(pidPath, `${child.pid}\n`, "utf-8");
117
+ console.log(`日志: ${logPath}`);
118
+ if (await waitForServer()) {
119
+ child.unref();
120
+ printAddress(`RssAny 已启动 (pid ${child.pid})`);
121
+ return;
122
+ }
123
+
124
+ child.unref();
125
+ console.error(`RssAny 启动未完成,请查看日志: ${logPath}`);
126
+ process.exitCode = 1;
127
+ }
128
+
129
+ async function stop() {
130
+ const pid = await readPid();
131
+ if (!pid) {
132
+ console.log("RssAny 未运行:没有找到 pid 文件。");
133
+ return;
134
+ }
135
+
136
+ if (!isProcessRunning(pid)) {
137
+ await rm(pidPath, { force: true });
138
+ console.log(`RssAny 未运行:已清理失效 pid ${pid}。`);
139
+ return;
140
+ }
141
+
142
+ process.kill(pid, "SIGTERM");
143
+ await rm(pidPath, { force: true });
144
+ console.log(`RssAny 已发送停止信号 (pid ${pid})。`);
145
+ }
146
+
147
+ function readCrawlRef(args) {
148
+ const refFlagIndex = args.findIndex((arg) => arg === "--ref");
149
+ if (refFlagIndex >= 0) return args[refFlagIndex + 1]?.trim() || "";
150
+ const refEquals = args.find((arg) => arg.startsWith("--ref="));
151
+ if (refEquals) return refEquals.slice("--ref=".length).trim();
152
+ return args.find((arg) => !arg.startsWith("-"))?.trim() || "";
153
+ }
154
+
155
+ async function postJson(path, body) {
156
+ const res = await fetch(`${serverOrigin}${path}`, {
157
+ method: "POST",
158
+ headers: { "Content-Type": "application/json" },
159
+ body: JSON.stringify(body),
160
+ });
161
+ const data = await res.json().catch(() => ({}));
162
+ if (!res.ok) {
163
+ throw new Error(data.error || `HTTP ${res.status}`);
164
+ }
165
+ return data;
166
+ }
167
+
168
+ async function getJson(path) {
169
+ const res = await fetch(`${serverOrigin}${path}`);
170
+ const data = await res.json().catch(() => ({}));
171
+ if (!res.ok) {
172
+ throw new Error(data.error || `HTTP ${res.status}`);
173
+ }
174
+ return data;
175
+ }
176
+
177
+ async function pollTask(taskId, timeoutMs = 120000) {
178
+ const start = Date.now();
179
+ while (Date.now() - start < timeoutMs) {
180
+ const task = await getJson(`/api/tasks/${encodeURIComponent(taskId)}`);
181
+ if (task.status === "done") return task;
182
+ if (task.status === "error") {
183
+ throw new Error(task.error || "抓取失败");
184
+ }
185
+ await new Promise((resolve) => setTimeout(resolve, 800));
186
+ }
187
+ throw new Error("抓取超时");
188
+ }
189
+
190
+ async function crawl() {
191
+ const ref = readCrawlRef(process.argv.slice(3));
192
+ if (!ref) {
193
+ console.error("ref 不能为空。用法: rssany crawl <ref>");
194
+ process.exitCode = 1;
195
+ return;
196
+ }
197
+ if (!(await canConnectToServer())) {
198
+ console.error(`RssAny 服务未运行,请先执行 rssany start。目标: ${serverOrigin}`);
199
+ process.exitCode = 1;
200
+ return;
201
+ }
202
+ try {
203
+ const { taskId } = await postJson("/api/tasks", { type: "source-pull", ref });
204
+ if (!taskId) throw new Error("后端未返回 taskId");
205
+ console.log(`crawl 已提交: ${ref}`);
206
+ console.log(`task: ${taskId}`);
207
+ await pollTask(taskId);
208
+ console.log("crawl 完成");
209
+ } catch (err) {
210
+ console.error(err instanceof Error ? err.message : String(err));
211
+ process.exitCode = 1;
212
+ }
213
+ }
214
+
215
+ if (command === "reset") {
216
+ await import(new URL("../scripts/reset.mjs", import.meta.url));
217
+ } else if (command === "start") {
218
+ await start();
219
+ } else if (command === "stop") {
220
+ await stop();
221
+ } else if (command === "crawl") {
222
+ await crawl();
223
+ } else {
224
+ printUsage();
225
+ if (command) process.exitCode = 1;
226
+ }