rssany 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +22 -22
  2. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +6 -7
  3. package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
  4. package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
  5. package/app/plugins/builtin/appen-resources.rssany.js +6 -7
  6. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
  7. package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
  8. package/app/plugins/builtin/baidu-research.rssany.js +5 -8
  9. package/app/plugins/builtin/brightdata-blog.rssany.js +6 -11
  10. package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
  11. package/app/plugins/builtin/email.rssany.js +9 -9
  12. package/app/plugins/builtin/five-radar.rssany.js +9 -11
  13. package/app/plugins/builtin/flageval-news.rssany.js +5 -7
  14. package/app/plugins/builtin/google-deepmind-research.rssany.js +6 -8
  15. package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
  16. package/app/plugins/builtin/google-research.rssany.js +6 -8
  17. package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
  18. package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
  19. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
  20. package/app/plugins/builtin/lingowhale.rssany.js +7 -9
  21. package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
  22. package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
  23. package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
  24. package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
  25. package/app/plugins/builtin/moonshot.rssany.js +6 -8
  26. package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
  27. package/app/plugins/builtin/opendatalab.rssany.js +5 -6
  28. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
  29. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
  30. package/app/plugins/builtin/opendrivelab-publications.rssany.js +6 -8
  31. package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
  32. package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
  33. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +7 -9
  34. package/app/plugins/builtin/rss.rssany.js +11 -12
  35. package/app/plugins/builtin/selectdataset.rssany.js +6 -8
  36. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
  37. package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
  38. package/app/plugins/builtin/theinformation-briefings.rssany.js +7 -13
  39. package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
  40. package/app/plugins/builtin/venturebeat.rssany.js +7 -9
  41. package/app/plugins/builtin/worldlabs.rssany.js +6 -8
  42. package/app/plugins/builtin/x.rssany.js +7 -9
  43. package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
  44. package/app/plugins/builtin/zhipu-research.rssany.js +5 -8
  45. package/app/plugins/site.rssany.js +25 -26
  46. package/{statics → app/statics}/README.md +7 -7
  47. package/bin/rssany.js +226 -6
  48. package/dist/index.js +209 -152
  49. package/dist/index.js.map +1 -1
  50. package/package.json +16 -9
  51. package/scripts/dev.mjs +114 -0
  52. package/scripts/reset.mjs +1 -1
  53. package/init/config.json +0 -17
  54. package/init/sources.json +0 -353
  55. package/statics/401.html +0 -56
  56. package/statics/404.html +0 -12
  57. package/statics/image.png +0 -0
  58. package/webui/build/200.html +0 -49
  59. package/webui/build/_app/env.js +0 -1
  60. package/webui/build/_app/immutable/assets/0.BB88QFoe.css +0 -1
  61. package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +0 -1
  62. package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +0 -1
  63. package/webui/build/_app/immutable/assets/12.DfJcfUWl.css +0 -1
  64. package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
  65. package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
  66. package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -1
  67. package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
  68. package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
  69. package/webui/build/_app/immutable/assets/5.B-dPiwB7.css +0 -1
  70. package/webui/build/_app/immutable/assets/6.B27N7pdA.css +0 -1
  71. package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +0 -1
  72. package/webui/build/_app/immutable/assets/8.Cgji2b15.css +0 -1
  73. package/webui/build/_app/immutable/assets/9.BsCIAvn3.css +0 -1
  74. package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -1
  75. package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +0 -1
  76. package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +0 -1
  77. package/webui/build/_app/immutable/chunks/5LVkDJzw.js +0 -1
  78. package/webui/build/_app/immutable/chunks/B-OsL1Ct.js +0 -1
  79. package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
  80. package/webui/build/_app/immutable/chunks/BK3WtZwv.js +0 -1
  81. package/webui/build/_app/immutable/chunks/BQqoDzLx.js +0 -1
  82. package/webui/build/_app/immutable/chunks/BUApaBEI.js +0 -1
  83. package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
  84. package/webui/build/_app/immutable/chunks/Bfc47y5P.js +0 -1
  85. package/webui/build/_app/immutable/chunks/Bns1MuyM.js +0 -36
  86. package/webui/build/_app/immutable/chunks/Bp63qm3L.js +0 -1
  87. package/webui/build/_app/immutable/chunks/Bu9HsS-V.js +0 -1
  88. package/webui/build/_app/immutable/chunks/CBY2biv-.js +0 -1
  89. package/webui/build/_app/immutable/chunks/CVzlFH44.js +0 -1
  90. package/webui/build/_app/immutable/chunks/CWNeClHp.js +0 -6
  91. package/webui/build/_app/immutable/chunks/Cihqbfi5.js +0 -1
  92. package/webui/build/_app/immutable/chunks/CmjOpds-.js +0 -1
  93. package/webui/build/_app/immutable/chunks/D5GvRCv7.js +0 -1
  94. package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +0 -1
  95. package/webui/build/_app/immutable/chunks/DFuhmi31.js +0 -1
  96. package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
  97. package/webui/build/_app/immutable/chunks/DjNLq3TF.js +0 -1
  98. package/webui/build/_app/immutable/chunks/Dt2CddFe.js +0 -1
  99. package/webui/build/_app/immutable/chunks/Dw782Tjs.js +0 -1
  100. package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +0 -1
  101. package/webui/build/_app/immutable/chunks/bvuf_jZd.js +0 -36
  102. package/webui/build/_app/immutable/chunks/hp4PFHFv.js +0 -1
  103. package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
  104. package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
  105. package/webui/build/_app/immutable/chunks/tB7QMF3U.js +0 -1
  106. package/webui/build/_app/immutable/chunks/xtNWTdbD.js +0 -1
  107. package/webui/build/_app/immutable/entry/app.BVkrDt5l.js +0 -2
  108. package/webui/build/_app/immutable/entry/start.D3Q-BMMd.js +0 -1
  109. package/webui/build/_app/immutable/nodes/0.I1lQdWMl.js +0 -11
  110. package/webui/build/_app/immutable/nodes/1.BiQQfx2j.js +0 -1
  111. package/webui/build/_app/immutable/nodes/10.CvfUsqsw.js +0 -1
  112. package/webui/build/_app/immutable/nodes/11.B4LHPNL6.js +0 -1
  113. package/webui/build/_app/immutable/nodes/12.DVFJuIWI.js +0 -1
  114. package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
  115. package/webui/build/_app/immutable/nodes/14.DfaAf0f8.js +0 -1
  116. package/webui/build/_app/immutable/nodes/15.CMzkX9OK.js +0 -1
  117. package/webui/build/_app/immutable/nodes/16.zPgTQNze.js +0 -24
  118. package/webui/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -1
  119. package/webui/build/_app/immutable/nodes/18.BIzqhTqv.js +0 -1
  120. package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
  121. package/webui/build/_app/immutable/nodes/3.B8Viux9S.js +0 -1
  122. package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
  123. package/webui/build/_app/immutable/nodes/5.B6fR3n6J.js +0 -2
  124. package/webui/build/_app/immutable/nodes/6.j2O5Mwjv.js +0 -1
  125. package/webui/build/_app/immutable/nodes/7.Bd2USIrl.js +0 -1
  126. package/webui/build/_app/immutable/nodes/8.Bw_d63B_.js +0 -1
  127. package/webui/build/_app/immutable/nodes/9.pMMi5PP6.js +0 -1
  128. package/webui/build/_app/version.json +0 -1
@@ -1,7 +1,11 @@
1
+ export const id = "sensetime-tech-achievements";
2
+ export const name = "Sensetime Tech Achievements";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?sensetime\.com\/cn\/technology-achievements(\?.*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
 
4
- const SITE_ID = "sensetime-tech-achievements";
8
+ const SITE_ID = id;
5
9
  const DATE_RE = /\b(20\d{2})-(\d{1,2})-(\d{1,2})\b/;
6
10
 
7
11
  function normalizeText(text) {
@@ -18,7 +22,7 @@ function toAbsoluteUrl(rawHref, baseUrl) {
18
22
  if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
19
23
  try {
20
24
  const url = new URL(href, baseUrl);
21
- if (!/^https?:$/i.test(url.protocol)) return null;
25
+ if (!/^https:$/i.test(url.protocol)) return null;
22
26
  return url.href;
23
27
  } catch {
24
28
  return null;
@@ -135,7 +139,7 @@ async function fetchItemsFromApi(finalUrl) {
135
139
  return items;
136
140
  }
137
141
 
138
- async function fetchItems(sourceId, ctx) {
142
+ export async function fetchItems(sourceId, ctx) {
139
143
  _deps = ctx.deps;
140
144
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
141
145
  const items = parseItemsFromHtml(html, finalUrl);
@@ -147,8 +151,3 @@ async function fetchItems(sourceId, ctx) {
147
151
  throw new Error(`[${SITE_ID}] 未解析到学术成果条目,页面结构或接口可能已变化`);
148
152
  }
149
153
 
150
- export default {
151
- id: SITE_ID,
152
- listUrlPattern: /^https?:\/\/(www\.)?sensetime\.com\/cn\/technology-achievements(\?.*)?$/i,
153
- fetchItems,
154
- };
@@ -1,3 +1,7 @@
1
+ export const id = "supervisely-blog";
2
+ export const name = "Supervisely Blog";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?supervisely\.com\/blog\/?(?:\?.*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // Supervisely Blog 插件:抓取列表页并解析为 FeedItem(不做正文 enrich)
@@ -35,7 +39,7 @@ function toAbsoluteUrl(href, baseUrl) {
35
39
  if (!href) return null;
36
40
  try {
37
41
  const url = new URL(href, baseUrl);
38
- if (!/^https?:$/i.test(url.protocol)) return null;
42
+ if (!/^https:$/i.test(url.protocol)) return null;
39
43
  return url.href;
40
44
  } catch {
41
45
  return null;
@@ -136,7 +140,7 @@ function parseFromHeadingFallback(root, baseUrl) {
136
140
  }
137
141
 
138
142
 
139
- async function fetchItems(sourceId, ctx) {
143
+ export async function fetchItems(sourceId, ctx) {
140
144
  _deps = ctx.deps;
141
145
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
142
146
  const root = _deps.parseHtml(html);
@@ -151,9 +155,3 @@ async function fetchItems(sourceId, ctx) {
151
155
  return items;
152
156
  }
153
157
 
154
-
155
- export default {
156
- id: "supervisely-blog",
157
- listUrlPattern: /^https?:\/\/(www\.)?supervisely\.com\/blog\/?(?:\?.*)?$/i,
158
- fetchItems,
159
- };
@@ -1,12 +1,14 @@
1
+ export const id = "theinformation";
2
+ export const name = "Theinformation";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?theinformation\.com\/(briefings|features\/[^/]+)\/?(\?.*)?$/i;
4
+ export const refreshInterval = "1h";
5
+
1
6
  let _deps;
2
7
 
3
8
  // The Information — AI Agenda 和 Briefings 列表页
4
9
  // 当前结构:.article.feed-item,标题 h3.title a,分类 .category-content a,作者 .authors,摘要 .recent-excerpt .long-excerpt
5
10
 
6
11
  const ORIGIN = "https://www.theinformation.com";
7
- const LIST_URL_RE =
8
- /^https?:\/\/(www\.)?theinformation\.com\/(briefings|features\/[^/]+)\/?(\?.*)?$/i;
9
-
10
12
 
11
13
  function normalizeText(text) {
12
14
  return (text ?? "").replace(/\s+/g, " ").trim();
@@ -24,7 +26,7 @@ function toAbsoluteHttpUrl(rawHref, baseUrl) {
24
26
  if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
25
27
  try {
26
28
  const url = new URL(href, baseUrl);
27
- if (!/^https?:$/i.test(url.protocol)) return null;
29
+ if (!/^https:$/i.test(url.protocol)) return null;
28
30
  return url.href;
29
31
  } catch {
30
32
  return null;
@@ -119,7 +121,7 @@ function parseFeedItems(html, pageUrl) {
119
121
  }
120
122
 
121
123
 
122
- async function fetchItems(sourceId, ctx) {
124
+ export async function fetchItems(sourceId, ctx) {
123
125
  _deps = ctx.deps;
124
126
  const { html, finalUrl, status } = await ctx.fetchHtml(sourceId, {
125
127
  waitMs: 5000,
@@ -140,11 +142,3 @@ async function fetchItems(sourceId, ctx) {
140
142
  items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
141
143
  return items;
142
144
  }
143
-
144
-
145
- export default {
146
- id: "theinformation",
147
- listUrlPattern: LIST_URL_RE,
148
- refreshInterval: "1h",
149
- fetchItems,
150
- };
@@ -1,3 +1,7 @@
1
+ export const id = "uci-ml-repository";
2
+ export const name = "UCI Ml Repository";
3
+ export const listUrlPattern = /^https:\/\/archive\.ics\.uci\.edu(?:\/(?:datasets\/?)?)?(?:\?.*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
 
@@ -16,7 +20,7 @@ function resolveDatasetLink(rawHref, baseUrl) {
16
20
  if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
17
21
  try {
18
22
  const url = new URL(href, baseUrl);
19
- if (!/^https?:$/i.test(url.protocol)) return null;
23
+ if (!/^https:$/i.test(url.protocol)) return null;
20
24
  if (url.hostname !== "archive.ics.uci.edu") return null;
21
25
  if (!/^\/dataset\/\d+\/[^/?#]+$/i.test(url.pathname)) return null;
22
26
  url.search = "";
@@ -89,7 +93,7 @@ function parseFromGenericAnchors(root, baseUrl) {
89
93
  return items;
90
94
  }
91
95
 
92
- async function fetchItems(sourceId, ctx) {
96
+ export async function fetchItems(sourceId, ctx) {
93
97
  _deps = ctx.deps;
94
98
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4000 });
95
99
  const baseUrl = finalUrl || sourceId || UCI_ORIGIN;
@@ -104,8 +108,3 @@ async function fetchItems(sourceId, ctx) {
104
108
  throw new Error("[uci-ml-repository] 未解析到数据集条目,页面结构可能已变化");
105
109
  }
106
110
 
107
- export default {
108
- id: "uci-ml-repository",
109
- listUrlPattern: /^https?:\/\/archive\.ics\.uci\.edu(?:\/(?:datasets\/?)?)?(?:\?.*)?$/i,
110
- fetchItems,
111
- };
@@ -1,3 +1,8 @@
1
+ export const id = "venturebeat";
2
+ export const name = "Venturebeat";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?venturebeat\.com\/?(\?.*)?$/i;
4
+ export const refreshInterval = "1h";
5
+
1
6
  let _deps;
2
7
 
3
8
  // VentureBeat 插件:通过官方 RSS Feed 拉取列表,规避首页安全检查页
@@ -35,7 +40,7 @@ function toFeedUrl(sourceId) {
35
40
 
36
41
  function mapFeedItem(item) {
37
42
  const link = normalizeText(item.link ?? "");
38
- if (!/^https?:\/\//i.test(link)) return null;
43
+ if (!/^https:\/\//i.test(link)) return null;
39
44
 
40
45
  const title = normalizeText(item.title ?? "");
41
46
  const pubDate = toValidDate(item.isoDate ?? item.pubDate);
@@ -53,7 +58,7 @@ function mapFeedItem(item) {
53
58
  }
54
59
 
55
60
 
56
- async function fetchItems(sourceId, _ctx) {
61
+ export async function fetchItems(sourceId, _ctx) {
57
62
  _deps = _ctx.deps;
58
63
  const parser = new _deps.RssParser({
59
64
  timeout: 15_000,
@@ -88,10 +93,3 @@ async function fetchItems(sourceId, _ctx) {
88
93
  return items;
89
94
  }
90
95
 
91
-
92
- export default {
93
- id: "venturebeat",
94
- listUrlPattern: /^https?:\/\/(www\.)?venturebeat\.com\/?(\?.*)?$/i,
95
- refreshInterval: "1h",
96
- fetchItems,
97
- };
@@ -1,3 +1,7 @@
1
+ export const id = "worldlabs";
2
+ export const name = "Worldlabs";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?worldlabs\.ai\/blog(\?.*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // World Labs 博客插件:抓取 Research & Insights 列表页,输出 FeedItem(不含 enrich)
@@ -39,7 +43,7 @@ function toAbsoluteHttpUrl(rawHref, baseUrl) {
39
43
  if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
40
44
  try {
41
45
  const url = new URL(href, baseUrl);
42
- if (!/^https?:$/i.test(url.protocol)) return null;
46
+ if (!/^https:$/i.test(url.protocol)) return null;
43
47
  return url.href;
44
48
  } catch {
45
49
  return null;
@@ -99,7 +103,7 @@ function parseCard(anchor, finalUrl) {
99
103
  }
100
104
 
101
105
 
102
- async function fetchItems(sourceId, ctx) {
106
+ export async function fetchItems(sourceId, ctx) {
103
107
  _deps = ctx.deps;
104
108
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
105
109
  const root = _deps.parseHtml(html);
@@ -121,9 +125,3 @@ async function fetchItems(sourceId, ctx) {
121
125
  return items;
122
126
  }
123
127
 
124
-
125
- export default {
126
- id: "worldlabs",
127
- listUrlPattern: /^https?:\/\/(www\.)?worldlabs\.ai\/blog(\?.*)?$/i,
128
- fetchItems,
129
- };
@@ -1,3 +1,7 @@
1
+ export const id = "x";
2
+ export const name = "X";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?x\.com\/[^/?#]+\/?(?:[?#].*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // X (Twitter) 站点插件:用户主页列表抓取与解析
@@ -161,13 +165,13 @@ function extractMediaUrl(article) {
161
165
  const video = article.querySelector("video[poster]");
162
166
  if (video) {
163
167
  const poster = video.getAttribute("poster");
164
- if (poster && /^https?:\/\//i.test(poster)) return poster;
168
+ if (poster && /^https:\/\//i.test(poster)) return poster;
165
169
  }
166
170
  for (const img of article.querySelectorAll(
167
171
  '[data-testid="card.wrapper"] img[src*="twimg.com/card_img"], [data-testid="card.wrapper"] img[src*="pbs.twimg.com/card_img"]',
168
172
  )) {
169
173
  const src = img.getAttribute("src");
170
- if (src && /^https?:\/\//i.test(src) && !/profile_images/i.test(src)) {
174
+ if (src && /^https:\/\//i.test(src) && !/profile_images/i.test(src)) {
171
175
  return normalizeCardImgUrl(src);
172
176
  }
173
177
  }
@@ -288,7 +292,7 @@ function entriesToFeedItems(entries) {
288
292
  }
289
293
 
290
294
 
291
- async function fetchItems(sourceId, ctx) {
295
+ export async function fetchItems(sourceId, ctx) {
292
296
  _deps = ctx.deps;
293
297
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 6000 });
294
298
  const root = _deps.parseHtml(html);
@@ -320,9 +324,3 @@ async function fetchItems(sourceId, ctx) {
320
324
  throw new Error(`[X] ${message}`);
321
325
  }
322
326
 
323
-
324
- export default {
325
- id: "x",
326
- listUrlPattern: "https://x.com/{username}",
327
- fetchItems,
328
- };
@@ -1,3 +1,7 @@
1
+ export const id = "xiaohongshu";
2
+ export const name = "Xiaohongshu";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?xiaohongshu\.com\/user\/profile\/[^/?#]+\/?(?:[?#].*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // 小红书站点插件:用户主页列表抓取、笔记详情提取、认证流程
@@ -5,6 +9,41 @@ let _deps;
5
9
 
6
10
 
7
11
  const XHS_ORIGIN = "https://www.xiaohongshu.com";
12
+ const XHS_NOTE_PATH_RE = /^\/(?:explore|discovery\/item)\/([0-9a-f]{24})\/?$/i;
13
+ const XHS_NOTE_ID_RE = /^[0-9a-f]{24}$/i;
14
+ const XHS_NOTE_ID_IN_IMG_RE = /xhscdn\.com\/\d+\/([0-9a-f]{24})/i;
15
+ const XHS_PROFILE_USER_RE = /\/user\/profile\/([0-9a-f]{24})/i;
16
+
17
+
18
+ function hashNoteGuid(noteId) {
19
+ return _deps.createHash("sha256").update(`xhs:note:${noteId}`).digest("hex");
20
+ }
21
+
22
+
23
+ function extractProfileUserId(url) {
24
+ const m = String(url).match(XHS_PROFILE_USER_RE);
25
+ return m?.[1]?.toLowerCase() ?? null;
26
+ }
27
+
28
+
29
+ function buildExploreLink(noteId, origin) {
30
+ return `${origin.replace(/\/$/, "")}/explore/${noteId}`;
31
+ }
32
+
33
+
34
+ function extractNoteIdFromSection(section, profileUserId) {
35
+ for (const img of section.querySelectorAll('img[src*="xhscdn"]')) {
36
+ const src = img.getAttribute("src")?.trim() ?? "";
37
+ const fromImg = src.match(XHS_NOTE_ID_IN_IMG_RE);
38
+ if (fromImg?.[1] && fromImg[1] !== profileUserId) return fromImg[1].toLowerCase();
39
+ }
40
+ const html = section.outerHTML ?? "";
41
+ for (const match of html.match(/[0-9a-f]{24}/gi) ?? []) {
42
+ const id = match.toLowerCase();
43
+ if (id !== profileUserId && XHS_NOTE_ID_RE.test(id)) return id;
44
+ }
45
+ return null;
46
+ }
8
47
 
9
48
 
10
49
  function getOrigin(url) {
@@ -16,57 +55,99 @@ function getOrigin(url) {
16
55
  }
17
56
 
18
57
 
19
- function buildExploreLinkWithXsec(profileHref, origin) {
58
+ function normalizeXhsUrl(href, origin) {
59
+ try {
60
+ const url = new URL(href.replace(/&/g, "&"), origin);
61
+ url.hash = "";
62
+ return url;
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+
69
+ function normalizeXhsItemLink(href, origin) {
70
+ const url = normalizeXhsUrl(href, origin);
71
+ if (!url) return null;
72
+
20
73
  try {
21
- const fullUrl = new URL(profileHref.replace(/&/g, "&"), origin);
22
- const pathSegs = fullUrl.pathname.split("/").filter(Boolean);
23
- const noteId = pathSegs[pathSegs.length - 1];
24
- if (!noteId || !/^[0-9a-f]+$/i.test(noteId)) return null;
25
- const token = fullUrl.searchParams.get("xsec_token");
26
- const source = fullUrl.searchParams.get("xsec_source") ?? "pc_user";
27
- if (!token) return null;
28
- const explore = new URL(`/explore/${noteId}`, origin);
29
- explore.searchParams.set("xsec_token", token);
30
- explore.searchParams.set("xsec_source", source);
31
- return explore.href;
74
+ if (!/(^|\.)xiaohongshu\.com$/i.test(url.hostname)) return null;
75
+ const m = url.pathname.match(XHS_NOTE_PATH_RE);
76
+ if (!m?.[1]) return null;
77
+ return buildExploreLink(m[1].toLowerCase(), url.origin);
32
78
  } catch {
33
79
  return null;
34
80
  }
35
81
  }
36
82
 
37
83
 
84
+ function extractRedirectItemLink(href, origin) {
85
+ const wrapper = normalizeXhsUrl(href, origin);
86
+ if (!wrapper) return null;
87
+ if (!/\/website-login\/error\/?$/i.test(wrapper.pathname)) return null;
88
+
89
+ const redirectPath = wrapper.searchParams.get("redirectPath");
90
+ if (!redirectPath) return null;
91
+ return normalizeXhsItemLink(redirectPath, origin);
92
+ }
93
+
94
+
95
+ function extractListItemLink(section, origin, profileUserId) {
96
+ const noteId = extractNoteIdFromSection(section, profileUserId);
97
+ if (noteId) return buildExploreLink(noteId, origin);
98
+
99
+ const anchors = section.querySelectorAll("a[href]");
100
+ const candidates = [];
101
+ for (const anchor of anchors) {
102
+ const href = anchor.getAttribute("href")?.trim();
103
+ if (!href) continue;
104
+
105
+ const direct = normalizeXhsItemLink(href, origin);
106
+ if (direct) candidates.push(direct);
107
+
108
+ const redirected = extractRedirectItemLink(href, origin);
109
+ if (redirected) candidates.push(redirected);
110
+ }
111
+ return candidates[0] ?? null;
112
+ }
113
+
114
+
38
115
  function parseListHtml(html, url) {
39
116
  const root = _deps.parseHtml(html);
40
117
  const origin = getOrigin(url);
118
+ const profileUserId = extractProfileUserId(url);
41
119
  const feed = root.querySelector("#userPostedFeeds");
42
120
  if (!feed) return [];
43
- const sections = feed.querySelectorAll("section[data-v-79abd645][data-index]");
121
+ const sections = feed.querySelectorAll("section[data-index]");
44
122
  const items = [];
123
+ const seenNoteIds = new Set();
45
124
  for (const section of sections) {
46
- const profileWithToken = section.querySelector('a[href*="xsec_token="]');
47
- const profileHref = profileWithToken?.getAttribute("href")?.trim();
48
- let link;
49
- if (profileHref && profileHref.includes("/user/profile/")) {
50
- const withXsec = buildExploreLinkWithXsec(profileHref, origin);
51
- if (withXsec) link = withXsec;
52
- else link = new URL(profileHref.replace(/&/g, "&"), origin).href;
53
- } else {
54
- const linkEl = section.querySelector('a[href^="/explore/"]');
55
- const href = linkEl?.getAttribute("href")?.trim();
56
- if (!href) continue;
57
- link = new URL(href, origin).href;
58
- }
59
- const titleEl = section.querySelector("span[data-v-51ec0135]");
60
- const title = (titleEl?.textContent ?? "").trim() || "笔记";
61
- const authorEl = section.querySelector('a[aria-current="page"] span');
125
+ const noteId = extractNoteIdFromSection(section, profileUserId);
126
+ const link = noteId
127
+ ? buildExploreLink(noteId, origin)
128
+ : extractListItemLink(section, origin, profileUserId);
129
+ if (!link) continue;
130
+ const dedupeKey = noteId ?? link;
131
+ if (seenNoteIds.has(dedupeKey)) continue;
132
+ seenNoteIds.add(dedupeKey);
133
+ const titleEl = section.querySelector("span[data-v-51ec0135]") ?? section.querySelector(".title span") ?? section.querySelector("span");
134
+ const title = (titleEl?.textContent ?? "").trim() || "Note";
135
+ const authorEl = section.querySelector('a[aria-current="page"] .name') ?? section.querySelector('a[aria-current="page"] span');
62
136
  const author = (authorEl?.textContent ?? "").trim() || undefined;
137
+ const imageEl = section.querySelector("img[data-xhs-img], img");
138
+ const image = imageEl?.getAttribute("src")?.trim() || undefined;
139
+ const summary = image ? undefined : title;
140
+ const guid = noteId ? hashNoteGuid(noteId) : _deps.createHash("sha256").update(link).digest("hex");
63
141
  items.push({
64
- guid: _deps.createHash("sha256").update(link).digest("hex"),
142
+ guid,
65
143
  title,
66
144
  link,
67
145
  pubDate: new Date(),
68
146
  author,
69
- summary: title,
147
+ summary,
148
+ imageUrl: image,
149
+ coverImg: image,
150
+ cover_img: image,
70
151
  });
71
152
  }
72
153
  return items;
@@ -240,9 +321,14 @@ function extractDetailHtml(html) {
240
321
  }
241
322
 
242
323
 
243
- async function fetchItems(sourceId, ctx) {
324
+ export async function fetchItems(sourceId, ctx) {
244
325
  _deps = ctx.deps;
245
- const { html, finalUrl } = await ctx.fetchHtml(sourceId);
326
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
327
+ waitMs: 3000,
328
+ waitForSelector: "#userPostedFeeds",
329
+ waitForSelectorTimeoutMs: 15000,
330
+ scrollBeforeSnapshot: { selector: "#userPostedFeeds", rounds: 8, pauseMs: 900 },
331
+ });
246
332
  return parseListHtml(html, finalUrl);
247
333
  }
248
334
 
@@ -258,26 +344,3 @@ async function enrichItem(item, ctx) {
258
344
  pubDate: detail.pubDate ?? item.pubDate,
259
345
  };
260
346
  }
261
-
262
-
263
- async function checkAuth(page, _url) {
264
- try {
265
- const loginButton = await page.$(".reds-button-new.login-btn.large.primary");
266
- return loginButton == null;
267
- } catch {
268
- return false;
269
- }
270
- }
271
-
272
-
273
- export default {
274
- id: "xiaohongshu",
275
- listUrlPattern: "https://xiaohongshu.com/user/profile/{userId}",
276
- fetchItems,
277
- enrichItem,
278
- checkAuth,
279
- loginUrl: "https://www.xiaohongshu.com/",
280
- domain: "xiaohongshu.com",
281
- loginTimeoutMs: 30 * 1000,
282
- pollIntervalMs: 2000,
283
- };
@@ -1,3 +1,7 @@
1
+ export const id = "zhipu-research";
2
+ export const name = "Zhipu Research";
3
+ export const listUrlPattern = /^https:\/\/(www\.)?zhipuai\.cn\/zh\/research\/?(?:[?#].*)?$/i;
4
+
1
5
  let _deps;
2
6
 
3
7
  // 智谱研究页插件:仅抓取列表,不做正文 enrich(兼容净化后的 HTML)
@@ -300,7 +304,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
300
304
  }
301
305
 
302
306
 
303
- async function fetchItems(sourceId, ctx) {
307
+ export async function fetchItems(sourceId, ctx) {
304
308
  _deps = ctx.deps;
305
309
  // 需要读取页面脚本里的 blogsItems(包含详情 id),因此这里禁用净化。
306
310
  const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 5000, purify: false });
@@ -325,10 +329,3 @@ async function fetchItems(sourceId, ctx) {
325
329
 
326
330
  throw new Error("[zhipu-research] 未解析到研究条目,页面结构可能已变化");
327
331
  }
328
-
329
-
330
- export default {
331
- id: "zhipu-research",
332
- listUrlPattern: ZHIPU_RESEARCH_URL,
333
- fetchItems,
334
- };
@@ -1,26 +1,25 @@
1
- /**
2
- * Site 插件模板(管理页「添加插件」会复制到 `.rssany/plugins/{id}.rssany.js`)
3
- * 修改 `id` 后请与文件名保持一致。
4
- *
5
- * 接口说明:app/scraper/sources/web/site.ts
6
- */
7
-
8
- export default {
9
- id: "__PLUGIN_ID__",
10
- // eslint-disable-next-line no-undef
11
- listUrlPattern: __LIST_URL_PATTERN__,
12
- refreshInterval: "1day",
13
-
14
- /** sourceId 与订阅里 ref 一致;ctx 含 fetchHtml、extractItem、deps(parseHtml 等) */
15
- async fetchItems(sourceId, ctx) {
16
- const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
17
- waitMs: 2000,
18
- purify: true,
19
- });
20
- const root = ctx.deps.parseHtml(html);
21
- void root;
22
- void finalUrl;
23
- // TODO: ctx.deps.parseHtml 解析列表页,产出 { title, link, summary?, pubDate? } 等 FeedItem
24
- return [];
25
- },
26
- };
1
+ /**
2
+ * Site plugin template. The admin UI copies this file to .rssany/plugins/{id}.rssany.js.
3
+ * Plugin protocol: named exports. No export default is required.
4
+ *
5
+ * Interface: app/scraper/sources/web/site.ts
6
+ */
7
+
8
+ // Predefined fields stay together at the top.
9
+ export const id = "__PLUGIN_ID__";
10
+ export const name = "__PLUGIN_ID__";
11
+ // eslint-disable-next-line no-undef
12
+ export const listUrlPattern = __LIST_URL_PATTERN__;
13
+ export const refreshInterval = "1day";
14
+
15
+ export async function fetchItems(sourceId, ctx) {
16
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, {
17
+ waitMs: 2000,
18
+ purify: true,
19
+ });
20
+ const root = ctx.deps.parseHtml(html);
21
+ void root;
22
+ void finalUrl;
23
+ // TODO: Parse the list page and return FeedItem objects.
24
+ return [];
25
+ }
@@ -1,7 +1,7 @@
1
- # statics
2
-
3
- 静态 HTML 页面:home(首页)、401、404。
4
-
5
- - **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
6
- - **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
7
- - **404.html**:无匹配站点时返回。
1
+ # statics
2
+
3
+ 静态 HTML 页面:home(首页)、401、404。
4
+
5
+ - **home.html**:首页,含 Try This 示例链接;下方「需登录的站点」从 `/plugins` 拉取,每个站点可点击「打开登录页」调用 `POST /auth/ensure?siteId=...` 批量做登录。
6
+ - **401.html**:需登录时返回;占位符 `{{listUrl}}` 由 router 注入为失败请求的订阅地址;页内「打开有头登录页」按钮调用 `POST /auth/ensure?url=...` 弹出有头浏览器完成登录。
7
+ - **404.html**:无匹配站点时返回。