rssany 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
  2. package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
  3. package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
  4. package/app/plugins/builtin/appen-resources.rssany.js +155 -0
  5. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
  6. package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
  7. package/app/plugins/builtin/baidu-research.rssany.js +222 -0
  8. package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
  9. package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
  10. package/app/plugins/builtin/five-radar.rssany.js +490 -0
  11. package/app/plugins/builtin/flageval-news.rssany.js +118 -0
  12. package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
  13. package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
  14. package/app/plugins/builtin/google-research.rssany.js +220 -0
  15. package/app/plugins/builtin/google.rssany.js +187 -0
  16. package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
  17. package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
  18. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
  19. package/app/plugins/builtin/lingowhale.rssany.js +119 -0
  20. package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
  21. package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
  22. package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
  23. package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
  24. package/app/plugins/builtin/moonshot.rssany.js +127 -0
  25. package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
  26. package/app/plugins/builtin/opendatalab.rssany.js +109 -0
  27. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
  28. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
  29. package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
  30. package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
  31. package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
  32. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
  33. package/app/plugins/builtin/rss.rssany.js +11 -1
  34. package/app/plugins/builtin/selectdataset.rssany.js +206 -0
  35. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
  36. package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
  37. package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
  38. package/app/plugins/builtin/venturebeat.rssany.js +97 -0
  39. package/app/plugins/builtin/worldlabs.rssany.js +129 -0
  40. package/app/plugins/builtin/x.rssany.js +159 -0
  41. package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
  42. package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
  43. package/dist/index.js +62 -4
  44. package/dist/index.js.map +1 -1
  45. package/package.json +1 -1
  46. package/webui/build/200.html +6 -6
  47. package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
  48. package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
  49. package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
  50. package/webui/build/_app/immutable/chunks/{C85CNwD2.js → D6VIKef0.js} +1 -1
  51. package/webui/build/_app/immutable/chunks/{CllQAdvt.js → Dbqx2mXq.js} +1 -1
  52. package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
  53. package/webui/build/_app/immutable/chunks/{CdMsRjxJ.js → dhB8G5Is.js} +1 -1
  54. package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.XPso7q7g.js} +2 -2
  55. package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
  56. package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
  57. package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.BS3_Rfxm.js} +1 -1
  58. package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.CyyxDCIS.js} +1 -1
  59. package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.CtYgIaGj.js} +1 -1
  60. package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.D5OEGPR2.js} +1 -1
  61. package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.B4dFN1Gk.js} +1 -1
  62. package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.M7ZII7tl.js} +1 -1
  63. package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.7r8v7qkm.js} +1 -1
  64. package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.CHIzoGrb.js} +1 -1
  65. package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.BDBqx-GY.js} +1 -1
  66. package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D5czsDmz.js} +1 -1
  67. package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.pjVNsCdV.js} +1 -1
  68. package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.CsARv1BH.js} +1 -1
  69. package/webui/build/_app/version.json +1 -1
  70. package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
  71. package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
  72. package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
  73. package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
@@ -0,0 +1,301 @@
1
+ let _deps;
2
+
3
+ // Bright Data 博客插件:优先解析站点 RSS feed,失败时回退解析列表页(不做正文 enrich)
4
+
5
+
6
+
7
+ const BRIGHTDATA_ORIGIN = "https://brightdata.com";
8
+ const LIST_URL_RE =
9
+ /^https?:\/\/(?:www\.)?brightdata\.com\/blog(?:\/(?:page\/\d+|[a-z0-9-]+(?:\/page\/\d+)?)?)?\/?(?:\?.*)?$/i;
10
+ const ARTICLE_PATH_RE = /^\/blog\/([^/?#/]+)\/([^/?#/]+)\/?$/i;
11
+ const MIN_READ_RE = /^\d+\s*min\s*read$/i;
12
+
13
+
14
+ function normalizeText(text) {
15
+ return (text ?? "").replace(/\s+/g, " ").trim();
16
+ }
17
+
18
+
19
+ function hashGuid(input) {
20
+ return _deps.createHash("sha256").update(input).digest("hex");
21
+ }
22
+
23
+
24
+ function toAbsoluteHttpUrl(rawHref, baseUrl) {
25
+ if (!rawHref) return null;
26
+ const href = rawHref.trim();
27
+ if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
28
+ try {
29
+ const url = new URL(href, baseUrl);
30
+ if (!/^https?:$/i.test(url.protocol)) return null;
31
+ return url.href;
32
+ } catch {
33
+ return null;
34
+ }
35
+ }
36
+
37
+
38
+ function parsePubDate(raw) {
39
+ if (!raw) return undefined;
40
+ const date = new Date(raw);
41
+ return Number.isNaN(date.getTime()) ? undefined : date;
42
+ }
43
+
44
+
45
+ function isBrightDataArticleUrl(urlText) {
46
+ try {
47
+ const url = new URL(urlText);
48
+ if (!/(^|\.)brightdata\.com$/i.test(url.hostname)) return false;
49
+ const m = url.pathname.match(ARTICLE_PATH_RE);
50
+ if (!m) return false;
51
+ return m[1].toLowerCase() !== "page" && m[1].toLowerCase() !== "feed";
52
+ } catch {
53
+ return false;
54
+ }
55
+ }
56
+
57
+
58
+ function formatCategory(raw) {
59
+ return raw
60
+ .split("-")
61
+ .map((part) => part ? part[0].toUpperCase() + part.slice(1) : "")
62
+ .filter(Boolean)
63
+ .join(" ");
64
+ }
65
+
66
+
67
+ function extractCategoryFromLink(link) {
68
+ try {
69
+ const url = new URL(link);
70
+ const m = url.pathname.match(ARTICLE_PATH_RE);
71
+ if (!m) return undefined;
72
+ const category = formatCategory(m[1]);
73
+ return category || undefined;
74
+ } catch {
75
+ return undefined;
76
+ }
77
+ }
78
+
79
+
80
+ function uniqueTexts(values) {
81
+ const seen = new Set();
82
+ const out = [];
83
+ for (const value of values) {
84
+ if (!value || seen.has(value)) continue;
85
+ seen.add(value);
86
+ out.push(value);
87
+ }
88
+ return out;
89
+ }
90
+
91
+
92
+ function queryText(node, selectors) {
93
+ for (const selector of selectors) {
94
+ try {
95
+ const text = normalizeText(node.querySelector(selector)?.textContent);
96
+ if (text) return text;
97
+ } catch {
98
+ // ignore unsupported selectors
99
+ }
100
+ }
101
+ return "";
102
+ }
103
+
104
+
105
+ function deriveFeedUrlFromListUrl(sourceId) {
106
+ try {
107
+ const url = new URL(sourceId, BRIGHTDATA_ORIGIN);
108
+ const parts = url.pathname.split("/").filter(Boolean);
109
+ if (parts[0] !== "blog") return new URL("/blog/feed/", url.origin).href;
110
+
111
+ if (parts.length >= 2 && parts[1].toLowerCase() !== "page") {
112
+ return new URL(`/blog/${parts[1]}/feed/`, url.origin).href;
113
+ }
114
+ return new URL("/blog/feed/", url.origin).href;
115
+ } catch {
116
+ return null;
117
+ }
118
+ }
119
+
120
+
121
+ function resolveFeedUrl(root, pageUrl) {
122
+ const feedHref = root
123
+ .querySelector('link[rel="alternate"][type="application/rss+xml"][href], link[href*="/feed/"][type="application/rss+xml"]')
124
+ ?.getAttribute("href");
125
+ return toAbsoluteHttpUrl(feedHref, pageUrl);
126
+ }
127
+
128
+
129
+ async function fetchFeedItems(feedUrl) {
130
+ const res = await fetch(feedUrl, {
131
+ redirect: "follow",
132
+ headers: {
133
+ "User-Agent":
134
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
135
+ "Accept-Language": "en-US,en;q=0.9",
136
+ },
137
+ });
138
+ if (!res.ok) {
139
+ throw new Error(`[brightdata-blog] 获取 RSS feed 失败: ${res.status}`);
140
+ }
141
+
142
+ const xml = await res.text();
143
+ const root = _deps.parseHtml(xml);
144
+ const items = [];
145
+ const seen = new Set();
146
+
147
+ for (const entry of root.querySelectorAll("item")) {
148
+ const title = queryText(entry, ["title"]);
149
+ const link = toAbsoluteHttpUrl(queryText(entry, ["link"]), feedUrl);
150
+ if (!title || !link || !isBrightDataArticleUrl(link) || seen.has(link)) continue;
151
+ seen.add(link);
152
+
153
+ const summary = queryText(entry, ["description"]);
154
+ const author = queryText(entry, ["dc\\:creator", "creator", "author"]);
155
+ const pubDateRaw = queryText(entry, ["pubDate", "published", "updated", "dc\\:date"]);
156
+ const pubDate = parsePubDate(pubDateRaw) ?? new Date();
157
+
158
+ const categories = entry
159
+ .querySelectorAll("category")
160
+ .map((node) => normalizeText(node.textContent))
161
+ .filter(Boolean);
162
+ const fallbackCategory = extractCategoryFromLink(link);
163
+ const finalCategories = categories.length > 0
164
+ ? uniqueTexts(categories)
165
+ : (fallbackCategory ? [fallbackCategory] : undefined);
166
+
167
+ items.push({
168
+ guid: hashGuid(link),
169
+ title,
170
+ link,
171
+ pubDate,
172
+ author: author || undefined,
173
+ summary: summary || undefined,
174
+ });
175
+ }
176
+ return items;
177
+ }
178
+
179
+
180
+ function collectLeafTexts(anchor) {
181
+ const nodes = anchor.querySelectorAll("h1, h2, h3, h4, h5, h6, p, span, div");
182
+ const texts = [];
183
+ for (const node of nodes) {
184
+ if (node.querySelector("h1, h2, h3, h4, h5, h6, p, span, div")) continue;
185
+ const text = normalizeText(node.textContent);
186
+ if (/<[^>]+>/.test(text)) continue;
187
+ if (text) texts.push(text);
188
+ }
189
+ return uniqueTexts(texts);
190
+ }
191
+
192
+
193
+ function isMetaText(text) {
194
+ const normalized = normalizeText(text).toLowerCase();
195
+ if (!normalized) return true;
196
+ if (MIN_READ_RE.test(normalized)) return true;
197
+ return (
198
+ normalized === "editor's pick" ||
199
+ normalized === "latest articles" ||
200
+ normalized === "all categories" ||
201
+ normalized.includes("min read")
202
+ );
203
+ }
204
+
205
+
206
+ function extractTitle(anchor, texts, category) {
207
+ const categoryText = normalizeText(category).toLowerCase();
208
+ const heading = normalizeText(anchor.querySelector("h1, h2, h3, h4, h5, h6")?.textContent);
209
+ if (heading && !isMetaText(heading) && heading.toLowerCase() !== categoryText) return heading;
210
+
211
+ for (const text of texts) {
212
+ if (isMetaText(text)) continue;
213
+ if (categoryText && text.toLowerCase() === categoryText) continue;
214
+ if (text.length < 12) continue;
215
+ return text;
216
+ }
217
+ return "";
218
+ }
219
+
220
+
221
+ function extractSummary(title, texts) {
222
+ for (const text of texts) {
223
+ if (!text || text === title || isMetaText(text)) continue;
224
+ if (text.length < 20) continue;
225
+ return text;
226
+ }
227
+ return "";
228
+ }
229
+
230
+
231
+ function extractAuthor(anchor) {
232
+ const images = anchor.querySelectorAll("img[alt]");
233
+ for (const image of images) {
234
+ const alt = normalizeText(image.getAttribute("alt"));
235
+ if (!alt) continue;
236
+ if (/^[A-Z][A-Za-z.'-]+(?:\s+[A-Z][A-Za-z.'-]+){1,2}$/.test(alt)) return alt;
237
+ }
238
+ return "";
239
+ }
240
+
241
+
242
+ function parseHtmlItems(root, baseUrl) {
243
+ const items = [];
244
+ const seen = new Set();
245
+ for (const anchor of root.querySelectorAll("a[href]")) {
246
+ const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), baseUrl);
247
+ if (!link || !isBrightDataArticleUrl(link) || seen.has(link)) continue;
248
+ const category = extractCategoryFromLink(link);
249
+
250
+ const texts = collectLeafTexts(anchor);
251
+ const title = extractTitle(anchor, texts, category);
252
+ if (!title) continue;
253
+
254
+ seen.add(link);
255
+ const summary = extractSummary(title, texts);
256
+ const author = extractAuthor(anchor);
257
+
258
+ items.push({
259
+ guid: hashGuid(link),
260
+ title,
261
+ link,
262
+ pubDate: new Date(),
263
+ author: author || undefined,
264
+ summary: summary || undefined,
265
+ });
266
+ }
267
+ return items;
268
+ }
269
+
270
+
271
+ async function fetchItems(sourceId, ctx) {
272
+ _deps = ctx.deps;
273
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
274
+ const root = _deps.parseHtml(html);
275
+ const pageUrl = finalUrl || sourceId || BRIGHTDATA_ORIGIN;
276
+
277
+ const discoveredFeedUrl = resolveFeedUrl(root, pageUrl);
278
+ const fallbackFeedUrl = deriveFeedUrlFromListUrl(pageUrl);
279
+ const feedUrls = uniqueTexts([discoveredFeedUrl, fallbackFeedUrl]);
280
+ for (const feedUrl of feedUrls) {
281
+ try {
282
+ const fromFeed = await fetchFeedItems(feedUrl);
283
+ if (fromFeed.length > 0) return fromFeed;
284
+ } catch {
285
+ // feed 失败时回退 HTML 解析
286
+ }
287
+ }
288
+
289
+ const fromHtml = parseHtmlItems(root, pageUrl);
290
+ if (fromHtml.length === 0) {
291
+ throw new Error("[brightdata-blog] 未解析到文章条目,页面结构可能已变化");
292
+ }
293
+ return fromHtml;
294
+ }
295
+
296
+
297
+ export default {
298
+ id: "brightdata-blog",
299
+ listUrlPattern: LIST_URL_RE,
300
+ fetchItems,
301
+ };
@@ -0,0 +1,231 @@
1
+ let _deps;
2
+
3
+ // ByteDance Seed 研究页插件:抓取研究论文与动态条目(不含 enrich)
4
+
5
+
6
+
7
+ const DEFAULT_ORIGIN = "https://seed.bytedance.com";
8
+ const ROUTER_DATA_MARKER = "window._ROUTER_DATA = ";
9
+ const RESEARCH_PAGE_KEY = "(locale$)/research/page";
10
+
11
+
12
+ function normalizeText(text) {
13
+ return (text ?? "").replace(/\s+/g, " ").trim();
14
+ }
15
+
16
+
17
+ function hashGuid(input) {
18
+ return _deps.createHash("sha256").update(input).digest("hex");
19
+ }
20
+
21
+
22
+ function toOrigin(url) {
23
+ try {
24
+ return new URL(url).origin;
25
+ } catch {
26
+ return DEFAULT_ORIGIN;
27
+ }
28
+ }
29
+
30
+
31
+ function detectLocale(url) {
32
+ try {
33
+ const path = new URL(url).pathname.toLowerCase();
34
+ if (path.startsWith("/en/")) return "en";
35
+ } catch {
36
+ // ignore
37
+ }
38
+ return "zh";
39
+ }
40
+
41
+
42
+ function pickLocalized(zhValue, enValue, locale) {
43
+ const zh = normalizeText(zhValue);
44
+ const en = normalizeText(enValue);
45
+ if (locale === "zh") return zh || en;
46
+ return en || zh;
47
+ }
48
+
49
+
50
+ function parsePublishDate(raw) {
51
+ const num = Number(raw);
52
+ if (Number.isFinite(num) && num > 0) {
53
+ const date = new Date(num);
54
+ if (!Number.isNaN(date.getTime())) return date;
55
+ }
56
+
57
+ const direct = new Date(String(raw ?? ""));
58
+ if (!Number.isNaN(direct.getTime())) return direct;
59
+ return new Date();
60
+ }
61
+
62
+
63
+ function extractBalancedJson(raw, marker) {
64
+ const markerIndex = raw.indexOf(marker);
65
+ if (markerIndex < 0) return undefined;
66
+
67
+ const start = raw.indexOf("{", markerIndex + marker.length);
68
+ if (start < 0) return undefined;
69
+
70
+ let inString = false;
71
+ let escaped = false;
72
+ let depth = 0;
73
+
74
+ for (let i = start; i < raw.length; i += 1) {
75
+ const ch = raw[i];
76
+ if (inString) {
77
+ if (escaped) {
78
+ escaped = false;
79
+ continue;
80
+ }
81
+ if (ch === "\\") {
82
+ escaped = true;
83
+ continue;
84
+ }
85
+ if (ch === "\"") inString = false;
86
+ continue;
87
+ }
88
+
89
+ if (ch === "\"") {
90
+ inString = true;
91
+ continue;
92
+ }
93
+ if (ch === "{") depth += 1;
94
+ if (ch === "}") {
95
+ depth -= 1;
96
+ if (depth === 0) {
97
+ return raw.slice(start, i + 1);
98
+ }
99
+ }
100
+ }
101
+
102
+ return undefined;
103
+ }
104
+
105
+
106
+ function parseRouterData(html) {
107
+ const raw = extractBalancedJson(html, ROUTER_DATA_MARKER);
108
+ if (!raw) return undefined;
109
+ try {
110
+ return JSON.parse(raw);
111
+ } catch {
112
+ return undefined;
113
+ }
114
+ }
115
+
116
+
117
+ function parseModernData(html) {
118
+ const m = html.match(/<script[^>]*id="__MODERN_DATA__"[^>]*>([\s\S]*?)<\/script>/i);
119
+ if (!m) return undefined;
120
+ try {
121
+ return JSON.parse(m[1]);
122
+ } catch {
123
+ return undefined;
124
+ }
125
+ }
126
+
127
+
128
+ function getResearchPayload(html) {
129
+ const routerData = parseRouterData(html);
130
+ const fromRouter = routerData?.loaderData?.[RESEARCH_PAGE_KEY];
131
+ if (fromRouter && typeof fromRouter === "object") return fromRouter;
132
+
133
+ const modernData = parseModernData(html);
134
+ if (modernData?.data && typeof modernData.data === "object") return modernData.data;
135
+ if (modernData && typeof modernData === "object") return modernData;
136
+
137
+ return undefined;
138
+ }
139
+
140
+
141
+ function buildItem(entry, kind, locale, origin) {
142
+ const meta = entry?.ArticleMeta ?? {};
143
+ const zh = entry?.ArticleSubContentZh ?? {};
144
+ const en = entry?.ArticleSubContentEn ?? {};
145
+
146
+ const title = pickLocalized(zh.Title ?? zh.ShortTitle, en.Title ?? en.ShortTitle, locale);
147
+ if (!title) return null;
148
+
149
+ const abstract = pickLocalized(zh.Abstract, en.Abstract, locale);
150
+ const titleKey = pickLocalized(zh.TitleKey, en.TitleKey, locale);
151
+ const articleId = String(meta.ArticleID ?? meta.ID ?? "").trim();
152
+
153
+ let link;
154
+ if (titleKey) {
155
+ const path = kind === "public_papers"
156
+ ? `/${locale}/public_papers/${encodeURIComponent(titleKey)}`
157
+ : `/${locale}/blog/${encodeURIComponent(titleKey)}`;
158
+ link = new URL(path, origin).href;
159
+ } else {
160
+ const fallback = articleId || title;
161
+ link = `${origin}/${locale}/research#${encodeURIComponent(fallback)}`;
162
+ }
163
+
164
+ const author = normalizeText(meta.Author);
165
+ const pubDate = parsePublishDate(meta.PublishDate);
166
+
167
+ return {
168
+ guid: hashGuid(`${kind}|${articleId || titleKey || title}`),
169
+ title,
170
+ link,
171
+ pubDate,
172
+ author: author || undefined,
173
+ summary: abstract || undefined,
174
+ };
175
+ }
176
+
177
+
178
+ function dedupeAndSort(items) {
179
+ const out = [];
180
+ const seen = new Set();
181
+ for (const item of items) {
182
+ const key = item.link || item.guid;
183
+ if (seen.has(key)) continue;
184
+ seen.add(key);
185
+ out.push(item);
186
+ }
187
+ out.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
188
+ return out;
189
+ }
190
+
191
+
192
+ async function fetchItems(sourceId, ctx) {
193
+ _deps = ctx.deps;
194
+ // 该站点条目核心数据在脚本 JSON 中,需关闭 purify 才能读取。
195
+ const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500, purify: false });
196
+ const pageUrl = finalUrl || sourceId;
197
+ const locale = detectLocale(pageUrl);
198
+ const origin = toOrigin(pageUrl);
199
+ const payload = getResearchPayload(html);
200
+
201
+ if (!payload) {
202
+ throw new Error("[bytedance-seed-research] 未找到研究页数据块(window._ROUTER_DATA / __MODERN_DATA__)");
203
+ }
204
+
205
+ const articleList = Array.isArray(payload.article_list) ? payload.article_list : [];
206
+ const feedList = Array.isArray(payload.feedList) ? payload.feedList : [];
207
+
208
+ const parsed = [];
209
+ for (const entry of articleList) {
210
+ const item = buildItem(entry, "public_papers", locale, origin);
211
+ if (item) parsed.push(item);
212
+ }
213
+ for (const entry of feedList) {
214
+ const item = buildItem(entry, "blog", locale, origin);
215
+ if (item) parsed.push(item);
216
+ }
217
+
218
+ const items = dedupeAndSort(parsed);
219
+ if (items.length === 0) {
220
+ throw new Error("[bytedance-seed-research] 研究页数据存在,但未解析到有效条目");
221
+ }
222
+
223
+ return items;
224
+ }
225
+
226
+
227
+ export default {
228
+ id: "bytedance-seed-research",
229
+ listUrlPattern: /^https?:\/\/seed\.bytedance\.com\/(zh|en)\/research(?:\/)?(\?.*)?$/i,
230
+ fetchItems,
231
+ };