rssany 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/theinformation-briefings.rssany.js +136 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +328 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +62 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +1 -0
- package/webui/build/_app/immutable/chunks/BwlaCkNX.js +36 -0
- package/webui/build/_app/immutable/chunks/C0J2-L94.js +1 -0
- package/webui/build/_app/immutable/chunks/CLOXMsDk.js +36 -0
- package/webui/build/_app/immutable/chunks/{C85CNwD2.js → DgceFEv5.js} +1 -1
- package/webui/build/_app/immutable/chunks/{CllQAdvt.js → SqCUd34O.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.B8zBPipq.js} +2 -2
- package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +1 -0
- package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.1N74-4Io.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.DY30t9Ib.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.ITuxnukH.js} +1 -1
- package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +1 -0
- package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.BHnIxbVM.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.CLjT9il3.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.BD-mKCLN.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.Dt5o2Fmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.Dy3vSsIP.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.DvclsL6H.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D2nJy-Uz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.C75mhrqs.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.Bp_QXw3w.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.BopJZtHu.css +0 -1
- package/webui/build/_app/immutable/chunks/CdMsRjxJ.js +0 -1
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
- package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
- package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
- package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +0 -1
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// World Labs 博客插件:抓取 Research & Insights 列表页,输出 FeedItem(不含 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const MONTH_NAME =
|
|
8
|
+
"January|February|March|April|May|June|July|August|September|October|November|December";
|
|
9
|
+
const DATE_RE = new RegExp(`\\b(${MONTH_NAME})\\s+\\d{1,2},\\s+\\d{4}\\b`, "i");
|
|
10
|
+
const MONTH_INDEX = {
|
|
11
|
+
january: 0,
|
|
12
|
+
february: 1,
|
|
13
|
+
march: 2,
|
|
14
|
+
april: 3,
|
|
15
|
+
may: 4,
|
|
16
|
+
june: 5,
|
|
17
|
+
july: 6,
|
|
18
|
+
august: 7,
|
|
19
|
+
september: 8,
|
|
20
|
+
october: 9,
|
|
21
|
+
november: 10,
|
|
22
|
+
december: 11,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
function normalizeText(text) {
|
|
27
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
function hashGuid(input) {
|
|
32
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
37
|
+
if (!rawHref) return null;
|
|
38
|
+
const href = rawHref.trim();
|
|
39
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
40
|
+
try {
|
|
41
|
+
const url = new URL(href, baseUrl);
|
|
42
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
43
|
+
return url.href;
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
function parseDateAndAuthor(metaText) {
|
|
51
|
+
const text = normalizeText(metaText);
|
|
52
|
+
const m = text.match(DATE_RE);
|
|
53
|
+
if (!m) return { pubDate: new Date(), author: undefined };
|
|
54
|
+
|
|
55
|
+
const dateText = m[0];
|
|
56
|
+
const parts = dateText.match(/^(?<month>[A-Za-z]+)\s+(?<day>\d{1,2}),\s*(?<year>\d{4})$/);
|
|
57
|
+
let date = new Date();
|
|
58
|
+
if (parts?.groups) {
|
|
59
|
+
const month = MONTH_INDEX[parts.groups.month.toLowerCase()];
|
|
60
|
+
const day = Number(parts.groups.day);
|
|
61
|
+
const year = Number(parts.groups.year);
|
|
62
|
+
if (month != null && Number.isFinite(day) && Number.isFinite(year)) {
|
|
63
|
+
// 统一用 UTC 中午,避免仅有日期时因时区导致前后一天偏移。
|
|
64
|
+
date = new Date(Date.UTC(year, month, day, 12, 0, 0));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const authorText = normalizeText(text.slice(m.index + dateText.length)).replace(/^[|/\-•·,:]+/, "").trim();
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
pubDate: Number.isNaN(date.getTime()) ? new Date() : date,
|
|
71
|
+
author: authorText || undefined,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function parseCard(anchor, finalUrl) {
|
|
77
|
+
const title = normalizeText(anchor.querySelector("h2, h3")?.textContent);
|
|
78
|
+
if (!title) return null;
|
|
79
|
+
|
|
80
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
|
|
81
|
+
if (!link) return null;
|
|
82
|
+
|
|
83
|
+
const paragraphTexts = anchor
|
|
84
|
+
.querySelectorAll("p")
|
|
85
|
+
.map((p) => normalizeText(p.textContent))
|
|
86
|
+
.filter(Boolean);
|
|
87
|
+
const metaText = paragraphTexts.find((t) => DATE_RE.test(t)) ?? paragraphTexts[0] ?? "";
|
|
88
|
+
const { pubDate, author } = parseDateAndAuthor(metaText);
|
|
89
|
+
const summary = paragraphTexts.find((t) => t !== metaText && !DATE_RE.test(t));
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
guid: hashGuid(link),
|
|
93
|
+
title,
|
|
94
|
+
link,
|
|
95
|
+
pubDate,
|
|
96
|
+
author,
|
|
97
|
+
summary: summary || undefined,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async function fetchItems(sourceId, ctx) {
|
|
103
|
+
_deps = ctx.deps;
|
|
104
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3500 });
|
|
105
|
+
const root = _deps.parseHtml(html);
|
|
106
|
+
|
|
107
|
+
const seen = new Set();
|
|
108
|
+
const items = [];
|
|
109
|
+
const anchors = root.querySelectorAll("a[href]");
|
|
110
|
+
for (const anchor of anchors) {
|
|
111
|
+
const item = parseCard(anchor, finalUrl);
|
|
112
|
+
if (!item) continue;
|
|
113
|
+
if (seen.has(item.link)) continue;
|
|
114
|
+
seen.add(item.link);
|
|
115
|
+
items.push(item);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (items.length === 0) {
|
|
119
|
+
throw new Error("[worldlabs] 未解析到条目,页面结构可能已变化");
|
|
120
|
+
}
|
|
121
|
+
return items;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
export default {
|
|
126
|
+
id: "worldlabs",
|
|
127
|
+
listUrlPattern: /^https?:\/\/(www\.)?worldlabs\.ai\/blog(\?.*)?$/i,
|
|
128
|
+
fetchItems,
|
|
129
|
+
};
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// X (Twitter) 站点插件:用户主页列表抓取与解析
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const X_ORIGIN = "https://x.com";
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
function getOrigin(url) {
|
|
11
|
+
try {
|
|
12
|
+
return new URL(url).origin;
|
|
13
|
+
} catch {
|
|
14
|
+
return X_ORIGIN;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
function normalizeText(text) {
|
|
20
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
function statusPathFromHref(href) {
|
|
25
|
+
if (!href) return null;
|
|
26
|
+
try {
|
|
27
|
+
const normalized = href.startsWith("http") ? new URL(href).pathname : href.split("?")[0];
|
|
28
|
+
const m = normalized.match(/^\/([A-Za-z0-9_]{1,32})\/status\/(\d+)/);
|
|
29
|
+
if (!m) return null;
|
|
30
|
+
return `/${m[1]}/status/${m[2]}`;
|
|
31
|
+
} catch {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
/** 非推文正文链接:头像区、分析页、单张图/视频子路径等,避免误当作主帖 ID */
|
|
38
|
+
function isAuxStatusSubpath(href) {
|
|
39
|
+
return /\/status\/\d+\/(photo|video|analytics|likes|retweets|quotes)\b/i.test(href || "");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* 主帖路径:优先取时间戳旁 permalink(与 UI 一致),避免首条任意 /status/ 链到引用帖或图集子链。
|
|
45
|
+
*/
|
|
46
|
+
function extractPrimaryStatusPath(article) {
|
|
47
|
+
const timeEl = article.querySelector("time[datetime]");
|
|
48
|
+
if (timeEl) {
|
|
49
|
+
const a = timeEl.closest("a[href*='/status/']");
|
|
50
|
+
if (a) {
|
|
51
|
+
const href = a.getAttribute("href") || "";
|
|
52
|
+
if (!isAuxStatusSubpath(href)) {
|
|
53
|
+
const p = statusPathFromHref(href);
|
|
54
|
+
if (p) return p;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
for (const a of article.querySelectorAll('a[href*="/status/"]')) {
|
|
59
|
+
const href = a.getAttribute("href") || "";
|
|
60
|
+
if (isAuxStatusSubpath(href)) continue;
|
|
61
|
+
const p = statusPathFromHref(href);
|
|
62
|
+
if (p) return p;
|
|
63
|
+
}
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
/** 引用/转推卡片内层 article 再解析会重复 guid,只处理时间轴最外层 tweet */
|
|
69
|
+
function isNestedTweetArticle(article) {
|
|
70
|
+
let p = article.parentElement;
|
|
71
|
+
while (p) {
|
|
72
|
+
if (p.matches?.("article[data-testid='tweet']")) return true;
|
|
73
|
+
p = p.parentElement;
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
function extractSocialContext(article) {
|
|
80
|
+
const el = article.querySelector('[data-testid="socialContext"]');
|
|
81
|
+
return normalizeText(el?.textContent);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
/** 时间轴上「转推/转发」帖:socialContext 含 Repost/Retweet 或中文 */
|
|
86
|
+
function isRepostArticle(article) {
|
|
87
|
+
const ctx = extractSocialContext(article);
|
|
88
|
+
if (!ctx) return false;
|
|
89
|
+
if (/reposted?|retweet/i.test(ctx)) return true;
|
|
90
|
+
if (/转推|转发/.test(ctx)) return true;
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
function normalizeTwimgUrl(src) {
|
|
96
|
+
if (!src || typeof src !== "string") return src;
|
|
97
|
+
try {
|
|
98
|
+
const u = new URL(src, X_ORIGIN);
|
|
99
|
+
if (u.hostname.includes("twimg.com") && u.pathname.includes("/media/")) {
|
|
100
|
+
u.searchParams.set("format", "jpg");
|
|
101
|
+
u.searchParams.set("name", "small");
|
|
102
|
+
}
|
|
103
|
+
return u.href;
|
|
104
|
+
} catch {
|
|
105
|
+
return src;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
/** 链接预览卡大图(card_img),与推文配图 /media/ 不同 */
|
|
111
|
+
function normalizeCardImgUrl(src) {
|
|
112
|
+
if (!src || typeof src !== "string") return src;
|
|
113
|
+
try {
|
|
114
|
+
const u = new URL(src, X_ORIGIN);
|
|
115
|
+
if (u.hostname.includes("twimg.com") && /\/card_img\//.test(u.pathname)) {
|
|
116
|
+
if (!u.searchParams.has("format")) u.searchParams.set("format", "jpg");
|
|
117
|
+
if (!u.searchParams.has("name")) u.searchParams.set("name", "small");
|
|
118
|
+
}
|
|
119
|
+
return u.href;
|
|
120
|
+
} catch {
|
|
121
|
+
return src;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* 无 tweetText 时:从链接卡 aria-label("domain.com 标题…")或 small 卡正文取一行摘要
|
|
128
|
+
*/
|
|
129
|
+
function extractLinkCardSummary(article) {
|
|
130
|
+
const a = article.querySelector('[data-testid="card.wrapper"] a[aria-label]');
|
|
131
|
+
if (a) {
|
|
132
|
+
const label = a.getAttribute("aria-label") || "";
|
|
133
|
+
const idx = label.indexOf(" ");
|
|
134
|
+
if (idx > 0) {
|
|
135
|
+
const rest = normalizeText(label.slice(idx + 1));
|
|
136
|
+
if (rest) return rest;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
const detail = article.querySelector('[data-testid="card.layoutSmall.detail"]');
|
|
140
|
+
if (detail) {
|
|
141
|
+
for (const el of detail.querySelectorAll(':scope > div[dir="auto"]')) {
|
|
142
|
+
const t = normalizeText(el.textContent);
|
|
143
|
+
if (t && t.length > 8 && !/^[a-z0-9.-]+\.[a-z]{2,}$/i.test(t)) return t;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return "";
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* 首图:推文配图 > 视频 poster(blob 源无法持久化,仅 poster 可作缩略图)
|
|
152
|
+
*/
|
|
153
|
+
function extractMediaUrl(article) {
|
|
154
|
+
for (const img of article.querySelectorAll('[data-testid="tweetPhoto"] img[src]')) {
|
|
155
|
+
const src = img.getAttribute("src");
|
|
156
|
+
if (!src || /profile_images/i.test(src)) continue;
|
|
157
|
+
if (/pbs\.twimg\.com\/media/i.test(src) || /twimg\.com\/media/i.test(src)) {
|
|
158
|
+
return normalizeTwimgUrl(src);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const video = article.querySelector("video[poster]");
|
|
162
|
+
if (video) {
|
|
163
|
+
const poster = video.getAttribute("poster");
|
|
164
|
+
if (poster && /^https?:\/\//i.test(poster)) return poster;
|
|
165
|
+
}
|
|
166
|
+
for (const img of article.querySelectorAll(
|
|
167
|
+
'[data-testid="card.wrapper"] img[src*="twimg.com/card_img"], [data-testid="card.wrapper"] img[src*="pbs.twimg.com/card_img"]',
|
|
168
|
+
)) {
|
|
169
|
+
const src = img.getAttribute("src");
|
|
170
|
+
if (src && /^https?:\/\//i.test(src) && !/profile_images/i.test(src)) {
|
|
171
|
+
return normalizeCardImgUrl(src);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
for (const img of article.querySelectorAll('img[src*="pbs.twimg.com/media"], img[src*="twimg.com/media"]')) {
|
|
175
|
+
const src = img.getAttribute("src");
|
|
176
|
+
if (src && !/profile_images/i.test(src) && !img.closest('[data-testid="User-Name"]')) {
|
|
177
|
+
return normalizeTwimgUrl(src);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return undefined;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
function extractAuthor(article, statusPath) {
|
|
185
|
+
const nameBlock = article.querySelector('[data-testid="User-Name"]');
|
|
186
|
+
if (nameBlock) {
|
|
187
|
+
const profileAnchors = nameBlock.querySelectorAll('a[href^="/"]');
|
|
188
|
+
for (const a of profileAnchors) {
|
|
189
|
+
const href = a.getAttribute("href") || "";
|
|
190
|
+
if (/^\/[A-Za-z0-9_]{1,32}$/.test(href)) return href.slice(1);
|
|
191
|
+
}
|
|
192
|
+
const text = normalizeText(nameBlock.textContent);
|
|
193
|
+
const mention = text.match(/@([A-Za-z0-9_]{1,32})/);
|
|
194
|
+
if (mention) return mention[1];
|
|
195
|
+
}
|
|
196
|
+
if (statusPath) {
|
|
197
|
+
const m = statusPath.match(/^\/([A-Za-z0-9_]{1,32})\/status\/\d+$/);
|
|
198
|
+
if (m) return m[1];
|
|
199
|
+
}
|
|
200
|
+
return undefined;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
function extractTweetText(article) {
|
|
205
|
+
const nodes = article.querySelectorAll('[data-testid="tweetText"]');
|
|
206
|
+
const parts = [];
|
|
207
|
+
for (const node of nodes) {
|
|
208
|
+
const t = normalizeText(node.textContent);
|
|
209
|
+
if (t) parts.push(t);
|
|
210
|
+
}
|
|
211
|
+
let body = parts.length ? parts.join("\n\n") : "";
|
|
212
|
+
const hasShowMore = !!article.querySelector('[data-testid="tweet-text-show-more-link"]');
|
|
213
|
+
if (!body) {
|
|
214
|
+
body = extractLinkCardSummary(article);
|
|
215
|
+
}
|
|
216
|
+
if (!body) {
|
|
217
|
+
body = hasShowMore ? "推文内容较长,请打开原文查看" : "";
|
|
218
|
+
} else if (hasShowMore) {
|
|
219
|
+
body = `${body} ...`;
|
|
220
|
+
}
|
|
221
|
+
if (!body) {
|
|
222
|
+
const fallback = article.querySelector("[lang]");
|
|
223
|
+
body = normalizeText(fallback?.textContent) || "";
|
|
224
|
+
}
|
|
225
|
+
return normalizeText(body);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
function parseArticles(root, origin) {
|
|
230
|
+
const entries = [];
|
|
231
|
+
const seen = new Set();
|
|
232
|
+
let articles = root.querySelectorAll('article[data-testid="tweet"]');
|
|
233
|
+
if (articles.length === 0) {
|
|
234
|
+
articles = root.querySelectorAll('article[role="article"]');
|
|
235
|
+
}
|
|
236
|
+
for (const article of articles) {
|
|
237
|
+
if (isNestedTweetArticle(article)) continue;
|
|
238
|
+
const statusPath = extractPrimaryStatusPath(article);
|
|
239
|
+
if (!statusPath || seen.has(statusPath)) continue;
|
|
240
|
+
seen.add(statusPath);
|
|
241
|
+
const link = new URL(statusPath, origin).href;
|
|
242
|
+
const text = extractTweetText(article);
|
|
243
|
+
const author = extractAuthor(article, statusPath);
|
|
244
|
+
const pubDate = article.querySelector("time[datetime]")?.getAttribute("datetime") || undefined;
|
|
245
|
+
const imageUrl = extractMediaUrl(article);
|
|
246
|
+
const isRepost = isRepostArticle(article);
|
|
247
|
+
entries.push({ link, text, author, pubDate, imageUrl, isRepost });
|
|
248
|
+
}
|
|
249
|
+
return entries;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
function extractEntriesFromJson(data, origin) {
|
|
254
|
+
if (typeof data !== "object" || data == null) return [];
|
|
255
|
+
const entries = [];
|
|
256
|
+
const str = JSON.stringify(data);
|
|
257
|
+
const seen = new Set();
|
|
258
|
+
const matches = str.match(/\/([A-Za-z0-9_]{1,32})\/status\/(\d+)/g) || [];
|
|
259
|
+
for (const raw of matches) {
|
|
260
|
+
const m = raw.match(/^\/([A-Za-z0-9_]{1,32})\/status\/(\d+)$/);
|
|
261
|
+
if (!m) continue;
|
|
262
|
+
const statusPath = `/${m[1]}/status/${m[2]}`;
|
|
263
|
+
if (seen.has(statusPath)) continue;
|
|
264
|
+
seen.add(statusPath);
|
|
265
|
+
entries.push({ link: new URL(statusPath, origin).href, text: "", author: m[1], pubDate: undefined });
|
|
266
|
+
}
|
|
267
|
+
return entries;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
function entriesToFeedItems(entries) {
|
|
272
|
+
return entries.map(({ link, text, author, pubDate, imageUrl, isRepost }) => {
|
|
273
|
+
const item = {
|
|
274
|
+
guid: _deps.createHash("sha256").update(link).digest("hex"),
|
|
275
|
+
/** 转发帖显示标题 Repost;其余不展示标题 */
|
|
276
|
+
title: isRepost ? "Repost" : "",
|
|
277
|
+
link,
|
|
278
|
+
pubDate: pubDate ? new Date(pubDate) : new Date(),
|
|
279
|
+
author,
|
|
280
|
+
summary: text || undefined,
|
|
281
|
+
};
|
|
282
|
+
if (imageUrl) {
|
|
283
|
+
item.imageUrl = imageUrl;
|
|
284
|
+
item.cover_img = imageUrl;
|
|
285
|
+
}
|
|
286
|
+
return item;
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
async function fetchItems(sourceId, ctx) {
|
|
292
|
+
_deps = ctx.deps;
|
|
293
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 6000 });
|
|
294
|
+
const root = _deps.parseHtml(html);
|
|
295
|
+
const origin = getOrigin(finalUrl);
|
|
296
|
+
|
|
297
|
+
let entries = parseArticles(root, origin);
|
|
298
|
+
if (entries.length > 0) return entriesToFeedItems(entries);
|
|
299
|
+
|
|
300
|
+
const scripts = root.querySelectorAll('script[type="application/json"]');
|
|
301
|
+
for (const script of scripts) {
|
|
302
|
+
try {
|
|
303
|
+
const data = JSON.parse(script.textContent || "");
|
|
304
|
+
const fromJson = extractEntriesFromJson(data, origin);
|
|
305
|
+
if (fromJson.length > 0) {
|
|
306
|
+
entries = fromJson;
|
|
307
|
+
break;
|
|
308
|
+
}
|
|
309
|
+
} catch {
|
|
310
|
+
// ignore broken JSON blocks
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (entries.length > 0) return entriesToFeedItems(entries);
|
|
314
|
+
|
|
315
|
+
const bodyText = normalizeText(root.textContent).toLowerCase();
|
|
316
|
+
const isErrorPage = bodyText.includes("something went wrong") || bodyText.includes("try again");
|
|
317
|
+
const message = isErrorPage
|
|
318
|
+
? "X 页面暂不可用(可能被风控或需登录),请稍后重试或切换为有头模式并确认登录态"
|
|
319
|
+
: "未解析到推文条目,可能被风控或需登录";
|
|
320
|
+
throw new Error(`[X] ${message}`);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
export default {
|
|
325
|
+
id: "x",
|
|
326
|
+
listUrlPattern: "https://x.com/{username}",
|
|
327
|
+
fetchItems,
|
|
328
|
+
};
|