rssany 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -5
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +1 -1
- package/app/plugins/builtin/brightdata-blog.rssany.js +1 -1
- package/app/plugins/builtin/five-radar.rssany.js +1 -1
- package/app/plugins/builtin/google-deepmind-research.rssany.js +1 -1
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +1 -1
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +1 -1
- package/app/plugins/builtin/theinformation-briefings.rssany.js +150 -0
- package/app/plugins/builtin/x.rssany.js +192 -23
- package/app/plugins/builtin/zhipu-research.rssany.js +2 -2
- package/app/plugins/site.rssany.js +1 -0
- package/dist/index.js +347 -255
- package/dist/index.js.map +1 -1
- package/init/config.json +1 -1
- package/package.json +8 -8
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/12.DfJcfUWl.css +1 -0
- package/webui/build/_app/immutable/assets/5.B-dPiwB7.css +1 -0
- package/webui/build/_app/immutable/assets/6.B27N7pdA.css +1 -0
- package/webui/build/_app/immutable/assets/8.Cgji2b15.css +1 -0
- package/webui/build/_app/immutable/assets/9.BsCIAvn3.css +1 -0
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +1 -0
- package/webui/build/_app/immutable/chunks/5LVkDJzw.js +1 -0
- package/webui/build/_app/immutable/chunks/Bns1MuyM.js +36 -0
- package/webui/build/_app/immutable/chunks/{D6VIKef0.js → Bu9HsS-V.js} +1 -1
- package/webui/build/_app/immutable/chunks/{Dbqx2mXq.js → CmjOpds-.js} +1 -1
- package/webui/build/_app/immutable/chunks/bvuf_jZd.js +36 -0
- package/webui/build/_app/immutable/entry/{app.XPso7q7g.js → app.BVkrDt5l.js} +2 -2
- package/webui/build/_app/immutable/entry/start.D3Q-BMMd.js +1 -0
- package/webui/build/_app/immutable/nodes/{0.BKTQePmA.js → 0.I1lQdWMl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{1.BS3_Rfxm.js → 1.BiQQfx2j.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.CyyxDCIS.js → 10.CvfUsqsw.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.CtYgIaGj.js → 11.B4LHPNL6.js} +1 -1
- package/webui/build/_app/immutable/nodes/12.DVFJuIWI.js +1 -0
- package/webui/build/_app/immutable/nodes/{14.D5OEGPR2.js → 14.DfaAf0f8.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.B4dFN1Gk.js → 15.CMzkX9OK.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.M7ZII7tl.js → 16.zPgTQNze.js} +1 -1
- package/webui/build/_app/immutable/nodes/{18.Ba_qJjp6.js → 18.BIzqhTqv.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.7r8v7qkm.js → 3.B8Viux9S.js} +1 -1
- package/webui/build/_app/immutable/nodes/5.B6fR3n6J.js +2 -0
- package/webui/build/_app/immutable/nodes/{6.BDBqx-GY.js → 6.j2O5Mwjv.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.D5czsDmz.js → 7.Bd2USIrl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.pjVNsCdV.js → 8.Bw_d63B_.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.CsARv1BH.js → 9.pMMi5PP6.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/app/plugins/builtin/google.rssany.js +0 -187
- package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +0 -1
- package/webui/build/_app/immutable/assets/5.ClehBQ0g.css +0 -1
- package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +0 -1
- package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +0 -1
- package/webui/build/_app/immutable/assets/9.m-LCx_kl.css +0 -1
- package/webui/build/_app/immutable/assets/homeFeedPanelStore.iOmfP2qL.css +0 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +0 -31
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +0 -41
- package/webui/build/_app/immutable/chunks/dhB8G5Is.js +0 -1
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +0 -1
- package/webui/build/_app/immutable/nodes/12.Cg8AeCSH.js +0 -1
- package/webui/build/_app/immutable/nodes/5.CHIzoGrb.js +0 -1
package/README.md
CHANGED
|
@@ -31,11 +31,7 @@
|
|
|
31
31
|
| --- | ------------------------------------------------------------ |
|
|
32
32
|
| 运行时 | Node.js **20–23**(见 `package.json` `engines`) |
|
|
33
33
|
| 后端 | Hono、`tsx` 开发入口 |
|
|
34
|
-
| 数据 | **SQLite
|
|
35
|
-
| 前端 | `webui/`(SvelteKit + Vite,构建输出由根服务托管) |
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
原生模块 `**better-sqlite3**` 安装时会编译;若遇绑定缺失,请确认未禁用构建(仓库 `pnpm-workspace.yaml` 中已允许其 `allowBuilds`)。
|
|
34
|
+
| 数据 | **SQLite**(Node.js 内置 `node:sqlite`,Node.js 20+),默认 **`~/.rssany/data/rssany.db`**(Windows:`%USERPROFILE%\.rssany\data\rssany.db`) |
|
|
39
35
|
|
|
40
36
|
---
|
|
41
37
|
|
|
@@ -131,7 +131,7 @@ function buildSummary(record) {
|
|
|
131
131
|
return clampText(detail);
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
function toFeedItem(record, origin,
|
|
134
|
+
function toFeedItem(record, origin, _source) {
|
|
135
135
|
if (!record || typeof record !== "object") return null;
|
|
136
136
|
const title = normalizeText(record.name);
|
|
137
137
|
if (!title) return null;
|
|
@@ -160,7 +160,7 @@ async function fetchFeedItems(feedUrl) {
|
|
|
160
160
|
.map((node) => normalizeText(node.textContent))
|
|
161
161
|
.filter(Boolean);
|
|
162
162
|
const fallbackCategory = extractCategoryFromLink(link);
|
|
163
|
-
const
|
|
163
|
+
const _finalCategories = categories.length > 0
|
|
164
164
|
? uniqueTexts(categories)
|
|
165
165
|
: (fallbackCategory ? [fallbackCategory] : undefined);
|
|
166
166
|
|
|
@@ -266,7 +266,7 @@ function mapRowToFeedItem(row, origin) {
|
|
|
266
266
|
|
|
267
267
|
const summary = normalizeText(row?.summary);
|
|
268
268
|
const author = normalizeText(row?.source || row?.author);
|
|
269
|
-
const
|
|
269
|
+
const _category = normalizeText(row?.theme?.title);
|
|
270
270
|
const idText = String(row?.id ?? "").trim();
|
|
271
271
|
|
|
272
272
|
return {
|
|
@@ -104,7 +104,7 @@ async function fetchItems(sourceId, ctx) {
|
|
|
104
104
|
if (seen.has(link)) continue;
|
|
105
105
|
seen.add(link);
|
|
106
106
|
|
|
107
|
-
const { summary, category, year } = extractContext(anchor);
|
|
107
|
+
const { summary, category: _category, year } = extractContext(anchor);
|
|
108
108
|
const pubDate = year != null ? new Date(Date.UTC(year, 0, 1)) : new Date();
|
|
109
109
|
|
|
110
110
|
items.push({
|
|
@@ -140,7 +140,7 @@ function parseOneEntry(liNode, currentYear, pageUrl) {
|
|
|
140
140
|
const fallbackYear = parseYear(`${periodical} ${detailNode.textContent}`);
|
|
141
141
|
const finalYear = currentYear ?? fallbackYear;
|
|
142
142
|
const pubDate = finalYear != null ? new Date(Date.UTC(finalYear, 0, 1, 0, 0, 0)) : new Date();
|
|
143
|
-
const
|
|
143
|
+
const _badge = normalizeText((liNode.querySelector(".abbr .badge") ?? liNode.querySelector("abbr"))?.textContent) || undefined;
|
|
144
144
|
const link = pickBestLink(detailNode, pageUrl, entryId);
|
|
145
145
|
const guidSeed = entryId || link || `${title}|${author ?? ""}|${finalYear ?? ""}`;
|
|
146
146
|
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// The Information — AI Agenda 和 Briefings 列表页
|
|
4
|
+
// 当前结构:.article.feed-item,标题 h3.title a,分类 .category-content a,作者 .authors,摘要 .recent-excerpt .long-excerpt
|
|
5
|
+
|
|
6
|
+
const ORIGIN = "https://www.theinformation.com";
|
|
7
|
+
const LIST_URL_RE =
|
|
8
|
+
/^https?:\/\/(www\.)?theinformation\.com\/(briefings|features\/[^/]+)\/?(\?.*)?$/i;
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
function normalizeText(text) {
|
|
12
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
function hashGuid(input) {
|
|
17
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
22
|
+
if (!rawHref) return null;
|
|
23
|
+
const href = rawHref.trim();
|
|
24
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
25
|
+
try {
|
|
26
|
+
const url = new URL(href, baseUrl);
|
|
27
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
28
|
+
return url.href;
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
function pad2(n) {
|
|
36
|
+
return String(n).padStart(2, "0");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
/** .authors 文本:By Author · Apr 14, 2026 · 7:52am PDT */
|
|
41
|
+
function parseAuthorsDate(raw) {
|
|
42
|
+
let t = normalizeText(raw);
|
|
43
|
+
t = t.replace(/\s*·\s*\d+\s+comments?\s*$/i, "").trim();
|
|
44
|
+
|
|
45
|
+
const m = t.match(
|
|
46
|
+
/^By\s+(.+?)\s*·\s*(.+?\d{4})\s*·\s*(\d{1,2}:\d{2}\s*(?:am|pm))\s*(PDT|PST|PT)\s*$/i
|
|
47
|
+
);
|
|
48
|
+
if (m) {
|
|
49
|
+
const author = m[1].trim();
|
|
50
|
+
const datePart = m[2].trim();
|
|
51
|
+
const timePart = m[3].trim();
|
|
52
|
+
const tz = m[4].toUpperCase();
|
|
53
|
+
const offset = tz === "PDT" ? "-07:00" : "-08:00";
|
|
54
|
+
|
|
55
|
+
const hm = timePart.match(/(\d{1,2}):(\d{2})\s*(am|pm)/i);
|
|
56
|
+
const d0 = new Date(datePart);
|
|
57
|
+
if (hm && !Number.isNaN(d0.getTime())) {
|
|
58
|
+
let h = Number(hm[1]);
|
|
59
|
+
const min = Number(hm[2]);
|
|
60
|
+
const ap = hm[3].toLowerCase();
|
|
61
|
+
if (ap === "pm" && h < 12) h += 12;
|
|
62
|
+
if (ap === "am" && h === 12) h = 0;
|
|
63
|
+
const y = d0.getFullYear();
|
|
64
|
+
const mo = d0.getMonth() + 1;
|
|
65
|
+
const da = d0.getDate();
|
|
66
|
+
const iso = `${y}-${pad2(mo)}-${pad2(da)}T${pad2(h)}:${pad2(min)}:00${offset}`;
|
|
67
|
+
const pubDate = new Date(iso);
|
|
68
|
+
if (!Number.isNaN(pubDate.getTime())) return { author, pubDate };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const authorMatch = t.match(/^By\s+(.+?)\s*·/i);
|
|
73
|
+
const author = authorMatch ? authorMatch[1].trim() : undefined;
|
|
74
|
+
const dateStr = t.replace(/^By\s+.*?\s*·\s*/, "").trim();
|
|
75
|
+
const pubDate = new Date(dateStr);
|
|
76
|
+
return { author, pubDate: Number.isNaN(pubDate.getTime()) ? new Date() : pubDate };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
function parseFeedItems(html, pageUrl) {
|
|
81
|
+
const root = _deps.parseHtml(html);
|
|
82
|
+
const items = [];
|
|
83
|
+
const seen = new Set();
|
|
84
|
+
|
|
85
|
+
for (const node of root.querySelectorAll(".article.feed-item")) {
|
|
86
|
+
const linkEl = node.querySelector("h3.title a[href]");
|
|
87
|
+
if (!linkEl) continue;
|
|
88
|
+
|
|
89
|
+
const title = normalizeText(linkEl.textContent);
|
|
90
|
+
const link = toAbsoluteHttpUrl(linkEl.getAttribute("href"), pageUrl);
|
|
91
|
+
if (!title || !link || seen.has(link)) continue;
|
|
92
|
+
seen.add(link);
|
|
93
|
+
|
|
94
|
+
const authorsText = normalizeText(node.querySelector(".authors")?.textContent ?? "");
|
|
95
|
+
const { author, pubDate } = parseAuthorsDate(authorsText);
|
|
96
|
+
|
|
97
|
+
const summary = normalizeText(
|
|
98
|
+
node.querySelector(".recent-excerpt .long-excerpt")?.textContent ??
|
|
99
|
+
node.querySelector(".recent-excerpt")?.textContent ??
|
|
100
|
+
node.querySelector(".short-excerpt")?.textContent ??
|
|
101
|
+
""
|
|
102
|
+
) || undefined;
|
|
103
|
+
|
|
104
|
+
const categoryEl = node.querySelector(".category-content a");
|
|
105
|
+
const category = categoryEl ? normalizeText(categoryEl.textContent) : undefined;
|
|
106
|
+
|
|
107
|
+
items.push({
|
|
108
|
+
guid: hashGuid(link),
|
|
109
|
+
title,
|
|
110
|
+
link,
|
|
111
|
+
pubDate,
|
|
112
|
+
author,
|
|
113
|
+
summary,
|
|
114
|
+
categories: category ? [category] : undefined,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return items;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
async function fetchItems(sourceId, ctx) {
|
|
123
|
+
_deps = ctx.deps;
|
|
124
|
+
const { html, finalUrl, status } = await ctx.fetchHtml(sourceId, {
|
|
125
|
+
waitMs: 5000,
|
|
126
|
+
waitForSelector: ".article.feed-item",
|
|
127
|
+
waitForSelectorTimeoutMs: 25_000,
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const pageUrl = finalUrl || sourceId || ORIGIN;
|
|
131
|
+
const items = parseFeedItems(html, pageUrl);
|
|
132
|
+
|
|
133
|
+
if (items.length === 0) {
|
|
134
|
+
const hint = status && status >= 400 ? ` HTTP ${status}` : "";
|
|
135
|
+
throw new Error(
|
|
136
|
+
`[theinformation] 未解析到条目,页面结构可能已变化或需登录后抓取。${hint}`
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
141
|
+
return items;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
export default {
|
|
146
|
+
id: "theinformation",
|
|
147
|
+
listUrlPattern: LIST_URL_RE,
|
|
148
|
+
refreshInterval: "1h",
|
|
149
|
+
fetchItems,
|
|
150
|
+
};
|
|
@@ -34,6 +34,153 @@ function statusPathFromHref(href) {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
/** 非推文正文链接:头像区、分析页、单张图/视频子路径等,避免误当作主帖 ID */
|
|
38
|
+
function isAuxStatusSubpath(href) {
|
|
39
|
+
return /\/status\/\d+\/(photo|video|analytics|likes|retweets|quotes)\b/i.test(href || "");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* 主帖路径:优先取时间戳旁 permalink(与 UI 一致),避免首条任意 /status/ 链到引用帖或图集子链。
|
|
45
|
+
*/
|
|
46
|
+
function extractPrimaryStatusPath(article) {
|
|
47
|
+
const timeEl = article.querySelector("time[datetime]");
|
|
48
|
+
if (timeEl) {
|
|
49
|
+
const a = timeEl.closest("a[href*='/status/']");
|
|
50
|
+
if (a) {
|
|
51
|
+
const href = a.getAttribute("href") || "";
|
|
52
|
+
if (!isAuxStatusSubpath(href)) {
|
|
53
|
+
const p = statusPathFromHref(href);
|
|
54
|
+
if (p) return p;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
for (const a of article.querySelectorAll('a[href*="/status/"]')) {
|
|
59
|
+
const href = a.getAttribute("href") || "";
|
|
60
|
+
if (isAuxStatusSubpath(href)) continue;
|
|
61
|
+
const p = statusPathFromHref(href);
|
|
62
|
+
if (p) return p;
|
|
63
|
+
}
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
/** 引用/转推卡片内层 article 再解析会重复 guid,只处理时间轴最外层 tweet */
|
|
69
|
+
function isNestedTweetArticle(article) {
|
|
70
|
+
let p = article.parentElement;
|
|
71
|
+
while (p) {
|
|
72
|
+
if (p.matches?.("article[data-testid='tweet']")) return true;
|
|
73
|
+
p = p.parentElement;
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
function extractSocialContext(article) {
|
|
80
|
+
const el = article.querySelector('[data-testid="socialContext"]');
|
|
81
|
+
return normalizeText(el?.textContent);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
/** 时间轴上「转推/转发」帖:socialContext 含 Repost/Retweet 或中文 */
|
|
86
|
+
function isRepostArticle(article) {
|
|
87
|
+
const ctx = extractSocialContext(article);
|
|
88
|
+
if (!ctx) return false;
|
|
89
|
+
if (/reposted?|retweet/i.test(ctx)) return true;
|
|
90
|
+
if (/转推|转发/.test(ctx)) return true;
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
function normalizeTwimgUrl(src) {
|
|
96
|
+
if (!src || typeof src !== "string") return src;
|
|
97
|
+
try {
|
|
98
|
+
const u = new URL(src, X_ORIGIN);
|
|
99
|
+
if (u.hostname.includes("twimg.com") && u.pathname.includes("/media/")) {
|
|
100
|
+
u.searchParams.set("format", "jpg");
|
|
101
|
+
u.searchParams.set("name", "small");
|
|
102
|
+
}
|
|
103
|
+
return u.href;
|
|
104
|
+
} catch {
|
|
105
|
+
return src;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
/** 链接预览卡大图(card_img),与推文配图 /media/ 不同 */
|
|
111
|
+
function normalizeCardImgUrl(src) {
|
|
112
|
+
if (!src || typeof src !== "string") return src;
|
|
113
|
+
try {
|
|
114
|
+
const u = new URL(src, X_ORIGIN);
|
|
115
|
+
if (u.hostname.includes("twimg.com") && /\/card_img\//.test(u.pathname)) {
|
|
116
|
+
if (!u.searchParams.has("format")) u.searchParams.set("format", "jpg");
|
|
117
|
+
if (!u.searchParams.has("name")) u.searchParams.set("name", "small");
|
|
118
|
+
}
|
|
119
|
+
return u.href;
|
|
120
|
+
} catch {
|
|
121
|
+
return src;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* 无 tweetText 时:从链接卡 aria-label("domain.com 标题…")或 small 卡正文取一行摘要
|
|
128
|
+
*/
|
|
129
|
+
function extractLinkCardSummary(article) {
|
|
130
|
+
const a = article.querySelector('[data-testid="card.wrapper"] a[aria-label]');
|
|
131
|
+
if (a) {
|
|
132
|
+
const label = a.getAttribute("aria-label") || "";
|
|
133
|
+
const idx = label.indexOf(" ");
|
|
134
|
+
if (idx > 0) {
|
|
135
|
+
const rest = normalizeText(label.slice(idx + 1));
|
|
136
|
+
if (rest) return rest;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
const detail = article.querySelector('[data-testid="card.layoutSmall.detail"]');
|
|
140
|
+
if (detail) {
|
|
141
|
+
for (const el of detail.querySelectorAll(':scope > div[dir="auto"]')) {
|
|
142
|
+
const t = normalizeText(el.textContent);
|
|
143
|
+
if (t && t.length > 8 && !/^[a-z0-9.-]+\.[a-z]{2,}$/i.test(t)) return t;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return "";
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* 首图:推文配图 > 视频 poster(blob 源无法持久化,仅 poster 可作缩略图)
|
|
152
|
+
*/
|
|
153
|
+
function extractMediaUrl(article) {
|
|
154
|
+
for (const img of article.querySelectorAll('[data-testid="tweetPhoto"] img[src]')) {
|
|
155
|
+
const src = img.getAttribute("src");
|
|
156
|
+
if (!src || /profile_images/i.test(src)) continue;
|
|
157
|
+
if (/pbs\.twimg\.com\/media/i.test(src) || /twimg\.com\/media/i.test(src)) {
|
|
158
|
+
return normalizeTwimgUrl(src);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const video = article.querySelector("video[poster]");
|
|
162
|
+
if (video) {
|
|
163
|
+
const poster = video.getAttribute("poster");
|
|
164
|
+
if (poster && /^https?:\/\//i.test(poster)) return poster;
|
|
165
|
+
}
|
|
166
|
+
for (const img of article.querySelectorAll(
|
|
167
|
+
'[data-testid="card.wrapper"] img[src*="twimg.com/card_img"], [data-testid="card.wrapper"] img[src*="pbs.twimg.com/card_img"]',
|
|
168
|
+
)) {
|
|
169
|
+
const src = img.getAttribute("src");
|
|
170
|
+
if (src && /^https?:\/\//i.test(src) && !/profile_images/i.test(src)) {
|
|
171
|
+
return normalizeCardImgUrl(src);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
for (const img of article.querySelectorAll('img[src*="pbs.twimg.com/media"], img[src*="twimg.com/media"]')) {
|
|
175
|
+
const src = img.getAttribute("src");
|
|
176
|
+
if (src && !/profile_images/i.test(src) && !img.closest('[data-testid="User-Name"]')) {
|
|
177
|
+
return normalizeTwimgUrl(src);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return undefined;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
37
184
|
function extractAuthor(article, statusPath) {
|
|
38
185
|
const nameBlock = article.querySelector('[data-testid="User-Name"]');
|
|
39
186
|
if (nameBlock) {
|
|
@@ -55,35 +202,49 @@ function extractAuthor(article, statusPath) {
|
|
|
55
202
|
|
|
56
203
|
|
|
57
204
|
function extractTweetText(article) {
|
|
58
|
-
const
|
|
59
|
-
const
|
|
205
|
+
const nodes = article.querySelectorAll('[data-testid="tweetText"]');
|
|
206
|
+
const parts = [];
|
|
207
|
+
for (const node of nodes) {
|
|
208
|
+
const t = normalizeText(node.textContent);
|
|
209
|
+
if (t) parts.push(t);
|
|
210
|
+
}
|
|
211
|
+
let body = parts.length ? parts.join("\n\n") : "";
|
|
60
212
|
const hasShowMore = !!article.querySelector('[data-testid="tweet-text-show-more-link"]');
|
|
61
|
-
if (!
|
|
62
|
-
|
|
213
|
+
if (!body) {
|
|
214
|
+
body = extractLinkCardSummary(article);
|
|
215
|
+
}
|
|
216
|
+
if (!body) {
|
|
217
|
+
body = hasShowMore ? "推文内容较长,请打开原文查看" : "";
|
|
218
|
+
} else if (hasShowMore) {
|
|
219
|
+
body = `${body} ...`;
|
|
220
|
+
}
|
|
221
|
+
if (!body) {
|
|
222
|
+
const fallback = article.querySelector("[lang]");
|
|
223
|
+
body = normalizeText(fallback?.textContent) || "";
|
|
224
|
+
}
|
|
225
|
+
return normalizeText(body);
|
|
63
226
|
}
|
|
64
227
|
|
|
65
228
|
|
|
66
229
|
function parseArticles(root, origin) {
|
|
67
230
|
const entries = [];
|
|
68
231
|
const seen = new Set();
|
|
69
|
-
|
|
232
|
+
let articles = root.querySelectorAll('article[data-testid="tweet"]');
|
|
233
|
+
if (articles.length === 0) {
|
|
234
|
+
articles = root.querySelectorAll('article[role="article"]');
|
|
235
|
+
}
|
|
70
236
|
for (const article of articles) {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
for (const a of links) {
|
|
74
|
-
const p = statusPathFromHref(a.getAttribute("href"));
|
|
75
|
-
if (p) {
|
|
76
|
-
statusPath = p;
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
237
|
+
if (isNestedTweetArticle(article)) continue;
|
|
238
|
+
const statusPath = extractPrimaryStatusPath(article);
|
|
80
239
|
if (!statusPath || seen.has(statusPath)) continue;
|
|
81
240
|
seen.add(statusPath);
|
|
82
241
|
const link = new URL(statusPath, origin).href;
|
|
83
242
|
const text = extractTweetText(article);
|
|
84
243
|
const author = extractAuthor(article, statusPath);
|
|
85
244
|
const pubDate = article.querySelector("time[datetime]")?.getAttribute("datetime") || undefined;
|
|
86
|
-
|
|
245
|
+
const imageUrl = extractMediaUrl(article);
|
|
246
|
+
const isRepost = isRepostArticle(article);
|
|
247
|
+
entries.push({ link, text, author, pubDate, imageUrl, isRepost });
|
|
87
248
|
}
|
|
88
249
|
return entries;
|
|
89
250
|
}
|
|
@@ -108,14 +269,22 @@ function extractEntriesFromJson(data, origin) {
|
|
|
108
269
|
|
|
109
270
|
|
|
110
271
|
function entriesToFeedItems(entries) {
|
|
111
|
-
return entries.map(({ link, text, author, pubDate }) =>
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
272
|
+
return entries.map(({ link, text, author, pubDate, imageUrl, isRepost }) => {
|
|
273
|
+
const item = {
|
|
274
|
+
guid: _deps.createHash("sha256").update(link).digest("hex"),
|
|
275
|
+
/** 转发帖显示标题 Repost;其余不展示标题 */
|
|
276
|
+
title: isRepost ? "Repost" : "",
|
|
277
|
+
link,
|
|
278
|
+
pubDate: pubDate ? new Date(pubDate) : new Date(),
|
|
279
|
+
author,
|
|
280
|
+
summary: text || undefined,
|
|
281
|
+
};
|
|
282
|
+
if (imageUrl) {
|
|
283
|
+
item.imageUrl = imageUrl;
|
|
284
|
+
item.cover_img = imageUrl;
|
|
285
|
+
}
|
|
286
|
+
return item;
|
|
287
|
+
});
|
|
119
288
|
}
|
|
120
289
|
|
|
121
290
|
|
|
@@ -113,7 +113,7 @@ function buildItemsFromBlogsItems(blogsItems) {
|
|
|
113
113
|
const summary = normalizeText(blog.resume_zh ?? blog.resume_en ?? "");
|
|
114
114
|
const createdAt = String(blog.createAt ?? "").trim();
|
|
115
115
|
const pubDate = createdAt ? new Date(createdAt) : new Date();
|
|
116
|
-
const
|
|
116
|
+
const _category = normalizeText(blog.tag_zh ?? blog.tag_en ?? "");
|
|
117
117
|
items.push({
|
|
118
118
|
guid: hashGuid(link),
|
|
119
119
|
title,
|
|
@@ -263,7 +263,7 @@ function buildItemsFromLeafSequence(html, titleIdMap) {
|
|
|
263
263
|
for (let i = 0; i < leafTexts.length; i += 1) {
|
|
264
264
|
const dateText = leafTexts[i];
|
|
265
265
|
if (!isDateText(dateText)) continue;
|
|
266
|
-
const
|
|
266
|
+
const _category = i > 0 && RESEARCH_TAGS.has(leafTexts[i - 1]) ? leafTexts[i - 1] : undefined;
|
|
267
267
|
|
|
268
268
|
let title = "";
|
|
269
269
|
let summary;
|