rssany 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/plugins/builtin/agi-eval-evaluation.rssany.js +188 -0
- package/app/plugins/builtin/amii-research-talent.rssany.js +73 -0
- package/app/plugins/builtin/anthropic-research.rssany.js +155 -0
- package/app/plugins/builtin/appen-resources.rssany.js +155 -0
- package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +185 -0
- package/app/plugins/builtin/baaidata-csdn.rssany.js +242 -0
- package/app/plugins/builtin/baidu-research.rssany.js +222 -0
- package/app/plugins/builtin/brightdata-blog.rssany.js +301 -0
- package/app/plugins/builtin/bytedance-seed-research.rssany.js +231 -0
- package/app/plugins/builtin/five-radar.rssany.js +490 -0
- package/app/plugins/builtin/flageval-news.rssany.js +118 -0
- package/app/plugins/builtin/google-deepmind-research.rssany.js +223 -0
- package/app/plugins/builtin/google-research-datasets.rssany.js +171 -0
- package/app/plugins/builtin/google-research.rssany.js +220 -0
- package/app/plugins/builtin/google.rssany.js +187 -0
- package/app/plugins/builtin/hacker-news-newest.rssany.js +130 -0
- package/app/plugins/builtin/harvard-dataverse.rssany.js +166 -0
- package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +185 -0
- package/app/plugins/builtin/lingowhale.rssany.js +119 -0
- package/app/plugins/builtin/meituan-tech.rssany.js +130 -0
- package/app/plugins/builtin/meta-ai-publications.rssany.js +221 -0
- package/app/plugins/builtin/mila-quebec.rssany.js +199 -0
- package/app/plugins/builtin/mit-csail-research.rssany.js +208 -0
- package/app/plugins/builtin/moonshot.rssany.js +127 -0
- package/app/plugins/builtin/opendatalab-news.rssany.js +174 -0
- package/app/plugins/builtin/opendatalab.rssany.js +109 -0
- package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +114 -0
- package/app/plugins/builtin/opendrivelab-publications.rssany.js +130 -0
- package/app/plugins/builtin/opendrivelab.rssany.js +333 -0
- package/app/plugins/builtin/paperswithcode.rssany.js +227 -0
- package/app/plugins/builtin/pjlab-adg-publications.rssany.js +202 -0
- package/app/plugins/builtin/rss.rssany.js +11 -1
- package/app/plugins/builtin/selectdataset.rssany.js +206 -0
- package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +154 -0
- package/app/plugins/builtin/supervisely-blog.rssany.js +159 -0
- package/app/plugins/builtin/uci-ml-repository.rssany.js +111 -0
- package/app/plugins/builtin/venturebeat.rssany.js +97 -0
- package/app/plugins/builtin/worldlabs.rssany.js +129 -0
- package/app/plugins/builtin/x.rssany.js +159 -0
- package/app/plugins/builtin/xiaohongshu.rssany.js +283 -0
- package/app/plugins/builtin/zhipu-research.rssany.js +334 -0
- package/dist/index.js +62 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/webui/build/200.html +6 -6
- package/webui/build/_app/immutable/assets/{0.DjU2hdCQ.css → 0.BB88QFoe.css} +1 -1
- package/webui/build/_app/immutable/assets/{homeFeedPanelStore.BopJZtHu.css → homeFeedPanelStore.iOmfP2qL.css} +1 -1
- package/webui/build/_app/immutable/chunks/CZD-YNDw.js +31 -0
- package/webui/build/_app/immutable/chunks/{C85CNwD2.js → D6VIKef0.js} +1 -1
- package/webui/build/_app/immutable/chunks/{CllQAdvt.js → Dbqx2mXq.js} +1 -1
- package/webui/build/_app/immutable/chunks/DeX-oq5W.js +41 -0
- package/webui/build/_app/immutable/chunks/{CdMsRjxJ.js → dhB8G5Is.js} +1 -1
- package/webui/build/_app/immutable/entry/{app.BcD2eSsQ.js → app.XPso7q7g.js} +2 -2
- package/webui/build/_app/immutable/entry/start.Db4snNCd.js +1 -0
- package/webui/build/_app/immutable/nodes/0.BKTQePmA.js +11 -0
- package/webui/build/_app/immutable/nodes/{1.DU9aYGAb.js → 1.BS3_Rfxm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{10.Db6vw7Ih.js → 10.CyyxDCIS.js} +1 -1
- package/webui/build/_app/immutable/nodes/{11.BaAcorz3.js → 11.CtYgIaGj.js} +1 -1
- package/webui/build/_app/immutable/nodes/{14.DqT4pcrQ.js → 14.D5OEGPR2.js} +1 -1
- package/webui/build/_app/immutable/nodes/{15.CCLbjxnH.js → 15.B4dFN1Gk.js} +1 -1
- package/webui/build/_app/immutable/nodes/{16.DiigpVdP.js → 16.M7ZII7tl.js} +1 -1
- package/webui/build/_app/immutable/nodes/{3.DEcYOQc-.js → 3.7r8v7qkm.js} +1 -1
- package/webui/build/_app/immutable/nodes/{5.CvM1TkLG.js → 5.CHIzoGrb.js} +1 -1
- package/webui/build/_app/immutable/nodes/{6.Dscr6LkS.js → 6.BDBqx-GY.js} +1 -1
- package/webui/build/_app/immutable/nodes/{7.Bp60MobD.js → 7.D5czsDmz.js} +1 -1
- package/webui/build/_app/immutable/nodes/{8.DwSg0MHh.js → 8.pjVNsCdV.js} +1 -1
- package/webui/build/_app/immutable/nodes/{9.BeYOUjxR.js → 9.CsARv1BH.js} +1 -1
- package/webui/build/_app/version.json +1 -1
- package/webui/build/_app/immutable/chunks/CtijX1u3.js +0 -31
- package/webui/build/_app/immutable/chunks/Dv1VCsiB.js +0 -41
- package/webui/build/_app/immutable/entry/start.CbkdJdz1.js +0 -1
- package/webui/build/_app/immutable/nodes/0.DSUDmOx2.js +0 -11
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
const DEFAULT_ORIGIN = "https://agi-eval.org";
|
|
5
|
+
const DEFAULT_SOURCES = ["PUBLIC", "PRIVATE"];
|
|
6
|
+
const VALID_SOURCES = new Set(DEFAULT_SOURCES);
|
|
7
|
+
|
|
8
|
+
function normalizeText(text) {
|
|
9
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function hashGuid(input) {
|
|
13
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function parseDate(value) {
|
|
17
|
+
const text = normalizeText(value);
|
|
18
|
+
if (!text) return undefined;
|
|
19
|
+
const date = new Date(text);
|
|
20
|
+
return Number.isNaN(date.getTime()) ? undefined : date;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function clampText(text, maxLen = 300) {
|
|
24
|
+
if (text.length <= maxLen) return text;
|
|
25
|
+
return `${text.slice(0, maxLen - 1).trim()}…`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function pickOrigin(sourceId) {
|
|
29
|
+
try {
|
|
30
|
+
const url = new URL(sourceId);
|
|
31
|
+
if (/^https?:$/i.test(url.protocol)) return url.origin;
|
|
32
|
+
} catch {
|
|
33
|
+
// ignore
|
|
34
|
+
}
|
|
35
|
+
return DEFAULT_ORIGIN;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function parseSources(sourceId) {
|
|
39
|
+
const found = [];
|
|
40
|
+
try {
|
|
41
|
+
const url = new URL(sourceId);
|
|
42
|
+
const fromQuery = [
|
|
43
|
+
url.searchParams.get("source"),
|
|
44
|
+
url.searchParams.get("sources"),
|
|
45
|
+
];
|
|
46
|
+
for (const value of fromQuery) {
|
|
47
|
+
if (!value) continue;
|
|
48
|
+
found.push(...value.split(/[,+\s|/]+/g));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const pathMatch = url.pathname.match(/^\/evaluation\/home\/([^/?#]+)/i);
|
|
52
|
+
if (pathMatch) {
|
|
53
|
+
found.push(...decodeURIComponent(pathMatch[1]).split(/[,+\s|/]+/g));
|
|
54
|
+
}
|
|
55
|
+
} catch {
|
|
56
|
+
// ignore
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const picked = [];
|
|
60
|
+
const seen = new Set();
|
|
61
|
+
for (const raw of found) {
|
|
62
|
+
const source = normalizeText(raw).toUpperCase();
|
|
63
|
+
if (!VALID_SOURCES.has(source) || seen.has(source)) continue;
|
|
64
|
+
seen.add(source);
|
|
65
|
+
picked.push(source);
|
|
66
|
+
}
|
|
67
|
+
return picked.length > 0 ? picked : [...DEFAULT_SOURCES];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function fetchBySource(origin, source) {
|
|
71
|
+
const response = await fetch(`${origin}/commWebApi/evaluation/home`, {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
"Accept": "application/json",
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({ source }),
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: HTTP ${response.status}`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const payload = await response.json().catch(() => null);
|
|
85
|
+
if (!payload || payload.rescode !== 0) {
|
|
86
|
+
const message = normalizeText(payload?.msg) || "接口返回异常";
|
|
87
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 请求失败: ${message}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const list = payload?.data?.evaluationList;
|
|
91
|
+
if (!Array.isArray(list)) {
|
|
92
|
+
throw new Error(`[agi-eval-evaluation] source=${source} 响应结构异常`);
|
|
93
|
+
}
|
|
94
|
+
return list;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function buildLink(origin, record) {
|
|
98
|
+
const id = String(record?.id ?? "").trim();
|
|
99
|
+
const name = normalizeText(record?.name);
|
|
100
|
+
if (id && name) {
|
|
101
|
+
return `${origin}/evaluation/${encodeURIComponent(name)}?id=${encodeURIComponent(id)}`;
|
|
102
|
+
}
|
|
103
|
+
if (id) {
|
|
104
|
+
return `${origin}/evaluation/detail?id=${encodeURIComponent(id)}`;
|
|
105
|
+
}
|
|
106
|
+
return `${origin}/evaluation/home`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function pickPubDate(record) {
|
|
110
|
+
const candidates = [record?.publishTime, record?.createdAt, record?.updatedAt];
|
|
111
|
+
for (const value of candidates) {
|
|
112
|
+
const date = parseDate(value);
|
|
113
|
+
if (date) return date;
|
|
114
|
+
}
|
|
115
|
+
return new Date();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function buildSummary(record) {
|
|
119
|
+
const description = normalizeText(
|
|
120
|
+
record?.description ||
|
|
121
|
+
record?.introduction ||
|
|
122
|
+
record?.zhData?.description ||
|
|
123
|
+
record?.enData?.description ||
|
|
124
|
+
""
|
|
125
|
+
);
|
|
126
|
+
const detail = description || "AGI-Eval 评测条目";
|
|
127
|
+
const views = Number(record?.views);
|
|
128
|
+
if (Number.isFinite(views) && views >= 0) {
|
|
129
|
+
return clampText(`${detail} | 浏览量 ${views}`);
|
|
130
|
+
}
|
|
131
|
+
return clampText(detail);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function toFeedItem(record, origin, source) {
|
|
135
|
+
if (!record || typeof record !== "object") return null;
|
|
136
|
+
const title = normalizeText(record.name);
|
|
137
|
+
if (!title) return null;
|
|
138
|
+
|
|
139
|
+
const link = buildLink(origin, record);
|
|
140
|
+
const id = String(record.id ?? "").trim();
|
|
141
|
+
const guidSeed = id ? `agi-eval:${id}` : link;
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
guid: hashGuid(guidSeed),
|
|
145
|
+
title,
|
|
146
|
+
link,
|
|
147
|
+
pubDate: pickPubDate(record),
|
|
148
|
+
author: "AGI-Eval",
|
|
149
|
+
summary: buildSummary(record),
|
|
150
|
+
sourceId: "agi-eval-evaluation",
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function fetchItems(sourceId, ctx) {
|
|
155
|
+
_deps = ctx.deps;
|
|
156
|
+
const origin = pickOrigin(sourceId);
|
|
157
|
+
const sources = parseSources(sourceId);
|
|
158
|
+
const rows = [];
|
|
159
|
+
|
|
160
|
+
for (const source of sources) {
|
|
161
|
+
const list = await fetchBySource(origin, source);
|
|
162
|
+
for (const record of list) {
|
|
163
|
+
rows.push({ source, record });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const seen = new Set();
|
|
168
|
+
const items = [];
|
|
169
|
+
for (const { source, record } of rows) {
|
|
170
|
+
const item = toFeedItem(record, origin, source);
|
|
171
|
+
if (!item || seen.has(item.guid)) continue;
|
|
172
|
+
seen.add(item.guid);
|
|
173
|
+
items.push(item);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (items.length === 0) {
|
|
177
|
+
throw new Error("[agi-eval-evaluation] 未解析到条目,接口结构可能已变化");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
items.sort((a, b) => b.pubDate.getTime() - a.pubDate.getTime());
|
|
181
|
+
return items;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export default {
|
|
185
|
+
id: "agi-eval-evaluation",
|
|
186
|
+
listUrlPattern: /^https?:\/\/agi-eval\.(org|cn)\/evaluation\/home(?:\/[^/?#]+)?\/?(?:\?.*)?$/i,
|
|
187
|
+
fetchItems,
|
|
188
|
+
};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Amii Research & Talent 插件:抓取人物卡片列表(不做正文 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
const PEOPLE_PATH_RE = /^\/people\/[^/?#]+\/?$/i;
|
|
7
|
+
|
|
8
|
+
function normalizeText(text) {
|
|
9
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function hashGuid(input) {
|
|
13
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function resolvePeopleLink(rawHref, pageUrl) {
|
|
17
|
+
if (!rawHref) return null;
|
|
18
|
+
try {
|
|
19
|
+
const url = new URL(rawHref, pageUrl);
|
|
20
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
21
|
+
if (!PEOPLE_PATH_RE.test(url.pathname)) return null;
|
|
22
|
+
return url.href;
|
|
23
|
+
} catch {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function extractPeopleItems(root, pageUrl) {
|
|
29
|
+
const anchors = root.querySelectorAll('a[href*="/people/"]');
|
|
30
|
+
const seen = new Set();
|
|
31
|
+
const items = [];
|
|
32
|
+
|
|
33
|
+
for (const anchor of anchors) {
|
|
34
|
+
const link = resolvePeopleLink(anchor.getAttribute("href"), pageUrl);
|
|
35
|
+
if (!link || seen.has(link)) continue;
|
|
36
|
+
|
|
37
|
+
const title = normalizeText(anchor.querySelector("h3")?.textContent);
|
|
38
|
+
if (!title) continue;
|
|
39
|
+
|
|
40
|
+
const summary = normalizeText(anchor.querySelector("p")?.textContent);
|
|
41
|
+
seen.add(link);
|
|
42
|
+
items.push({
|
|
43
|
+
guid: hashGuid(link),
|
|
44
|
+
title,
|
|
45
|
+
link,
|
|
46
|
+
pubDate: new Date(),
|
|
47
|
+
author: "Amii",
|
|
48
|
+
summary: summary || undefined,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return items;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function fetchItems(sourceId, ctx) {
|
|
56
|
+
_deps = ctx.deps;
|
|
57
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 3000 });
|
|
58
|
+
const root = _deps.parseHtml(html);
|
|
59
|
+
const pageUrl = new URL(finalUrl);
|
|
60
|
+
|
|
61
|
+
const items = extractPeopleItems(root, pageUrl);
|
|
62
|
+
if (items.length === 0) {
|
|
63
|
+
throw new Error("[amii-research-talent] 未解析到人物条目,页面结构可能已变化");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return items;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export default {
|
|
70
|
+
id: "amii-research-talent",
|
|
71
|
+
listUrlPattern: /^https?:\/\/(www\.)?amii\.ca\/research-talent\/?(\?.*)?$/i,
|
|
72
|
+
fetchItems,
|
|
73
|
+
};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
// Anthropic Research 插件:抓取研究页列表条目(不含 enrich)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
const ANTHROPIC_ORIGIN = "https://www.anthropic.com";
|
|
8
|
+
const MONTH_TO_INDEX = {
|
|
9
|
+
jan: 0,
|
|
10
|
+
feb: 1,
|
|
11
|
+
mar: 2,
|
|
12
|
+
apr: 3,
|
|
13
|
+
may: 4,
|
|
14
|
+
jun: 5,
|
|
15
|
+
jul: 6,
|
|
16
|
+
aug: 7,
|
|
17
|
+
sep: 8,
|
|
18
|
+
oct: 9,
|
|
19
|
+
nov: 10,
|
|
20
|
+
dec: 11,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
function normalizeText(text) {
|
|
25
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
function hashGuid(input) {
|
|
30
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
function toAbsoluteHttpUrl(rawHref, baseUrl) {
|
|
35
|
+
if (!rawHref) return null;
|
|
36
|
+
const href = rawHref.trim();
|
|
37
|
+
if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
|
|
38
|
+
try {
|
|
39
|
+
const url = new URL(href, baseUrl);
|
|
40
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
41
|
+
return url.href;
|
|
42
|
+
} catch {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
function isResearchArticlePath(pathname) {
|
|
49
|
+
if (!pathname.startsWith("/research/")) return false;
|
|
50
|
+
if (pathname.startsWith("/research/team/")) return false;
|
|
51
|
+
return pathname.length > "/research/".length;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
function parsePubDate(dateText) {
|
|
56
|
+
const normalized = normalizeText(dateText);
|
|
57
|
+
if (!normalized) return undefined;
|
|
58
|
+
|
|
59
|
+
const m = normalized.match(/^([A-Za-z]{3,9})\s+(\d{1,2}),\s*(\d{4})$/);
|
|
60
|
+
if (m) {
|
|
61
|
+
const month = MONTH_TO_INDEX[m[1].slice(0, 3).toLowerCase()];
|
|
62
|
+
if (month != null) {
|
|
63
|
+
const day = Number(m[2]);
|
|
64
|
+
const year = Number(m[3]);
|
|
65
|
+
const d = new Date(Date.UTC(year, month, day, 12, 0, 0));
|
|
66
|
+
if (!Number.isNaN(d.getTime())) return d;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const direct = new Date(normalized);
|
|
71
|
+
if (!Number.isNaN(direct.getTime())) return direct;
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function extractTitle(anchor) {
|
|
77
|
+
const heading =
|
|
78
|
+
anchor.querySelector("h2") ??
|
|
79
|
+
anchor.querySelector("h3") ??
|
|
80
|
+
anchor.querySelector("h4");
|
|
81
|
+
const headingText = normalizeText(heading?.textContent);
|
|
82
|
+
if (headingText) return headingText;
|
|
83
|
+
|
|
84
|
+
const spans = anchor
|
|
85
|
+
.querySelectorAll("span")
|
|
86
|
+
.map((s) => normalizeText(s.textContent))
|
|
87
|
+
.filter(Boolean);
|
|
88
|
+
if (spans.length > 0) return spans[spans.length - 1];
|
|
89
|
+
|
|
90
|
+
return normalizeText(anchor.textContent);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
function extractSummary(anchor, title) {
|
|
95
|
+
const summary = normalizeText(anchor.querySelector("p")?.textContent);
|
|
96
|
+
if (summary && summary !== title) return summary;
|
|
97
|
+
return undefined;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
function parseAnchorItem(anchor, finalUrl) {
|
|
102
|
+
const link = toAbsoluteHttpUrl(anchor.getAttribute("href"), finalUrl);
|
|
103
|
+
if (!link) return null;
|
|
104
|
+
|
|
105
|
+
const pathname = new URL(link).pathname;
|
|
106
|
+
if (!isResearchArticlePath(pathname)) return null;
|
|
107
|
+
|
|
108
|
+
const title = extractTitle(anchor);
|
|
109
|
+
if (!title) return null;
|
|
110
|
+
|
|
111
|
+
const dateText = normalizeText(anchor.querySelector("time")?.textContent);
|
|
112
|
+
const pubDate = parsePubDate(dateText) ?? new Date();
|
|
113
|
+
const summary = extractSummary(anchor, title);
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
guid: hashGuid(link),
|
|
117
|
+
title,
|
|
118
|
+
link,
|
|
119
|
+
pubDate,
|
|
120
|
+
author: "Anthropic",
|
|
121
|
+
summary,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async function fetchItems(sourceId, ctx) {
|
|
127
|
+
_deps = ctx.deps;
|
|
128
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
129
|
+
const root = _deps.parseHtml(html);
|
|
130
|
+
|
|
131
|
+
const seen = new Set();
|
|
132
|
+
const items = [];
|
|
133
|
+
const anchors = root.querySelectorAll("a[href]");
|
|
134
|
+
|
|
135
|
+
for (const anchor of anchors) {
|
|
136
|
+
const item = parseAnchorItem(anchor, finalUrl || ANTHROPIC_ORIGIN);
|
|
137
|
+
if (!item) continue;
|
|
138
|
+
if (seen.has(item.link)) continue;
|
|
139
|
+
seen.add(item.link);
|
|
140
|
+
items.push(item);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (items.length === 0) {
|
|
144
|
+
throw new Error("[anthropic-research] 未解析到研究条目,页面结构可能已变化");
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return items;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
export default {
|
|
152
|
+
id: "anthropic-research",
|
|
153
|
+
listUrlPattern: /^https?:\/\/(www\.)?anthropic\.com\/research(?:\/)?(\?.*)?$/i,
|
|
154
|
+
fetchItems,
|
|
155
|
+
};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
let _deps;
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
const APPEN_ORIGIN = "https://www.appen.com";
|
|
5
|
+
const IGNORED_TEXTS = new Set([
|
|
6
|
+
"learn more",
|
|
7
|
+
"view all",
|
|
8
|
+
"browse catalog",
|
|
9
|
+
"blog",
|
|
10
|
+
"case studies",
|
|
11
|
+
"white papers, reports and ebooks",
|
|
12
|
+
"white papers, reports and ebooks",
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
function normalizeText(text) {
|
|
16
|
+
return (text ?? "").replace(/\s+/g, " ").trim();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function hashGuid(input) {
|
|
20
|
+
return _deps.createHash("sha256").update(input).digest("hex");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function resolveHttpUrl(href, baseUrl) {
|
|
24
|
+
if (!href) return null;
|
|
25
|
+
const raw = href.trim();
|
|
26
|
+
if (!raw || raw.startsWith("#") || raw.startsWith("javascript:")) return null;
|
|
27
|
+
try {
|
|
28
|
+
const url = new URL(raw, baseUrl);
|
|
29
|
+
if (!/^https?:$/i.test(url.protocol)) return null;
|
|
30
|
+
return url;
|
|
31
|
+
} catch {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function getResourceType(pathname) {
|
|
37
|
+
if (/^\/blog\/[^/?#]+\/?$/i.test(pathname)) return { label: "Blog", author: "Appen Blog" };
|
|
38
|
+
if (/^\/case-studies\/[^/?#]+\/?$/i.test(pathname)) return { label: "Case Study", author: "Appen Case Studies" };
|
|
39
|
+
if (/^\/whitepapers\/[^/?#]+\/?$/i.test(pathname)) return { label: "Whitepaper", author: "Appen Whitepapers" };
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function dedupeTexts(texts) {
|
|
44
|
+
const out = [];
|
|
45
|
+
const seen = new Set();
|
|
46
|
+
for (const text of texts) {
|
|
47
|
+
if (!text) continue;
|
|
48
|
+
const key = text.toLowerCase();
|
|
49
|
+
if (seen.has(key)) continue;
|
|
50
|
+
seen.add(key);
|
|
51
|
+
out.push(text);
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function extractLeafTexts(anchor) {
|
|
57
|
+
const nodes = anchor.querySelectorAll("h1,h2,h3,h4,h5,h6,p,span,div");
|
|
58
|
+
const leafs = nodes
|
|
59
|
+
.filter((node) => node.querySelector("h1,h2,h3,h4,h5,h6,p,span,div") == null)
|
|
60
|
+
.map((node) => normalizeText(node.textContent))
|
|
61
|
+
.filter(Boolean)
|
|
62
|
+
.filter((text) => !IGNORED_TEXTS.has(text.toLowerCase()));
|
|
63
|
+
return dedupeTexts(leafs);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function pickTitle(anchor, leafTexts) {
|
|
67
|
+
const heading = normalizeText(anchor.querySelector("h1,h2,h3,h4,h5,h6")?.textContent);
|
|
68
|
+
if (heading && !IGNORED_TEXTS.has(heading.toLowerCase())) return heading;
|
|
69
|
+
const longEnough = leafTexts.filter((text) => text.length >= 12);
|
|
70
|
+
if (longEnough.length > 0) {
|
|
71
|
+
return longEnough.slice().sort((a, b) => b.length - a.length)[0];
|
|
72
|
+
}
|
|
73
|
+
return leafTexts[0] ?? "";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function pickSummary(leafTexts, title) {
|
|
77
|
+
const summaryCandidates = leafTexts
|
|
78
|
+
.filter((text) => text !== title)
|
|
79
|
+
.filter((text) => text.length >= 24);
|
|
80
|
+
if (summaryCandidates.length === 0) return undefined;
|
|
81
|
+
return summaryCandidates.slice().sort((a, b) => b.length - a.length)[0];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function parsePagePublishedDate(html) {
|
|
85
|
+
const match = html.match(/Last Published:\s*([^-<]+GMT\+0000)/i);
|
|
86
|
+
if (!match) return undefined;
|
|
87
|
+
const date = new Date(match[1].trim());
|
|
88
|
+
return Number.isNaN(date.getTime()) ? undefined : date;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function parseDateFromSlug(pathname) {
|
|
92
|
+
const match = pathname.match(/(?:^|[-/])(20\d{2})(?:[-/]|$)/);
|
|
93
|
+
if (!match) return undefined;
|
|
94
|
+
const year = Number(match[1]);
|
|
95
|
+
if (!Number.isFinite(year) || year < 2000 || year > 2100) return undefined;
|
|
96
|
+
return new Date(Date.UTC(year, 0, 1));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function upsertItem(itemsByLink, candidate) {
|
|
100
|
+
const previous = itemsByLink.get(candidate.link);
|
|
101
|
+
if (!previous) {
|
|
102
|
+
itemsByLink.set(candidate.link, candidate);
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const score = (item) => (item.summary ? 2 : 0) + (item.title.length >= 20 ? 1 : 0);
|
|
106
|
+
if (score(candidate) > score(previous)) {
|
|
107
|
+
itemsByLink.set(candidate.link, candidate);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function fetchItems(sourceId, ctx) {
|
|
112
|
+
_deps = ctx.deps;
|
|
113
|
+
const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 4500 });
|
|
114
|
+
const root = _deps.parseHtml(html);
|
|
115
|
+
const baseUrl = finalUrl || sourceId || APPEN_ORIGIN;
|
|
116
|
+
const pagePublishedDate = parsePagePublishedDate(html);
|
|
117
|
+
const fallbackDate = pagePublishedDate ?? new Date();
|
|
118
|
+
const itemsByLink = new Map();
|
|
119
|
+
|
|
120
|
+
const anchors = root.querySelectorAll("a[href]");
|
|
121
|
+
for (const anchor of anchors) {
|
|
122
|
+
const url = resolveHttpUrl(anchor.getAttribute("href"), baseUrl);
|
|
123
|
+
if (!url) continue;
|
|
124
|
+
|
|
125
|
+
const resourceType = getResourceType(url.pathname);
|
|
126
|
+
if (!resourceType) continue;
|
|
127
|
+
|
|
128
|
+
const leafTexts = extractLeafTexts(anchor);
|
|
129
|
+
const title = pickTitle(anchor, leafTexts);
|
|
130
|
+
if (!title) continue;
|
|
131
|
+
|
|
132
|
+
const summary = pickSummary(leafTexts, title);
|
|
133
|
+
const inferredDate = parseDateFromSlug(url.pathname);
|
|
134
|
+
upsertItem(itemsByLink, {
|
|
135
|
+
guid: hashGuid(url.href),
|
|
136
|
+
title,
|
|
137
|
+
link: url.href,
|
|
138
|
+
pubDate: inferredDate ?? fallbackDate,
|
|
139
|
+
author: resourceType.author,
|
|
140
|
+
summary,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const items = Array.from(itemsByLink.values());
|
|
145
|
+
if (items.length === 0) {
|
|
146
|
+
throw new Error("[appen-resources] 未解析到资源条目,页面结构可能已变化");
|
|
147
|
+
}
|
|
148
|
+
return items;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export default {
|
|
152
|
+
id: "appen-resources",
|
|
153
|
+
listUrlPattern: /^https?:\/\/(www\.)?appen\.com\/resources\/?(\?.*)?$/i,
|
|
154
|
+
fetchItems,
|
|
155
|
+
};
|