@oh-my-pi/pi-coding-agent 3.24.0 → 3.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/package.json +4 -4
- package/src/core/custom-commands/bundled/wt/index.ts +3 -0
- package/src/core/sdk.ts +7 -0
- package/src/core/tools/complete.ts +129 -0
- package/src/core/tools/index.test.ts +9 -1
- package/src/core/tools/index.ts +18 -5
- package/src/core/tools/jtd-to-json-schema.ts +252 -0
- package/src/core/tools/output.ts +125 -14
- package/src/core/tools/read.ts +4 -4
- package/src/core/tools/task/artifacts.ts +6 -9
- package/src/core/tools/task/executor.ts +189 -24
- package/src/core/tools/task/index.ts +23 -18
- package/src/core/tools/task/name-generator.ts +1577 -0
- package/src/core/tools/task/render.ts +137 -8
- package/src/core/tools/task/types.ts +26 -5
- package/src/core/tools/task/worker-protocol.ts +1 -0
- package/src/core/tools/task/worker.ts +136 -14
- package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
- package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
- package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
- package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
- package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
- package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
- package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
- package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
- package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
- package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
- package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
- package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
- package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
- package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
- package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
- package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
- package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
- package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
- package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
- package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
- package/src/core/tools/web-fetch-handlers/github.ts +424 -0
- package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
- package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
- package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
- package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
- package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
- package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
- package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
- package/src/core/tools/web-fetch-handlers/index.ts +69 -0
- package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
- package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
- package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
- package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
- package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
- package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
- package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
- package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
- package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
- package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
- package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
- package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
- package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
- package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
- package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
- package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
- package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
- package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
- package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
- package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
- package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
- package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
- package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
- package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
- package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
- package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
- package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
- package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
- package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
- package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
- package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
- package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
- package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
- package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
- package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
- package/src/core/tools/web-fetch-handlers/types.ts +163 -0
- package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
- package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
- package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
- package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
- package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
- package/src/core/tools/web-fetch.ts +152 -1324
- package/src/prompts/task.md +14 -50
- package/src/prompts/tools/output.md +2 -1
- package/src/prompts/tools/task.md +3 -1
- package/src/utils/tools-manager.ts +110 -8
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
+
import { finalizeOutput, formatCount, loadPage } from "./types";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Common Wikidata property IDs mapped to human-readable names
|
|
6
|
+
*/
|
|
7
|
+
const PROPERTY_LABELS: Record<string, string> = {
|
|
8
|
+
P31: "Instance of",
|
|
9
|
+
P279: "Subclass of",
|
|
10
|
+
P17: "Country",
|
|
11
|
+
P131: "Located in",
|
|
12
|
+
P625: "Coordinates",
|
|
13
|
+
P18: "Image",
|
|
14
|
+
P154: "Logo",
|
|
15
|
+
P571: "Founded",
|
|
16
|
+
P576: "Dissolved",
|
|
17
|
+
P169: "CEO",
|
|
18
|
+
P112: "Founded by",
|
|
19
|
+
P159: "Headquarters",
|
|
20
|
+
P452: "Industry",
|
|
21
|
+
P1128: "Employees",
|
|
22
|
+
P2139: "Revenue",
|
|
23
|
+
P856: "Website",
|
|
24
|
+
P21: "Sex/Gender",
|
|
25
|
+
P27: "Citizenship",
|
|
26
|
+
P569: "Born",
|
|
27
|
+
P570: "Died",
|
|
28
|
+
P19: "Birthplace",
|
|
29
|
+
P20: "Death place",
|
|
30
|
+
P106: "Occupation",
|
|
31
|
+
P108: "Employer",
|
|
32
|
+
P69: "Educated at",
|
|
33
|
+
P22: "Father",
|
|
34
|
+
P25: "Mother",
|
|
35
|
+
P26: "Spouse",
|
|
36
|
+
P40: "Child",
|
|
37
|
+
P166: "Award",
|
|
38
|
+
P136: "Genre",
|
|
39
|
+
P495: "Country of origin",
|
|
40
|
+
P577: "Publication date",
|
|
41
|
+
P50: "Author",
|
|
42
|
+
P123: "Publisher",
|
|
43
|
+
P364: "Original language",
|
|
44
|
+
P86: "Composer",
|
|
45
|
+
P57: "Director",
|
|
46
|
+
P161: "Cast member",
|
|
47
|
+
P170: "Creator",
|
|
48
|
+
P178: "Developer",
|
|
49
|
+
P275: "License",
|
|
50
|
+
P306: "Operating system",
|
|
51
|
+
P277: "Programming language",
|
|
52
|
+
P348: "Version",
|
|
53
|
+
P1566: "GeoNames ID",
|
|
54
|
+
P214: "VIAF ID",
|
|
55
|
+
P227: "GND ID",
|
|
56
|
+
P213: "ISNI",
|
|
57
|
+
P496: "ORCID",
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
interface WikidataEntity {
|
|
61
|
+
type: string;
|
|
62
|
+
id: string;
|
|
63
|
+
labels?: Record<string, { language: string; value: string }>;
|
|
64
|
+
descriptions?: Record<string, { language: string; value: string }>;
|
|
65
|
+
aliases?: Record<string, Array<{ language: string; value: string }>>;
|
|
66
|
+
claims?: Record<string, WikidataClaim[]>;
|
|
67
|
+
sitelinks?: Record<string, { site: string; title: string }>;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
interface WikidataClaim {
|
|
71
|
+
mainsnak: {
|
|
72
|
+
snaktype: string;
|
|
73
|
+
property: string;
|
|
74
|
+
datavalue?: {
|
|
75
|
+
type: string;
|
|
76
|
+
value: WikidataValue;
|
|
77
|
+
};
|
|
78
|
+
};
|
|
79
|
+
rank: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
type WikidataValue =
|
|
83
|
+
| string
|
|
84
|
+
| { "entity-type": string; id: string; "numeric-id": number }
|
|
85
|
+
| { time: string; precision: number; calendarmodel: string }
|
|
86
|
+
| { amount: string; unit: string }
|
|
87
|
+
| { text: string; language: string }
|
|
88
|
+
| { latitude: number; longitude: number; precision: number };
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Handle Wikidata URLs via EntityData API
|
|
92
|
+
*/
|
|
93
|
+
export const handleWikidata: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
94
|
+
try {
|
|
95
|
+
const parsed = new URL(url);
|
|
96
|
+
if (!parsed.hostname.includes("wikidata.org")) return null;
|
|
97
|
+
|
|
98
|
+
// Extract Q-id from /wiki/Q123 or /entity/Q123
|
|
99
|
+
const qidMatch = parsed.pathname.match(/\/(?:wiki|entity)\/(Q\d+)/i);
|
|
100
|
+
if (!qidMatch) return null;
|
|
101
|
+
|
|
102
|
+
const qid = qidMatch[1].toUpperCase();
|
|
103
|
+
const fetchedAt = new Date().toISOString();
|
|
104
|
+
|
|
105
|
+
// Fetch entity data from API
|
|
106
|
+
const apiUrl = `https://www.wikidata.org/wiki/Special:EntityData/${qid}.json`;
|
|
107
|
+
const result = await loadPage(apiUrl, { timeout });
|
|
108
|
+
|
|
109
|
+
if (!result.ok) return null;
|
|
110
|
+
|
|
111
|
+
let data: { entities: Record<string, WikidataEntity> };
|
|
112
|
+
try {
|
|
113
|
+
data = JSON.parse(result.content);
|
|
114
|
+
} catch {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const entity = data.entities[qid];
|
|
119
|
+
if (!entity) return null;
|
|
120
|
+
|
|
121
|
+
// Get label and description (prefer English)
|
|
122
|
+
const label = getLocalizedValue(entity.labels, "en") || qid;
|
|
123
|
+
const description = getLocalizedValue(entity.descriptions, "en");
|
|
124
|
+
const aliases = getLocalizedAliases(entity.aliases, "en");
|
|
125
|
+
|
|
126
|
+
let md = `# ${label} (${qid})\n\n`;
|
|
127
|
+
if (description) md += `*${description}*\n\n`;
|
|
128
|
+
if (aliases.length > 0) md += `**Also known as:** ${aliases.join(", ")}\n\n`;
|
|
129
|
+
|
|
130
|
+
// Count sitelinks
|
|
131
|
+
const sitelinkCount = entity.sitelinks ? Object.keys(entity.sitelinks).length : 0;
|
|
132
|
+
if (sitelinkCount > 0) {
|
|
133
|
+
md += `**Wikipedia articles:** ${formatCount(sitelinkCount)} languages\n\n`;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Process claims
|
|
137
|
+
if (entity.claims && Object.keys(entity.claims).length > 0) {
|
|
138
|
+
md += "## Properties\n\n";
|
|
139
|
+
|
|
140
|
+
// Collect entity IDs we need to resolve
|
|
141
|
+
const entityIdsToResolve = new Set<string>();
|
|
142
|
+
for (const claims of Object.values(entity.claims)) {
|
|
143
|
+
for (const claim of claims) {
|
|
144
|
+
if (claim.mainsnak.datavalue?.type === "wikibase-entityid") {
|
|
145
|
+
const val = claim.mainsnak.datavalue.value as { id: string };
|
|
146
|
+
entityIdsToResolve.add(val.id);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Fetch labels for referenced entities (limit to 50)
|
|
152
|
+
const entityLabels = await resolveEntityLabels(Array.from(entityIdsToResolve).slice(0, 50), timeout);
|
|
153
|
+
|
|
154
|
+
// Group claims by property
|
|
155
|
+
const processedProperties: string[] = [];
|
|
156
|
+
for (const [propId, claims] of Object.entries(entity.claims)) {
|
|
157
|
+
const propLabel = PROPERTY_LABELS[propId] || propId;
|
|
158
|
+
const values: string[] = [];
|
|
159
|
+
|
|
160
|
+
for (const claim of claims) {
|
|
161
|
+
if (claim.rank === "deprecated") continue;
|
|
162
|
+
const value = formatClaimValue(claim, entityLabels);
|
|
163
|
+
if (value && !values.includes(value)) {
|
|
164
|
+
values.push(value);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (values.length > 0) {
|
|
169
|
+
// Limit values shown per property
|
|
170
|
+
const displayValues = values.slice(0, 10);
|
|
171
|
+
const overflow = values.length > 10 ? ` (+${values.length - 10} more)` : "";
|
|
172
|
+
processedProperties.push(`- **${propLabel}:** ${displayValues.join(", ")}${overflow}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Sort: known properties first, then by property ID
|
|
177
|
+
processedProperties.sort((a, b) => {
|
|
178
|
+
const aKnown = Object.values(PROPERTY_LABELS).some((l) => a.includes(`**${l}:**`));
|
|
179
|
+
const bKnown = Object.values(PROPERTY_LABELS).some((l) => b.includes(`**${l}:**`));
|
|
180
|
+
if (aKnown && !bKnown) return -1;
|
|
181
|
+
if (!aKnown && bKnown) return 1;
|
|
182
|
+
return a.localeCompare(b);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// Limit total properties shown
|
|
186
|
+
const maxProps = 50;
|
|
187
|
+
md += processedProperties.slice(0, maxProps).join("\n");
|
|
188
|
+
if (processedProperties.length > maxProps) {
|
|
189
|
+
md += `\n\n*...and ${processedProperties.length - maxProps} more properties*`;
|
|
190
|
+
}
|
|
191
|
+
md += "\n";
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Add notable sitelinks
|
|
195
|
+
if (entity.sitelinks) {
|
|
196
|
+
const notableSites = ["enwiki", "dewiki", "frwiki", "eswiki", "jawiki", "zhwiki"];
|
|
197
|
+
const links: string[] = [];
|
|
198
|
+
|
|
199
|
+
for (const site of notableSites) {
|
|
200
|
+
const sitelink = entity.sitelinks[site];
|
|
201
|
+
if (sitelink) {
|
|
202
|
+
const lang = site.replace("wiki", "");
|
|
203
|
+
const wikiUrl = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(sitelink.title)}`;
|
|
204
|
+
links.push(`[${lang.toUpperCase()}](${wikiUrl})`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (links.length > 0) {
|
|
209
|
+
md += `\n## Wikipedia Links\n\n${links.join(" · ")}\n`;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const output = finalizeOutput(md);
|
|
214
|
+
return {
|
|
215
|
+
url,
|
|
216
|
+
finalUrl: url,
|
|
217
|
+
contentType: "text/markdown",
|
|
218
|
+
method: "wikidata",
|
|
219
|
+
content: output.content,
|
|
220
|
+
fetchedAt,
|
|
221
|
+
truncated: output.truncated,
|
|
222
|
+
notes: ["Fetched via Wikidata EntityData API"],
|
|
223
|
+
};
|
|
224
|
+
} catch {}
|
|
225
|
+
|
|
226
|
+
return null;
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Get localized value with fallback
|
|
231
|
+
*/
|
|
232
|
+
function getLocalizedValue(
|
|
233
|
+
values: Record<string, { language: string; value: string }> | undefined,
|
|
234
|
+
preferredLang: string,
|
|
235
|
+
): string | null {
|
|
236
|
+
if (!values) return null;
|
|
237
|
+
if (values[preferredLang]) return values[preferredLang].value;
|
|
238
|
+
// Fallback to any available
|
|
239
|
+
const first = Object.values(values)[0];
|
|
240
|
+
return first?.value || null;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Get aliases for a language
|
|
245
|
+
*/
|
|
246
|
+
function getLocalizedAliases(
|
|
247
|
+
aliases: Record<string, Array<{ language: string; value: string }>> | undefined,
|
|
248
|
+
preferredLang: string,
|
|
249
|
+
): string[] {
|
|
250
|
+
if (!aliases) return [];
|
|
251
|
+
const langAliases = aliases[preferredLang];
|
|
252
|
+
if (!langAliases) return [];
|
|
253
|
+
return langAliases.map((a) => a.value);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Resolve entity IDs to their labels via wbgetentities API
|
|
258
|
+
*/
|
|
259
|
+
async function resolveEntityLabels(entityIds: string[], timeout: number): Promise<Record<string, string>> {
|
|
260
|
+
if (entityIds.length === 0) return {};
|
|
261
|
+
|
|
262
|
+
const labels: Record<string, string> = {};
|
|
263
|
+
|
|
264
|
+
// Fetch in batches of 50
|
|
265
|
+
const batchSize = 50;
|
|
266
|
+
for (let i = 0; i < entityIds.length; i += batchSize) {
|
|
267
|
+
const batch = entityIds.slice(i, i + batchSize);
|
|
268
|
+
const apiUrl = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${batch.join("|")}&props=labels&languages=en&format=json`;
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
const result = await loadPage(apiUrl, { timeout: Math.min(timeout, 10) });
|
|
272
|
+
if (result.ok) {
|
|
273
|
+
const data = JSON.parse(result.content) as {
|
|
274
|
+
entities: Record<string, { labels?: Record<string, { value: string }> }>;
|
|
275
|
+
};
|
|
276
|
+
for (const [id, entity] of Object.entries(data.entities)) {
|
|
277
|
+
const label = entity.labels?.en?.value;
|
|
278
|
+
if (label) labels[id] = label;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
} catch {}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return labels;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Format a claim value to human-readable string
|
|
289
|
+
*/
|
|
290
|
+
function formatClaimValue(claim: WikidataClaim, entityLabels: Record<string, string>): string | null {
|
|
291
|
+
const snak = claim.mainsnak;
|
|
292
|
+
if (snak.snaktype !== "value" || !snak.datavalue) return null;
|
|
293
|
+
|
|
294
|
+
const { type, value } = snak.datavalue;
|
|
295
|
+
|
|
296
|
+
switch (type) {
|
|
297
|
+
case "wikibase-entityid": {
|
|
298
|
+
const entityVal = value as { id: string };
|
|
299
|
+
return entityLabels[entityVal.id] || entityVal.id;
|
|
300
|
+
}
|
|
301
|
+
case "string":
|
|
302
|
+
return value as string;
|
|
303
|
+
case "time": {
|
|
304
|
+
const timeVal = value as { time: string; precision: number };
|
|
305
|
+
return formatWikidataTime(timeVal.time, timeVal.precision);
|
|
306
|
+
}
|
|
307
|
+
case "quantity": {
|
|
308
|
+
const qtyVal = value as { amount: string; unit: string };
|
|
309
|
+
const amount = qtyVal.amount.replace(/^\+/, "");
|
|
310
|
+
// Extract unit Q-id if present
|
|
311
|
+
const unitMatch = qtyVal.unit.match(/Q\d+$/);
|
|
312
|
+
const unit = unitMatch ? entityLabels[unitMatch[0]] || "" : "";
|
|
313
|
+
return unit ? `${amount} ${unit}` : amount;
|
|
314
|
+
}
|
|
315
|
+
case "monolingualtext": {
|
|
316
|
+
const textVal = value as { text: string; language: string };
|
|
317
|
+
return textVal.text;
|
|
318
|
+
}
|
|
319
|
+
case "globecoordinate": {
|
|
320
|
+
const coordVal = value as { latitude: number; longitude: number };
|
|
321
|
+
return `${coordVal.latitude.toFixed(4)}, ${coordVal.longitude.toFixed(4)}`;
|
|
322
|
+
}
|
|
323
|
+
default:
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Format Wikidata time value to readable date
|
|
330
|
+
*/
|
|
331
|
+
function formatWikidataTime(time: string, precision: number): string {
|
|
332
|
+
// Time format: +YYYY-MM-DDT00:00:00Z
|
|
333
|
+
const match = time.match(/^([+-]?\d+)-(\d{2})-(\d{2})/);
|
|
334
|
+
if (!match) return time;
|
|
335
|
+
|
|
336
|
+
const [, year, month, day] = match;
|
|
337
|
+
const yearNum = Number.parseInt(year, 10);
|
|
338
|
+
const absYear = Math.abs(yearNum);
|
|
339
|
+
const era = yearNum < 0 ? " BCE" : "";
|
|
340
|
+
|
|
341
|
+
// Precision: 9=year, 10=month, 11=day
|
|
342
|
+
if (precision >= 11) {
|
|
343
|
+
return `${day}/${month}/${absYear}${era}`;
|
|
344
|
+
}
|
|
345
|
+
if (precision >= 10) {
|
|
346
|
+
return `${month}/${absYear}${era}`;
|
|
347
|
+
}
|
|
348
|
+
return `${absYear}${era}`;
|
|
349
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { handleWikipedia } from "./wikipedia";
|
|
3
|
+
|
|
4
|
+
const SKIP = !process.env.WEB_FETCH_INTEGRATION;
|
|
5
|
+
|
|
6
|
+
describe.skipIf(SKIP)("handleWikipedia", () => {
|
|
7
|
+
it("returns null for non-Wikipedia URLs", async () => {
|
|
8
|
+
const result = await handleWikipedia("https://example.com", 10);
|
|
9
|
+
expect(result).toBeNull();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("returns null for Wikipedia URLs without /wiki/ path", async () => {
|
|
13
|
+
const result = await handleWikipedia("https://en.wikipedia.org/", 10);
|
|
14
|
+
expect(result).toBeNull();
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("fetches a known article with full metadata", async () => {
|
|
18
|
+
// "Computer" is a stable, well-established article
|
|
19
|
+
const result = await handleWikipedia("https://en.wikipedia.org/wiki/Computer", 20);
|
|
20
|
+
expect(result).not.toBeNull();
|
|
21
|
+
expect(result?.method).toBe("wikipedia");
|
|
22
|
+
expect(result?.contentType).toBe("text/markdown");
|
|
23
|
+
expect(result?.content).toContain("Computer");
|
|
24
|
+
expect(result?.url).toBe("https://en.wikipedia.org/wiki/Computer");
|
|
25
|
+
expect(result?.finalUrl).toBe("https://en.wikipedia.org/wiki/Computer");
|
|
26
|
+
expect(result?.truncated).toBe(false);
|
|
27
|
+
expect(result?.notes).toContain("Fetched via Wikipedia API");
|
|
28
|
+
expect(result?.fetchedAt).toBeDefined();
|
|
29
|
+
// Should be a valid ISO timestamp
|
|
30
|
+
expect(() => new Date(result?.fetchedAt ?? "")).not.toThrow();
|
|
31
|
+
// The handler should filter out References and External links sections
|
|
32
|
+
const content = result?.content ?? "";
|
|
33
|
+
const hasReferencesHeading = /^## References$/m.test(content);
|
|
34
|
+
const hasExternalLinksHeading = /^## External links$/m.test(content);
|
|
35
|
+
// At least one of these should be filtered out
|
|
36
|
+
expect(hasReferencesHeading || hasExternalLinksHeading).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("handles different language wikis", async () => {
|
|
40
|
+
// German Wikipedia article for "Computer"
|
|
41
|
+
const result = await handleWikipedia("https://de.wikipedia.org/wiki/Computer", 20);
|
|
42
|
+
expect(result).not.toBeNull();
|
|
43
|
+
expect(result?.method).toBe("wikipedia");
|
|
44
|
+
expect(result?.contentType).toBe("text/markdown");
|
|
45
|
+
expect(result?.content).toContain("Computer");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("handles article with special characters in title", async () => {
|
|
49
|
+
// Article with special characters: "C++"
|
|
50
|
+
const result = await handleWikipedia("https://en.wikipedia.org/wiki/C%2B%2B", 20);
|
|
51
|
+
expect(result).not.toBeNull();
|
|
52
|
+
expect(result?.method).toBe("wikipedia");
|
|
53
|
+
expect(result?.contentType).toBe("text/markdown");
|
|
54
|
+
expect(result?.content).toMatch(/C\+\+/);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("handles article with spaces and parentheses in title", async () => {
|
|
58
|
+
// Artificial intelligence uses underscores for spaces
|
|
59
|
+
const result = await handleWikipedia("https://en.wikipedia.org/wiki/Artificial_intelligence", 20);
|
|
60
|
+
expect(result).not.toBeNull();
|
|
61
|
+
expect(result?.method).toBe("wikipedia");
|
|
62
|
+
expect(result?.contentType).toBe("text/markdown");
|
|
63
|
+
expect(result?.content).toMatch(/[Aa]rtificial intelligence/);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("handles non-existent articles gracefully", async () => {
|
|
67
|
+
const result = await handleWikipedia(
|
|
68
|
+
"https://en.wikipedia.org/wiki/ThisArticleDefinitelyDoesNotExist123456789",
|
|
69
|
+
20,
|
|
70
|
+
);
|
|
71
|
+
expect(result).toBeNull();
|
|
72
|
+
});
|
|
73
|
+
});
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { parse as parseHtml } from "node-html-parser";
|
|
2
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
3
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Handle Wikipedia URLs via Wikipedia API
|
|
7
|
+
*/
|
|
8
|
+
export const handleWikipedia: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
9
|
+
try {
|
|
10
|
+
const parsed = new URL(url);
|
|
11
|
+
// Match *.wikipedia.org
|
|
12
|
+
const wikiMatch = parsed.hostname.match(/^(\w+)\.wikipedia\.org$/);
|
|
13
|
+
if (!wikiMatch) return null;
|
|
14
|
+
|
|
15
|
+
const lang = wikiMatch[1];
|
|
16
|
+
const titleMatch = parsed.pathname.match(/\/wiki\/(.+)/);
|
|
17
|
+
if (!titleMatch) return null;
|
|
18
|
+
|
|
19
|
+
const title = decodeURIComponent(titleMatch[1]);
|
|
20
|
+
const fetchedAt = new Date().toISOString();
|
|
21
|
+
|
|
22
|
+
// Use Wikipedia API to get plain text extract
|
|
23
|
+
const apiUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
|
|
24
|
+
const summaryResult = await loadPage(apiUrl, { timeout });
|
|
25
|
+
|
|
26
|
+
let md = "";
|
|
27
|
+
|
|
28
|
+
if (summaryResult.ok) {
|
|
29
|
+
const summary = JSON.parse(summaryResult.content) as {
|
|
30
|
+
title: string;
|
|
31
|
+
description?: string;
|
|
32
|
+
extract: string;
|
|
33
|
+
};
|
|
34
|
+
md = `# ${summary.title}\n\n`;
|
|
35
|
+
if (summary.description) md += `*${summary.description}*\n\n`;
|
|
36
|
+
md += `${summary.extract}\n\n---\n\n`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Get full article content via mobile-html or parse API
|
|
40
|
+
const contentUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/mobile-html/${encodeURIComponent(title)}`;
|
|
41
|
+
const contentResult = await loadPage(contentUrl, { timeout });
|
|
42
|
+
|
|
43
|
+
if (contentResult.ok) {
|
|
44
|
+
const doc = parseHtml(contentResult.content);
|
|
45
|
+
|
|
46
|
+
// Extract main content sections
|
|
47
|
+
const sections = doc.querySelectorAll("section");
|
|
48
|
+
for (const section of sections) {
|
|
49
|
+
const heading = section.querySelector("h2, h3, h4");
|
|
50
|
+
const headingText = heading?.text?.trim();
|
|
51
|
+
|
|
52
|
+
// Skip certain sections
|
|
53
|
+
if (
|
|
54
|
+
headingText &&
|
|
55
|
+
["References", "External links", "See also", "Notes", "Further reading"].includes(headingText)
|
|
56
|
+
) {
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (headingText) {
|
|
61
|
+
const level = heading?.tagName === "H2" ? "##" : "###";
|
|
62
|
+
md += `${level} ${headingText}\n\n`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const paragraphs = section.querySelectorAll("p");
|
|
66
|
+
for (const p of paragraphs) {
|
|
67
|
+
const text = p.text?.trim();
|
|
68
|
+
if (text && text.length > 20) {
|
|
69
|
+
md += `${text}\n\n`;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (!md) return null;
|
|
76
|
+
|
|
77
|
+
const output = finalizeOutput(md);
|
|
78
|
+
return {
|
|
79
|
+
url,
|
|
80
|
+
finalUrl: url,
|
|
81
|
+
contentType: "text/markdown",
|
|
82
|
+
method: "wikipedia",
|
|
83
|
+
content: output.content,
|
|
84
|
+
fetchedAt,
|
|
85
|
+
truncated: output.truncated,
|
|
86
|
+
notes: ["Fetched via Wikipedia API"],
|
|
87
|
+
};
|
|
88
|
+
} catch {}
|
|
89
|
+
|
|
90
|
+
return null;
|
|
91
|
+
};
|