@oh-my-pi/pi-coding-agent 3.25.0 → 3.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/package.json +4 -4
  3. package/src/core/tools/complete.ts +2 -4
  4. package/src/core/tools/jtd-to-json-schema.ts +174 -196
  5. package/src/core/tools/read.ts +4 -4
  6. package/src/core/tools/task/executor.ts +146 -20
  7. package/src/core/tools/task/name-generator.ts +1544 -214
  8. package/src/core/tools/task/types.ts +19 -5
  9. package/src/core/tools/task/worker.ts +103 -13
  10. package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
  11. package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
  12. package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
  13. package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
  14. package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
  15. package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
  16. package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
  17. package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
  18. package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
  19. package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
  20. package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
  21. package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
  22. package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
  23. package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
  24. package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
  25. package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
  26. package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
  27. package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
  28. package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
  29. package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
  30. package/src/core/tools/web-fetch-handlers/github.ts +424 -0
  31. package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
  32. package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
  33. package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
  34. package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
  35. package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
  36. package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
  37. package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
  38. package/src/core/tools/web-fetch-handlers/index.ts +69 -0
  39. package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
  40. package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
  41. package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
  42. package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
  43. package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
  44. package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
  45. package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
  46. package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
  47. package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
  48. package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
  49. package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
  50. package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
  51. package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
  52. package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
  53. package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
  54. package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
  55. package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
  56. package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
  57. package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
  58. package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
  59. package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
  60. package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
  61. package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
  62. package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
  63. package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
  64. package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
  65. package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
  66. package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
  67. package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
  68. package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
  69. package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
  70. package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
  71. package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
  72. package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
  73. package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
  74. package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
  75. package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
  76. package/src/core/tools/web-fetch-handlers/types.ts +163 -0
  77. package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
  78. package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
  79. package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
  80. package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
  81. package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
  82. package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
  83. package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
  84. package/src/core/tools/web-fetch.ts +152 -1324
  85. package/src/utils/tools-manager.ts +110 -8
@@ -0,0 +1,349 @@
1
+ import type { RenderResult, SpecialHandler } from "./types";
2
+ import { finalizeOutput, formatCount, loadPage } from "./types";
3
+
4
+ /**
5
+ * Common Wikidata property IDs mapped to human-readable names
6
+ */
7
+ const PROPERTY_LABELS: Record<string, string> = {
8
+ P31: "Instance of",
9
+ P279: "Subclass of",
10
+ P17: "Country",
11
+ P131: "Located in",
12
+ P625: "Coordinates",
13
+ P18: "Image",
14
+ P154: "Logo",
15
+ P571: "Founded",
16
+ P576: "Dissolved",
17
+ P169: "CEO",
18
+ P112: "Founded by",
19
+ P159: "Headquarters",
20
+ P452: "Industry",
21
+ P1128: "Employees",
22
+ P2139: "Revenue",
23
+ P856: "Website",
24
+ P21: "Sex/Gender",
25
+ P27: "Citizenship",
26
+ P569: "Born",
27
+ P570: "Died",
28
+ P19: "Birthplace",
29
+ P20: "Death place",
30
+ P106: "Occupation",
31
+ P108: "Employer",
32
+ P69: "Educated at",
33
+ P22: "Father",
34
+ P25: "Mother",
35
+ P26: "Spouse",
36
+ P40: "Child",
37
+ P166: "Award",
38
+ P136: "Genre",
39
+ P495: "Country of origin",
40
+ P577: "Publication date",
41
+ P50: "Author",
42
+ P123: "Publisher",
43
+ P364: "Original language",
44
+ P86: "Composer",
45
+ P57: "Director",
46
+ P161: "Cast member",
47
+ P170: "Creator",
48
+ P178: "Developer",
49
+ P275: "License",
50
+ P306: "Operating system",
51
+ P277: "Programming language",
52
+ P348: "Version",
53
+ P1566: "GeoNames ID",
54
+ P214: "VIAF ID",
55
+ P227: "GND ID",
56
+ P213: "ISNI",
57
+ P496: "ORCID",
58
+ };
59
+
60
+ interface WikidataEntity {
61
+ type: string;
62
+ id: string;
63
+ labels?: Record<string, { language: string; value: string }>;
64
+ descriptions?: Record<string, { language: string; value: string }>;
65
+ aliases?: Record<string, Array<{ language: string; value: string }>>;
66
+ claims?: Record<string, WikidataClaim[]>;
67
+ sitelinks?: Record<string, { site: string; title: string }>;
68
+ }
69
+
70
+ interface WikidataClaim {
71
+ mainsnak: {
72
+ snaktype: string;
73
+ property: string;
74
+ datavalue?: {
75
+ type: string;
76
+ value: WikidataValue;
77
+ };
78
+ };
79
+ rank: string;
80
+ }
81
+
82
+ type WikidataValue =
83
+ | string
84
+ | { "entity-type": string; id: string; "numeric-id": number }
85
+ | { time: string; precision: number; calendarmodel: string }
86
+ | { amount: string; unit: string }
87
+ | { text: string; language: string }
88
+ | { latitude: number; longitude: number; precision: number };
89
+
90
+ /**
91
+ * Handle Wikidata URLs via EntityData API
92
+ */
93
+ export const handleWikidata: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
94
+ try {
95
+ const parsed = new URL(url);
96
+ if (!parsed.hostname.includes("wikidata.org")) return null;
97
+
98
+ // Extract Q-id from /wiki/Q123 or /entity/Q123
99
+ const qidMatch = parsed.pathname.match(/\/(?:wiki|entity)\/(Q\d+)/i);
100
+ if (!qidMatch) return null;
101
+
102
+ const qid = qidMatch[1].toUpperCase();
103
+ const fetchedAt = new Date().toISOString();
104
+
105
+ // Fetch entity data from API
106
+ const apiUrl = `https://www.wikidata.org/wiki/Special:EntityData/${qid}.json`;
107
+ const result = await loadPage(apiUrl, { timeout });
108
+
109
+ if (!result.ok) return null;
110
+
111
+ let data: { entities: Record<string, WikidataEntity> };
112
+ try {
113
+ data = JSON.parse(result.content);
114
+ } catch {
115
+ return null;
116
+ }
117
+
118
+ const entity = data.entities[qid];
119
+ if (!entity) return null;
120
+
121
+ // Get label and description (prefer English)
122
+ const label = getLocalizedValue(entity.labels, "en") || qid;
123
+ const description = getLocalizedValue(entity.descriptions, "en");
124
+ const aliases = getLocalizedAliases(entity.aliases, "en");
125
+
126
+ let md = `# ${label} (${qid})\n\n`;
127
+ if (description) md += `*${description}*\n\n`;
128
+ if (aliases.length > 0) md += `**Also known as:** ${aliases.join(", ")}\n\n`;
129
+
130
+ // Count sitelinks
131
+ const sitelinkCount = entity.sitelinks ? Object.keys(entity.sitelinks).length : 0;
132
+ if (sitelinkCount > 0) {
133
+ md += `**Wikipedia articles:** ${formatCount(sitelinkCount)} languages\n\n`;
134
+ }
135
+
136
+ // Process claims
137
+ if (entity.claims && Object.keys(entity.claims).length > 0) {
138
+ md += "## Properties\n\n";
139
+
140
+ // Collect entity IDs we need to resolve
141
+ const entityIdsToResolve = new Set<string>();
142
+ for (const claims of Object.values(entity.claims)) {
143
+ for (const claim of claims) {
144
+ if (claim.mainsnak.datavalue?.type === "wikibase-entityid") {
145
+ const val = claim.mainsnak.datavalue.value as { id: string };
146
+ entityIdsToResolve.add(val.id);
147
+ }
148
+ }
149
+ }
150
+
151
+ // Fetch labels for referenced entities (limit to 50)
152
+ const entityLabels = await resolveEntityLabels(Array.from(entityIdsToResolve).slice(0, 50), timeout);
153
+
154
+ // Group claims by property
155
+ const processedProperties: string[] = [];
156
+ for (const [propId, claims] of Object.entries(entity.claims)) {
157
+ const propLabel = PROPERTY_LABELS[propId] || propId;
158
+ const values: string[] = [];
159
+
160
+ for (const claim of claims) {
161
+ if (claim.rank === "deprecated") continue;
162
+ const value = formatClaimValue(claim, entityLabels);
163
+ if (value && !values.includes(value)) {
164
+ values.push(value);
165
+ }
166
+ }
167
+
168
+ if (values.length > 0) {
169
+ // Limit values shown per property
170
+ const displayValues = values.slice(0, 10);
171
+ const overflow = values.length > 10 ? ` (+${values.length - 10} more)` : "";
172
+ processedProperties.push(`- **${propLabel}:** ${displayValues.join(", ")}${overflow}`);
173
+ }
174
+ }
175
+
176
+ // Sort: known properties first, then by property ID
177
+ processedProperties.sort((a, b) => {
178
+ const aKnown = Object.values(PROPERTY_LABELS).some((l) => a.includes(`**${l}:**`));
179
+ const bKnown = Object.values(PROPERTY_LABELS).some((l) => b.includes(`**${l}:**`));
180
+ if (aKnown && !bKnown) return -1;
181
+ if (!aKnown && bKnown) return 1;
182
+ return a.localeCompare(b);
183
+ });
184
+
185
+ // Limit total properties shown
186
+ const maxProps = 50;
187
+ md += processedProperties.slice(0, maxProps).join("\n");
188
+ if (processedProperties.length > maxProps) {
189
+ md += `\n\n*...and ${processedProperties.length - maxProps} more properties*`;
190
+ }
191
+ md += "\n";
192
+ }
193
+
194
+ // Add notable sitelinks
195
+ if (entity.sitelinks) {
196
+ const notableSites = ["enwiki", "dewiki", "frwiki", "eswiki", "jawiki", "zhwiki"];
197
+ const links: string[] = [];
198
+
199
+ for (const site of notableSites) {
200
+ const sitelink = entity.sitelinks[site];
201
+ if (sitelink) {
202
+ const lang = site.replace("wiki", "");
203
+ const wikiUrl = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(sitelink.title)}`;
204
+ links.push(`[${lang.toUpperCase()}](${wikiUrl})`);
205
+ }
206
+ }
207
+
208
+ if (links.length > 0) {
209
+ md += `\n## Wikipedia Links\n\n${links.join(" · ")}\n`;
210
+ }
211
+ }
212
+
213
+ const output = finalizeOutput(md);
214
+ return {
215
+ url,
216
+ finalUrl: url,
217
+ contentType: "text/markdown",
218
+ method: "wikidata",
219
+ content: output.content,
220
+ fetchedAt,
221
+ truncated: output.truncated,
222
+ notes: ["Fetched via Wikidata EntityData API"],
223
+ };
224
+ } catch {}
225
+
226
+ return null;
227
+ };
228
+
229
+ /**
230
+ * Get localized value with fallback
231
+ */
232
+ function getLocalizedValue(
233
+ values: Record<string, { language: string; value: string }> | undefined,
234
+ preferredLang: string,
235
+ ): string | null {
236
+ if (!values) return null;
237
+ if (values[preferredLang]) return values[preferredLang].value;
238
+ // Fallback to any available
239
+ const first = Object.values(values)[0];
240
+ return first?.value || null;
241
+ }
242
+
243
+ /**
244
+ * Get aliases for a language
245
+ */
246
+ function getLocalizedAliases(
247
+ aliases: Record<string, Array<{ language: string; value: string }>> | undefined,
248
+ preferredLang: string,
249
+ ): string[] {
250
+ if (!aliases) return [];
251
+ const langAliases = aliases[preferredLang];
252
+ if (!langAliases) return [];
253
+ return langAliases.map((a) => a.value);
254
+ }
255
+
256
+ /**
257
+ * Resolve entity IDs to their labels via wbgetentities API
258
+ */
259
+ async function resolveEntityLabels(entityIds: string[], timeout: number): Promise<Record<string, string>> {
260
+ if (entityIds.length === 0) return {};
261
+
262
+ const labels: Record<string, string> = {};
263
+
264
+ // Fetch in batches of 50
265
+ const batchSize = 50;
266
+ for (let i = 0; i < entityIds.length; i += batchSize) {
267
+ const batch = entityIds.slice(i, i + batchSize);
268
+ const apiUrl = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${batch.join("|")}&props=labels&languages=en&format=json`;
269
+
270
+ try {
271
+ const result = await loadPage(apiUrl, { timeout: Math.min(timeout, 10) });
272
+ if (result.ok) {
273
+ const data = JSON.parse(result.content) as {
274
+ entities: Record<string, { labels?: Record<string, { value: string }> }>;
275
+ };
276
+ for (const [id, entity] of Object.entries(data.entities)) {
277
+ const label = entity.labels?.en?.value;
278
+ if (label) labels[id] = label;
279
+ }
280
+ }
281
+ } catch {}
282
+ }
283
+
284
+ return labels;
285
+ }
286
+
287
+ /**
288
+ * Format a claim value to human-readable string
289
+ */
290
+ function formatClaimValue(claim: WikidataClaim, entityLabels: Record<string, string>): string | null {
291
+ const snak = claim.mainsnak;
292
+ if (snak.snaktype !== "value" || !snak.datavalue) return null;
293
+
294
+ const { type, value } = snak.datavalue;
295
+
296
+ switch (type) {
297
+ case "wikibase-entityid": {
298
+ const entityVal = value as { id: string };
299
+ return entityLabels[entityVal.id] || entityVal.id;
300
+ }
301
+ case "string":
302
+ return value as string;
303
+ case "time": {
304
+ const timeVal = value as { time: string; precision: number };
305
+ return formatWikidataTime(timeVal.time, timeVal.precision);
306
+ }
307
+ case "quantity": {
308
+ const qtyVal = value as { amount: string; unit: string };
309
+ const amount = qtyVal.amount.replace(/^\+/, "");
310
+ // Extract unit Q-id if present
311
+ const unitMatch = qtyVal.unit.match(/Q\d+$/);
312
+ const unit = unitMatch ? entityLabels[unitMatch[0]] || "" : "";
313
+ return unit ? `${amount} ${unit}` : amount;
314
+ }
315
+ case "monolingualtext": {
316
+ const textVal = value as { text: string; language: string };
317
+ return textVal.text;
318
+ }
319
+ case "globecoordinate": {
320
+ const coordVal = value as { latitude: number; longitude: number };
321
+ return `${coordVal.latitude.toFixed(4)}, ${coordVal.longitude.toFixed(4)}`;
322
+ }
323
+ default:
324
+ return null;
325
+ }
326
+ }
327
+
328
+ /**
329
+ * Format Wikidata time value to readable date
330
+ */
331
+ function formatWikidataTime(time: string, precision: number): string {
332
+ // Time format: +YYYY-MM-DDT00:00:00Z
333
+ const match = time.match(/^([+-]?\d+)-(\d{2})-(\d{2})/);
334
+ if (!match) return time;
335
+
336
+ const [, year, month, day] = match;
337
+ const yearNum = Number.parseInt(year, 10);
338
+ const absYear = Math.abs(yearNum);
339
+ const era = yearNum < 0 ? " BCE" : "";
340
+
341
+ // Precision: 9=year, 10=month, 11=day
342
+ if (precision >= 11) {
343
+ return `${day}/${month}/${absYear}${era}`;
344
+ }
345
+ if (precision >= 10) {
346
+ return `${month}/${absYear}${era}`;
347
+ }
348
+ return `${absYear}${era}`;
349
+ }
@@ -0,0 +1,73 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import { handleWikipedia } from "./wikipedia";
3
+
4
+ const SKIP = !process.env.WEB_FETCH_INTEGRATION;
5
+
6
+ describe.skipIf(SKIP)("handleWikipedia", () => {
7
+ it("returns null for non-Wikipedia URLs", async () => {
8
+ const result = await handleWikipedia("https://example.com", 10);
9
+ expect(result).toBeNull();
10
+ });
11
+
12
+ it("returns null for Wikipedia URLs without /wiki/ path", async () => {
13
+ const result = await handleWikipedia("https://en.wikipedia.org/", 10);
14
+ expect(result).toBeNull();
15
+ });
16
+
17
+ it("fetches a known article with full metadata", async () => {
18
+ // "Computer" is a stable, well-established article
19
+ const result = await handleWikipedia("https://en.wikipedia.org/wiki/Computer", 20);
20
+ expect(result).not.toBeNull();
21
+ expect(result?.method).toBe("wikipedia");
22
+ expect(result?.contentType).toBe("text/markdown");
23
+ expect(result?.content).toContain("Computer");
24
+ expect(result?.url).toBe("https://en.wikipedia.org/wiki/Computer");
25
+ expect(result?.finalUrl).toBe("https://en.wikipedia.org/wiki/Computer");
26
+ expect(result?.truncated).toBe(false);
27
+ expect(result?.notes).toContain("Fetched via Wikipedia API");
28
+ expect(result?.fetchedAt).toBeDefined();
29
+ // Should be a valid ISO timestamp
30
+ expect(() => new Date(result?.fetchedAt ?? "")).not.toThrow();
31
+ // The handler should filter out References and External links sections
32
+ const content = result?.content ?? "";
33
+ const hasReferencesHeading = /^## References$/m.test(content);
34
+ const hasExternalLinksHeading = /^## External links$/m.test(content);
35
+ // At least one of these should be filtered out
36
+ expect(hasReferencesHeading || hasExternalLinksHeading).toBe(false);
37
+ });
38
+
39
+ it("handles different language wikis", async () => {
40
+ // German Wikipedia article for "Computer"
41
+ const result = await handleWikipedia("https://de.wikipedia.org/wiki/Computer", 20);
42
+ expect(result).not.toBeNull();
43
+ expect(result?.method).toBe("wikipedia");
44
+ expect(result?.contentType).toBe("text/markdown");
45
+ expect(result?.content).toContain("Computer");
46
+ });
47
+
48
+ it("handles article with special characters in title", async () => {
49
+ // Article with special characters: "C++"
50
+ const result = await handleWikipedia("https://en.wikipedia.org/wiki/C%2B%2B", 20);
51
+ expect(result).not.toBeNull();
52
+ expect(result?.method).toBe("wikipedia");
53
+ expect(result?.contentType).toBe("text/markdown");
54
+ expect(result?.content).toMatch(/C\+\+/);
55
+ });
56
+
57
+ it("handles article with spaces and parentheses in title", async () => {
58
+ // Artificial intelligence uses underscores for spaces
59
+ const result = await handleWikipedia("https://en.wikipedia.org/wiki/Artificial_intelligence", 20);
60
+ expect(result).not.toBeNull();
61
+ expect(result?.method).toBe("wikipedia");
62
+ expect(result?.contentType).toBe("text/markdown");
63
+ expect(result?.content).toMatch(/[Aa]rtificial intelligence/);
64
+ });
65
+
66
+ it("handles non-existent articles gracefully", async () => {
67
+ const result = await handleWikipedia(
68
+ "https://en.wikipedia.org/wiki/ThisArticleDefinitelyDoesNotExist123456789",
69
+ 20,
70
+ );
71
+ expect(result).toBeNull();
72
+ });
73
+ });
@@ -0,0 +1,91 @@
1
+ import { parse as parseHtml } from "node-html-parser";
2
+ import type { RenderResult, SpecialHandler } from "./types";
3
+ import { finalizeOutput, loadPage } from "./types";
4
+
5
+ /**
6
+ * Handle Wikipedia URLs via Wikipedia API
7
+ */
8
+ export const handleWikipedia: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
9
+ try {
10
+ const parsed = new URL(url);
11
+ // Match *.wikipedia.org
12
+ const wikiMatch = parsed.hostname.match(/^(\w+)\.wikipedia\.org$/);
13
+ if (!wikiMatch) return null;
14
+
15
+ const lang = wikiMatch[1];
16
+ const titleMatch = parsed.pathname.match(/\/wiki\/(.+)/);
17
+ if (!titleMatch) return null;
18
+
19
+ const title = decodeURIComponent(titleMatch[1]);
20
+ const fetchedAt = new Date().toISOString();
21
+
22
+ // Use Wikipedia API to get plain text extract
23
+ const apiUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
24
+ const summaryResult = await loadPage(apiUrl, { timeout });
25
+
26
+ let md = "";
27
+
28
+ if (summaryResult.ok) {
29
+ const summary = JSON.parse(summaryResult.content) as {
30
+ title: string;
31
+ description?: string;
32
+ extract: string;
33
+ };
34
+ md = `# ${summary.title}\n\n`;
35
+ if (summary.description) md += `*${summary.description}*\n\n`;
36
+ md += `${summary.extract}\n\n---\n\n`;
37
+ }
38
+
39
+ // Get full article content via mobile-html or parse API
40
+ const contentUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/mobile-html/${encodeURIComponent(title)}`;
41
+ const contentResult = await loadPage(contentUrl, { timeout });
42
+
43
+ if (contentResult.ok) {
44
+ const doc = parseHtml(contentResult.content);
45
+
46
+ // Extract main content sections
47
+ const sections = doc.querySelectorAll("section");
48
+ for (const section of sections) {
49
+ const heading = section.querySelector("h2, h3, h4");
50
+ const headingText = heading?.text?.trim();
51
+
52
+ // Skip certain sections
53
+ if (
54
+ headingText &&
55
+ ["References", "External links", "See also", "Notes", "Further reading"].includes(headingText)
56
+ ) {
57
+ continue;
58
+ }
59
+
60
+ if (headingText) {
61
+ const level = heading?.tagName === "H2" ? "##" : "###";
62
+ md += `${level} ${headingText}\n\n`;
63
+ }
64
+
65
+ const paragraphs = section.querySelectorAll("p");
66
+ for (const p of paragraphs) {
67
+ const text = p.text?.trim();
68
+ if (text && text.length > 20) {
69
+ md += `${text}\n\n`;
70
+ }
71
+ }
72
+ }
73
+ }
74
+
75
+ if (!md) return null;
76
+
77
+ const output = finalizeOutput(md);
78
+ return {
79
+ url,
80
+ finalUrl: url,
81
+ contentType: "text/markdown",
82
+ method: "wikipedia",
83
+ content: output.content,
84
+ fetchedAt,
85
+ truncated: output.truncated,
86
+ notes: ["Fetched via Wikipedia API"],
87
+ };
88
+ } catch {}
89
+
90
+ return null;
91
+ };