@oh-my-pi/pi-coding-agent 3.25.0 → 3.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/package.json +4 -4
  3. package/src/core/tools/complete.ts +2 -4
  4. package/src/core/tools/jtd-to-json-schema.ts +174 -196
  5. package/src/core/tools/read.ts +4 -4
  6. package/src/core/tools/task/executor.ts +146 -20
  7. package/src/core/tools/task/name-generator.ts +1544 -214
  8. package/src/core/tools/task/types.ts +19 -5
  9. package/src/core/tools/task/worker.ts +103 -13
  10. package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
  11. package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
  12. package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
  13. package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
  14. package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
  15. package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
  16. package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
  17. package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
  18. package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
  19. package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
  20. package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
  21. package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
  22. package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
  23. package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
  24. package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
  25. package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
  26. package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
  27. package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
  28. package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
  29. package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
  30. package/src/core/tools/web-fetch-handlers/github.ts +424 -0
  31. package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
  32. package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
  33. package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
  34. package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
  35. package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
  36. package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
  37. package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
  38. package/src/core/tools/web-fetch-handlers/index.ts +69 -0
  39. package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
  40. package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
  41. package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
  42. package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
  43. package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
  44. package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
  45. package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
  46. package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
  47. package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
  48. package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
  49. package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
  50. package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
  51. package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
  52. package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
  53. package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
  54. package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
  55. package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
  56. package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
  57. package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
  58. package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
  59. package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
  60. package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
  61. package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
  62. package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
  63. package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
  64. package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
  65. package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
  66. package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
  67. package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
  68. package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
  69. package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
  70. package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
  71. package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
  72. package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
  73. package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
  74. package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
  75. package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
  76. package/src/core/tools/web-fetch-handlers/types.ts +163 -0
  77. package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
  78. package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
  79. package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
  80. package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
  81. package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
  82. package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
  83. package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
  84. package/src/core/tools/web-fetch.ts +152 -1324
  85. package/src/utils/tools-manager.ts +110 -8
@@ -0,0 +1,385 @@
1
+ import type { SpecialHandler } from "./types";
2
+ import { finalizeOutput, formatCount, loadPage } from "./types";
3
+
4
+ interface HfModelData {
5
+ modelId: string;
6
+ pipeline_tag?: string;
7
+ library_name?: string;
8
+ tags?: string[];
9
+ downloads?: number;
10
+ likes?: number;
11
+ private?: boolean;
12
+ gated?: boolean | string;
13
+ cardData?: {
14
+ license?: string;
15
+ language?: string | string[];
16
+ datasets?: string[];
17
+ metrics?: string[];
18
+ };
19
+ }
20
+
21
+ interface HfDatasetData {
22
+ id: string;
23
+ tags?: string[];
24
+ downloads?: number;
25
+ likes?: number;
26
+ private?: boolean;
27
+ gated?: boolean | string;
28
+ cardData?: {
29
+ license?: string;
30
+ language?: string | string[];
31
+ task_categories?: string[];
32
+ size_categories?: string[];
33
+ };
34
+ description?: string;
35
+ }
36
+
37
+ interface HfSpaceData {
38
+ id: string;
39
+ author?: string;
40
+ title?: string;
41
+ sdk?: string;
42
+ tags?: string[];
43
+ likes?: number;
44
+ private?: boolean;
45
+ cardData?: {
46
+ license?: string;
47
+ sdk?: string;
48
+ app_file?: string;
49
+ };
50
+ }
51
+
52
+ interface HfUserData {
53
+ avatarUrl?: string;
54
+ fullname?: string;
55
+ user?: string;
56
+ orgs?: Array<{ name: string }>;
57
+ numModels?: number;
58
+ numDatasets?: number;
59
+ numSpaces?: number;
60
+ }
61
+
62
+ /**
63
+ * Parse Hugging Face URL and determine type
64
+ */
65
+ function parseHuggingFaceUrl(url: string): {
66
+ type: "model" | "dataset" | "space" | "model_or_user";
67
+ id: string; // Full ID (org/name or just name)
68
+ } | null {
69
+ try {
70
+ const parsed = new URL(url);
71
+ if (parsed.hostname !== "huggingface.co") return null;
72
+
73
+ const parts = parsed.pathname.split("/").filter(Boolean);
74
+ if (parts.length === 0) return null;
75
+
76
+ // huggingface.co/datasets/{org}/{dataset} or huggingface.co/datasets/{dataset}
77
+ if (parts[0] === "datasets" && parts.length >= 2) {
78
+ const id = parts.slice(1).join("/");
79
+ return { type: "dataset", id };
80
+ }
81
+
82
+ // huggingface.co/spaces/{org}/{space}
83
+ if (parts[0] === "spaces" && parts.length >= 3) {
84
+ return { type: "space", id: `${parts[1]}/${parts[2]}` };
85
+ }
86
+
87
+ // Skip non-resource paths
88
+ const reservedPaths = ["docs", "blog", "pricing", "enterprise", "join", "login", "settings"];
89
+ if (reservedPaths.includes(parts[0])) {
90
+ return null;
91
+ }
92
+
93
+ // huggingface.co/{org}/{model} (two parts = definitely a model)
94
+ if (parts.length >= 2) {
95
+ return { type: "model", id: `${parts[0]}/${parts[1]}` };
96
+ }
97
+
98
+ // huggingface.co/{id} (single part = could be model or user, try model first)
99
+ if (parts.length === 1) {
100
+ return { type: "model_or_user", id: parts[0] };
101
+ }
102
+
103
+ return null;
104
+ } catch {
105
+ return null;
106
+ }
107
+ }
108
+
109
+ export const handleHuggingFace: SpecialHandler = async (url: string, timeout: number) => {
110
+ const parsed = parseHuggingFaceUrl(url);
111
+ if (!parsed) return null;
112
+
113
+ const fetchedAt = new Date().toISOString();
114
+ const notes: string[] = [];
115
+
116
+ try {
117
+ switch (parsed.type) {
118
+ case "model": {
119
+ const apiUrl = `https://huggingface.co/api/models/${parsed.id}`;
120
+ const readmeUrl = `https://huggingface.co/${parsed.id}/raw/main/README.md`;
121
+
122
+ const [apiResult, readmeResult] = await Promise.all([
123
+ loadPage(apiUrl, { timeout }),
124
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5) }),
125
+ ]);
126
+
127
+ if (!apiResult.ok) return null;
128
+
129
+ let model: HfModelData;
130
+ try {
131
+ model = JSON.parse(apiResult.content);
132
+ } catch {
133
+ return null;
134
+ }
135
+
136
+ let md = `# ${model.modelId}\n\n`;
137
+
138
+ if (model.pipeline_tag) md += `**Task:** ${model.pipeline_tag}\n`;
139
+ if (model.library_name) md += `**Library:** ${model.library_name}\n`;
140
+ if (model.downloads !== undefined) md += `**Downloads:** ${formatCount(model.downloads)}\n`;
141
+ if (model.likes !== undefined) md += `**Likes:** ${formatCount(model.likes)}\n`;
142
+ if (model.private) md += `**Visibility:** Private\n`;
143
+ if (model.gated) md += `**Access:** Gated\n`;
144
+
145
+ if (model.cardData) {
146
+ if (model.cardData.license) md += `**License:** ${model.cardData.license}\n`;
147
+ if (model.cardData.language) {
148
+ const langs = Array.isArray(model.cardData.language)
149
+ ? model.cardData.language.join(", ")
150
+ : model.cardData.language;
151
+ md += `**Language:** ${langs}\n`;
152
+ }
153
+ if (model.cardData.datasets?.length) {
154
+ md += `**Datasets:** ${model.cardData.datasets.join(", ")}\n`;
155
+ }
156
+ if (model.cardData.metrics?.length) {
157
+ md += `**Metrics:** ${model.cardData.metrics.join(", ")}\n`;
158
+ }
159
+ }
160
+
161
+ if (model.tags?.length) {
162
+ md += `**Tags:** ${model.tags.join(", ")}\n`;
163
+ }
164
+
165
+ md += "\n";
166
+
167
+ if (readmeResult.ok && readmeResult.content.trim()) {
168
+ md += `## Model Card\n\n${readmeResult.content}`;
169
+ }
170
+
171
+ const { content, truncated } = finalizeOutput(md);
172
+ return {
173
+ url,
174
+ finalUrl: apiResult.finalUrl,
175
+ contentType: "text/markdown",
176
+ method: "huggingface",
177
+ content,
178
+ fetchedAt,
179
+ truncated,
180
+ notes,
181
+ };
182
+ }
183
+
184
+ case "dataset": {
185
+ const apiUrl = `https://huggingface.co/api/datasets/${parsed.id}`;
186
+ const readmeUrl = `https://huggingface.co/datasets/${parsed.id}/raw/main/README.md`;
187
+
188
+ const [apiResult, readmeResult] = await Promise.all([
189
+ loadPage(apiUrl, { timeout }),
190
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5) }),
191
+ ]);
192
+
193
+ if (!apiResult.ok) return null;
194
+
195
+ let dataset: HfDatasetData;
196
+ try {
197
+ dataset = JSON.parse(apiResult.content);
198
+ } catch {
199
+ return null;
200
+ }
201
+
202
+ let md = `# ${dataset.id}\n\n`;
203
+ if (dataset.description) md += `${dataset.description}\n\n`;
204
+
205
+ if (dataset.downloads !== undefined) md += `**Downloads:** ${formatCount(dataset.downloads)}\n`;
206
+ if (dataset.likes !== undefined) md += `**Likes:** ${formatCount(dataset.likes)}\n`;
207
+ if (dataset.private) md += `**Visibility:** Private\n`;
208
+ if (dataset.gated) md += `**Access:** Gated\n`;
209
+
210
+ if (dataset.cardData) {
211
+ if (dataset.cardData.license) md += `**License:** ${dataset.cardData.license}\n`;
212
+ if (dataset.cardData.language) {
213
+ const langs = Array.isArray(dataset.cardData.language)
214
+ ? dataset.cardData.language.join(", ")
215
+ : dataset.cardData.language;
216
+ md += `**Language:** ${langs}\n`;
217
+ }
218
+ if (dataset.cardData.task_categories?.length) {
219
+ md += `**Tasks:** ${dataset.cardData.task_categories.join(", ")}\n`;
220
+ }
221
+ if (dataset.cardData.size_categories?.length) {
222
+ md += `**Size:** ${dataset.cardData.size_categories.join(", ")}\n`;
223
+ }
224
+ }
225
+
226
+ if (dataset.tags?.length) {
227
+ md += `**Tags:** ${dataset.tags.join(", ")}\n`;
228
+ }
229
+
230
+ md += "\n";
231
+
232
+ if (readmeResult.ok && readmeResult.content.trim()) {
233
+ md += `## Dataset Card\n\n${readmeResult.content}`;
234
+ }
235
+
236
+ const { content, truncated } = finalizeOutput(md);
237
+ return {
238
+ url,
239
+ finalUrl: apiResult.finalUrl,
240
+ contentType: "text/markdown",
241
+ method: "huggingface",
242
+ content,
243
+ fetchedAt,
244
+ truncated,
245
+ notes,
246
+ };
247
+ }
248
+
249
+ case "space": {
250
+ const apiUrl = `https://huggingface.co/api/spaces/${parsed.id}`;
251
+ const readmeUrl = `https://huggingface.co/spaces/${parsed.id}/raw/main/README.md`;
252
+
253
+ const [apiResult, readmeResult] = await Promise.all([
254
+ loadPage(apiUrl, { timeout }),
255
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5) }),
256
+ ]);
257
+
258
+ if (!apiResult.ok) return null;
259
+
260
+ let space: HfSpaceData;
261
+ try {
262
+ space = JSON.parse(apiResult.content);
263
+ } catch {
264
+ return null;
265
+ }
266
+
267
+ let md = `# ${space.id}\n\n`;
268
+ if (space.title) md += `${space.title}\n\n`;
269
+
270
+ if (space.author) md += `**Author:** ${space.author}\n`;
271
+ if (space.sdk) md += `**SDK:** ${space.sdk}\n`;
272
+ if (space.likes !== undefined) md += `**Likes:** ${formatCount(space.likes)}\n`;
273
+ if (space.private) md += `**Visibility:** Private\n`;
274
+
275
+ if (space.cardData) {
276
+ if (space.cardData.license) md += `**License:** ${space.cardData.license}\n`;
277
+ if (space.cardData.app_file) md += `**App File:** ${space.cardData.app_file}\n`;
278
+ }
279
+
280
+ if (space.tags?.length) {
281
+ md += `**Tags:** ${space.tags.join(", ")}\n`;
282
+ }
283
+
284
+ md += "\n";
285
+
286
+ if (readmeResult.ok && readmeResult.content.trim()) {
287
+ md += `## Space Info\n\n${readmeResult.content}`;
288
+ }
289
+
290
+ const { content, truncated } = finalizeOutput(md);
291
+ return {
292
+ url,
293
+ finalUrl: apiResult.finalUrl,
294
+ contentType: "text/markdown",
295
+ method: "huggingface",
296
+ content,
297
+ fetchedAt,
298
+ truncated,
299
+ notes,
300
+ };
301
+ }
302
+
303
+ case "model_or_user": {
304
+ // Try model API first
305
+ const modelApiUrl = `https://huggingface.co/api/models/${parsed.id}`;
306
+ const modelResult = await loadPage(modelApiUrl, { timeout });
307
+
308
+ if (modelResult.ok) {
309
+ let model: HfModelData | null = null;
310
+ try {
311
+ model = JSON.parse(modelResult.content);
312
+ } catch {
313
+ // Fall through to user check
314
+ }
315
+ if (model) {
316
+ const readmeUrl = `https://huggingface.co/${parsed.id}/raw/main/README.md`;
317
+ const readmeResult = await loadPage(readmeUrl, { timeout: Math.min(timeout, 5) });
318
+
319
+ let md = `# ${model.modelId}\n\n`;
320
+ if (model.pipeline_tag) md += `**Task:** ${model.pipeline_tag}\n`;
321
+ if (model.library_name) md += `**Library:** ${model.library_name}\n`;
322
+ if (model.downloads !== undefined) md += `**Downloads:** ${formatCount(model.downloads)}\n`;
323
+ if (model.likes !== undefined) md += `**Likes:** ${formatCount(model.likes)}\n`;
324
+ if (model.tags?.length) md += `**Tags:** ${model.tags.join(", ")}\n`;
325
+ md += "\n";
326
+ if (readmeResult.ok && readmeResult.content.trim()) {
327
+ md += `## Model Card\n\n${readmeResult.content}`;
328
+ }
329
+
330
+ const { content, truncated } = finalizeOutput(md);
331
+ return {
332
+ url,
333
+ finalUrl: modelResult.finalUrl,
334
+ contentType: "text/markdown",
335
+ method: "huggingface",
336
+ content,
337
+ fetchedAt,
338
+ truncated,
339
+ notes,
340
+ };
341
+ }
342
+ }
343
+
344
+ // Fall back to user API
345
+ const userApiUrl = `https://huggingface.co/api/users/${parsed.id}`;
346
+ const userResult = await loadPage(userApiUrl, { timeout });
347
+ if (!userResult.ok) return null;
348
+
349
+ let user: HfUserData;
350
+ try {
351
+ user = JSON.parse(userResult.content);
352
+ } catch {
353
+ return null;
354
+ }
355
+
356
+ let md = `# ${user.user || parsed.id}\n\n`;
357
+ if (user.fullname) md += `**Name:** ${user.fullname}\n`;
358
+ if (user.numModels !== undefined) md += `**Models:** ${formatCount(user.numModels)}\n`;
359
+ if (user.numDatasets !== undefined) md += `**Datasets:** ${formatCount(user.numDatasets)}\n`;
360
+ if (user.numSpaces !== undefined) md += `**Spaces:** ${formatCount(user.numSpaces)}\n`;
361
+
362
+ if (user.orgs?.length) {
363
+ md += `**Organizations:** ${user.orgs.map((o) => o.name).join(", ")}\n`;
364
+ }
365
+
366
+ const { content, truncated } = finalizeOutput(md);
367
+ return {
368
+ url,
369
+ finalUrl: userResult.finalUrl,
370
+ contentType: "text/markdown",
371
+ method: "huggingface",
372
+ content,
373
+ fetchedAt,
374
+ truncated,
375
+ notes,
376
+ };
377
+ }
378
+
379
+ default:
380
+ return null;
381
+ }
382
+ } catch (_err) {
383
+ return null;
384
+ }
385
+ };
@@ -0,0 +1,82 @@
1
+ import { parse as parseHtml } from "node-html-parser";
2
+ import type { RenderResult, SpecialHandler } from "./types";
3
+ import { finalizeOutput, loadPage } from "./types";
4
+ import { convertWithMarkitdown, fetchBinary } from "./utils";
5
+
6
+ /**
7
+ * Handle IACR ePrint Archive URLs
8
+ */
9
+ export const handleIacr: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
10
+ try {
11
+ const parsed = new URL(url);
12
+ if (parsed.hostname !== "eprint.iacr.org") return null;
13
+
14
+ // Extract paper ID from /year/number or /year/number.pdf
15
+ const match = parsed.pathname.match(/\/(\d{4})\/(\d+)(?:\.pdf)?$/);
16
+ if (!match) return null;
17
+
18
+ const [, year, number] = match;
19
+ const paperId = `${year}/${number}`;
20
+ const fetchedAt = new Date().toISOString();
21
+ const notes: string[] = [];
22
+
23
+ // Fetch the HTML page for metadata
24
+ const pageUrl = `https://eprint.iacr.org/${paperId}`;
25
+ const result = await loadPage(pageUrl, { timeout });
26
+
27
+ if (!result.ok) return null;
28
+
29
+ const doc = parseHtml(result.content);
30
+
31
+ // Extract metadata from the page
32
+ const title =
33
+ doc.querySelector("h3.mb-3")?.text?.trim() ||
34
+ doc.querySelector('meta[name="citation_title"]')?.getAttribute("content");
35
+ const authors = doc
36
+ .querySelectorAll('meta[name="citation_author"]')
37
+ .map((m) => m.getAttribute("content"))
38
+ .filter(Boolean);
39
+ // Abstract is in <p> after <h5>Abstract</h5>
40
+ const abstractHeading = doc.querySelectorAll("h5").find((h) => h.text?.includes("Abstract"));
41
+ const abstract =
42
+ abstractHeading?.parentNode?.querySelector("p")?.text?.trim() ||
43
+ doc.querySelector('meta[name="description"]')?.getAttribute("content");
44
+ const keywords = doc.querySelector(".keywords")?.text?.replace("Keywords:", "").trim();
45
+ const pubDate = doc.querySelector('meta[name="citation_publication_date"]')?.getAttribute("content");
46
+
47
+ let md = `# ${title || "IACR ePrint Paper"}\n\n`;
48
+ if (authors.length) md += `**Authors:** ${authors.join(", ")}\n`;
49
+ if (pubDate) md += `**Date:** ${pubDate}\n`;
50
+ md += `**ePrint:** ${paperId}\n`;
51
+ if (keywords) md += `**Keywords:** ${keywords}\n`;
52
+ md += `\n---\n\n## Abstract\n\n${abstract || "No abstract available."}\n\n`;
53
+
54
+ // If it was a PDF link, try to fetch and convert PDF
55
+ if (parsed.pathname.endsWith(".pdf")) {
56
+ const pdfUrl = `https://eprint.iacr.org/${paperId}.pdf`;
57
+ notes.push("Fetching PDF for full content...");
58
+ const pdfResult = await fetchBinary(pdfUrl, timeout);
59
+ if (pdfResult.ok) {
60
+ const converted = await convertWithMarkitdown(pdfResult.buffer, ".pdf", timeout);
61
+ if (converted.ok && converted.content.length > 500) {
62
+ md += `---\n\n## Full Paper\n\n${converted.content}\n`;
63
+ notes.push("PDF converted via markitdown");
64
+ }
65
+ }
66
+ }
67
+
68
+ const output = finalizeOutput(md);
69
+ return {
70
+ url,
71
+ finalUrl: url,
72
+ contentType: "text/markdown",
73
+ method: "iacr",
74
+ content: output.content,
75
+ fetchedAt,
76
+ truncated: output.truncated,
77
+ notes: notes.length ? notes : ["Fetched from IACR ePrint Archive"],
78
+ };
79
+ } catch {}
80
+
81
+ return null;
82
+ };
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Web Fetch Special Handlers Index
3
+ *
4
+ * Exports all special handlers for site-specific content extraction.
5
+ */
6
+
7
+ export { handleArtifactHub } from "./artifacthub";
8
+ // Academic
9
+ export { handleArxiv } from "./arxiv";
10
+ export { handleAur } from "./aur";
11
+ export { handleBiorxiv } from "./biorxiv";
12
+ export { handleBluesky } from "./bluesky";
13
+ export { handleBrew } from "./brew";
14
+ export { handleCheatSh } from "./cheatsh";
15
+ export { handleChocolatey } from "./chocolatey";
16
+ export { handleCoinGecko } from "./coingecko";
17
+ export { handleCratesIo } from "./crates-io";
18
+ export { handleDevTo } from "./devto";
19
+ export { handleDiscogs } from "./discogs";
20
+ export { handleDockerHub } from "./dockerhub";
21
+ // Git hosting
22
+ export { fetchGitHubApi, handleGitHub } from "./github";
23
+ export { handleGitHubGist } from "./github-gist";
24
+ export { handleGitLab } from "./gitlab";
25
+ export { handleGoPkg } from "./go-pkg";
26
+ export { handleHackage } from "./hackage";
27
+ export { handleHackerNews } from "./hackernews";
28
+ export { handleHex } from "./hex";
29
+ // ML/AI
30
+ export { handleHuggingFace } from "./huggingface";
31
+ export { handleIacr } from "./iacr";
32
+ export { handleLobsters } from "./lobsters";
33
+ export { handleMastodon } from "./mastodon";
34
+ export { handleMaven } from "./maven";
35
+ export { handleMDN } from "./mdn";
36
+ export { handleMetaCPAN } from "./metacpan";
37
+ // Package registries
38
+ export { handleNpm } from "./npm";
39
+ export { handleNuGet } from "./nuget";
40
+ export { handleNvd } from "./nvd";
41
+ export { handleOpenCorporates } from "./opencorporates";
42
+ export { handleOpenLibrary } from "./openlibrary";
43
+ export { handleOsv } from "./osv";
44
+ export { handlePackagist } from "./packagist";
45
+ export { handlePubDev } from "./pub-dev";
46
+ export { handlePubMed } from "./pubmed";
47
+ export { handlePyPI } from "./pypi";
48
+ export { handleReadTheDocs } from "./readthedocs";
49
+ export { handleReddit } from "./reddit";
50
+ export { handleRepology } from "./repology";
51
+ export { handleRfc } from "./rfc";
52
+ export { handleRubyGems } from "./rubygems";
53
+ export { handleSecEdgar } from "./sec-edgar";
54
+ export { handleSemanticScholar } from "./semantic-scholar";
55
+ export { handleSpotify } from "./spotify";
56
+ // Developer content
57
+ export { handleStackOverflow } from "./stackoverflow";
58
+ export { handleTerraform } from "./terraform";
59
+ export { handleTldr } from "./tldr";
60
+ // Social/News
61
+ export { handleTwitter } from "./twitter";
62
+ export type { RenderResult, SpecialHandler } from "./types";
63
+ export { handleVimeo } from "./vimeo";
64
+
65
+ // Reference
66
+ export { handleWikidata } from "./wikidata";
67
+ export { handleWikipedia } from "./wikipedia";
68
+ // Video/Media
69
+ export { handleYouTube } from "./youtube";