@oh-my-pi/pi-coding-agent 3.25.0 → 3.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/package.json +5 -5
  3. package/src/cli/args.ts +4 -0
  4. package/src/core/agent-session.ts +29 -2
  5. package/src/core/bash-executor.ts +2 -1
  6. package/src/core/custom-commands/bundled/review/index.ts +369 -14
  7. package/src/core/custom-commands/bundled/wt/index.ts +1 -1
  8. package/src/core/session-manager.ts +158 -246
  9. package/src/core/session-storage.ts +379 -0
  10. package/src/core/settings-manager.ts +155 -4
  11. package/src/core/system-prompt.ts +62 -64
  12. package/src/core/tools/ask.ts +5 -4
  13. package/src/core/tools/bash-interceptor.ts +26 -61
  14. package/src/core/tools/bash.ts +13 -8
  15. package/src/core/tools/complete.ts +2 -4
  16. package/src/core/tools/edit-diff.ts +11 -4
  17. package/src/core/tools/edit.ts +7 -13
  18. package/src/core/tools/find.ts +111 -50
  19. package/src/core/tools/gemini-image.ts +128 -147
  20. package/src/core/tools/grep.ts +397 -415
  21. package/src/core/tools/index.test.ts +5 -1
  22. package/src/core/tools/index.ts +6 -8
  23. package/src/core/tools/jtd-to-json-schema.ts +174 -196
  24. package/src/core/tools/ls.ts +12 -10
  25. package/src/core/tools/lsp/client.ts +58 -9
  26. package/src/core/tools/lsp/config.ts +205 -656
  27. package/src/core/tools/lsp/defaults.json +465 -0
  28. package/src/core/tools/lsp/index.ts +55 -32
  29. package/src/core/tools/lsp/rust-analyzer.ts +49 -10
  30. package/src/core/tools/lsp/types.ts +1 -0
  31. package/src/core/tools/lsp/utils.ts +1 -1
  32. package/src/core/tools/read.ts +152 -76
  33. package/src/core/tools/render-utils.ts +70 -10
  34. package/src/core/tools/review.ts +38 -126
  35. package/src/core/tools/task/artifacts.ts +5 -4
  36. package/src/core/tools/task/executor.ts +204 -67
  37. package/src/core/tools/task/index.ts +129 -92
  38. package/src/core/tools/task/name-generator.ts +1544 -214
  39. package/src/core/tools/task/parallel.ts +30 -3
  40. package/src/core/tools/task/render.ts +85 -39
  41. package/src/core/tools/task/types.ts +34 -11
  42. package/src/core/tools/task/worker.ts +152 -27
  43. package/src/core/tools/web-fetch.ts +220 -1657
  44. package/src/core/tools/web-scrapers/academic.test.ts +239 -0
  45. package/src/core/tools/web-scrapers/artifacthub.ts +215 -0
  46. package/src/core/tools/web-scrapers/arxiv.ts +88 -0
  47. package/src/core/tools/web-scrapers/aur.ts +175 -0
  48. package/src/core/tools/web-scrapers/biorxiv.ts +141 -0
  49. package/src/core/tools/web-scrapers/bluesky.ts +284 -0
  50. package/src/core/tools/web-scrapers/brew.ts +177 -0
  51. package/src/core/tools/web-scrapers/business.test.ts +82 -0
  52. package/src/core/tools/web-scrapers/cheatsh.ts +78 -0
  53. package/src/core/tools/web-scrapers/chocolatey.ts +158 -0
  54. package/src/core/tools/web-scrapers/choosealicense.ts +110 -0
  55. package/src/core/tools/web-scrapers/cisa-kev.ts +100 -0
  56. package/src/core/tools/web-scrapers/clojars.ts +180 -0
  57. package/src/core/tools/web-scrapers/coingecko.ts +184 -0
  58. package/src/core/tools/web-scrapers/crates-io.ts +128 -0
  59. package/src/core/tools/web-scrapers/crossref.ts +149 -0
  60. package/src/core/tools/web-scrapers/dev-platforms.test.ts +254 -0
  61. package/src/core/tools/web-scrapers/devto.ts +177 -0
  62. package/src/core/tools/web-scrapers/discogs.ts +308 -0
  63. package/src/core/tools/web-scrapers/discourse.ts +221 -0
  64. package/src/core/tools/web-scrapers/dockerhub.ts +160 -0
  65. package/src/core/tools/web-scrapers/documentation.test.ts +85 -0
  66. package/src/core/tools/web-scrapers/fdroid.ts +158 -0
  67. package/src/core/tools/web-scrapers/finance-media.test.ts +144 -0
  68. package/src/core/tools/web-scrapers/firefox-addons.ts +214 -0
  69. package/src/core/tools/web-scrapers/flathub.ts +239 -0
  70. package/src/core/tools/web-scrapers/git-hosting.test.ts +272 -0
  71. package/src/core/tools/web-scrapers/github-gist.ts +68 -0
  72. package/src/core/tools/web-scrapers/github.ts +455 -0
  73. package/src/core/tools/web-scrapers/gitlab.ts +456 -0
  74. package/src/core/tools/web-scrapers/go-pkg.ts +275 -0
  75. package/src/core/tools/web-scrapers/hackage.ts +94 -0
  76. package/src/core/tools/web-scrapers/hackernews.ts +208 -0
  77. package/src/core/tools/web-scrapers/hex.ts +121 -0
  78. package/src/core/tools/web-scrapers/huggingface.ts +385 -0
  79. package/src/core/tools/web-scrapers/iacr.ts +86 -0
  80. package/src/core/tools/web-scrapers/index.ts +250 -0
  81. package/src/core/tools/web-scrapers/jetbrains-marketplace.ts +169 -0
  82. package/src/core/tools/web-scrapers/lemmy.ts +220 -0
  83. package/src/core/tools/web-scrapers/lobsters.ts +186 -0
  84. package/src/core/tools/web-scrapers/mastodon.ts +310 -0
  85. package/src/core/tools/web-scrapers/maven.ts +152 -0
  86. package/src/core/tools/web-scrapers/mdn.ts +174 -0
  87. package/src/core/tools/web-scrapers/media.test.ts +138 -0
  88. package/src/core/tools/web-scrapers/metacpan.ts +253 -0
  89. package/src/core/tools/web-scrapers/musicbrainz.ts +273 -0
  90. package/src/core/tools/web-scrapers/npm.ts +114 -0
  91. package/src/core/tools/web-scrapers/nuget.ts +205 -0
  92. package/src/core/tools/web-scrapers/nvd.ts +243 -0
  93. package/src/core/tools/web-scrapers/ollama.ts +267 -0
  94. package/src/core/tools/web-scrapers/open-vsx.ts +119 -0
  95. package/src/core/tools/web-scrapers/opencorporates.ts +275 -0
  96. package/src/core/tools/web-scrapers/openlibrary.ts +319 -0
  97. package/src/core/tools/web-scrapers/orcid.ts +299 -0
  98. package/src/core/tools/web-scrapers/osv.ts +189 -0
  99. package/src/core/tools/web-scrapers/package-managers-2.test.ts +199 -0
  100. package/src/core/tools/web-scrapers/package-managers.test.ts +171 -0
  101. package/src/core/tools/web-scrapers/package-registries.test.ts +259 -0
  102. package/src/core/tools/web-scrapers/packagist.ts +174 -0
  103. package/src/core/tools/web-scrapers/pub-dev.ts +185 -0
  104. package/src/core/tools/web-scrapers/pubmed.ts +178 -0
  105. package/src/core/tools/web-scrapers/pypi.ts +129 -0
  106. package/src/core/tools/web-scrapers/rawg.ts +124 -0
  107. package/src/core/tools/web-scrapers/readthedocs.ts +126 -0
  108. package/src/core/tools/web-scrapers/reddit.ts +104 -0
  109. package/src/core/tools/web-scrapers/repology.ts +262 -0
  110. package/src/core/tools/web-scrapers/research.test.ts +107 -0
  111. package/src/core/tools/web-scrapers/rfc.ts +209 -0
  112. package/src/core/tools/web-scrapers/rubygems.ts +117 -0
  113. package/src/core/tools/web-scrapers/searchcode.ts +217 -0
  114. package/src/core/tools/web-scrapers/sec-edgar.ts +274 -0
  115. package/src/core/tools/web-scrapers/security.test.ts +103 -0
  116. package/src/core/tools/web-scrapers/semantic-scholar.ts +190 -0
  117. package/src/core/tools/web-scrapers/snapcraft.ts +200 -0
  118. package/src/core/tools/web-scrapers/social-extended.test.ts +192 -0
  119. package/src/core/tools/web-scrapers/social.test.ts +259 -0
  120. package/src/core/tools/web-scrapers/sourcegraph.ts +373 -0
  121. package/src/core/tools/web-scrapers/spdx.ts +121 -0
  122. package/src/core/tools/web-scrapers/spotify.ts +218 -0
  123. package/src/core/tools/web-scrapers/stackexchange.test.ts +120 -0
  124. package/src/core/tools/web-scrapers/stackoverflow.ts +124 -0
  125. package/src/core/tools/web-scrapers/standards.test.ts +122 -0
  126. package/src/core/tools/web-scrapers/terraform.ts +304 -0
  127. package/src/core/tools/web-scrapers/tldr.ts +51 -0
  128. package/src/core/tools/web-scrapers/twitter.ts +96 -0
  129. package/src/core/tools/web-scrapers/types.ts +234 -0
  130. package/src/core/tools/web-scrapers/utils.ts +162 -0
  131. package/src/core/tools/web-scrapers/vimeo.ts +152 -0
  132. package/src/core/tools/web-scrapers/vscode-marketplace.ts +195 -0
  133. package/src/core/tools/web-scrapers/w3c.ts +163 -0
  134. package/src/core/tools/web-scrapers/wikidata.ts +357 -0
  135. package/src/core/tools/web-scrapers/wikipedia.test.ts +73 -0
  136. package/src/core/tools/web-scrapers/wikipedia.ts +95 -0
  137. package/src/core/tools/web-scrapers/youtube.test.ts +198 -0
  138. package/src/core/tools/web-scrapers/youtube.ts +371 -0
  139. package/src/core/tools/write.ts +21 -18
  140. package/src/core/voice.ts +3 -2
  141. package/src/lib/worktree/collapse.ts +2 -1
  142. package/src/lib/worktree/git.ts +2 -18
  143. package/src/main.ts +59 -3
  144. package/src/modes/interactive/components/extensions/extension-dashboard.ts +33 -19
  145. package/src/modes/interactive/components/extensions/extension-list.ts +15 -8
  146. package/src/modes/interactive/components/hook-editor.ts +2 -1
  147. package/src/modes/interactive/components/model-selector.ts +19 -4
  148. package/src/modes/interactive/interactive-mode.ts +41 -38
  149. package/src/modes/interactive/theme/theme.ts +58 -58
  150. package/src/modes/rpc/rpc-mode.ts +10 -9
  151. package/src/prompts/review-request.md +27 -0
  152. package/src/prompts/reviewer.md +64 -68
  153. package/src/prompts/tools/output.md +22 -3
  154. package/src/prompts/tools/task.md +32 -33
  155. package/src/utils/clipboard.ts +2 -1
  156. package/src/utils/tools-manager.ts +110 -8
  157. package/examples/extensions/subagent/agents/reviewer.md +0 -35
@@ -0,0 +1,385 @@
1
+ import type { SpecialHandler } from "./types";
2
+ import { finalizeOutput, formatCount, loadPage } from "./types";
3
+
4
+ interface HfModelData {
5
+ modelId: string;
6
+ pipeline_tag?: string;
7
+ library_name?: string;
8
+ tags?: string[];
9
+ downloads?: number;
10
+ likes?: number;
11
+ private?: boolean;
12
+ gated?: boolean | string;
13
+ cardData?: {
14
+ license?: string;
15
+ language?: string | string[];
16
+ datasets?: string[];
17
+ metrics?: string[];
18
+ };
19
+ }
20
+
21
+ interface HfDatasetData {
22
+ id: string;
23
+ tags?: string[];
24
+ downloads?: number;
25
+ likes?: number;
26
+ private?: boolean;
27
+ gated?: boolean | string;
28
+ cardData?: {
29
+ license?: string;
30
+ language?: string | string[];
31
+ task_categories?: string[];
32
+ size_categories?: string[];
33
+ };
34
+ description?: string;
35
+ }
36
+
37
+ interface HfSpaceData {
38
+ id: string;
39
+ author?: string;
40
+ title?: string;
41
+ sdk?: string;
42
+ tags?: string[];
43
+ likes?: number;
44
+ private?: boolean;
45
+ cardData?: {
46
+ license?: string;
47
+ sdk?: string;
48
+ app_file?: string;
49
+ };
50
+ }
51
+
52
+ interface HfUserData {
53
+ avatarUrl?: string;
54
+ fullname?: string;
55
+ user?: string;
56
+ orgs?: Array<{ name: string }>;
57
+ numModels?: number;
58
+ numDatasets?: number;
59
+ numSpaces?: number;
60
+ }
61
+
62
+ /**
63
+ * Parse Hugging Face URL and determine type
64
+ */
65
+ function parseHuggingFaceUrl(url: string): {
66
+ type: "model" | "dataset" | "space" | "model_or_user";
67
+ id: string; // Full ID (org/name or just name)
68
+ } | null {
69
+ try {
70
+ const parsed = new URL(url);
71
+ if (parsed.hostname !== "huggingface.co") return null;
72
+
73
+ const parts = parsed.pathname.split("/").filter(Boolean);
74
+ if (parts.length === 0) return null;
75
+
76
+ // huggingface.co/datasets/{org}/{dataset} or huggingface.co/datasets/{dataset}
77
+ if (parts[0] === "datasets" && parts.length >= 2) {
78
+ const id = parts.slice(1).join("/");
79
+ return { type: "dataset", id };
80
+ }
81
+
82
+ // huggingface.co/spaces/{org}/{space}
83
+ if (parts[0] === "spaces" && parts.length >= 3) {
84
+ return { type: "space", id: `${parts[1]}/${parts[2]}` };
85
+ }
86
+
87
+ // Skip non-resource paths
88
+ const reservedPaths = ["docs", "blog", "pricing", "enterprise", "join", "login", "settings"];
89
+ if (reservedPaths.includes(parts[0])) {
90
+ return null;
91
+ }
92
+
93
+ // huggingface.co/{org}/{model} (two parts = definitely a model)
94
+ if (parts.length >= 2) {
95
+ return { type: "model", id: `${parts[0]}/${parts[1]}` };
96
+ }
97
+
98
+ // huggingface.co/{id} (single part = could be model or user, try model first)
99
+ if (parts.length === 1) {
100
+ return { type: "model_or_user", id: parts[0] };
101
+ }
102
+
103
+ return null;
104
+ } catch {
105
+ return null;
106
+ }
107
+ }
108
+
109
+ export const handleHuggingFace: SpecialHandler = async (url: string, timeout: number, signal?: AbortSignal) => {
110
+ const parsed = parseHuggingFaceUrl(url);
111
+ if (!parsed) return null;
112
+
113
+ const fetchedAt = new Date().toISOString();
114
+ const notes: string[] = [];
115
+
116
+ try {
117
+ switch (parsed.type) {
118
+ case "model": {
119
+ const apiUrl = `https://huggingface.co/api/models/${parsed.id}`;
120
+ const readmeUrl = `https://huggingface.co/${parsed.id}/raw/main/README.md`;
121
+
122
+ const [apiResult, readmeResult] = await Promise.all([
123
+ loadPage(apiUrl, { timeout, signal }),
124
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5), signal }),
125
+ ]);
126
+
127
+ if (!apiResult.ok) return null;
128
+
129
+ let model: HfModelData;
130
+ try {
131
+ model = JSON.parse(apiResult.content);
132
+ } catch {
133
+ return null;
134
+ }
135
+
136
+ let md = `# ${model.modelId}\n\n`;
137
+
138
+ if (model.pipeline_tag) md += `**Task:** ${model.pipeline_tag}\n`;
139
+ if (model.library_name) md += `**Library:** ${model.library_name}\n`;
140
+ if (model.downloads !== undefined) md += `**Downloads:** ${formatCount(model.downloads)}\n`;
141
+ if (model.likes !== undefined) md += `**Likes:** ${formatCount(model.likes)}\n`;
142
+ if (model.private) md += `**Visibility:** Private\n`;
143
+ if (model.gated) md += `**Access:** Gated\n`;
144
+
145
+ if (model.cardData) {
146
+ if (model.cardData.license) md += `**License:** ${model.cardData.license}\n`;
147
+ if (model.cardData.language) {
148
+ const langs = Array.isArray(model.cardData.language)
149
+ ? model.cardData.language.join(", ")
150
+ : model.cardData.language;
151
+ md += `**Language:** ${langs}\n`;
152
+ }
153
+ if (model.cardData.datasets?.length) {
154
+ md += `**Datasets:** ${model.cardData.datasets.join(", ")}\n`;
155
+ }
156
+ if (model.cardData.metrics?.length) {
157
+ md += `**Metrics:** ${model.cardData.metrics.join(", ")}\n`;
158
+ }
159
+ }
160
+
161
+ if (model.tags?.length) {
162
+ md += `**Tags:** ${model.tags.join(", ")}\n`;
163
+ }
164
+
165
+ md += "\n";
166
+
167
+ if (readmeResult.ok && readmeResult.content.trim()) {
168
+ md += `## Model Card\n\n${readmeResult.content}`;
169
+ }
170
+
171
+ const { content, truncated } = finalizeOutput(md);
172
+ return {
173
+ url,
174
+ finalUrl: apiResult.finalUrl,
175
+ contentType: "text/markdown",
176
+ method: "huggingface",
177
+ content,
178
+ fetchedAt,
179
+ truncated,
180
+ notes,
181
+ };
182
+ }
183
+
184
+ case "dataset": {
185
+ const apiUrl = `https://huggingface.co/api/datasets/${parsed.id}`;
186
+ const readmeUrl = `https://huggingface.co/datasets/${parsed.id}/raw/main/README.md`;
187
+
188
+ const [apiResult, readmeResult] = await Promise.all([
189
+ loadPage(apiUrl, { timeout, signal }),
190
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5), signal }),
191
+ ]);
192
+
193
+ if (!apiResult.ok) return null;
194
+
195
+ let dataset: HfDatasetData;
196
+ try {
197
+ dataset = JSON.parse(apiResult.content);
198
+ } catch {
199
+ return null;
200
+ }
201
+
202
+ let md = `# ${dataset.id}\n\n`;
203
+ if (dataset.description) md += `${dataset.description}\n\n`;
204
+
205
+ if (dataset.downloads !== undefined) md += `**Downloads:** ${formatCount(dataset.downloads)}\n`;
206
+ if (dataset.likes !== undefined) md += `**Likes:** ${formatCount(dataset.likes)}\n`;
207
+ if (dataset.private) md += `**Visibility:** Private\n`;
208
+ if (dataset.gated) md += `**Access:** Gated\n`;
209
+
210
+ if (dataset.cardData) {
211
+ if (dataset.cardData.license) md += `**License:** ${dataset.cardData.license}\n`;
212
+ if (dataset.cardData.language) {
213
+ const langs = Array.isArray(dataset.cardData.language)
214
+ ? dataset.cardData.language.join(", ")
215
+ : dataset.cardData.language;
216
+ md += `**Language:** ${langs}\n`;
217
+ }
218
+ if (dataset.cardData.task_categories?.length) {
219
+ md += `**Tasks:** ${dataset.cardData.task_categories.join(", ")}\n`;
220
+ }
221
+ if (dataset.cardData.size_categories?.length) {
222
+ md += `**Size:** ${dataset.cardData.size_categories.join(", ")}\n`;
223
+ }
224
+ }
225
+
226
+ if (dataset.tags?.length) {
227
+ md += `**Tags:** ${dataset.tags.join(", ")}\n`;
228
+ }
229
+
230
+ md += "\n";
231
+
232
+ if (readmeResult.ok && readmeResult.content.trim()) {
233
+ md += `## Dataset Card\n\n${readmeResult.content}`;
234
+ }
235
+
236
+ const { content, truncated } = finalizeOutput(md);
237
+ return {
238
+ url,
239
+ finalUrl: apiResult.finalUrl,
240
+ contentType: "text/markdown",
241
+ method: "huggingface",
242
+ content,
243
+ fetchedAt,
244
+ truncated,
245
+ notes,
246
+ };
247
+ }
248
+
249
+ case "space": {
250
+ const apiUrl = `https://huggingface.co/api/spaces/${parsed.id}`;
251
+ const readmeUrl = `https://huggingface.co/spaces/${parsed.id}/raw/main/README.md`;
252
+
253
+ const [apiResult, readmeResult] = await Promise.all([
254
+ loadPage(apiUrl, { timeout, signal }),
255
+ loadPage(readmeUrl, { timeout: Math.min(timeout, 5), signal }),
256
+ ]);
257
+
258
+ if (!apiResult.ok) return null;
259
+
260
+ let space: HfSpaceData;
261
+ try {
262
+ space = JSON.parse(apiResult.content);
263
+ } catch {
264
+ return null;
265
+ }
266
+
267
+ let md = `# ${space.id}\n\n`;
268
+ if (space.title) md += `${space.title}\n\n`;
269
+
270
+ if (space.author) md += `**Author:** ${space.author}\n`;
271
+ if (space.sdk) md += `**SDK:** ${space.sdk}\n`;
272
+ if (space.likes !== undefined) md += `**Likes:** ${formatCount(space.likes)}\n`;
273
+ if (space.private) md += `**Visibility:** Private\n`;
274
+
275
+ if (space.cardData) {
276
+ if (space.cardData.license) md += `**License:** ${space.cardData.license}\n`;
277
+ if (space.cardData.app_file) md += `**App File:** ${space.cardData.app_file}\n`;
278
+ }
279
+
280
+ if (space.tags?.length) {
281
+ md += `**Tags:** ${space.tags.join(", ")}\n`;
282
+ }
283
+
284
+ md += "\n";
285
+
286
+ if (readmeResult.ok && readmeResult.content.trim()) {
287
+ md += `## Space Info\n\n${readmeResult.content}`;
288
+ }
289
+
290
+ const { content, truncated } = finalizeOutput(md);
291
+ return {
292
+ url,
293
+ finalUrl: apiResult.finalUrl,
294
+ contentType: "text/markdown",
295
+ method: "huggingface",
296
+ content,
297
+ fetchedAt,
298
+ truncated,
299
+ notes,
300
+ };
301
+ }
302
+
303
+ case "model_or_user": {
304
+ // Try model API first
305
+ const modelApiUrl = `https://huggingface.co/api/models/${parsed.id}`;
306
+ const modelResult = await loadPage(modelApiUrl, { timeout, signal });
307
+
308
+ if (modelResult.ok) {
309
+ let model: HfModelData | null = null;
310
+ try {
311
+ model = JSON.parse(modelResult.content);
312
+ } catch {
313
+ // Fall through to user check
314
+ }
315
+ if (model) {
316
+ const readmeUrl = `https://huggingface.co/${parsed.id}/raw/main/README.md`;
317
+ const readmeResult = await loadPage(readmeUrl, { timeout: Math.min(timeout, 5), signal });
318
+
319
+ let md = `# ${model.modelId}\n\n`;
320
+ if (model.pipeline_tag) md += `**Task:** ${model.pipeline_tag}\n`;
321
+ if (model.library_name) md += `**Library:** ${model.library_name}\n`;
322
+ if (model.downloads !== undefined) md += `**Downloads:** ${formatCount(model.downloads)}\n`;
323
+ if (model.likes !== undefined) md += `**Likes:** ${formatCount(model.likes)}\n`;
324
+ if (model.tags?.length) md += `**Tags:** ${model.tags.join(", ")}\n`;
325
+ md += "\n";
326
+ if (readmeResult.ok && readmeResult.content.trim()) {
327
+ md += `## Model Card\n\n${readmeResult.content}`;
328
+ }
329
+
330
+ const { content, truncated } = finalizeOutput(md);
331
+ return {
332
+ url,
333
+ finalUrl: modelResult.finalUrl,
334
+ contentType: "text/markdown",
335
+ method: "huggingface",
336
+ content,
337
+ fetchedAt,
338
+ truncated,
339
+ notes,
340
+ };
341
+ }
342
+ }
343
+
344
+ // Fall back to user API
345
+ const userApiUrl = `https://huggingface.co/api/users/${parsed.id}`;
346
+ const userResult = await loadPage(userApiUrl, { timeout, signal });
347
+ if (!userResult.ok) return null;
348
+
349
+ let user: HfUserData;
350
+ try {
351
+ user = JSON.parse(userResult.content);
352
+ } catch {
353
+ return null;
354
+ }
355
+
356
+ let md = `# ${user.user || parsed.id}\n\n`;
357
+ if (user.fullname) md += `**Name:** ${user.fullname}\n`;
358
+ if (user.numModels !== undefined) md += `**Models:** ${formatCount(user.numModels)}\n`;
359
+ if (user.numDatasets !== undefined) md += `**Datasets:** ${formatCount(user.numDatasets)}\n`;
360
+ if (user.numSpaces !== undefined) md += `**Spaces:** ${formatCount(user.numSpaces)}\n`;
361
+
362
+ if (user.orgs?.length) {
363
+ md += `**Organizations:** ${user.orgs.map((o) => o.name).join(", ")}\n`;
364
+ }
365
+
366
+ const { content, truncated } = finalizeOutput(md);
367
+ return {
368
+ url,
369
+ finalUrl: userResult.finalUrl,
370
+ contentType: "text/markdown",
371
+ method: "huggingface",
372
+ content,
373
+ fetchedAt,
374
+ truncated,
375
+ notes,
376
+ };
377
+ }
378
+
379
+ default:
380
+ return null;
381
+ }
382
+ } catch (_err) {
383
+ return null;
384
+ }
385
+ };
@@ -0,0 +1,86 @@
1
+ import { parse as parseHtml } from "node-html-parser";
2
+ import type { RenderResult, SpecialHandler } from "./types";
3
+ import { finalizeOutput, loadPage } from "./types";
4
+ import { convertWithMarkitdown, fetchBinary } from "./utils";
5
+
6
+ /**
7
+ * Handle IACR ePrint Archive URLs
8
+ */
9
+ export const handleIacr: SpecialHandler = async (
10
+ url: string,
11
+ timeout: number,
12
+ signal?: AbortSignal,
13
+ ): Promise<RenderResult | null> => {
14
+ try {
15
+ const parsed = new URL(url);
16
+ if (parsed.hostname !== "eprint.iacr.org") return null;
17
+
18
+ // Extract paper ID from /year/number or /year/number.pdf
19
+ const match = parsed.pathname.match(/\/(\d{4})\/(\d+)(?:\.pdf)?$/);
20
+ if (!match) return null;
21
+
22
+ const [, year, number] = match;
23
+ const paperId = `${year}/${number}`;
24
+ const fetchedAt = new Date().toISOString();
25
+ const notes: string[] = [];
26
+
27
+ // Fetch the HTML page for metadata
28
+ const pageUrl = `https://eprint.iacr.org/${paperId}`;
29
+ const result = await loadPage(pageUrl, { timeout, signal });
30
+
31
+ if (!result.ok) return null;
32
+
33
+ const doc = parseHtml(result.content);
34
+
35
+ // Extract metadata from the page
36
+ const title =
37
+ doc.querySelector("h3.mb-3")?.text?.trim() ||
38
+ doc.querySelector('meta[name="citation_title"]')?.getAttribute("content");
39
+ const authors = doc
40
+ .querySelectorAll('meta[name="citation_author"]')
41
+ .map((m) => m.getAttribute("content"))
42
+ .filter(Boolean);
43
+ // Abstract is in <p> after <h5>Abstract</h5>
44
+ const abstractHeading = doc.querySelectorAll("h5").find((h) => h.text?.includes("Abstract"));
45
+ const abstract =
46
+ abstractHeading?.parentNode?.querySelector("p")?.text?.trim() ||
47
+ doc.querySelector('meta[name="description"]')?.getAttribute("content");
48
+ const keywords = doc.querySelector(".keywords")?.text?.replace("Keywords:", "").trim();
49
+ const pubDate = doc.querySelector('meta[name="citation_publication_date"]')?.getAttribute("content");
50
+
51
+ let md = `# ${title || "IACR ePrint Paper"}\n\n`;
52
+ if (authors.length) md += `**Authors:** ${authors.join(", ")}\n`;
53
+ if (pubDate) md += `**Date:** ${pubDate}\n`;
54
+ md += `**ePrint:** ${paperId}\n`;
55
+ if (keywords) md += `**Keywords:** ${keywords}\n`;
56
+ md += `\n---\n\n## Abstract\n\n${abstract || "No abstract available."}\n\n`;
57
+
58
+ // If it was a PDF link, try to fetch and convert PDF
59
+ if (parsed.pathname.endsWith(".pdf")) {
60
+ const pdfUrl = `https://eprint.iacr.org/${paperId}.pdf`;
61
+ notes.push("Fetching PDF for full content...");
62
+ const pdfResult = await fetchBinary(pdfUrl, timeout, signal);
63
+ if (pdfResult.ok) {
64
+ const converted = await convertWithMarkitdown(pdfResult.buffer, ".pdf", timeout, signal);
65
+ if (converted.ok && converted.content.length > 500) {
66
+ md += `---\n\n## Full Paper\n\n${converted.content}\n`;
67
+ notes.push("PDF converted via markitdown");
68
+ }
69
+ }
70
+ }
71
+
72
+ const output = finalizeOutput(md);
73
+ return {
74
+ url,
75
+ finalUrl: url,
76
+ contentType: "text/markdown",
77
+ method: "iacr",
78
+ content: output.content,
79
+ fetchedAt,
80
+ truncated: output.truncated,
81
+ notes: notes.length ? notes : ["Fetched from IACR ePrint Archive"],
82
+ };
83
+ } catch {}
84
+
85
+ return null;
86
+ };