@oh-my-pi/pi-coding-agent 3.24.0 → 3.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/package.json +4 -4
  3. package/src/core/custom-commands/bundled/wt/index.ts +3 -0
  4. package/src/core/sdk.ts +7 -0
  5. package/src/core/tools/complete.ts +129 -0
  6. package/src/core/tools/index.test.ts +9 -1
  7. package/src/core/tools/index.ts +18 -5
  8. package/src/core/tools/jtd-to-json-schema.ts +252 -0
  9. package/src/core/tools/output.ts +125 -14
  10. package/src/core/tools/read.ts +4 -4
  11. package/src/core/tools/task/artifacts.ts +6 -9
  12. package/src/core/tools/task/executor.ts +189 -24
  13. package/src/core/tools/task/index.ts +23 -18
  14. package/src/core/tools/task/name-generator.ts +1577 -0
  15. package/src/core/tools/task/render.ts +137 -8
  16. package/src/core/tools/task/types.ts +26 -5
  17. package/src/core/tools/task/worker-protocol.ts +1 -0
  18. package/src/core/tools/task/worker.ts +136 -14
  19. package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
  20. package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
  21. package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
  22. package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
  23. package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
  24. package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
  25. package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
  26. package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
  27. package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
  28. package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
  29. package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
  30. package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
  31. package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
  32. package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
  33. package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
  34. package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
  35. package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
  36. package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
  37. package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
  38. package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
  39. package/src/core/tools/web-fetch-handlers/github.ts +424 -0
  40. package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
  41. package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
  42. package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
  43. package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
  44. package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
  45. package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
  46. package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
  47. package/src/core/tools/web-fetch-handlers/index.ts +69 -0
  48. package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
  49. package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
  50. package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
  51. package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
  52. package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
  53. package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
  54. package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
  55. package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
  56. package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
  57. package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
  58. package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
  59. package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
  60. package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
  61. package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
  62. package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
  63. package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
  64. package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
  65. package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
  66. package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
  67. package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
  68. package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
  69. package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
  70. package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
  71. package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
  72. package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
  73. package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
  74. package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
  75. package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
  76. package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
  77. package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
  78. package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
  79. package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
  80. package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
  81. package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
  82. package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
  83. package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
  84. package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
  85. package/src/core/tools/web-fetch-handlers/types.ts +163 -0
  86. package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
  87. package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
  88. package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
  89. package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
  90. package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
  91. package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
  92. package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
  93. package/src/core/tools/web-fetch.ts +152 -1324
  94. package/src/prompts/task.md +14 -50
  95. package/src/prompts/tools/output.md +2 -1
  96. package/src/prompts/tools/task.md +3 -1
  97. package/src/utils/tools-manager.ts +110 -8
@@ -0,0 +1,185 @@
1
+ import { finalizeOutput, formatCount, loadPage, type SpecialHandler } from "./types";
2
+
3
+ /**
4
+ * Handle pub.dev URLs via API
5
+ */
6
+ export const handlePubDev: SpecialHandler = async (url: string, timeout: number) => {
7
+ try {
8
+ const parsed = new URL(url);
9
+ if (parsed.hostname !== "pub.dev" && parsed.hostname !== "www.pub.dev") return null;
10
+
11
+ // Extract package name from /packages/{package}
12
+ const match = parsed.pathname.match(/^\/packages\/([^/]+)/);
13
+ if (!match) return null;
14
+
15
+ const packageName = decodeURIComponent(match[1]);
16
+ const fetchedAt = new Date().toISOString();
17
+
18
+ // Fetch from pub.dev API
19
+ const apiUrl = `https://pub.dev/api/packages/${encodeURIComponent(packageName)}`;
20
+ const result = await loadPage(apiUrl, { timeout });
21
+
22
+ if (!result.ok) return null;
23
+
24
+ let data: {
25
+ name: string;
26
+ latest: {
27
+ version: string;
28
+ pubspec: {
29
+ description?: string;
30
+ homepage?: string;
31
+ repository?: string;
32
+ documentation?: string;
33
+ environment?: Record<string, string>;
34
+ dependencies?: Record<string, unknown>;
35
+ dev_dependencies?: Record<string, unknown>;
36
+ };
37
+ };
38
+ publisherId?: string;
39
+ metrics?: {
40
+ score?: {
41
+ likeCount?: number;
42
+ grantedPoints?: number;
43
+ maxPoints?: number;
44
+ popularityScore?: number;
45
+ };
46
+ };
47
+ };
48
+
49
+ try {
50
+ data = JSON.parse(result.content);
51
+ } catch {
52
+ return null;
53
+ }
54
+
55
+ const { name, latest, publisherId, metrics } = data;
56
+ const pubspec = latest.pubspec;
57
+
58
+ let md = `# ${name}\n\n`;
59
+ if (pubspec.description) md += `${pubspec.description}\n\n`;
60
+
61
+ md += `**Latest:** ${latest.version}`;
62
+ if (publisherId) md += ` · **Publisher:** ${publisherId}`;
63
+ md += "\n";
64
+
65
+ // Add metrics if available
66
+ const score = metrics?.score;
67
+ if (score) {
68
+ const likes = score.likeCount;
69
+ const points = score.grantedPoints;
70
+ const maxPoints = score.maxPoints;
71
+ const popularity = score.popularityScore;
72
+
73
+ if (likes !== undefined) md += `**Likes:** ${formatCount(likes)}`;
74
+ if (points !== undefined && maxPoints !== undefined) {
75
+ md += ` · **Pub Points:** ${points}/${maxPoints}`;
76
+ }
77
+ if (popularity !== undefined) {
78
+ md += ` · **Popularity:** ${Math.round(popularity * 100)}%`;
79
+ }
80
+ md += "\n";
81
+ }
82
+
83
+ md += "\n";
84
+
85
+ if (pubspec.homepage) md += `**Homepage:** ${pubspec.homepage}\n`;
86
+ if (pubspec.repository) md += `**Repository:** ${pubspec.repository}\n`;
87
+ if (pubspec.documentation) md += `**Documentation:** ${pubspec.documentation}\n`;
88
+
89
+ // SDK constraints
90
+ if (pubspec.environment) {
91
+ const constraints: string[] = [];
92
+ for (const [key, value] of Object.entries(pubspec.environment)) {
93
+ constraints.push(`${key}: ${value}`);
94
+ }
95
+ if (constraints.length > 0) {
96
+ md += `**SDK:** ${constraints.join(", ")}\n`;
97
+ }
98
+ }
99
+
100
+ md += "\n";
101
+
102
+ // Dependencies
103
+ if (pubspec.dependencies) {
104
+ const deps = Object.keys(pubspec.dependencies);
105
+ if (deps.length > 0) {
106
+ md += `## Dependencies (${deps.length})\n\n`;
107
+ for (const dep of deps.slice(0, 20)) {
108
+ const constraint = pubspec.dependencies[dep];
109
+ const constraintStr =
110
+ typeof constraint === "string" ? constraint : typeof constraint === "object" ? "complex" : "";
111
+ md += `- ${dep}`;
112
+ if (constraintStr) md += `: ${constraintStr}`;
113
+ md += "\n";
114
+ }
115
+ if (deps.length > 20) {
116
+ md += `\n*...and ${deps.length - 20} more*\n`;
117
+ }
118
+ md += "\n";
119
+ }
120
+ }
121
+
122
+ // Try to fetch README from pub.dev
123
+ const readmeUrl = `https://pub.dev/packages/${encodeURIComponent(packageName)}/versions/${encodeURIComponent(latest.version)}/readme`;
124
+ try {
125
+ const readmeResult = await loadPage(readmeUrl, { timeout: Math.min(timeout, 10) });
126
+ if (readmeResult.ok) {
127
+ // Extract README content from HTML
128
+ const readmeMatch = readmeResult.content.match(
129
+ /<div[^>]*class="[^"]*markdown-body[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
130
+ );
131
+ if (readmeMatch) {
132
+ // Basic HTML to markdown conversion for README
133
+ const readme = readmeMatch[1]
134
+ .replace(/<h(\d)[^>]*>(.*?)<\/h\d>/gi, (_, level, text) => {
135
+ const stripped = text.replace(/<[^>]+>/g, "");
136
+ return `${"#".repeat(parseInt(level, 10))} ${stripped}\n\n`;
137
+ })
138
+ .replace(/<pre><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_, code) => {
139
+ const decoded = code
140
+ .replace(/&lt;/g, "<")
141
+ .replace(/&gt;/g, ">")
142
+ .replace(/&amp;/g, "&")
143
+ .replace(/&quot;/g, '"');
144
+ return `\n\`\`\`\n${decoded}\n\`\`\`\n\n`;
145
+ })
146
+ .replace(/<code[^>]*>(.*?)<\/code>/gi, "`$1`")
147
+ .replace(/<a[^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)")
148
+ .replace(/<strong[^>]*>(.*?)<\/strong>/gi, "**$1**")
149
+ .replace(/<em[^>]*>(.*?)<\/em>/gi, "*$1*")
150
+ .replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n")
151
+ .replace(/<\/?(ul|ol|p|br)[^>]*>/gi, "\n")
152
+ .replace(/<[^>]+>/g, "")
153
+ .replace(/&lt;/g, "<")
154
+ .replace(/&gt;/g, ">")
155
+ .replace(/&amp;/g, "&")
156
+ .replace(/&quot;/g, '"')
157
+ .replace(/&#39;/g, "'")
158
+ .replace(/&nbsp;/g, " ")
159
+ .replace(/\n{3,}/g, "\n\n")
160
+ .trim();
161
+
162
+ if (readme.length > 100) {
163
+ md += `## README\n\n${readme}\n`;
164
+ }
165
+ }
166
+ }
167
+ } catch {
168
+ // README fetch failed, continue without it
169
+ }
170
+
171
+ const output = finalizeOutput(md);
172
+ return {
173
+ url,
174
+ finalUrl: url,
175
+ contentType: "text/markdown",
176
+ method: "pub.dev",
177
+ content: output.content,
178
+ fetchedAt,
179
+ truncated: output.truncated,
180
+ notes: ["Fetched via pub.dev API"],
181
+ };
182
+ } catch {}
183
+
184
+ return null;
185
+ };
@@ -0,0 +1,174 @@
1
+ /**
2
+ * PubMed handler for web-fetch
3
+ */
4
+
5
+ import type { RenderResult, SpecialHandler } from "./types";
6
+ import { finalizeOutput, loadPage } from "./types";
7
+
8
+ /**
9
+ * Handle PubMed URLs - fetch article metadata, abstract, MeSH terms
10
+ */
11
+ export const handlePubMed: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
12
+ try {
13
+ const parsed = new URL(url);
14
+
15
+ // Match pubmed.ncbi.nlm.nih.gov/{pmid} or ncbi.nlm.nih.gov/pubmed/{pmid}
16
+ if (
17
+ parsed.hostname !== "pubmed.ncbi.nlm.nih.gov" &&
18
+ !(parsed.hostname === "ncbi.nlm.nih.gov" && parsed.pathname.startsWith("/pubmed"))
19
+ ) {
20
+ return null;
21
+ }
22
+
23
+ // Extract PMID from URL
24
+ let pmid: string | null = null;
25
+ if (parsed.hostname === "pubmed.ncbi.nlm.nih.gov") {
26
+ // Format: pubmed.ncbi.nlm.nih.gov/12345678/
27
+ const match = parsed.pathname.match(/\/(\d+)/);
28
+ if (match) pmid = match[1];
29
+ } else {
30
+ // Format: ncbi.nlm.nih.gov/pubmed/12345678
31
+ const match = parsed.pathname.match(/\/pubmed\/(\d+)/);
32
+ if (match) pmid = match[1];
33
+ }
34
+
35
+ if (!pmid) return null;
36
+
37
+ const fetchedAt = new Date().toISOString();
38
+ const notes: string[] = [];
39
+
40
+ // Fetch summary metadata
41
+ const summaryUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json`;
42
+ const summaryResult = await loadPage(summaryUrl, { timeout });
43
+
44
+ if (!summaryResult.ok) return null;
45
+
46
+ let summaryData: {
47
+ result?: {
48
+ [pmid: string]: {
49
+ title?: string;
50
+ authors?: Array<{ name: string }>;
51
+ fulljournalname?: string;
52
+ pubdate?: string;
53
+ volume?: string;
54
+ issue?: string;
55
+ pages?: string;
56
+ elocationid?: string; // DOI
57
+ articleids?: Array<{ idtype: string; value: string }>;
58
+ };
59
+ };
60
+ };
61
+
62
+ try {
63
+ summaryData = JSON.parse(summaryResult.content);
64
+ } catch {
65
+ return null;
66
+ }
67
+
68
+ const article = summaryData.result?.[pmid];
69
+ if (!article) return null;
70
+
71
+ // Fetch abstract
72
+ const abstractUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=abstract&retmode=text`;
73
+ const abstractResult = await loadPage(abstractUrl, { timeout });
74
+
75
+ let abstractText = "";
76
+ if (abstractResult.ok) {
77
+ abstractText = abstractResult.content.trim();
78
+ notes.push("Fetched abstract via NCBI E-utilities");
79
+ }
80
+
81
+ // Extract DOI and PMCID
82
+ let doi = "";
83
+ let pmcid = "";
84
+ if (article.articleids) {
85
+ for (const id of article.articleids) {
86
+ if (id.idtype === "doi") doi = id.value;
87
+ if (id.idtype === "pmc") pmcid = id.value;
88
+ }
89
+ }
90
+ if (!doi && article.elocationid) {
91
+ doi = article.elocationid;
92
+ }
93
+
94
+ // Build markdown output
95
+ let md = `# ${article.title || "PubMed Article"}\n\n`;
96
+
97
+ // Authors
98
+ if (article.authors && article.authors.length > 0) {
99
+ const authorNames = article.authors.map((a) => a.name).join(", ");
100
+ md += `**Authors:** ${authorNames}\n`;
101
+ }
102
+
103
+ // Journal info
104
+ if (article.fulljournalname) {
105
+ md += `**Journal:** ${article.fulljournalname}`;
106
+ if (article.pubdate) md += ` (${article.pubdate})`;
107
+ md += "\n";
108
+ }
109
+
110
+ // Volume/Issue/Pages
111
+ const citation: string[] = [];
112
+ if (article.volume) citation.push(`Vol ${article.volume}`);
113
+ if (article.issue) citation.push(`Issue ${article.issue}`);
114
+ if (article.pages) citation.push(`pp ${article.pages}`);
115
+ if (citation.length > 0) {
116
+ md += `**Citation:** ${citation.join(", ")}\n`;
117
+ }
118
+
119
+ // IDs
120
+ md += `**PMID:** ${pmid}\n`;
121
+ if (doi) md += `**DOI:** ${doi}\n`;
122
+ if (pmcid) md += `**PMCID:** ${pmcid}\n`;
123
+
124
+ md += "\n---\n\n";
125
+
126
+ // Abstract section
127
+ if (abstractText) {
128
+ md += `## Abstract\n\n${abstractText}\n`;
129
+ } else {
130
+ md += `## Abstract\n\nNo abstract available.\n`;
131
+ }
132
+
133
+ // Try to fetch MeSH terms
134
+ try {
135
+ const meshUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=medline&retmode=text`;
136
+ const meshResult = await loadPage(meshUrl, { timeout: Math.min(timeout, 5) });
137
+
138
+ if (meshResult.ok) {
139
+ const meshTerms: string[] = [];
140
+ const lines = meshResult.content.split("\n");
141
+ for (const line of lines) {
142
+ if (line.startsWith("MH - ")) {
143
+ const term = line.slice(6).trim();
144
+ meshTerms.push(term);
145
+ }
146
+ }
147
+
148
+ if (meshTerms.length > 0) {
149
+ md += `\n## MeSH Terms\n\n`;
150
+ for (const term of meshTerms) {
151
+ md += `- ${term}\n`;
152
+ }
153
+ notes.push("Fetched MeSH terms via NCBI E-utilities");
154
+ }
155
+ }
156
+ } catch {
157
+ // MeSH terms are optional
158
+ }
159
+
160
+ const output = finalizeOutput(md);
161
+ return {
162
+ url,
163
+ finalUrl: url,
164
+ contentType: "text/markdown",
165
+ method: "pubmed",
166
+ content: output.content,
167
+ fetchedAt,
168
+ truncated: output.truncated,
169
+ notes: notes.length > 0 ? notes : ["Fetched via NCBI E-utilities"],
170
+ };
171
+ } catch {
172
+ return null;
173
+ }
174
+ };
@@ -0,0 +1,125 @@
1
+ import type { RenderResult, SpecialHandler } from "./types";
2
+ import { finalizeOutput, formatCount, loadPage } from "./types";
3
+
4
+ /**
5
+ * Handle PyPI URLs via JSON API
6
+ */
7
+ export const handlePyPI: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
8
+ try {
9
+ const parsed = new URL(url);
10
+ if (parsed.hostname !== "pypi.org" && parsed.hostname !== "www.pypi.org") return null;
11
+
12
+ // Extract package name from /project/{package} or /project/{package}/{version}
13
+ const match = parsed.pathname.match(/^\/project\/([^/]+)/);
14
+ if (!match) return null;
15
+
16
+ const packageName = decodeURIComponent(match[1]);
17
+ const fetchedAt = new Date().toISOString();
18
+
19
+ // Fetch from PyPI JSON API
20
+ const apiUrl = `https://pypi.org/pypi/${packageName}/json`;
21
+ const downloadsUrl = `https://pypistats.org/api/packages/${packageName}/recent`;
22
+
23
+ // Fetch package info and download stats in parallel
24
+ const [result, downloadsResult] = await Promise.all([
25
+ loadPage(apiUrl, { timeout }),
26
+ loadPage(downloadsUrl, { timeout: Math.min(timeout, 5) }),
27
+ ]);
28
+
29
+ if (!result.ok) return null;
30
+
31
+ // Parse download stats
32
+ let weeklyDownloads: number | null = null;
33
+ if (downloadsResult.ok) {
34
+ try {
35
+ const dlData = JSON.parse(downloadsResult.content) as { data?: { last_week?: number } };
36
+ weeklyDownloads = dlData.data?.last_week ?? null;
37
+ } catch {}
38
+ }
39
+
40
+ let pkg: {
41
+ info: {
42
+ name: string;
43
+ version: string;
44
+ summary?: string;
45
+ description?: string;
46
+ author?: string;
47
+ author_email?: string;
48
+ license?: string;
49
+ home_page?: string;
50
+ project_urls?: Record<string, string>;
51
+ requires_python?: string;
52
+ keywords?: string;
53
+ classifiers?: string[];
54
+ };
55
+ urls?: Array<{ filename: string; size: number; upload_time: string }>;
56
+ releases?: Record<string, unknown>;
57
+ requires_dist?: string[];
58
+ };
59
+
60
+ try {
61
+ pkg = JSON.parse(result.content);
62
+ } catch {
63
+ return null; // JSON parse failed
64
+ }
65
+
66
+ const info = pkg.info;
67
+ let md = `# ${info.name}\n\n`;
68
+ if (info.summary) md += `${info.summary}\n\n`;
69
+
70
+ md += `**Latest:** ${info.version}`;
71
+ if (info.license) md += ` · **License:** ${info.license}`;
72
+ md += "\n";
73
+
74
+ if (weeklyDownloads !== null) {
75
+ md += `**Weekly Downloads:** ${formatCount(weeklyDownloads)}\n`;
76
+ }
77
+
78
+ md += "\n";
79
+
80
+ if (info.author) {
81
+ md += `**Author:** ${info.author}`;
82
+ if (info.author_email) md += ` <${info.author_email}>`;
83
+ md += "\n";
84
+ }
85
+
86
+ if (info.requires_python) md += `**Python:** ${info.requires_python}\n`;
87
+ if (info.home_page) md += `**Homepage:** ${info.home_page}\n`;
88
+
89
+ if (info.project_urls && Object.keys(info.project_urls).length > 0) {
90
+ md += "\n**Project URLs:**\n";
91
+ for (const [label, url] of Object.entries(info.project_urls)) {
92
+ md += `- ${label}: ${url}\n`;
93
+ }
94
+ }
95
+
96
+ if (info.keywords) md += `\n**Keywords:** ${info.keywords}\n`;
97
+
98
+ // Dependencies
99
+ if (pkg.requires_dist && pkg.requires_dist.length > 0) {
100
+ md += `\n## Dependencies\n\n`;
101
+ for (const dep of pkg.requires_dist) {
102
+ md += `- ${dep}\n`;
103
+ }
104
+ }
105
+
106
+ // README/Description
107
+ if (info.description) {
108
+ md += `\n---\n\n## Description\n\n${info.description}\n`;
109
+ }
110
+
111
+ const output = finalizeOutput(md);
112
+ return {
113
+ url,
114
+ finalUrl: url,
115
+ contentType: "text/markdown",
116
+ method: "pypi",
117
+ content: output.content,
118
+ fetchedAt,
119
+ truncated: output.truncated,
120
+ notes: ["Fetched via PyPI JSON API"],
121
+ };
122
+ } catch {}
123
+
124
+ return null;
125
+ };
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Read the Docs handler for web-fetch
3
+ */
4
+
5
+ import { parse as parseHtml } from "node-html-parser";
6
+ import type { RenderResult, SpecialHandler } from "./types";
7
+ import { finalizeOutput, htmlToBasicMarkdown, loadPage } from "./types";
8
+
9
+ export const handleReadTheDocs: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
10
+ // Check if URL matches Read the Docs patterns
11
+ const urlObj = new URL(url);
12
+ const isReadTheDocs =
13
+ urlObj.hostname.endsWith(".readthedocs.io") ||
14
+ urlObj.hostname === "readthedocs.org" ||
15
+ urlObj.hostname === "www.readthedocs.org";
16
+
17
+ if (!isReadTheDocs) {
18
+ return null;
19
+ }
20
+
21
+ const notes: string[] = [];
22
+ const fetchedAt = new Date().toISOString();
23
+
24
+ // Fetch the page
25
+ const result = await loadPage(url, { timeout });
26
+ if (!result.ok) {
27
+ return {
28
+ url,
29
+ finalUrl: result.finalUrl,
30
+ contentType: result.contentType,
31
+ method: "readthedocs",
32
+ content: `Failed to fetch Read the Docs page (status: ${result.status ?? "unknown"})`,
33
+ fetchedAt,
34
+ truncated: false,
35
+ notes,
36
+ };
37
+ }
38
+
39
+ // Parse HTML
40
+ const root = parseHtml(result.content);
41
+
42
+ // Extract main content from common Read the Docs selectors
43
+ let mainContent =
44
+ root.querySelector(".document") ||
45
+ root.querySelector('[role="main"]') ||
46
+ root.querySelector("main") ||
47
+ root.querySelector(".rst-content") ||
48
+ root.querySelector(".body");
49
+
50
+ if (!mainContent) {
51
+ // Fallback to body if no main content found
52
+ mainContent = root.querySelector("body");
53
+ notes.push("Using full body content (no main content div found)");
54
+ }
55
+
56
+ // Remove navigation, sidebar, footer elements
57
+ mainContent
58
+ ?.querySelectorAll(
59
+ ".headerlink, .viewcode-link, nav, .sidebar, footer, .related, .sphinxsidebar, .toctree-wrapper",
60
+ )
61
+ .forEach((el) => {
62
+ el.remove();
63
+ });
64
+
65
+ // Try to find Edit on GitHub/GitLab links for raw source
66
+ const editLinks = root.querySelectorAll('a[href*="github.com"], a[href*="gitlab.com"]');
67
+ let sourceUrl: string | null = null;
68
+
69
+ for (const link of editLinks) {
70
+ const href = link.getAttribute("href");
71
+ const text = link.textContent?.toLowerCase() || "";
72
+
73
+ if (href && (text.includes("edit") || text.includes("source"))) {
74
+ // Convert edit URL to raw URL
75
+ if (href.includes("github.com")) {
76
+ sourceUrl = href.replace("/blob/", "/raw/").replace("/edit/", "/raw/");
77
+ } else if (href.includes("gitlab.com")) {
78
+ sourceUrl = href.replace("/blob/", "/raw/").replace("/edit/", "/raw/");
79
+ }
80
+ break;
81
+ }
82
+ }
83
+
84
+ let content = "";
85
+
86
+ // Try to fetch raw source if available
87
+ if (sourceUrl) {
88
+ try {
89
+ const sourceResult = await loadPage(sourceUrl, { timeout: Math.min(timeout, 10) });
90
+ if (sourceResult.ok && sourceResult.content.length > 0 && sourceResult.content.length < 1_000_000) {
91
+ content = sourceResult.content;
92
+ notes.push(`Fetched raw source from ${sourceUrl}`);
93
+ }
94
+ } catch (_err) {
95
+ // Ignore errors, fall back to HTML
96
+ }
97
+ }
98
+
99
+ // If no raw source, convert HTML to markdown
100
+ if (!content && mainContent) {
101
+ const html = mainContent.innerHTML;
102
+ content = htmlToBasicMarkdown(html);
103
+ }
104
+
105
+ if (!content) {
106
+ content = "No content extracted from Read the Docs page";
107
+ notes.push("Failed to extract content");
108
+ }
109
+
110
+ const { content: finalContent, truncated } = finalizeOutput(content);
111
+
112
+ return {
113
+ url,
114
+ finalUrl: result.finalUrl,
115
+ contentType: sourceUrl ? "text/plain" : "text/html",
116
+ method: "readthedocs",
117
+ content: finalContent,
118
+ fetchedAt,
119
+ truncated,
120
+ notes,
121
+ };
122
+ };
@@ -0,0 +1,100 @@
1
+ import type { RenderResult, SpecialHandler } from "./types";
2
+ import { finalizeOutput, loadPage } from "./types";
3
+
4
+ interface RedditPost {
5
+ title: string;
6
+ selftext: string;
7
+ author: string;
8
+ score: number;
9
+ num_comments: number;
10
+ created_utc: number;
11
+ subreddit: string;
12
+ url: string;
13
+ is_self: boolean;
14
+ }
15
+
16
+ interface RedditComment {
17
+ body: string;
18
+ author: string;
19
+ score: number;
20
+ created_utc: number;
21
+ replies?: { data: { children: Array<{ data: RedditComment }> } };
22
+ }
23
+
24
+ /**
25
+ * Handle Reddit URLs via JSON API
26
+ */
27
+ export const handleReddit: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
28
+ try {
29
+ const parsed = new URL(url);
30
+ if (!parsed.hostname.includes("reddit.com")) return null;
31
+
32
+ const fetchedAt = new Date().toISOString();
33
+
34
+ // Append .json to get JSON response
35
+ let jsonUrl = `${url.replace(/\/$/, "")}.json`;
36
+ if (parsed.search) {
37
+ jsonUrl = `${url.replace(/\/$/, "").replace(parsed.search, "")}.json${parsed.search}`;
38
+ }
39
+
40
+ const result = await loadPage(jsonUrl, { timeout });
41
+ if (!result.ok) return null;
42
+
43
+ const data = JSON.parse(result.content);
44
+ let md = "";
45
+
46
+ // Handle different Reddit URL types
47
+ if (Array.isArray(data) && data.length >= 1) {
48
+ // Post page (with comments)
49
+ const postData = data[0]?.data?.children?.[0]?.data as RedditPost | undefined;
50
+ if (postData) {
51
+ md = `# ${postData.title}\n\n`;
52
+ md += `**r/${postData.subreddit}** · u/${postData.author} · ${postData.score} points · ${postData.num_comments} comments\n`;
53
+ md += `*${new Date(postData.created_utc * 1000).toISOString().split("T")[0]}*\n\n`;
54
+
55
+ if (postData.is_self && postData.selftext) {
56
+ md += `---\n\n${postData.selftext}\n\n`;
57
+ } else if (!postData.is_self) {
58
+ md += `**Link:** ${postData.url}\n\n`;
59
+ }
60
+
61
+ // Add comments if available
62
+ if (data.length >= 2 && data[1]?.data?.children) {
63
+ md += `---\n\n## Top Comments\n\n`;
64
+ const comments = data[1].data.children.filter((c: { kind: string }) => c.kind === "t1").slice(0, 10);
65
+
66
+ for (const { data: comment } of comments as Array<{ data: RedditComment }>) {
67
+ md += `### u/${comment.author} · ${comment.score} points\n\n`;
68
+ md += `${comment.body}\n\n---\n\n`;
69
+ }
70
+ }
71
+ }
72
+ } else if (data?.data?.children) {
73
+ // Subreddit or listing page
74
+ const posts = data.data.children.slice(0, 20) as Array<{ data: RedditPost }>;
75
+ const subreddit = posts[0]?.data?.subreddit;
76
+
77
+ md = `# r/${subreddit || "Reddit"}\n\n`;
78
+ for (const { data: post } of posts) {
79
+ md += `- **${post.title}** (${post.score} pts, ${post.num_comments} comments)\n`;
80
+ md += ` by u/${post.author}\n\n`;
81
+ }
82
+ }
83
+
84
+ if (!md) return null;
85
+
86
+ const output = finalizeOutput(md);
87
+ return {
88
+ url,
89
+ finalUrl: url,
90
+ contentType: "text/markdown",
91
+ method: "reddit",
92
+ content: output.content,
93
+ fetchedAt,
94
+ truncated: output.truncated,
95
+ notes: ["Fetched via Reddit JSON API"],
96
+ };
97
+ } catch {}
98
+
99
+ return null;
100
+ };