@oh-my-pi/pi-coding-agent 3.24.0 → 3.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/package.json +4 -4
- package/src/core/custom-commands/bundled/wt/index.ts +3 -0
- package/src/core/sdk.ts +7 -0
- package/src/core/tools/complete.ts +129 -0
- package/src/core/tools/index.test.ts +9 -1
- package/src/core/tools/index.ts +18 -5
- package/src/core/tools/jtd-to-json-schema.ts +252 -0
- package/src/core/tools/output.ts +125 -14
- package/src/core/tools/read.ts +4 -4
- package/src/core/tools/task/artifacts.ts +6 -9
- package/src/core/tools/task/executor.ts +189 -24
- package/src/core/tools/task/index.ts +23 -18
- package/src/core/tools/task/name-generator.ts +1577 -0
- package/src/core/tools/task/render.ts +137 -8
- package/src/core/tools/task/types.ts +26 -5
- package/src/core/tools/task/worker-protocol.ts +1 -0
- package/src/core/tools/task/worker.ts +136 -14
- package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
- package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
- package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
- package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
- package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
- package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
- package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
- package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
- package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
- package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
- package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
- package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
- package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
- package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
- package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
- package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
- package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
- package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
- package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
- package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
- package/src/core/tools/web-fetch-handlers/github.ts +424 -0
- package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
- package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
- package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
- package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
- package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
- package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
- package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
- package/src/core/tools/web-fetch-handlers/index.ts +69 -0
- package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
- package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
- package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
- package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
- package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
- package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
- package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
- package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
- package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
- package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
- package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
- package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
- package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
- package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
- package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
- package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
- package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
- package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
- package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
- package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
- package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
- package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
- package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
- package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
- package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
- package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
- package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
- package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
- package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
- package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
- package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
- package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
- package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
- package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
- package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
- package/src/core/tools/web-fetch-handlers/types.ts +163 -0
- package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
- package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
- package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
- package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
- package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
- package/src/core/tools/web-fetch.ts +152 -1324
- package/src/prompts/task.md +14 -50
- package/src/prompts/tools/output.md +2 -1
- package/src/prompts/tools/task.md +3 -1
- package/src/utils/tools-manager.ts +110 -8
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { finalizeOutput, formatCount, loadPage, type SpecialHandler } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Handle pub.dev URLs via API
|
|
5
|
+
*/
|
|
6
|
+
export const handlePubDev: SpecialHandler = async (url: string, timeout: number) => {
|
|
7
|
+
try {
|
|
8
|
+
const parsed = new URL(url);
|
|
9
|
+
if (parsed.hostname !== "pub.dev" && parsed.hostname !== "www.pub.dev") return null;
|
|
10
|
+
|
|
11
|
+
// Extract package name from /packages/{package}
|
|
12
|
+
const match = parsed.pathname.match(/^\/packages\/([^/]+)/);
|
|
13
|
+
if (!match) return null;
|
|
14
|
+
|
|
15
|
+
const packageName = decodeURIComponent(match[1]);
|
|
16
|
+
const fetchedAt = new Date().toISOString();
|
|
17
|
+
|
|
18
|
+
// Fetch from pub.dev API
|
|
19
|
+
const apiUrl = `https://pub.dev/api/packages/${encodeURIComponent(packageName)}`;
|
|
20
|
+
const result = await loadPage(apiUrl, { timeout });
|
|
21
|
+
|
|
22
|
+
if (!result.ok) return null;
|
|
23
|
+
|
|
24
|
+
let data: {
|
|
25
|
+
name: string;
|
|
26
|
+
latest: {
|
|
27
|
+
version: string;
|
|
28
|
+
pubspec: {
|
|
29
|
+
description?: string;
|
|
30
|
+
homepage?: string;
|
|
31
|
+
repository?: string;
|
|
32
|
+
documentation?: string;
|
|
33
|
+
environment?: Record<string, string>;
|
|
34
|
+
dependencies?: Record<string, unknown>;
|
|
35
|
+
dev_dependencies?: Record<string, unknown>;
|
|
36
|
+
};
|
|
37
|
+
};
|
|
38
|
+
publisherId?: string;
|
|
39
|
+
metrics?: {
|
|
40
|
+
score?: {
|
|
41
|
+
likeCount?: number;
|
|
42
|
+
grantedPoints?: number;
|
|
43
|
+
maxPoints?: number;
|
|
44
|
+
popularityScore?: number;
|
|
45
|
+
};
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
data = JSON.parse(result.content);
|
|
51
|
+
} catch {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const { name, latest, publisherId, metrics } = data;
|
|
56
|
+
const pubspec = latest.pubspec;
|
|
57
|
+
|
|
58
|
+
let md = `# ${name}\n\n`;
|
|
59
|
+
if (pubspec.description) md += `${pubspec.description}\n\n`;
|
|
60
|
+
|
|
61
|
+
md += `**Latest:** ${latest.version}`;
|
|
62
|
+
if (publisherId) md += ` · **Publisher:** ${publisherId}`;
|
|
63
|
+
md += "\n";
|
|
64
|
+
|
|
65
|
+
// Add metrics if available
|
|
66
|
+
const score = metrics?.score;
|
|
67
|
+
if (score) {
|
|
68
|
+
const likes = score.likeCount;
|
|
69
|
+
const points = score.grantedPoints;
|
|
70
|
+
const maxPoints = score.maxPoints;
|
|
71
|
+
const popularity = score.popularityScore;
|
|
72
|
+
|
|
73
|
+
if (likes !== undefined) md += `**Likes:** ${formatCount(likes)}`;
|
|
74
|
+
if (points !== undefined && maxPoints !== undefined) {
|
|
75
|
+
md += ` · **Pub Points:** ${points}/${maxPoints}`;
|
|
76
|
+
}
|
|
77
|
+
if (popularity !== undefined) {
|
|
78
|
+
md += ` · **Popularity:** ${Math.round(popularity * 100)}%`;
|
|
79
|
+
}
|
|
80
|
+
md += "\n";
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
md += "\n";
|
|
84
|
+
|
|
85
|
+
if (pubspec.homepage) md += `**Homepage:** ${pubspec.homepage}\n`;
|
|
86
|
+
if (pubspec.repository) md += `**Repository:** ${pubspec.repository}\n`;
|
|
87
|
+
if (pubspec.documentation) md += `**Documentation:** ${pubspec.documentation}\n`;
|
|
88
|
+
|
|
89
|
+
// SDK constraints
|
|
90
|
+
if (pubspec.environment) {
|
|
91
|
+
const constraints: string[] = [];
|
|
92
|
+
for (const [key, value] of Object.entries(pubspec.environment)) {
|
|
93
|
+
constraints.push(`${key}: ${value}`);
|
|
94
|
+
}
|
|
95
|
+
if (constraints.length > 0) {
|
|
96
|
+
md += `**SDK:** ${constraints.join(", ")}\n`;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
md += "\n";
|
|
101
|
+
|
|
102
|
+
// Dependencies
|
|
103
|
+
if (pubspec.dependencies) {
|
|
104
|
+
const deps = Object.keys(pubspec.dependencies);
|
|
105
|
+
if (deps.length > 0) {
|
|
106
|
+
md += `## Dependencies (${deps.length})\n\n`;
|
|
107
|
+
for (const dep of deps.slice(0, 20)) {
|
|
108
|
+
const constraint = pubspec.dependencies[dep];
|
|
109
|
+
const constraintStr =
|
|
110
|
+
typeof constraint === "string" ? constraint : typeof constraint === "object" ? "complex" : "";
|
|
111
|
+
md += `- ${dep}`;
|
|
112
|
+
if (constraintStr) md += `: ${constraintStr}`;
|
|
113
|
+
md += "\n";
|
|
114
|
+
}
|
|
115
|
+
if (deps.length > 20) {
|
|
116
|
+
md += `\n*...and ${deps.length - 20} more*\n`;
|
|
117
|
+
}
|
|
118
|
+
md += "\n";
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Try to fetch README from pub.dev
|
|
123
|
+
const readmeUrl = `https://pub.dev/packages/${encodeURIComponent(packageName)}/versions/${encodeURIComponent(latest.version)}/readme`;
|
|
124
|
+
try {
|
|
125
|
+
const readmeResult = await loadPage(readmeUrl, { timeout: Math.min(timeout, 10) });
|
|
126
|
+
if (readmeResult.ok) {
|
|
127
|
+
// Extract README content from HTML
|
|
128
|
+
const readmeMatch = readmeResult.content.match(
|
|
129
|
+
/<div[^>]*class="[^"]*markdown-body[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
|
|
130
|
+
);
|
|
131
|
+
if (readmeMatch) {
|
|
132
|
+
// Basic HTML to markdown conversion for README
|
|
133
|
+
const readme = readmeMatch[1]
|
|
134
|
+
.replace(/<h(\d)[^>]*>(.*?)<\/h\d>/gi, (_, level, text) => {
|
|
135
|
+
const stripped = text.replace(/<[^>]+>/g, "");
|
|
136
|
+
return `${"#".repeat(parseInt(level, 10))} ${stripped}\n\n`;
|
|
137
|
+
})
|
|
138
|
+
.replace(/<pre><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_, code) => {
|
|
139
|
+
const decoded = code
|
|
140
|
+
.replace(/</g, "<")
|
|
141
|
+
.replace(/>/g, ">")
|
|
142
|
+
.replace(/&/g, "&")
|
|
143
|
+
.replace(/"/g, '"');
|
|
144
|
+
return `\n\`\`\`\n${decoded}\n\`\`\`\n\n`;
|
|
145
|
+
})
|
|
146
|
+
.replace(/<code[^>]*>(.*?)<\/code>/gi, "`$1`")
|
|
147
|
+
.replace(/<a[^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)")
|
|
148
|
+
.replace(/<strong[^>]*>(.*?)<\/strong>/gi, "**$1**")
|
|
149
|
+
.replace(/<em[^>]*>(.*?)<\/em>/gi, "*$1*")
|
|
150
|
+
.replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n")
|
|
151
|
+
.replace(/<\/?(ul|ol|p|br)[^>]*>/gi, "\n")
|
|
152
|
+
.replace(/<[^>]+>/g, "")
|
|
153
|
+
.replace(/</g, "<")
|
|
154
|
+
.replace(/>/g, ">")
|
|
155
|
+
.replace(/&/g, "&")
|
|
156
|
+
.replace(/"/g, '"')
|
|
157
|
+
.replace(/'/g, "'")
|
|
158
|
+
.replace(/ /g, " ")
|
|
159
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
160
|
+
.trim();
|
|
161
|
+
|
|
162
|
+
if (readme.length > 100) {
|
|
163
|
+
md += `## README\n\n${readme}\n`;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
} catch {
|
|
168
|
+
// README fetch failed, continue without it
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const output = finalizeOutput(md);
|
|
172
|
+
return {
|
|
173
|
+
url,
|
|
174
|
+
finalUrl: url,
|
|
175
|
+
contentType: "text/markdown",
|
|
176
|
+
method: "pub.dev",
|
|
177
|
+
content: output.content,
|
|
178
|
+
fetchedAt,
|
|
179
|
+
truncated: output.truncated,
|
|
180
|
+
notes: ["Fetched via pub.dev API"],
|
|
181
|
+
};
|
|
182
|
+
} catch {}
|
|
183
|
+
|
|
184
|
+
return null;
|
|
185
|
+
};
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PubMed handler for web-fetch
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
6
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Handle PubMed URLs - fetch article metadata, abstract, MeSH terms
|
|
10
|
+
*/
|
|
11
|
+
export const handlePubMed: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
12
|
+
try {
|
|
13
|
+
const parsed = new URL(url);
|
|
14
|
+
|
|
15
|
+
// Match pubmed.ncbi.nlm.nih.gov/{pmid} or ncbi.nlm.nih.gov/pubmed/{pmid}
|
|
16
|
+
if (
|
|
17
|
+
parsed.hostname !== "pubmed.ncbi.nlm.nih.gov" &&
|
|
18
|
+
!(parsed.hostname === "ncbi.nlm.nih.gov" && parsed.pathname.startsWith("/pubmed"))
|
|
19
|
+
) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Extract PMID from URL
|
|
24
|
+
let pmid: string | null = null;
|
|
25
|
+
if (parsed.hostname === "pubmed.ncbi.nlm.nih.gov") {
|
|
26
|
+
// Format: pubmed.ncbi.nlm.nih.gov/12345678/
|
|
27
|
+
const match = parsed.pathname.match(/\/(\d+)/);
|
|
28
|
+
if (match) pmid = match[1];
|
|
29
|
+
} else {
|
|
30
|
+
// Format: ncbi.nlm.nih.gov/pubmed/12345678
|
|
31
|
+
const match = parsed.pathname.match(/\/pubmed\/(\d+)/);
|
|
32
|
+
if (match) pmid = match[1];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!pmid) return null;
|
|
36
|
+
|
|
37
|
+
const fetchedAt = new Date().toISOString();
|
|
38
|
+
const notes: string[] = [];
|
|
39
|
+
|
|
40
|
+
// Fetch summary metadata
|
|
41
|
+
const summaryUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json`;
|
|
42
|
+
const summaryResult = await loadPage(summaryUrl, { timeout });
|
|
43
|
+
|
|
44
|
+
if (!summaryResult.ok) return null;
|
|
45
|
+
|
|
46
|
+
let summaryData: {
|
|
47
|
+
result?: {
|
|
48
|
+
[pmid: string]: {
|
|
49
|
+
title?: string;
|
|
50
|
+
authors?: Array<{ name: string }>;
|
|
51
|
+
fulljournalname?: string;
|
|
52
|
+
pubdate?: string;
|
|
53
|
+
volume?: string;
|
|
54
|
+
issue?: string;
|
|
55
|
+
pages?: string;
|
|
56
|
+
elocationid?: string; // DOI
|
|
57
|
+
articleids?: Array<{ idtype: string; value: string }>;
|
|
58
|
+
};
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
summaryData = JSON.parse(summaryResult.content);
|
|
64
|
+
} catch {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const article = summaryData.result?.[pmid];
|
|
69
|
+
if (!article) return null;
|
|
70
|
+
|
|
71
|
+
// Fetch abstract
|
|
72
|
+
const abstractUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=abstract&retmode=text`;
|
|
73
|
+
const abstractResult = await loadPage(abstractUrl, { timeout });
|
|
74
|
+
|
|
75
|
+
let abstractText = "";
|
|
76
|
+
if (abstractResult.ok) {
|
|
77
|
+
abstractText = abstractResult.content.trim();
|
|
78
|
+
notes.push("Fetched abstract via NCBI E-utilities");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Extract DOI and PMCID
|
|
82
|
+
let doi = "";
|
|
83
|
+
let pmcid = "";
|
|
84
|
+
if (article.articleids) {
|
|
85
|
+
for (const id of article.articleids) {
|
|
86
|
+
if (id.idtype === "doi") doi = id.value;
|
|
87
|
+
if (id.idtype === "pmc") pmcid = id.value;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
if (!doi && article.elocationid) {
|
|
91
|
+
doi = article.elocationid;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Build markdown output
|
|
95
|
+
let md = `# ${article.title || "PubMed Article"}\n\n`;
|
|
96
|
+
|
|
97
|
+
// Authors
|
|
98
|
+
if (article.authors && article.authors.length > 0) {
|
|
99
|
+
const authorNames = article.authors.map((a) => a.name).join(", ");
|
|
100
|
+
md += `**Authors:** ${authorNames}\n`;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Journal info
|
|
104
|
+
if (article.fulljournalname) {
|
|
105
|
+
md += `**Journal:** ${article.fulljournalname}`;
|
|
106
|
+
if (article.pubdate) md += ` (${article.pubdate})`;
|
|
107
|
+
md += "\n";
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Volume/Issue/Pages
|
|
111
|
+
const citation: string[] = [];
|
|
112
|
+
if (article.volume) citation.push(`Vol ${article.volume}`);
|
|
113
|
+
if (article.issue) citation.push(`Issue ${article.issue}`);
|
|
114
|
+
if (article.pages) citation.push(`pp ${article.pages}`);
|
|
115
|
+
if (citation.length > 0) {
|
|
116
|
+
md += `**Citation:** ${citation.join(", ")}\n`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// IDs
|
|
120
|
+
md += `**PMID:** ${pmid}\n`;
|
|
121
|
+
if (doi) md += `**DOI:** ${doi}\n`;
|
|
122
|
+
if (pmcid) md += `**PMCID:** ${pmcid}\n`;
|
|
123
|
+
|
|
124
|
+
md += "\n---\n\n";
|
|
125
|
+
|
|
126
|
+
// Abstract section
|
|
127
|
+
if (abstractText) {
|
|
128
|
+
md += `## Abstract\n\n${abstractText}\n`;
|
|
129
|
+
} else {
|
|
130
|
+
md += `## Abstract\n\nNo abstract available.\n`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Try to fetch MeSH terms
|
|
134
|
+
try {
|
|
135
|
+
const meshUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=medline&retmode=text`;
|
|
136
|
+
const meshResult = await loadPage(meshUrl, { timeout: Math.min(timeout, 5) });
|
|
137
|
+
|
|
138
|
+
if (meshResult.ok) {
|
|
139
|
+
const meshTerms: string[] = [];
|
|
140
|
+
const lines = meshResult.content.split("\n");
|
|
141
|
+
for (const line of lines) {
|
|
142
|
+
if (line.startsWith("MH - ")) {
|
|
143
|
+
const term = line.slice(6).trim();
|
|
144
|
+
meshTerms.push(term);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (meshTerms.length > 0) {
|
|
149
|
+
md += `\n## MeSH Terms\n\n`;
|
|
150
|
+
for (const term of meshTerms) {
|
|
151
|
+
md += `- ${term}\n`;
|
|
152
|
+
}
|
|
153
|
+
notes.push("Fetched MeSH terms via NCBI E-utilities");
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} catch {
|
|
157
|
+
// MeSH terms are optional
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const output = finalizeOutput(md);
|
|
161
|
+
return {
|
|
162
|
+
url,
|
|
163
|
+
finalUrl: url,
|
|
164
|
+
contentType: "text/markdown",
|
|
165
|
+
method: "pubmed",
|
|
166
|
+
content: output.content,
|
|
167
|
+
fetchedAt,
|
|
168
|
+
truncated: output.truncated,
|
|
169
|
+
notes: notes.length > 0 ? notes : ["Fetched via NCBI E-utilities"],
|
|
170
|
+
};
|
|
171
|
+
} catch {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
};
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
+
import { finalizeOutput, formatCount, loadPage } from "./types";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Handle PyPI URLs via JSON API
|
|
6
|
+
*/
|
|
7
|
+
export const handlePyPI: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
8
|
+
try {
|
|
9
|
+
const parsed = new URL(url);
|
|
10
|
+
if (parsed.hostname !== "pypi.org" && parsed.hostname !== "www.pypi.org") return null;
|
|
11
|
+
|
|
12
|
+
// Extract package name from /project/{package} or /project/{package}/{version}
|
|
13
|
+
const match = parsed.pathname.match(/^\/project\/([^/]+)/);
|
|
14
|
+
if (!match) return null;
|
|
15
|
+
|
|
16
|
+
const packageName = decodeURIComponent(match[1]);
|
|
17
|
+
const fetchedAt = new Date().toISOString();
|
|
18
|
+
|
|
19
|
+
// Fetch from PyPI JSON API
|
|
20
|
+
const apiUrl = `https://pypi.org/pypi/${packageName}/json`;
|
|
21
|
+
const downloadsUrl = `https://pypistats.org/api/packages/${packageName}/recent`;
|
|
22
|
+
|
|
23
|
+
// Fetch package info and download stats in parallel
|
|
24
|
+
const [result, downloadsResult] = await Promise.all([
|
|
25
|
+
loadPage(apiUrl, { timeout }),
|
|
26
|
+
loadPage(downloadsUrl, { timeout: Math.min(timeout, 5) }),
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
if (!result.ok) return null;
|
|
30
|
+
|
|
31
|
+
// Parse download stats
|
|
32
|
+
let weeklyDownloads: number | null = null;
|
|
33
|
+
if (downloadsResult.ok) {
|
|
34
|
+
try {
|
|
35
|
+
const dlData = JSON.parse(downloadsResult.content) as { data?: { last_week?: number } };
|
|
36
|
+
weeklyDownloads = dlData.data?.last_week ?? null;
|
|
37
|
+
} catch {}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let pkg: {
|
|
41
|
+
info: {
|
|
42
|
+
name: string;
|
|
43
|
+
version: string;
|
|
44
|
+
summary?: string;
|
|
45
|
+
description?: string;
|
|
46
|
+
author?: string;
|
|
47
|
+
author_email?: string;
|
|
48
|
+
license?: string;
|
|
49
|
+
home_page?: string;
|
|
50
|
+
project_urls?: Record<string, string>;
|
|
51
|
+
requires_python?: string;
|
|
52
|
+
keywords?: string;
|
|
53
|
+
classifiers?: string[];
|
|
54
|
+
};
|
|
55
|
+
urls?: Array<{ filename: string; size: number; upload_time: string }>;
|
|
56
|
+
releases?: Record<string, unknown>;
|
|
57
|
+
requires_dist?: string[];
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
pkg = JSON.parse(result.content);
|
|
62
|
+
} catch {
|
|
63
|
+
return null; // JSON parse failed
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const info = pkg.info;
|
|
67
|
+
let md = `# ${info.name}\n\n`;
|
|
68
|
+
if (info.summary) md += `${info.summary}\n\n`;
|
|
69
|
+
|
|
70
|
+
md += `**Latest:** ${info.version}`;
|
|
71
|
+
if (info.license) md += ` · **License:** ${info.license}`;
|
|
72
|
+
md += "\n";
|
|
73
|
+
|
|
74
|
+
if (weeklyDownloads !== null) {
|
|
75
|
+
md += `**Weekly Downloads:** ${formatCount(weeklyDownloads)}\n`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
md += "\n";
|
|
79
|
+
|
|
80
|
+
if (info.author) {
|
|
81
|
+
md += `**Author:** ${info.author}`;
|
|
82
|
+
if (info.author_email) md += ` <${info.author_email}>`;
|
|
83
|
+
md += "\n";
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (info.requires_python) md += `**Python:** ${info.requires_python}\n`;
|
|
87
|
+
if (info.home_page) md += `**Homepage:** ${info.home_page}\n`;
|
|
88
|
+
|
|
89
|
+
if (info.project_urls && Object.keys(info.project_urls).length > 0) {
|
|
90
|
+
md += "\n**Project URLs:**\n";
|
|
91
|
+
for (const [label, url] of Object.entries(info.project_urls)) {
|
|
92
|
+
md += `- ${label}: ${url}\n`;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (info.keywords) md += `\n**Keywords:** ${info.keywords}\n`;
|
|
97
|
+
|
|
98
|
+
// Dependencies
|
|
99
|
+
if (pkg.requires_dist && pkg.requires_dist.length > 0) {
|
|
100
|
+
md += `\n## Dependencies\n\n`;
|
|
101
|
+
for (const dep of pkg.requires_dist) {
|
|
102
|
+
md += `- ${dep}\n`;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// README/Description
|
|
107
|
+
if (info.description) {
|
|
108
|
+
md += `\n---\n\n## Description\n\n${info.description}\n`;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const output = finalizeOutput(md);
|
|
112
|
+
return {
|
|
113
|
+
url,
|
|
114
|
+
finalUrl: url,
|
|
115
|
+
contentType: "text/markdown",
|
|
116
|
+
method: "pypi",
|
|
117
|
+
content: output.content,
|
|
118
|
+
fetchedAt,
|
|
119
|
+
truncated: output.truncated,
|
|
120
|
+
notes: ["Fetched via PyPI JSON API"],
|
|
121
|
+
};
|
|
122
|
+
} catch {}
|
|
123
|
+
|
|
124
|
+
return null;
|
|
125
|
+
};
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read the Docs handler for web-fetch
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { parse as parseHtml } from "node-html-parser";
|
|
6
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
7
|
+
import { finalizeOutput, htmlToBasicMarkdown, loadPage } from "./types";
|
|
8
|
+
|
|
9
|
+
export const handleReadTheDocs: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
10
|
+
// Check if URL matches Read the Docs patterns
|
|
11
|
+
const urlObj = new URL(url);
|
|
12
|
+
const isReadTheDocs =
|
|
13
|
+
urlObj.hostname.endsWith(".readthedocs.io") ||
|
|
14
|
+
urlObj.hostname === "readthedocs.org" ||
|
|
15
|
+
urlObj.hostname === "www.readthedocs.org";
|
|
16
|
+
|
|
17
|
+
if (!isReadTheDocs) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const notes: string[] = [];
|
|
22
|
+
const fetchedAt = new Date().toISOString();
|
|
23
|
+
|
|
24
|
+
// Fetch the page
|
|
25
|
+
const result = await loadPage(url, { timeout });
|
|
26
|
+
if (!result.ok) {
|
|
27
|
+
return {
|
|
28
|
+
url,
|
|
29
|
+
finalUrl: result.finalUrl,
|
|
30
|
+
contentType: result.contentType,
|
|
31
|
+
method: "readthedocs",
|
|
32
|
+
content: `Failed to fetch Read the Docs page (status: ${result.status ?? "unknown"})`,
|
|
33
|
+
fetchedAt,
|
|
34
|
+
truncated: false,
|
|
35
|
+
notes,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Parse HTML
|
|
40
|
+
const root = parseHtml(result.content);
|
|
41
|
+
|
|
42
|
+
// Extract main content from common Read the Docs selectors
|
|
43
|
+
let mainContent =
|
|
44
|
+
root.querySelector(".document") ||
|
|
45
|
+
root.querySelector('[role="main"]') ||
|
|
46
|
+
root.querySelector("main") ||
|
|
47
|
+
root.querySelector(".rst-content") ||
|
|
48
|
+
root.querySelector(".body");
|
|
49
|
+
|
|
50
|
+
if (!mainContent) {
|
|
51
|
+
// Fallback to body if no main content found
|
|
52
|
+
mainContent = root.querySelector("body");
|
|
53
|
+
notes.push("Using full body content (no main content div found)");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Remove navigation, sidebar, footer elements
|
|
57
|
+
mainContent
|
|
58
|
+
?.querySelectorAll(
|
|
59
|
+
".headerlink, .viewcode-link, nav, .sidebar, footer, .related, .sphinxsidebar, .toctree-wrapper",
|
|
60
|
+
)
|
|
61
|
+
.forEach((el) => {
|
|
62
|
+
el.remove();
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Try to find Edit on GitHub/GitLab links for raw source
|
|
66
|
+
const editLinks = root.querySelectorAll('a[href*="github.com"], a[href*="gitlab.com"]');
|
|
67
|
+
let sourceUrl: string | null = null;
|
|
68
|
+
|
|
69
|
+
for (const link of editLinks) {
|
|
70
|
+
const href = link.getAttribute("href");
|
|
71
|
+
const text = link.textContent?.toLowerCase() || "";
|
|
72
|
+
|
|
73
|
+
if (href && (text.includes("edit") || text.includes("source"))) {
|
|
74
|
+
// Convert edit URL to raw URL
|
|
75
|
+
if (href.includes("github.com")) {
|
|
76
|
+
sourceUrl = href.replace("/blob/", "/raw/").replace("/edit/", "/raw/");
|
|
77
|
+
} else if (href.includes("gitlab.com")) {
|
|
78
|
+
sourceUrl = href.replace("/blob/", "/raw/").replace("/edit/", "/raw/");
|
|
79
|
+
}
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
let content = "";
|
|
85
|
+
|
|
86
|
+
// Try to fetch raw source if available
|
|
87
|
+
if (sourceUrl) {
|
|
88
|
+
try {
|
|
89
|
+
const sourceResult = await loadPage(sourceUrl, { timeout: Math.min(timeout, 10) });
|
|
90
|
+
if (sourceResult.ok && sourceResult.content.length > 0 && sourceResult.content.length < 1_000_000) {
|
|
91
|
+
content = sourceResult.content;
|
|
92
|
+
notes.push(`Fetched raw source from ${sourceUrl}`);
|
|
93
|
+
}
|
|
94
|
+
} catch (_err) {
|
|
95
|
+
// Ignore errors, fall back to HTML
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// If no raw source, convert HTML to markdown
|
|
100
|
+
if (!content && mainContent) {
|
|
101
|
+
const html = mainContent.innerHTML;
|
|
102
|
+
content = htmlToBasicMarkdown(html);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!content) {
|
|
106
|
+
content = "No content extracted from Read the Docs page";
|
|
107
|
+
notes.push("Failed to extract content");
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const { content: finalContent, truncated } = finalizeOutput(content);
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
url,
|
|
114
|
+
finalUrl: result.finalUrl,
|
|
115
|
+
contentType: sourceUrl ? "text/plain" : "text/html",
|
|
116
|
+
method: "readthedocs",
|
|
117
|
+
content: finalContent,
|
|
118
|
+
fetchedAt,
|
|
119
|
+
truncated,
|
|
120
|
+
notes,
|
|
121
|
+
};
|
|
122
|
+
};
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
3
|
+
|
|
4
|
+
interface RedditPost {
|
|
5
|
+
title: string;
|
|
6
|
+
selftext: string;
|
|
7
|
+
author: string;
|
|
8
|
+
score: number;
|
|
9
|
+
num_comments: number;
|
|
10
|
+
created_utc: number;
|
|
11
|
+
subreddit: string;
|
|
12
|
+
url: string;
|
|
13
|
+
is_self: boolean;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface RedditComment {
|
|
17
|
+
body: string;
|
|
18
|
+
author: string;
|
|
19
|
+
score: number;
|
|
20
|
+
created_utc: number;
|
|
21
|
+
replies?: { data: { children: Array<{ data: RedditComment }> } };
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Handle Reddit URLs via JSON API
|
|
26
|
+
*/
|
|
27
|
+
export const handleReddit: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
28
|
+
try {
|
|
29
|
+
const parsed = new URL(url);
|
|
30
|
+
if (!parsed.hostname.includes("reddit.com")) return null;
|
|
31
|
+
|
|
32
|
+
const fetchedAt = new Date().toISOString();
|
|
33
|
+
|
|
34
|
+
// Append .json to get JSON response
|
|
35
|
+
let jsonUrl = `${url.replace(/\/$/, "")}.json`;
|
|
36
|
+
if (parsed.search) {
|
|
37
|
+
jsonUrl = `${url.replace(/\/$/, "").replace(parsed.search, "")}.json${parsed.search}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const result = await loadPage(jsonUrl, { timeout });
|
|
41
|
+
if (!result.ok) return null;
|
|
42
|
+
|
|
43
|
+
const data = JSON.parse(result.content);
|
|
44
|
+
let md = "";
|
|
45
|
+
|
|
46
|
+
// Handle different Reddit URL types
|
|
47
|
+
if (Array.isArray(data) && data.length >= 1) {
|
|
48
|
+
// Post page (with comments)
|
|
49
|
+
const postData = data[0]?.data?.children?.[0]?.data as RedditPost | undefined;
|
|
50
|
+
if (postData) {
|
|
51
|
+
md = `# ${postData.title}\n\n`;
|
|
52
|
+
md += `**r/${postData.subreddit}** · u/${postData.author} · ${postData.score} points · ${postData.num_comments} comments\n`;
|
|
53
|
+
md += `*${new Date(postData.created_utc * 1000).toISOString().split("T")[0]}*\n\n`;
|
|
54
|
+
|
|
55
|
+
if (postData.is_self && postData.selftext) {
|
|
56
|
+
md += `---\n\n${postData.selftext}\n\n`;
|
|
57
|
+
} else if (!postData.is_self) {
|
|
58
|
+
md += `**Link:** ${postData.url}\n\n`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Add comments if available
|
|
62
|
+
if (data.length >= 2 && data[1]?.data?.children) {
|
|
63
|
+
md += `---\n\n## Top Comments\n\n`;
|
|
64
|
+
const comments = data[1].data.children.filter((c: { kind: string }) => c.kind === "t1").slice(0, 10);
|
|
65
|
+
|
|
66
|
+
for (const { data: comment } of comments as Array<{ data: RedditComment }>) {
|
|
67
|
+
md += `### u/${comment.author} · ${comment.score} points\n\n`;
|
|
68
|
+
md += `${comment.body}\n\n---\n\n`;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
} else if (data?.data?.children) {
|
|
73
|
+
// Subreddit or listing page
|
|
74
|
+
const posts = data.data.children.slice(0, 20) as Array<{ data: RedditPost }>;
|
|
75
|
+
const subreddit = posts[0]?.data?.subreddit;
|
|
76
|
+
|
|
77
|
+
md = `# r/${subreddit || "Reddit"}\n\n`;
|
|
78
|
+
for (const { data: post } of posts) {
|
|
79
|
+
md += `- **${post.title}** (${post.score} pts, ${post.num_comments} comments)\n`;
|
|
80
|
+
md += ` by u/${post.author}\n\n`;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!md) return null;
|
|
85
|
+
|
|
86
|
+
const output = finalizeOutput(md);
|
|
87
|
+
return {
|
|
88
|
+
url,
|
|
89
|
+
finalUrl: url,
|
|
90
|
+
contentType: "text/markdown",
|
|
91
|
+
method: "reddit",
|
|
92
|
+
content: output.content,
|
|
93
|
+
fetchedAt,
|
|
94
|
+
truncated: output.truncated,
|
|
95
|
+
notes: ["Fetched via Reddit JSON API"],
|
|
96
|
+
};
|
|
97
|
+
} catch {}
|
|
98
|
+
|
|
99
|
+
return null;
|
|
100
|
+
};
|