@oh-my-pi/pi-coding-agent 12.18.3 → 12.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/package.json +7 -7
- package/src/async/index.ts +1 -0
- package/src/async/job-manager.ts +341 -0
- package/src/cli/file-processor.ts +3 -3
- package/src/cli/list-models.ts +3 -17
- package/src/cli/stats-cli.ts +3 -22
- package/src/cli/web-search-cli.ts +8 -16
- package/src/commit/agentic/agent.ts +6 -9
- package/src/commit/agentic/index.ts +44 -50
- package/src/commit/agentic/state.ts +0 -9
- package/src/commit/agentic/tools/propose-commit.ts +1 -30
- package/src/commit/agentic/tools/schemas.ts +31 -0
- package/src/commit/agentic/tools/split-commit.ts +1 -30
- package/src/commit/agentic/validation.ts +1 -18
- package/src/commit/analysis/conventional.ts +3 -50
- package/src/commit/analysis/summary.ts +2 -13
- package/src/commit/changelog/detect.ts +4 -1
- package/src/commit/changelog/generate.ts +2 -25
- package/src/commit/changelog/index.ts +1 -2
- package/src/commit/cli.ts +4 -12
- package/src/commit/map-reduce/reduce-phase.ts +2 -43
- package/src/commit/pipeline.ts +7 -15
- package/src/commit/utils.ts +44 -0
- package/src/config/prompt-templates.ts +1 -81
- package/src/config/settings-schema.ts +20 -1
- package/src/config.ts +2 -3
- package/src/debug/index.ts +1 -6
- package/src/debug/system-info.ts +2 -6
- package/src/discovery/builtin.ts +5 -9
- package/src/discovery/helpers.ts +0 -26
- package/src/discovery/ssh.ts +1 -8
- package/src/exa/company.ts +8 -39
- package/src/exa/factory.ts +64 -0
- package/src/exa/index.ts +0 -16
- package/src/exa/linkedin.ts +8 -39
- package/src/exa/mcp-client.ts +0 -64
- package/src/exa/researcher.ts +17 -59
- package/src/exa/search.ts +30 -154
- package/src/extensibility/custom-tools/loader.ts +3 -41
- package/src/extensibility/extensions/loader.ts +2 -9
- package/src/extensibility/hooks/loader.ts +3 -20
- package/src/extensibility/hooks/runner.ts +3 -19
- package/src/extensibility/plugins/installer.ts +2 -1
- package/src/extensibility/plugins/loader.ts +29 -117
- package/src/extensibility/skills.ts +2 -89
- package/src/extensibility/slash-commands.ts +1 -63
- package/src/extensibility/utils.ts +38 -0
- package/src/index.ts +9 -25
- package/src/internal-urls/index.ts +1 -0
- package/src/internal-urls/jobs-protocol.ts +118 -0
- package/src/ipy/kernel.ts +2 -0
- package/src/lsp/config.ts +1 -5
- package/src/lsp/lspmux.ts +0 -17
- package/src/lsp/utils.ts +2 -24
- package/src/main.ts +16 -24
- package/src/mcp/client.ts +1 -46
- package/src/mcp/render.ts +8 -1
- package/src/mcp/tool-cache.ts +1 -5
- package/src/mcp/transports/http.ts +2 -7
- package/src/mcp/transports/stdio.ts +2 -7
- package/src/modes/components/bash-execution.ts +2 -16
- package/src/modes/components/extensions/inspector-panel.ts +8 -18
- package/src/modes/components/footer.ts +10 -50
- package/src/modes/components/model-selector.ts +2 -21
- package/src/modes/components/python-execution.ts +2 -16
- package/src/modes/components/settings-selector.ts +1 -10
- package/src/modes/components/status-line/segments.ts +8 -25
- package/src/modes/components/status-line.ts +14 -31
- package/src/modes/components/tool-execution.ts +8 -2
- package/src/modes/controllers/command-controller.ts +71 -30
- package/src/modes/controllers/event-controller.ts +34 -4
- package/src/modes/controllers/mcp-command-controller.ts +3 -34
- package/src/modes/controllers/selector-controller.ts +2 -2
- package/src/modes/controllers/ssh-command-controller.ts +3 -34
- package/src/modes/interactive-mode.ts +6 -2
- package/src/modes/rpc/rpc-client.ts +1 -5
- package/src/modes/shared.ts +73 -0
- package/src/modes/types.ts +1 -0
- package/src/modes/utils/ui-helpers.ts +26 -2
- package/src/patch/index.ts +4 -4
- package/src/patch/normalize.ts +22 -65
- package/src/patch/shared.ts +16 -16
- package/src/prompts/system/custom-system-prompt.md +0 -10
- package/src/prompts/system/system-prompt.md +69 -89
- package/src/prompts/tools/async-result.md +5 -0
- package/src/prompts/tools/bash.md +5 -0
- package/src/prompts/tools/cancel-job.md +7 -0
- package/src/prompts/tools/poll-jobs.md +7 -0
- package/src/prompts/tools/task.md +4 -0
- package/src/sdk.ts +70 -6
- package/src/session/agent-session.ts +40 -6
- package/src/session/agent-storage.ts +69 -278
- package/src/session/auth-storage.ts +14 -1430
- package/src/session/session-manager.ts +69 -5
- package/src/session/session-storage.ts +1 -5
- package/src/session/streaming-output.ts +637 -76
- package/src/slash-commands/builtin-registry.ts +8 -0
- package/src/ssh/connection-manager.ts +4 -12
- package/src/ssh/sshfs-mount.ts +3 -7
- package/src/ssh/utils.ts +8 -0
- package/src/system-prompt.ts +24 -90
- package/src/task/executor.ts +11 -1
- package/src/task/index.ts +258 -13
- package/src/task/parallel.ts +32 -0
- package/src/task/render.ts +15 -7
- package/src/task/types.ts +5 -0
- package/src/tools/ask.ts +4 -7
- package/src/tools/bash-interactive.ts +4 -5
- package/src/tools/bash.ts +125 -41
- package/src/tools/cancel-job.ts +93 -0
- package/src/tools/fetch.ts +7 -27
- package/src/tools/find.ts +3 -3
- package/src/tools/gemini-image.ts +15 -14
- package/src/tools/grep.ts +3 -3
- package/src/tools/index.ts +13 -29
- package/src/tools/json-tree.ts +12 -1
- package/src/tools/jtd-to-json-schema.ts +10 -74
- package/src/tools/jtd-to-typescript.ts +10 -72
- package/src/tools/jtd-utils.ts +102 -0
- package/src/tools/notebook.ts +4 -9
- package/src/tools/output-meta.ts +52 -26
- package/src/tools/path-utils.ts +13 -7
- package/src/tools/poll-jobs.ts +178 -0
- package/src/tools/python.ts +32 -35
- package/src/tools/read.ts +61 -82
- package/src/tools/render-utils.ts +8 -159
- package/src/tools/ssh.ts +7 -20
- package/src/tools/submit-result.ts +1 -1
- package/src/tools/tool-errors.ts +0 -30
- package/src/tools/tool-result.ts +1 -2
- package/src/tools/write.ts +8 -10
- package/src/tui/code-cell.ts +8 -3
- package/src/tui/status-line.ts +4 -4
- package/src/tui/types.ts +0 -1
- package/src/tui/utils.ts +1 -14
- package/src/utils/command-args.ts +76 -0
- package/src/utils/file-mentions.ts +15 -19
- package/src/utils/frontmatter.ts +5 -10
- package/src/utils/shell-snapshot.ts +0 -11
- package/src/utils/title-generator.ts +0 -12
- package/src/web/scrapers/artifacthub.ts +7 -16
- package/src/web/scrapers/arxiv.ts +3 -8
- package/src/web/scrapers/aur.ts +8 -22
- package/src/web/scrapers/biorxiv.ts +5 -14
- package/src/web/scrapers/bluesky.ts +13 -36
- package/src/web/scrapers/brew.ts +5 -10
- package/src/web/scrapers/cheatsh.ts +2 -12
- package/src/web/scrapers/chocolatey.ts +63 -26
- package/src/web/scrapers/choosealicense.ts +3 -18
- package/src/web/scrapers/cisa-kev.ts +4 -18
- package/src/web/scrapers/clojars.ts +6 -33
- package/src/web/scrapers/coingecko.ts +25 -33
- package/src/web/scrapers/crates-io.ts +7 -26
- package/src/web/scrapers/crossref.ts +4 -18
- package/src/web/scrapers/devto.ts +11 -41
- package/src/web/scrapers/discogs.ts +7 -10
- package/src/web/scrapers/discourse.ts +6 -31
- package/src/web/scrapers/dockerhub.ts +12 -35
- package/src/web/scrapers/fdroid.ts +8 -33
- package/src/web/scrapers/firefox-addons.ts +10 -34
- package/src/web/scrapers/flathub.ts +7 -24
- package/src/web/scrapers/github-gist.ts +2 -12
- package/src/web/scrapers/github.ts +9 -47
- package/src/web/scrapers/gitlab.ts +130 -185
- package/src/web/scrapers/go-pkg.ts +12 -22
- package/src/web/scrapers/hackage.ts +88 -43
- package/src/web/scrapers/hackernews.ts +25 -45
- package/src/web/scrapers/hex.ts +19 -36
- package/src/web/scrapers/huggingface.ts +26 -91
- package/src/web/scrapers/iacr.ts +3 -8
- package/src/web/scrapers/jetbrains-marketplace.ts +9 -20
- package/src/web/scrapers/lemmy.ts +5 -23
- package/src/web/scrapers/lobsters.ts +16 -28
- package/src/web/scrapers/mastodon.ts +24 -43
- package/src/web/scrapers/maven.ts +6 -21
- package/src/web/scrapers/mdn.ts +7 -11
- package/src/web/scrapers/metacpan.ts +9 -41
- package/src/web/scrapers/musicbrainz.ts +4 -28
- package/src/web/scrapers/npm.ts +8 -25
- package/src/web/scrapers/nuget.ts +14 -37
- package/src/web/scrapers/nvd.ts +6 -28
- package/src/web/scrapers/ollama.ts +7 -34
- package/src/web/scrapers/open-vsx.ts +5 -19
- package/src/web/scrapers/opencorporates.ts +30 -14
- package/src/web/scrapers/openlibrary.ts +49 -33
- package/src/web/scrapers/orcid.ts +4 -18
- package/src/web/scrapers/osv.ts +7 -24
- package/src/web/scrapers/packagist.ts +9 -24
- package/src/web/scrapers/pub-dev.ts +7 -50
- package/src/web/scrapers/pubmed.ts +54 -21
- package/src/web/scrapers/pypi.ts +8 -26
- package/src/web/scrapers/rawg.ts +11 -19
- package/src/web/scrapers/readthedocs.ts +4 -9
- package/src/web/scrapers/reddit.ts +5 -15
- package/src/web/scrapers/repology.ts +8 -20
- package/src/web/scrapers/rfc.ts +5 -14
- package/src/web/scrapers/rubygems.ts +6 -21
- package/src/web/scrapers/searchcode.ts +8 -36
- package/src/web/scrapers/sec-edgar.ts +4 -18
- package/src/web/scrapers/semantic-scholar.ts +15 -35
- package/src/web/scrapers/snapcraft.ts +5 -19
- package/src/web/scrapers/sourcegraph.ts +5 -43
- package/src/web/scrapers/spdx.ts +4 -18
- package/src/web/scrapers/spotify.ts +4 -23
- package/src/web/scrapers/stackoverflow.ts +8 -13
- package/src/web/scrapers/terraform.ts +9 -37
- package/src/web/scrapers/tldr.ts +3 -7
- package/src/web/scrapers/twitter.ts +3 -7
- package/src/web/scrapers/types.ts +105 -27
- package/src/web/scrapers/utils.ts +97 -103
- package/src/web/scrapers/vimeo.ts +7 -27
- package/src/web/scrapers/vscode-marketplace.ts +8 -17
- package/src/web/scrapers/w3c.ts +6 -14
- package/src/web/scrapers/wikidata.ts +5 -19
- package/src/web/scrapers/wikipedia.ts +2 -12
- package/src/web/scrapers/youtube.ts +5 -34
- package/src/web/search/index.ts +0 -9
- package/src/web/search/providers/anthropic.ts +3 -2
- package/src/web/search/providers/brave.ts +3 -18
- package/src/web/search/providers/exa.ts +1 -12
- package/src/web/search/providers/kimi.ts +5 -44
- package/src/web/search/providers/perplexity.ts +1 -12
- package/src/web/search/providers/synthetic.ts +3 -26
- package/src/web/search/providers/utils.ts +36 -0
- package/src/web/search/providers/zai.ts +9 -50
- package/src/web/search/types.ts +0 -28
- package/src/web/search/utils.ts +17 -0
- package/src/tools/output-utils.ts +0 -63
- package/src/tools/truncate.ts +0 -385
- package/src/web/search/auth.ts +0 -178
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PubMed handler for web-fetch
|
|
3
3
|
*/
|
|
4
|
-
import type
|
|
5
|
-
|
|
4
|
+
import { buildResult, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
5
|
+
|
|
6
|
+
const NCBI_HEADERS = {
|
|
7
|
+
Accept: "application/json, text/plain;q=0.9, */*;q=0.8",
|
|
8
|
+
"User-Agent": "CodingAgent/1.0 (web scraper)",
|
|
9
|
+
};
|
|
6
10
|
|
|
7
11
|
/**
|
|
8
12
|
* Handle PubMed URLs - fetch article metadata, abstract, MeSH terms
|
|
@@ -39,14 +43,45 @@ export const handlePubMed: SpecialHandler = async (
|
|
|
39
43
|
|
|
40
44
|
const fetchedAt = new Date().toISOString();
|
|
41
45
|
const notes: string[] = [];
|
|
46
|
+
const buildFallback = (fallbackNotes: string[]) =>
|
|
47
|
+
buildResult(`# PubMed Article\n\n**PMID:** ${pmid}\n\n---\n\n## Abstract\n\nNo abstract available.\n`, {
|
|
48
|
+
url,
|
|
49
|
+
method: "pubmed",
|
|
50
|
+
fetchedAt,
|
|
51
|
+
notes: fallbackNotes,
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const fetchWithRetry = async (requestUrl: string, acceptJson = true) => {
|
|
55
|
+
let response = await loadPage(requestUrl, {
|
|
56
|
+
timeout,
|
|
57
|
+
signal,
|
|
58
|
+
headers: {
|
|
59
|
+
...NCBI_HEADERS,
|
|
60
|
+
Accept: acceptJson ? "application/json" : "text/plain, */*;q=0.8",
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
if (!response.ok) {
|
|
64
|
+
response = await loadPage(requestUrl, {
|
|
65
|
+
timeout,
|
|
66
|
+
signal,
|
|
67
|
+
headers: {
|
|
68
|
+
...NCBI_HEADERS,
|
|
69
|
+
Accept: acceptJson ? "application/json" : "text/plain, */*;q=0.8",
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return response;
|
|
74
|
+
};
|
|
42
75
|
|
|
43
76
|
// Fetch summary metadata
|
|
44
77
|
const summaryUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json`;
|
|
45
|
-
const summaryResult = await
|
|
78
|
+
const summaryResult = await fetchWithRetry(summaryUrl);
|
|
46
79
|
|
|
47
|
-
if (!summaryResult.ok)
|
|
80
|
+
if (!summaryResult.ok) {
|
|
81
|
+
return buildFallback(["Failed to fetch PubMed summary metadata"]);
|
|
82
|
+
}
|
|
48
83
|
|
|
49
|
-
|
|
84
|
+
const summaryData = tryParseJson<{
|
|
50
85
|
result?: {
|
|
51
86
|
[pmid: string]: {
|
|
52
87
|
title?: string;
|
|
@@ -60,20 +95,19 @@ export const handlePubMed: SpecialHandler = async (
|
|
|
60
95
|
articleids?: Array<{ idtype: string; value: string }>;
|
|
61
96
|
};
|
|
62
97
|
};
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
summaryData = JSON.parse(summaryResult.content);
|
|
67
|
-
} catch {
|
|
68
|
-
return null;
|
|
98
|
+
}>(summaryResult.content);
|
|
99
|
+
if (!summaryData) {
|
|
100
|
+
return buildFallback(["Failed to parse PubMed summary metadata"]);
|
|
69
101
|
}
|
|
70
102
|
|
|
71
103
|
const article = summaryData.result?.[pmid];
|
|
72
|
-
if (!article)
|
|
104
|
+
if (!article) {
|
|
105
|
+
return buildFallback(["PubMed record unavailable from E-utilities summary endpoint"]);
|
|
106
|
+
}
|
|
73
107
|
|
|
74
108
|
// Fetch abstract
|
|
75
109
|
const abstractUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=abstract&retmode=text`;
|
|
76
|
-
const abstractResult = await
|
|
110
|
+
const abstractResult = await fetchWithRetry(abstractUrl, false);
|
|
77
111
|
|
|
78
112
|
let abstractText = "";
|
|
79
113
|
if (abstractResult.ok) {
|
|
@@ -136,7 +170,11 @@ export const handlePubMed: SpecialHandler = async (
|
|
|
136
170
|
// Try to fetch MeSH terms
|
|
137
171
|
try {
|
|
138
172
|
const meshUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&rettype=medline&retmode=text`;
|
|
139
|
-
const meshResult = await loadPage(meshUrl, {
|
|
173
|
+
const meshResult = await loadPage(meshUrl, {
|
|
174
|
+
timeout: Math.min(timeout, 5),
|
|
175
|
+
signal,
|
|
176
|
+
headers: { ...NCBI_HEADERS, Accept: "text/plain, */*;q=0.8" },
|
|
177
|
+
});
|
|
140
178
|
|
|
141
179
|
if (meshResult.ok) {
|
|
142
180
|
const meshTerms: string[] = [];
|
|
@@ -160,17 +198,12 @@ export const handlePubMed: SpecialHandler = async (
|
|
|
160
198
|
// MeSH terms are optional
|
|
161
199
|
}
|
|
162
200
|
|
|
163
|
-
|
|
164
|
-
return {
|
|
201
|
+
return buildResult(md, {
|
|
165
202
|
url,
|
|
166
|
-
finalUrl: url,
|
|
167
|
-
contentType: "text/markdown",
|
|
168
203
|
method: "pubmed",
|
|
169
|
-
content: output.content,
|
|
170
204
|
fetchedAt,
|
|
171
|
-
truncated: output.truncated,
|
|
172
205
|
notes: notes.length > 0 ? notes : ["Fetched via NCBI E-utilities"],
|
|
173
|
-
};
|
|
206
|
+
});
|
|
174
207
|
} catch {
|
|
175
208
|
return null;
|
|
176
209
|
}
|
package/src/web/scrapers/pypi.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import { finalizeOutput, formatCount, loadPage } from "./types";
|
|
1
|
+
import { buildResult, formatNumber, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
3
2
|
|
|
4
3
|
/**
|
|
5
4
|
* Handle PyPI URLs via JSON API
|
|
@@ -35,13 +34,11 @@ export const handlePyPI: SpecialHandler = async (
|
|
|
35
34
|
// Parse download stats
|
|
36
35
|
let weeklyDownloads: number | null = null;
|
|
37
36
|
if (downloadsResult.ok) {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
weeklyDownloads = dlData.data?.last_week ?? null;
|
|
41
|
-
} catch {}
|
|
37
|
+
const dlData = tryParseJson<{ data?: { last_week?: number } }>(downloadsResult.content);
|
|
38
|
+
if (dlData) weeklyDownloads = dlData.data?.last_week ?? null;
|
|
42
39
|
}
|
|
43
40
|
|
|
44
|
-
|
|
41
|
+
const pkg = tryParseJson<{
|
|
45
42
|
info: {
|
|
46
43
|
name: string;
|
|
47
44
|
version: string;
|
|
@@ -59,13 +56,8 @@ export const handlePyPI: SpecialHandler = async (
|
|
|
59
56
|
urls?: Array<{ filename: string; size: number; upload_time: string }>;
|
|
60
57
|
releases?: Record<string, unknown>;
|
|
61
58
|
requires_dist?: string[];
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
try {
|
|
65
|
-
pkg = JSON.parse(result.content);
|
|
66
|
-
} catch {
|
|
67
|
-
return null; // JSON parse failed
|
|
68
|
-
}
|
|
59
|
+
}>(result.content);
|
|
60
|
+
if (!pkg) return null;
|
|
69
61
|
|
|
70
62
|
const info = pkg.info;
|
|
71
63
|
let md = `# ${info.name}\n\n`;
|
|
@@ -76,7 +68,7 @@ export const handlePyPI: SpecialHandler = async (
|
|
|
76
68
|
md += "\n";
|
|
77
69
|
|
|
78
70
|
if (weeklyDownloads !== null) {
|
|
79
|
-
md += `**Weekly Downloads:** ${
|
|
71
|
+
md += `**Weekly Downloads:** ${formatNumber(weeklyDownloads)}\n`;
|
|
80
72
|
}
|
|
81
73
|
|
|
82
74
|
md += "\n";
|
|
@@ -112,17 +104,7 @@ export const handlePyPI: SpecialHandler = async (
|
|
|
112
104
|
md += `\n---\n\n## Description\n\n${info.description}\n`;
|
|
113
105
|
}
|
|
114
106
|
|
|
115
|
-
|
|
116
|
-
return {
|
|
117
|
-
url,
|
|
118
|
-
finalUrl: url,
|
|
119
|
-
contentType: "text/markdown",
|
|
120
|
-
method: "pypi",
|
|
121
|
-
content: output.content,
|
|
122
|
-
fetchedAt,
|
|
123
|
-
truncated: output.truncated,
|
|
124
|
-
notes: ["Fetched via PyPI JSON API"],
|
|
125
|
-
};
|
|
107
|
+
return buildResult(md, { url, method: "pypi", fetchedAt, notes: ["Fetched via PyPI JSON API"] });
|
|
126
108
|
} catch {}
|
|
127
109
|
|
|
128
110
|
return null;
|
package/src/web/scrapers/rawg.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
buildResult,
|
|
3
|
+
htmlToBasicMarkdown,
|
|
4
|
+
loadPage,
|
|
5
|
+
type RenderResult,
|
|
6
|
+
type SpecialHandler,
|
|
7
|
+
tryParseJson,
|
|
8
|
+
} from "./types";
|
|
3
9
|
|
|
4
10
|
interface RawgPlatformEntry {
|
|
5
11
|
platform?: {
|
|
@@ -41,12 +47,8 @@ export const handleRawg: SpecialHandler = async (
|
|
|
41
47
|
|
|
42
48
|
if (!result.ok) return null;
|
|
43
49
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
game = JSON.parse(result.content);
|
|
47
|
-
} catch {
|
|
48
|
-
return null;
|
|
49
|
-
}
|
|
50
|
+
const game = tryParseJson<RawgGameResponse>(result.content);
|
|
51
|
+
if (!game) return null;
|
|
50
52
|
|
|
51
53
|
if (requiresApiKey(game)) return null;
|
|
52
54
|
|
|
@@ -72,17 +74,7 @@ export const handleRawg: SpecialHandler = async (
|
|
|
72
74
|
md += `## Description\n\n${description}\n`;
|
|
73
75
|
}
|
|
74
76
|
|
|
75
|
-
|
|
76
|
-
return {
|
|
77
|
-
url,
|
|
78
|
-
finalUrl: url,
|
|
79
|
-
contentType: "text/markdown",
|
|
80
|
-
method: "rawg",
|
|
81
|
-
content: output.content,
|
|
82
|
-
fetchedAt,
|
|
83
|
-
truncated: output.truncated,
|
|
84
|
-
notes: ["Fetched via RAWG API"],
|
|
85
|
-
};
|
|
77
|
+
return buildResult(md, { url, method: "rawg", fetchedAt, notes: ["Fetched via RAWG API"] });
|
|
86
78
|
} catch {}
|
|
87
79
|
|
|
88
80
|
return null;
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
* Read the Docs handler for web-fetch
|
|
3
3
|
*/
|
|
4
4
|
import { parse as parseHtml } from "node-html-parser";
|
|
5
|
-
import type
|
|
6
|
-
import { finalizeOutput, htmlToBasicMarkdown, loadPage } from "./types";
|
|
5
|
+
import { buildResult, htmlToBasicMarkdown, loadPage, type RenderResult, type SpecialHandler } from "./types";
|
|
7
6
|
|
|
8
7
|
export const handleReadTheDocs: SpecialHandler = async (
|
|
9
8
|
url: string,
|
|
@@ -110,16 +109,12 @@ export const handleReadTheDocs: SpecialHandler = async (
|
|
|
110
109
|
notes.push("Failed to extract content");
|
|
111
110
|
}
|
|
112
111
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return {
|
|
112
|
+
return buildResult(content, {
|
|
116
113
|
url,
|
|
117
114
|
finalUrl: result.finalUrl,
|
|
118
|
-
contentType: sourceUrl ? "text/plain" : "text/html",
|
|
119
115
|
method: "readthedocs",
|
|
120
|
-
content: finalContent,
|
|
121
116
|
fetchedAt,
|
|
122
|
-
truncated,
|
|
123
117
|
notes,
|
|
124
|
-
|
|
118
|
+
contentType: sourceUrl ? "text/plain" : "text/html",
|
|
119
|
+
});
|
|
125
120
|
};
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import { finalizeOutput, loadPage } from "./types";
|
|
1
|
+
import { buildResult, formatIsoDate, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
3
2
|
|
|
4
3
|
interface RedditPost {
|
|
5
4
|
title: string;
|
|
@@ -44,7 +43,8 @@ export const handleReddit: SpecialHandler = async (
|
|
|
44
43
|
const result = await loadPage(jsonUrl, { timeout, signal });
|
|
45
44
|
if (!result.ok) return null;
|
|
46
45
|
|
|
47
|
-
const data =
|
|
46
|
+
const data = tryParseJson<any>(result.content);
|
|
47
|
+
if (!data) return null;
|
|
48
48
|
let md = "";
|
|
49
49
|
|
|
50
50
|
// Handle different Reddit URL types
|
|
@@ -54,7 +54,7 @@ export const handleReddit: SpecialHandler = async (
|
|
|
54
54
|
if (postData) {
|
|
55
55
|
md = `# ${postData.title}\n\n`;
|
|
56
56
|
md += `**r/${postData.subreddit}** · u/${postData.author} · ${postData.score} points · ${postData.num_comments} comments\n`;
|
|
57
|
-
md += `*${
|
|
57
|
+
md += `*${formatIsoDate(postData.created_utc * 1000)}*\n\n`;
|
|
58
58
|
|
|
59
59
|
if (postData.is_self && postData.selftext) {
|
|
60
60
|
md += `---\n\n${postData.selftext}\n\n`;
|
|
@@ -87,17 +87,7 @@ export const handleReddit: SpecialHandler = async (
|
|
|
87
87
|
|
|
88
88
|
if (!md) return null;
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
return {
|
|
92
|
-
url,
|
|
93
|
-
finalUrl: url,
|
|
94
|
-
contentType: "text/markdown",
|
|
95
|
-
method: "reddit",
|
|
96
|
-
content: output.content,
|
|
97
|
-
fetchedAt,
|
|
98
|
-
truncated: output.truncated,
|
|
99
|
-
notes: ["Fetched via Reddit JSON API"],
|
|
100
|
-
};
|
|
90
|
+
return buildResult(md, { url, method: "reddit", fetchedAt, notes: ["Fetched via Reddit JSON API"] });
|
|
101
91
|
} catch {}
|
|
102
92
|
|
|
103
93
|
return null;
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import { finalizeOutput, loadPage } from "./types";
|
|
1
|
+
import { buildResult, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
3
2
|
|
|
4
3
|
interface RepologyPackage {
|
|
5
4
|
repo: string;
|
|
@@ -122,18 +121,17 @@ export const handleRepology: SpecialHandler = async (
|
|
|
122
121
|
const apiUrl = `https://repology.org/api/v1/project/${encodeURIComponent(packageName)}`;
|
|
123
122
|
const result = await loadPage(apiUrl, {
|
|
124
123
|
timeout,
|
|
125
|
-
headers: {
|
|
124
|
+
headers: {
|
|
125
|
+
Accept: "application/json",
|
|
126
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
127
|
+
},
|
|
126
128
|
signal,
|
|
127
129
|
});
|
|
128
130
|
|
|
129
131
|
if (!result.ok) return null;
|
|
130
132
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
packages = JSON.parse(result.content);
|
|
134
|
-
} catch {
|
|
135
|
-
return null;
|
|
136
|
-
}
|
|
133
|
+
const packages = tryParseJson<RepologyPackage[]>(result.content);
|
|
134
|
+
if (!packages) return null;
|
|
137
135
|
|
|
138
136
|
// Empty response means package not found
|
|
139
137
|
if (!Array.isArray(packages) || packages.length === 0) return null;
|
|
@@ -245,17 +243,7 @@ export const handleRepology: SpecialHandler = async (
|
|
|
245
243
|
|
|
246
244
|
md += `\n---\n\n[View on Repology](${url})\n`;
|
|
247
245
|
|
|
248
|
-
|
|
249
|
-
return {
|
|
250
|
-
url,
|
|
251
|
-
finalUrl: url,
|
|
252
|
-
contentType: "text/markdown",
|
|
253
|
-
method: "repology",
|
|
254
|
-
content: output.content,
|
|
255
|
-
fetchedAt,
|
|
256
|
-
truncated: output.truncated,
|
|
257
|
-
notes: ["Fetched via Repology API"],
|
|
258
|
-
};
|
|
246
|
+
return buildResult(md, { url, method: "repology", fetchedAt, notes: ["Fetched via Repology API"] });
|
|
259
247
|
} catch {}
|
|
260
248
|
|
|
261
249
|
return null;
|
package/src/web/scrapers/rfc.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import { finalizeOutput, loadPage } from "./types";
|
|
1
|
+
import { buildResult, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
3
2
|
|
|
4
3
|
interface RfcMetadata {
|
|
5
4
|
doc_id: string;
|
|
@@ -118,12 +117,8 @@ export const handleRfc: SpecialHandler = async (
|
|
|
118
117
|
|
|
119
118
|
let metadata: RfcMetadata | null = null;
|
|
120
119
|
if (metaResult.ok) {
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
notes.push("Metadata from RFC Editor JSON API");
|
|
124
|
-
} catch {
|
|
125
|
-
// JSON parse failed, continue without metadata
|
|
126
|
-
}
|
|
120
|
+
metadata = tryParseJson<RfcMetadata>(metaResult.content);
|
|
121
|
+
if (metadata) notes.push("Metadata from RFC Editor JSON API");
|
|
127
122
|
}
|
|
128
123
|
|
|
129
124
|
// Build markdown output
|
|
@@ -192,17 +187,13 @@ export const handleRfc: SpecialHandler = async (
|
|
|
192
187
|
md += cleanRfcText(textResult.content);
|
|
193
188
|
md += "\n```\n";
|
|
194
189
|
|
|
195
|
-
|
|
196
|
-
return {
|
|
190
|
+
return buildResult(md, {
|
|
197
191
|
url,
|
|
198
192
|
finalUrl: `https://www.rfc-editor.org/rfc/rfc${rfcNumber}`,
|
|
199
|
-
contentType: "text/markdown",
|
|
200
193
|
method: "rfc",
|
|
201
|
-
content: output.content,
|
|
202
194
|
fetchedAt,
|
|
203
|
-
truncated: output.truncated,
|
|
204
195
|
notes: notes.length ? notes : ["Fetched from RFC Editor"],
|
|
205
|
-
};
|
|
196
|
+
});
|
|
206
197
|
} catch {}
|
|
207
198
|
|
|
208
199
|
return null;
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import { finalizeOutput, formatCount, loadPage } from "./types";
|
|
1
|
+
import { buildResult, formatNumber, loadPage, type RenderResult, type SpecialHandler, tryParseJson } from "./types";
|
|
3
2
|
|
|
4
3
|
interface RubyGemsDependency {
|
|
5
4
|
name: string;
|
|
@@ -56,12 +55,8 @@ export const handleRubyGems: SpecialHandler = async (
|
|
|
56
55
|
|
|
57
56
|
if (!result.ok) return null;
|
|
58
57
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
gem = JSON.parse(result.content);
|
|
62
|
-
} catch {
|
|
63
|
-
return null;
|
|
64
|
-
}
|
|
58
|
+
const gem = tryParseJson<RubyGemsResponse>(result.content);
|
|
59
|
+
if (!gem) return null;
|
|
65
60
|
|
|
66
61
|
let md = `# ${gem.name}\n\n`;
|
|
67
62
|
if (gem.info) md += `${gem.info}\n\n`;
|
|
@@ -72,8 +67,8 @@ export const handleRubyGems: SpecialHandler = async (
|
|
|
72
67
|
md += "\n";
|
|
73
68
|
|
|
74
69
|
// Downloads
|
|
75
|
-
md += `**Total Downloads:** ${
|
|
76
|
-
if (gem.version_downloads) md += ` · **Version Downloads:** ${
|
|
70
|
+
md += `**Total Downloads:** ${formatNumber(gem.downloads)}`;
|
|
71
|
+
if (gem.version_downloads) md += ` · **Version Downloads:** ${formatNumber(gem.version_downloads)}`;
|
|
77
72
|
md += "\n\n";
|
|
78
73
|
|
|
79
74
|
// Links
|
|
@@ -100,17 +95,7 @@ export const handleRubyGems: SpecialHandler = async (
|
|
|
100
95
|
}
|
|
101
96
|
}
|
|
102
97
|
|
|
103
|
-
|
|
104
|
-
return {
|
|
105
|
-
url,
|
|
106
|
-
finalUrl: url,
|
|
107
|
-
contentType: "text/markdown",
|
|
108
|
-
method: "rubygems",
|
|
109
|
-
content: output.content,
|
|
110
|
-
fetchedAt,
|
|
111
|
-
truncated: output.truncated,
|
|
112
|
-
notes: ["Fetched via RubyGems API"],
|
|
113
|
-
};
|
|
98
|
+
return buildResult(md, { url, method: "rubygems", fetchedAt, notes: ["Fetched via RubyGems API"] });
|
|
114
99
|
} catch {}
|
|
115
100
|
|
|
116
101
|
return null;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
-
import {
|
|
2
|
+
import { buildResult, formatNumber, loadPage, tryParseJson } from "./types";
|
|
3
3
|
|
|
4
4
|
interface SearchcodeResult {
|
|
5
5
|
id?: number | string;
|
|
@@ -84,12 +84,8 @@ export const handleSearchcode: SpecialHandler = async (
|
|
|
84
84
|
const result = await loadPage(apiUrl, { timeout, signal, headers: { Accept: "application/json" } });
|
|
85
85
|
if (!result.ok) return null;
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
data = JSON.parse(result.content) as SearchcodeResult;
|
|
90
|
-
} catch {
|
|
91
|
-
return null;
|
|
92
|
-
}
|
|
87
|
+
const data = tryParseJson<SearchcodeResult>(result.content);
|
|
88
|
+
if (!data) return null;
|
|
93
89
|
|
|
94
90
|
const filename = data.filename || data.location || `Result ${id}`;
|
|
95
91
|
const lineNumbers = parseLineNumbers(data.lines);
|
|
@@ -116,17 +112,7 @@ export const handleSearchcode: SpecialHandler = async (
|
|
|
116
112
|
md += "\n\n_No snippet available._\n";
|
|
117
113
|
}
|
|
118
114
|
|
|
119
|
-
|
|
120
|
-
return {
|
|
121
|
-
url,
|
|
122
|
-
finalUrl: url,
|
|
123
|
-
contentType: "text/markdown",
|
|
124
|
-
method: "searchcode",
|
|
125
|
-
content: output.content,
|
|
126
|
-
fetchedAt,
|
|
127
|
-
truncated: output.truncated,
|
|
128
|
-
notes: ["Fetched via searchcode API"],
|
|
129
|
-
};
|
|
115
|
+
return buildResult(md, { url, method: "searchcode", fetchedAt, notes: ["Fetched via searchcode API"] });
|
|
130
116
|
}
|
|
131
117
|
|
|
132
118
|
const query = parsed.searchParams.get("q");
|
|
@@ -141,12 +127,8 @@ export const handleSearchcode: SpecialHandler = async (
|
|
|
141
127
|
const result = await loadPage(apiUrl, { timeout, signal, headers: { Accept: "application/json" } });
|
|
142
128
|
if (!result.ok) return null;
|
|
143
129
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
data = JSON.parse(result.content) as SearchcodeSearchResponse;
|
|
147
|
-
} catch {
|
|
148
|
-
return null;
|
|
149
|
-
}
|
|
130
|
+
const data = tryParseJson<SearchcodeSearchResponse>(result.content);
|
|
131
|
+
if (!data) return null;
|
|
150
132
|
|
|
151
133
|
const results = Array.isArray(data.results) ? data.results : [];
|
|
152
134
|
const total =
|
|
@@ -162,7 +144,7 @@ export const handleSearchcode: SpecialHandler = async (
|
|
|
162
144
|
md += `## Metadata\n\n`;
|
|
163
145
|
md += `**Query:** \`${query}\`\n`;
|
|
164
146
|
md += `**Page:** ${page}\n`;
|
|
165
|
-
if (total !== null) md += `**Total Results:** ${
|
|
147
|
+
if (total !== null) md += `**Total Results:** ${formatNumber(total)}\n`;
|
|
166
148
|
md += `**Result Count:** ${results.length}\n`;
|
|
167
149
|
if (typeof data.nextpage === "number") md += `**Next Page:** ${data.nextpage}\n`;
|
|
168
150
|
|
|
@@ -200,17 +182,7 @@ export const handleSearchcode: SpecialHandler = async (
|
|
|
200
182
|
}
|
|
201
183
|
}
|
|
202
184
|
|
|
203
|
-
|
|
204
|
-
return {
|
|
205
|
-
url,
|
|
206
|
-
finalUrl: url,
|
|
207
|
-
contentType: "text/markdown",
|
|
208
|
-
method: "searchcode",
|
|
209
|
-
content: output.content,
|
|
210
|
-
fetchedAt,
|
|
211
|
-
truncated: output.truncated,
|
|
212
|
-
notes: ["Fetched via searchcode API"],
|
|
213
|
-
};
|
|
185
|
+
return buildResult(md, { url, method: "searchcode", fetchedAt, notes: ["Fetched via searchcode API"] });
|
|
214
186
|
} catch {}
|
|
215
187
|
|
|
216
188
|
return null;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
-
import {
|
|
2
|
+
import { buildResult, loadPage, tryParseJson } from "./types";
|
|
3
3
|
|
|
4
4
|
interface SecFiling {
|
|
5
5
|
accessionNumber: string;
|
|
@@ -184,12 +184,8 @@ export const handleSecEdgar: SpecialHandler = async (
|
|
|
184
184
|
|
|
185
185
|
if (!result.ok) return null;
|
|
186
186
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
company = JSON.parse(result.content);
|
|
190
|
-
} catch {
|
|
191
|
-
return null;
|
|
192
|
-
}
|
|
187
|
+
const company = tryParseJson<SecCompany>(result.content);
|
|
188
|
+
if (!company) return null;
|
|
193
189
|
|
|
194
190
|
// Build markdown output
|
|
195
191
|
let md = `# ${company.name}\n\n`;
|
|
@@ -257,17 +253,7 @@ export const handleSecEdgar: SpecialHandler = async (
|
|
|
257
253
|
md += `- [SEC EDGAR Filings](https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=${cik}&type=&dateb=&owner=include&count=40)\n`;
|
|
258
254
|
md += `- [Company Search](https://www.sec.gov/cgi-bin/browse-edgar?company=${encodeURIComponent(company.name)}&CIK=&type=&owner=include&count=40&action=getcompany)\n`;
|
|
259
255
|
|
|
260
|
-
|
|
261
|
-
return {
|
|
262
|
-
url,
|
|
263
|
-
finalUrl: url,
|
|
264
|
-
contentType: "text/markdown",
|
|
265
|
-
method: "sec-edgar",
|
|
266
|
-
content: output.content,
|
|
267
|
-
fetchedAt,
|
|
268
|
-
truncated: output.truncated,
|
|
269
|
-
notes: ["Fetched via SEC EDGAR API"],
|
|
270
|
-
};
|
|
256
|
+
return buildResult(md, { url, method: "sec-edgar", fetchedAt, notes: ["Fetched via SEC EDGAR API"] });
|
|
271
257
|
} catch {}
|
|
272
258
|
|
|
273
259
|
return null;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { SpecialHandler } from "./types";
|
|
2
|
-
import {
|
|
2
|
+
import { buildResult, formatNumber, loadPage, tryParseJson } from "./types";
|
|
3
3
|
|
|
4
4
|
interface SemanticScholarAuthor {
|
|
5
5
|
name: string;
|
|
@@ -48,16 +48,13 @@ export const handleSemanticScholar: SpecialHandler = async (url: string, timeout
|
|
|
48
48
|
|
|
49
49
|
const paperId = extractPaperId(url);
|
|
50
50
|
if (!paperId) {
|
|
51
|
-
return {
|
|
51
|
+
return buildResult("Failed to extract paper ID from Semantic Scholar URL", {
|
|
52
52
|
url,
|
|
53
|
-
finalUrl: url,
|
|
54
|
-
contentType: "text/plain",
|
|
55
53
|
method: "semantic-scholar",
|
|
56
|
-
content: "Failed to extract paper ID from Semantic Scholar URL",
|
|
57
54
|
fetchedAt: new Date().toISOString(),
|
|
58
|
-
truncated: false,
|
|
59
55
|
notes: ["Invalid URL format"],
|
|
60
|
-
|
|
56
|
+
contentType: "text/plain",
|
|
57
|
+
});
|
|
61
58
|
}
|
|
62
59
|
|
|
63
60
|
const fields = [
|
|
@@ -80,32 +77,26 @@ export const handleSemanticScholar: SpecialHandler = async (url: string, timeout
|
|
|
80
77
|
const { content, ok, finalUrl } = await loadPage(apiUrl, { timeout, signal });
|
|
81
78
|
|
|
82
79
|
if (!ok || !content) {
|
|
83
|
-
return {
|
|
80
|
+
return buildResult("Failed to fetch paper from Semantic Scholar API", {
|
|
84
81
|
url,
|
|
85
82
|
finalUrl: apiUrl,
|
|
86
|
-
contentType: "text/plain",
|
|
87
83
|
method: "semantic-scholar",
|
|
88
|
-
content: "Failed to fetch paper from Semantic Scholar API",
|
|
89
84
|
fetchedAt: new Date().toISOString(),
|
|
90
|
-
truncated: false,
|
|
91
85
|
notes: ["API request failed"],
|
|
92
|
-
|
|
86
|
+
contentType: "text/plain",
|
|
87
|
+
});
|
|
93
88
|
}
|
|
94
89
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
} catch {
|
|
99
|
-
return {
|
|
90
|
+
const paper = tryParseJson<SemanticScholarPaper>(content);
|
|
91
|
+
if (!paper) {
|
|
92
|
+
return buildResult("Failed to parse response from Semantic Scholar API", {
|
|
100
93
|
url,
|
|
101
94
|
finalUrl: apiUrl,
|
|
102
|
-
contentType: "text/plain",
|
|
103
95
|
method: "semantic-scholar",
|
|
104
|
-
content: "Failed to parse response from Semantic Scholar API",
|
|
105
96
|
fetchedAt: new Date().toISOString(),
|
|
106
|
-
truncated: false,
|
|
107
97
|
notes: ["JSON parse error"],
|
|
108
|
-
|
|
98
|
+
contentType: "text/plain",
|
|
99
|
+
});
|
|
109
100
|
}
|
|
110
101
|
|
|
111
102
|
const sections: string[] = [];
|
|
@@ -123,10 +114,10 @@ export const handleSemanticScholar: SpecialHandler = async (url: string, timeout
|
|
|
123
114
|
if (paper.year) metadata.push(`Year: ${paper.year}`);
|
|
124
115
|
if (paper.journal?.name) metadata.push(`Venue: ${paper.journal.name}`);
|
|
125
116
|
if (paper.citationCount !== undefined) {
|
|
126
|
-
metadata.push(`Citations: ${
|
|
117
|
+
metadata.push(`Citations: ${formatNumber(paper.citationCount)}`);
|
|
127
118
|
}
|
|
128
119
|
if (paper.referenceCount !== undefined) {
|
|
129
|
-
metadata.push(`References: ${
|
|
120
|
+
metadata.push(`References: ${formatNumber(paper.referenceCount)}`);
|
|
130
121
|
}
|
|
131
122
|
if (metadata.length > 0) {
|
|
132
123
|
sections.push(metadata.join(" • "));
|
|
@@ -175,16 +166,5 @@ export const handleSemanticScholar: SpecialHandler = async (url: string, timeout
|
|
|
175
166
|
}
|
|
176
167
|
|
|
177
168
|
const fullContent = sections.join("\n");
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
return {
|
|
181
|
-
url,
|
|
182
|
-
finalUrl,
|
|
183
|
-
contentType: "text/markdown",
|
|
184
|
-
method: "semantic-scholar",
|
|
185
|
-
content: finalContent,
|
|
186
|
-
fetchedAt: new Date().toISOString(),
|
|
187
|
-
truncated,
|
|
188
|
-
notes: [],
|
|
189
|
-
};
|
|
169
|
+
return buildResult(fullContent, { url, finalUrl, method: "semantic-scholar", fetchedAt: new Date().toISOString() });
|
|
190
170
|
};
|