@oh-my-pi/pi-coding-agent 3.24.0 → 3.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/package.json +4 -4
- package/src/core/custom-commands/bundled/wt/index.ts +3 -0
- package/src/core/sdk.ts +7 -0
- package/src/core/tools/complete.ts +129 -0
- package/src/core/tools/index.test.ts +9 -1
- package/src/core/tools/index.ts +18 -5
- package/src/core/tools/jtd-to-json-schema.ts +252 -0
- package/src/core/tools/output.ts +125 -14
- package/src/core/tools/read.ts +4 -4
- package/src/core/tools/task/artifacts.ts +6 -9
- package/src/core/tools/task/executor.ts +189 -24
- package/src/core/tools/task/index.ts +23 -18
- package/src/core/tools/task/name-generator.ts +1577 -0
- package/src/core/tools/task/render.ts +137 -8
- package/src/core/tools/task/types.ts +26 -5
- package/src/core/tools/task/worker-protocol.ts +1 -0
- package/src/core/tools/task/worker.ts +136 -14
- package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
- package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
- package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
- package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
- package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
- package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
- package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
- package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
- package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
- package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
- package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
- package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
- package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
- package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
- package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
- package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
- package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
- package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
- package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
- package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
- package/src/core/tools/web-fetch-handlers/github.ts +424 -0
- package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
- package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
- package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
- package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
- package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
- package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
- package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
- package/src/core/tools/web-fetch-handlers/index.ts +69 -0
- package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
- package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
- package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
- package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
- package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
- package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
- package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
- package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
- package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
- package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
- package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
- package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
- package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
- package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
- package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
- package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
- package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
- package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
- package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
- package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
- package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
- package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
- package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
- package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
- package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
- package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
- package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
- package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
- package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
- package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
- package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
- package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
- package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
- package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
- package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
- package/src/core/tools/web-fetch-handlers/types.ts +163 -0
- package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
- package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
- package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
- package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
- package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
- package/src/core/tools/web-fetch.ts +152 -1324
- package/src/prompts/task.md +14 -50
- package/src/prompts/tools/output.md +2 -1
- package/src/prompts/tools/task.md +3 -1
- package/src/utils/tools-manager.ts +110 -8
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
2
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
3
|
+
|
|
4
|
+
const TLDR_BASE = "https://raw.githubusercontent.com/tldr-pages/tldr/main/pages";
|
|
5
|
+
const PLATFORMS = ["common", "linux", "osx"] as const;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Handle tldr page URLs
|
|
9
|
+
* - https://tldr.sh/{command}
|
|
10
|
+
* - https://tldr.ostera.io/{command}
|
|
11
|
+
*/
|
|
12
|
+
export const handleTldr: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
13
|
+
try {
|
|
14
|
+
const parsed = new URL(url);
|
|
15
|
+
if (parsed.hostname !== "tldr.sh" && parsed.hostname !== "tldr.ostera.io") return null;
|
|
16
|
+
|
|
17
|
+
// Extract command from path (e.g., /tar -> tar)
|
|
18
|
+
const command = parsed.pathname.replace(/^\//, "").replace(/\.md$/, "");
|
|
19
|
+
if (!command || command.includes("/")) return null;
|
|
20
|
+
|
|
21
|
+
const fetchedAt = new Date().toISOString();
|
|
22
|
+
|
|
23
|
+
// Try platforms in order: common, linux, osx
|
|
24
|
+
for (const platform of PLATFORMS) {
|
|
25
|
+
const rawUrl = `${TLDR_BASE}/${platform}/${command}.md`;
|
|
26
|
+
const result = await loadPage(rawUrl, { timeout });
|
|
27
|
+
|
|
28
|
+
if (result.ok && result.content.trim()) {
|
|
29
|
+
const output = finalizeOutput(result.content);
|
|
30
|
+
return {
|
|
31
|
+
url,
|
|
32
|
+
finalUrl: rawUrl,
|
|
33
|
+
contentType: "text/markdown",
|
|
34
|
+
method: "tldr",
|
|
35
|
+
content: output.content,
|
|
36
|
+
fetchedAt,
|
|
37
|
+
truncated: output.truncated,
|
|
38
|
+
notes: [`Fetched from tldr-pages (${platform})`],
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return null;
|
|
44
|
+
} catch {}
|
|
45
|
+
|
|
46
|
+
return null;
|
|
47
|
+
};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { parse as parseHtml } from "node-html-parser";
|
|
2
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
3
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
4
|
+
|
|
5
|
+
const NITTER_INSTANCES = [
|
|
6
|
+
"nitter.privacyredirect.com",
|
|
7
|
+
"nitter.tiekoetter.com",
|
|
8
|
+
"nitter.poast.org",
|
|
9
|
+
"nitter.woodland.cafe",
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Handle Twitter/X URLs via Nitter
|
|
14
|
+
*/
|
|
15
|
+
export const handleTwitter: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
16
|
+
try {
|
|
17
|
+
const parsed = new URL(url);
|
|
18
|
+
if (!["twitter.com", "x.com", "www.twitter.com", "www.x.com"].includes(parsed.hostname)) {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const fetchedAt = new Date().toISOString();
|
|
23
|
+
|
|
24
|
+
// Try Nitter instances
|
|
25
|
+
for (const instance of NITTER_INSTANCES) {
|
|
26
|
+
const nitterUrl = `https://${instance}${parsed.pathname}`;
|
|
27
|
+
const result = await loadPage(nitterUrl, { timeout: Math.min(timeout, 10) });
|
|
28
|
+
|
|
29
|
+
if (result.ok && result.content.length > 500) {
|
|
30
|
+
// Parse the Nitter HTML
|
|
31
|
+
const doc = parseHtml(result.content);
|
|
32
|
+
|
|
33
|
+
// Extract tweet content
|
|
34
|
+
const tweetContent = doc.querySelector(".tweet-content")?.text?.trim();
|
|
35
|
+
const fullname = doc.querySelector(".fullname")?.text?.trim();
|
|
36
|
+
const username = doc.querySelector(".username")?.text?.trim();
|
|
37
|
+
const date = doc.querySelector(".tweet-date a")?.text?.trim();
|
|
38
|
+
const stats = doc.querySelector(".tweet-stats")?.text?.trim();
|
|
39
|
+
|
|
40
|
+
if (tweetContent) {
|
|
41
|
+
let md = `# Tweet by ${fullname || "Unknown"} (${username || "@?"})\n\n`;
|
|
42
|
+
if (date) md += `*${date}*\n\n`;
|
|
43
|
+
md += `${tweetContent}\n\n`;
|
|
44
|
+
if (stats) md += `---\n${stats.replace(/\s+/g, " ")}\n`;
|
|
45
|
+
|
|
46
|
+
// Check for replies/thread
|
|
47
|
+
const replies = doc.querySelectorAll(".timeline-item .tweet-content");
|
|
48
|
+
if (replies.length > 1) {
|
|
49
|
+
md += `\n---\n\n## Thread/Replies\n\n`;
|
|
50
|
+
for (const reply of Array.from(replies).slice(1, 10)) {
|
|
51
|
+
const replyUser = reply.parentNode?.querySelector(".username")?.text?.trim();
|
|
52
|
+
md += `**${replyUser || "@?"}**: ${reply.text?.trim()}\n\n`;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const output = finalizeOutput(md);
|
|
57
|
+
return {
|
|
58
|
+
url,
|
|
59
|
+
finalUrl: nitterUrl,
|
|
60
|
+
contentType: "text/markdown",
|
|
61
|
+
method: "twitter-nitter",
|
|
62
|
+
content: output.content,
|
|
63
|
+
fetchedAt,
|
|
64
|
+
truncated: output.truncated,
|
|
65
|
+
notes: [`Via Nitter: ${instance}`],
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
} catch {}
|
|
71
|
+
|
|
72
|
+
// X.com blocks all bots - return a helpful error instead of falling through
|
|
73
|
+
return {
|
|
74
|
+
url,
|
|
75
|
+
finalUrl: url,
|
|
76
|
+
contentType: "text/plain",
|
|
77
|
+
method: "twitter-blocked",
|
|
78
|
+
content:
|
|
79
|
+
"Twitter/X blocks automated access. Nitter instances were unavailable.\n\nTry:\n- Opening the link in a browser\n- Using a different Nitter instance manually\n- Checking if the tweet is available via an archive service",
|
|
80
|
+
fetchedAt: new Date().toISOString(),
|
|
81
|
+
truncated: false,
|
|
82
|
+
notes: ["X.com blocks bots; Nitter instances unavailable"],
|
|
83
|
+
};
|
|
84
|
+
};
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types and utilities for web-fetch handlers
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface RenderResult {
|
|
6
|
+
url: string;
|
|
7
|
+
finalUrl: string;
|
|
8
|
+
contentType: string;
|
|
9
|
+
method: string;
|
|
10
|
+
content: string;
|
|
11
|
+
fetchedAt: string;
|
|
12
|
+
truncated: boolean;
|
|
13
|
+
notes: string[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export type SpecialHandler = (url: string, timeout: number) => Promise<RenderResult | null>;
|
|
17
|
+
|
|
18
|
+
export const MAX_OUTPUT_CHARS = 500_000;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Truncate and cleanup output
|
|
22
|
+
*/
|
|
23
|
+
export function finalizeOutput(content: string): { content: string; truncated: boolean } {
|
|
24
|
+
const cleaned = content.replace(/\n{3,}/g, "\n\n").trim();
|
|
25
|
+
const truncated = cleaned.length > MAX_OUTPUT_CHARS;
|
|
26
|
+
return {
|
|
27
|
+
content: cleaned.slice(0, MAX_OUTPUT_CHARS),
|
|
28
|
+
truncated,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Fetch a page with timeout and size limit
|
|
34
|
+
*/
|
|
35
|
+
export async function loadPage(
|
|
36
|
+
url: string,
|
|
37
|
+
options: { timeout?: number; headers?: Record<string, string>; maxBytes?: number } = {},
|
|
38
|
+
): Promise<{ content: string; contentType: string; finalUrl: string; ok: boolean; status?: number }> {
|
|
39
|
+
const { timeout = 20, headers = {}, maxBytes = 50 * 1024 * 1024 } = options;
|
|
40
|
+
|
|
41
|
+
const userAgents = [
|
|
42
|
+
"curl/8.0",
|
|
43
|
+
"Mozilla/5.0 (compatible; TextBot/1.0)",
|
|
44
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
for (let attempt = 0; attempt < userAgents.length; attempt++) {
|
|
48
|
+
const userAgent = userAgents[attempt];
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
const controller = new AbortController();
|
|
52
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout * 1000);
|
|
53
|
+
|
|
54
|
+
const response = await fetch(url, {
|
|
55
|
+
signal: controller.signal,
|
|
56
|
+
headers: {
|
|
57
|
+
"User-Agent": userAgent,
|
|
58
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
59
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
60
|
+
...headers,
|
|
61
|
+
},
|
|
62
|
+
redirect: "follow",
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
clearTimeout(timeoutId);
|
|
66
|
+
|
|
67
|
+
const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
|
|
68
|
+
const finalUrl = response.url;
|
|
69
|
+
|
|
70
|
+
const reader = response.body?.getReader();
|
|
71
|
+
if (!reader) {
|
|
72
|
+
return { content: "", contentType, finalUrl, ok: false, status: response.status };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const chunks: Uint8Array[] = [];
|
|
76
|
+
let totalSize = 0;
|
|
77
|
+
|
|
78
|
+
while (true) {
|
|
79
|
+
const { done, value } = await reader.read();
|
|
80
|
+
if (done) break;
|
|
81
|
+
|
|
82
|
+
chunks.push(value);
|
|
83
|
+
totalSize += value.length;
|
|
84
|
+
|
|
85
|
+
if (totalSize > maxBytes) {
|
|
86
|
+
reader.cancel();
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const decoder = new TextDecoder();
|
|
92
|
+
const content = decoder.decode(Buffer.concat(chunks));
|
|
93
|
+
|
|
94
|
+
// Check if blocked
|
|
95
|
+
if ((response.status === 403 || response.status === 503) && attempt < userAgents.length - 1) {
|
|
96
|
+
const lower = content.toLowerCase();
|
|
97
|
+
if (lower.includes("cloudflare") || lower.includes("captcha") || lower.includes("blocked")) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (!response.ok) {
|
|
103
|
+
return { content, contentType, finalUrl, ok: false, status: response.status };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return { content, contentType, finalUrl, ok: true, status: response.status };
|
|
107
|
+
} catch (_err) {
|
|
108
|
+
if (attempt === userAgents.length - 1) {
|
|
109
|
+
return { content: "", contentType: "", finalUrl: url, ok: false };
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return { content: "", contentType: "", finalUrl: url, ok: false };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Format large numbers (1000 -> 1K, 1000000 -> 1M)
|
|
119
|
+
*/
|
|
120
|
+
export function formatCount(n: number): string {
|
|
121
|
+
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
122
|
+
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
|
123
|
+
return String(n);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Convert basic HTML to markdown
|
|
128
|
+
*/
|
|
129
|
+
export function htmlToBasicMarkdown(html: string): string {
|
|
130
|
+
return html
|
|
131
|
+
.replace(/<pre><code[^>]*>/g, "\n```\n")
|
|
132
|
+
.replace(/<\/code><\/pre>/g, "\n```\n")
|
|
133
|
+
.replace(/<code>/g, "`")
|
|
134
|
+
.replace(/<\/code>/g, "`")
|
|
135
|
+
.replace(/<strong>/g, "**")
|
|
136
|
+
.replace(/<\/strong>/g, "**")
|
|
137
|
+
.replace(/<b>/g, "**")
|
|
138
|
+
.replace(/<\/b>/g, "**")
|
|
139
|
+
.replace(/<em>/g, "*")
|
|
140
|
+
.replace(/<\/em>/g, "*")
|
|
141
|
+
.replace(/<i>/g, "*")
|
|
142
|
+
.replace(/<\/i>/g, "*")
|
|
143
|
+
.replace(/<a href="([^"]+)"[^>]*>([^<]+)<\/a>/g, "[$2]($1)")
|
|
144
|
+
.replace(/<p>/g, "\n\n")
|
|
145
|
+
.replace(/<\/p>/g, "")
|
|
146
|
+
.replace(/<br\s*\/?>/g, "\n")
|
|
147
|
+
.replace(/<li>/g, "- ")
|
|
148
|
+
.replace(/<\/li>/g, "\n")
|
|
149
|
+
.replace(/<\/?[uo]l>/g, "\n")
|
|
150
|
+
.replace(/<h(\d)>/g, (_, n) => `\n${"#".repeat(parseInt(n, 10))} `)
|
|
151
|
+
.replace(/<\/h\d>/g, "\n")
|
|
152
|
+
.replace(/<blockquote>/g, "\n> ")
|
|
153
|
+
.replace(/<\/blockquote>/g, "\n")
|
|
154
|
+
.replace(/<[^>]+>/g, "")
|
|
155
|
+
.replace(/</g, "<")
|
|
156
|
+
.replace(/>/g, ">")
|
|
157
|
+
.replace(/&/g, "&")
|
|
158
|
+
.replace(/"/g, '"')
|
|
159
|
+
.replace(/'/g, "'")
|
|
160
|
+
.replace(/ /g, " ")
|
|
161
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
162
|
+
.trim();
|
|
163
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { tmpdir } from "node:os";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { ensureTool } from "../../../utils/tools-manager";
|
|
4
|
+
|
|
5
|
+
const MAX_BYTES = 50 * 1024 * 1024; // 50MB for binary files
|
|
6
|
+
|
|
7
|
+
function exec(
|
|
8
|
+
cmd: string,
|
|
9
|
+
args: string[],
|
|
10
|
+
options?: { timeout?: number; input?: string | Buffer },
|
|
11
|
+
): { stdout: string; stderr: string; ok: boolean } {
|
|
12
|
+
const result = Bun.spawnSync([cmd, ...args], {
|
|
13
|
+
stdin: options?.input ? (options.input as any) : "ignore",
|
|
14
|
+
stdout: "pipe",
|
|
15
|
+
stderr: "pipe",
|
|
16
|
+
});
|
|
17
|
+
return {
|
|
18
|
+
stdout: result.stdout?.toString() ?? "",
|
|
19
|
+
stderr: result.stderr?.toString() ?? "",
|
|
20
|
+
ok: result.exitCode === 0,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function convertWithMarkitdown(
|
|
25
|
+
content: Buffer,
|
|
26
|
+
extensionHint: string,
|
|
27
|
+
timeout: number,
|
|
28
|
+
): Promise<{ content: string; ok: boolean }> {
|
|
29
|
+
const markitdown = await ensureTool("markitdown", true);
|
|
30
|
+
if (!markitdown) {
|
|
31
|
+
return { content: "", ok: false };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Write to temp file with extension hint
|
|
35
|
+
const ext = extensionHint || ".bin";
|
|
36
|
+
const tmpDir = tmpdir();
|
|
37
|
+
const tmpFile = path.join(tmpDir, `omp-convert-${Date.now()}${ext}`);
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
await Bun.write(tmpFile, content);
|
|
41
|
+
const result = exec(markitdown, [tmpFile], { timeout });
|
|
42
|
+
return { content: result.stdout, ok: result.ok };
|
|
43
|
+
} finally {
|
|
44
|
+
try {
|
|
45
|
+
await Bun.$`rm ${tmpFile}`.quiet();
|
|
46
|
+
} catch {}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function fetchBinary(
|
|
51
|
+
url: string,
|
|
52
|
+
timeout: number,
|
|
53
|
+
): Promise<{ buffer: Buffer; contentType: string; contentDisposition?: string; ok: boolean }> {
|
|
54
|
+
try {
|
|
55
|
+
const controller = new AbortController();
|
|
56
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout * 1000);
|
|
57
|
+
|
|
58
|
+
const response = await fetch(url, {
|
|
59
|
+
signal: controller.signal,
|
|
60
|
+
headers: {
|
|
61
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0",
|
|
62
|
+
},
|
|
63
|
+
redirect: "follow",
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
clearTimeout(timeoutId);
|
|
67
|
+
|
|
68
|
+
if (!response.ok) {
|
|
69
|
+
return { buffer: Buffer.alloc(0), contentType: "", ok: false };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
73
|
+
const contentDisposition = response.headers.get("content-disposition") ?? undefined;
|
|
74
|
+
const contentLength = response.headers.get("content-length");
|
|
75
|
+
if (contentLength) {
|
|
76
|
+
const size = Number.parseInt(contentLength, 10);
|
|
77
|
+
if (Number.isFinite(size) && size > MAX_BYTES) {
|
|
78
|
+
return { buffer: Buffer.alloc(0), contentType, contentDisposition, ok: false };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
83
|
+
if (buffer.length > MAX_BYTES) {
|
|
84
|
+
return { buffer: Buffer.alloc(0), contentType, contentDisposition, ok: false };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { buffer, contentType, contentDisposition, ok: true };
|
|
88
|
+
} catch {
|
|
89
|
+
return { buffer: Buffer.alloc(0), contentType: "", ok: false };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import type { SpecialHandler } from "./types";
|
|
2
|
+
import { finalizeOutput, loadPage } from "./types";
|
|
3
|
+
|
|
4
|
+
interface VimeoOEmbed {
|
|
5
|
+
title: string;
|
|
6
|
+
author_name: string;
|
|
7
|
+
author_url: string;
|
|
8
|
+
description?: string;
|
|
9
|
+
duration: number;
|
|
10
|
+
thumbnail_url: string;
|
|
11
|
+
upload_date: string;
|
|
12
|
+
video_id: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface VimeoVideoConfig {
|
|
16
|
+
video?: {
|
|
17
|
+
title?: string;
|
|
18
|
+
duration?: number;
|
|
19
|
+
owner?: {
|
|
20
|
+
name?: string;
|
|
21
|
+
url?: string;
|
|
22
|
+
};
|
|
23
|
+
thumbs?: {
|
|
24
|
+
base?: string;
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
request?: {
|
|
28
|
+
files?: {
|
|
29
|
+
progressive?: Array<{
|
|
30
|
+
quality: string;
|
|
31
|
+
width: number;
|
|
32
|
+
height: number;
|
|
33
|
+
fps: number;
|
|
34
|
+
}>;
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Format seconds into HH:MM:SS or MM:SS
|
|
41
|
+
*/
|
|
42
|
+
function formatDuration(seconds: number): string {
|
|
43
|
+
const h = Math.floor(seconds / 3600);
|
|
44
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
45
|
+
const s = seconds % 60;
|
|
46
|
+
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
|
|
47
|
+
return `${m}:${String(s).padStart(2, "0")}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Extract video ID from various Vimeo URL formats
|
|
52
|
+
*/
|
|
53
|
+
function extractVideoId(url: string): string | null {
|
|
54
|
+
try {
|
|
55
|
+
const parsed = new URL(url);
|
|
56
|
+
|
|
57
|
+
// player.vimeo.com/video/{id}
|
|
58
|
+
if (parsed.hostname === "player.vimeo.com") {
|
|
59
|
+
const match = parsed.pathname.match(/^\/video\/(\d+)/);
|
|
60
|
+
return match?.[1] ?? null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// vimeo.com/{id} or vimeo.com/{user}/{id}
|
|
64
|
+
if (parsed.hostname === "vimeo.com" || parsed.hostname === "www.vimeo.com") {
|
|
65
|
+
const parts = parsed.pathname.split("/").filter(Boolean);
|
|
66
|
+
// Last part should be the video ID
|
|
67
|
+
const lastPart = parts[parts.length - 1];
|
|
68
|
+
if (lastPart && /^\d+$/.test(lastPart)) {
|
|
69
|
+
return lastPart;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return null;
|
|
74
|
+
} catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Handle Vimeo URLs via oEmbed API
|
|
81
|
+
*/
|
|
82
|
+
export const handleVimeo: SpecialHandler = async (url: string, timeout: number) => {
|
|
83
|
+
try {
|
|
84
|
+
const parsed = new URL(url);
|
|
85
|
+
if (!parsed.hostname.includes("vimeo.com")) return null;
|
|
86
|
+
|
|
87
|
+
const videoId = extractVideoId(url);
|
|
88
|
+
if (!videoId) return null;
|
|
89
|
+
|
|
90
|
+
const fetchedAt = new Date().toISOString();
|
|
91
|
+
|
|
92
|
+
// Use canonical URL for oEmbed (handles staffpicks and other URL formats)
|
|
93
|
+
const canonicalUrl = `https://vimeo.com/${videoId}`;
|
|
94
|
+
const oembedUrl = `https://vimeo.com/api/oembed.json?url=${encodeURIComponent(canonicalUrl)}`;
|
|
95
|
+
const oembedResult = await loadPage(oembedUrl, { timeout });
|
|
96
|
+
|
|
97
|
+
if (!oembedResult.ok) return null;
|
|
98
|
+
|
|
99
|
+
const oembed = JSON.parse(oembedResult.content) as VimeoOEmbed;
|
|
100
|
+
|
|
101
|
+
let md = `# ${oembed.title}\n\n`;
|
|
102
|
+
md += `**Author:** [${oembed.author_name}](${oembed.author_url})\n`;
|
|
103
|
+
md += `**Duration:** ${formatDuration(oembed.duration)}\n`;
|
|
104
|
+
|
|
105
|
+
if (oembed.upload_date) {
|
|
106
|
+
md += `**Uploaded:** ${oembed.upload_date}\n`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
md += `**Video ID:** ${videoId}\n\n`;
|
|
110
|
+
|
|
111
|
+
if (oembed.description) {
|
|
112
|
+
md += `---\n\n## Description\n\n${oembed.description}\n\n`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
md += `---\n\n**Thumbnail:** ${oembed.thumbnail_url}\n`;
|
|
116
|
+
|
|
117
|
+
// Try to get additional details from video config
|
|
118
|
+
try {
|
|
119
|
+
const configUrl = `https://player.vimeo.com/video/${videoId}/config`;
|
|
120
|
+
const configResult = await loadPage(configUrl, { timeout: Math.min(timeout, 5) });
|
|
121
|
+
|
|
122
|
+
if (configResult.ok) {
|
|
123
|
+
const config = JSON.parse(configResult.content) as VimeoVideoConfig;
|
|
124
|
+
|
|
125
|
+
// Add video quality info if available
|
|
126
|
+
const progressive = config.request?.files?.progressive;
|
|
127
|
+
if (progressive && progressive.length > 0) {
|
|
128
|
+
md += `\n**Available Qualities:**\n`;
|
|
129
|
+
for (const quality of progressive.slice(0, 5)) {
|
|
130
|
+
md += `- ${quality.quality}: ${quality.width}x${quality.height} @ ${quality.fps}fps\n`;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
// Config fetch is optional - continue without it
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const output = finalizeOutput(md);
|
|
139
|
+
return {
|
|
140
|
+
url,
|
|
141
|
+
finalUrl: url,
|
|
142
|
+
contentType: "text/markdown",
|
|
143
|
+
method: "vimeo",
|
|
144
|
+
content: output.content,
|
|
145
|
+
fetchedAt,
|
|
146
|
+
truncated: output.truncated,
|
|
147
|
+
notes: ["Fetched via Vimeo oEmbed API"],
|
|
148
|
+
};
|
|
149
|
+
} catch {
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
};
|