@oh-my-pi/pi-coding-agent 3.25.0 → 3.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/package.json +5 -5
  3. package/src/cli/args.ts +4 -0
  4. package/src/core/agent-session.ts +29 -2
  5. package/src/core/bash-executor.ts +2 -1
  6. package/src/core/custom-commands/bundled/review/index.ts +369 -14
  7. package/src/core/custom-commands/bundled/wt/index.ts +1 -1
  8. package/src/core/session-manager.ts +158 -246
  9. package/src/core/session-storage.ts +379 -0
  10. package/src/core/settings-manager.ts +155 -4
  11. package/src/core/system-prompt.ts +62 -64
  12. package/src/core/tools/ask.ts +5 -4
  13. package/src/core/tools/bash-interceptor.ts +26 -61
  14. package/src/core/tools/bash.ts +13 -8
  15. package/src/core/tools/complete.ts +2 -4
  16. package/src/core/tools/edit-diff.ts +11 -4
  17. package/src/core/tools/edit.ts +7 -13
  18. package/src/core/tools/find.ts +111 -50
  19. package/src/core/tools/gemini-image.ts +128 -147
  20. package/src/core/tools/grep.ts +397 -415
  21. package/src/core/tools/index.test.ts +5 -1
  22. package/src/core/tools/index.ts +6 -8
  23. package/src/core/tools/jtd-to-json-schema.ts +174 -196
  24. package/src/core/tools/ls.ts +12 -10
  25. package/src/core/tools/lsp/client.ts +58 -9
  26. package/src/core/tools/lsp/config.ts +205 -656
  27. package/src/core/tools/lsp/defaults.json +465 -0
  28. package/src/core/tools/lsp/index.ts +55 -32
  29. package/src/core/tools/lsp/rust-analyzer.ts +49 -10
  30. package/src/core/tools/lsp/types.ts +1 -0
  31. package/src/core/tools/lsp/utils.ts +1 -1
  32. package/src/core/tools/read.ts +152 -76
  33. package/src/core/tools/render-utils.ts +70 -10
  34. package/src/core/tools/review.ts +38 -126
  35. package/src/core/tools/task/artifacts.ts +5 -4
  36. package/src/core/tools/task/executor.ts +204 -67
  37. package/src/core/tools/task/index.ts +129 -92
  38. package/src/core/tools/task/name-generator.ts +1544 -214
  39. package/src/core/tools/task/parallel.ts +30 -3
  40. package/src/core/tools/task/render.ts +85 -39
  41. package/src/core/tools/task/types.ts +34 -11
  42. package/src/core/tools/task/worker.ts +152 -27
  43. package/src/core/tools/web-fetch.ts +220 -1657
  44. package/src/core/tools/web-scrapers/academic.test.ts +239 -0
  45. package/src/core/tools/web-scrapers/artifacthub.ts +215 -0
  46. package/src/core/tools/web-scrapers/arxiv.ts +88 -0
  47. package/src/core/tools/web-scrapers/aur.ts +175 -0
  48. package/src/core/tools/web-scrapers/biorxiv.ts +141 -0
  49. package/src/core/tools/web-scrapers/bluesky.ts +284 -0
  50. package/src/core/tools/web-scrapers/brew.ts +177 -0
  51. package/src/core/tools/web-scrapers/business.test.ts +82 -0
  52. package/src/core/tools/web-scrapers/cheatsh.ts +78 -0
  53. package/src/core/tools/web-scrapers/chocolatey.ts +158 -0
  54. package/src/core/tools/web-scrapers/choosealicense.ts +110 -0
  55. package/src/core/tools/web-scrapers/cisa-kev.ts +100 -0
  56. package/src/core/tools/web-scrapers/clojars.ts +180 -0
  57. package/src/core/tools/web-scrapers/coingecko.ts +184 -0
  58. package/src/core/tools/web-scrapers/crates-io.ts +128 -0
  59. package/src/core/tools/web-scrapers/crossref.ts +149 -0
  60. package/src/core/tools/web-scrapers/dev-platforms.test.ts +254 -0
  61. package/src/core/tools/web-scrapers/devto.ts +177 -0
  62. package/src/core/tools/web-scrapers/discogs.ts +308 -0
  63. package/src/core/tools/web-scrapers/discourse.ts +221 -0
  64. package/src/core/tools/web-scrapers/dockerhub.ts +160 -0
  65. package/src/core/tools/web-scrapers/documentation.test.ts +85 -0
  66. package/src/core/tools/web-scrapers/fdroid.ts +158 -0
  67. package/src/core/tools/web-scrapers/finance-media.test.ts +144 -0
  68. package/src/core/tools/web-scrapers/firefox-addons.ts +214 -0
  69. package/src/core/tools/web-scrapers/flathub.ts +239 -0
  70. package/src/core/tools/web-scrapers/git-hosting.test.ts +272 -0
  71. package/src/core/tools/web-scrapers/github-gist.ts +68 -0
  72. package/src/core/tools/web-scrapers/github.ts +455 -0
  73. package/src/core/tools/web-scrapers/gitlab.ts +456 -0
  74. package/src/core/tools/web-scrapers/go-pkg.ts +275 -0
  75. package/src/core/tools/web-scrapers/hackage.ts +94 -0
  76. package/src/core/tools/web-scrapers/hackernews.ts +208 -0
  77. package/src/core/tools/web-scrapers/hex.ts +121 -0
  78. package/src/core/tools/web-scrapers/huggingface.ts +385 -0
  79. package/src/core/tools/web-scrapers/iacr.ts +86 -0
  80. package/src/core/tools/web-scrapers/index.ts +250 -0
  81. package/src/core/tools/web-scrapers/jetbrains-marketplace.ts +169 -0
  82. package/src/core/tools/web-scrapers/lemmy.ts +220 -0
  83. package/src/core/tools/web-scrapers/lobsters.ts +186 -0
  84. package/src/core/tools/web-scrapers/mastodon.ts +310 -0
  85. package/src/core/tools/web-scrapers/maven.ts +152 -0
  86. package/src/core/tools/web-scrapers/mdn.ts +174 -0
  87. package/src/core/tools/web-scrapers/media.test.ts +138 -0
  88. package/src/core/tools/web-scrapers/metacpan.ts +253 -0
  89. package/src/core/tools/web-scrapers/musicbrainz.ts +273 -0
  90. package/src/core/tools/web-scrapers/npm.ts +114 -0
  91. package/src/core/tools/web-scrapers/nuget.ts +205 -0
  92. package/src/core/tools/web-scrapers/nvd.ts +243 -0
  93. package/src/core/tools/web-scrapers/ollama.ts +267 -0
  94. package/src/core/tools/web-scrapers/open-vsx.ts +119 -0
  95. package/src/core/tools/web-scrapers/opencorporates.ts +275 -0
  96. package/src/core/tools/web-scrapers/openlibrary.ts +319 -0
  97. package/src/core/tools/web-scrapers/orcid.ts +299 -0
  98. package/src/core/tools/web-scrapers/osv.ts +189 -0
  99. package/src/core/tools/web-scrapers/package-managers-2.test.ts +199 -0
  100. package/src/core/tools/web-scrapers/package-managers.test.ts +171 -0
  101. package/src/core/tools/web-scrapers/package-registries.test.ts +259 -0
  102. package/src/core/tools/web-scrapers/packagist.ts +174 -0
  103. package/src/core/tools/web-scrapers/pub-dev.ts +185 -0
  104. package/src/core/tools/web-scrapers/pubmed.ts +178 -0
  105. package/src/core/tools/web-scrapers/pypi.ts +129 -0
  106. package/src/core/tools/web-scrapers/rawg.ts +124 -0
  107. package/src/core/tools/web-scrapers/readthedocs.ts +126 -0
  108. package/src/core/tools/web-scrapers/reddit.ts +104 -0
  109. package/src/core/tools/web-scrapers/repology.ts +262 -0
  110. package/src/core/tools/web-scrapers/research.test.ts +107 -0
  111. package/src/core/tools/web-scrapers/rfc.ts +209 -0
  112. package/src/core/tools/web-scrapers/rubygems.ts +117 -0
  113. package/src/core/tools/web-scrapers/searchcode.ts +217 -0
  114. package/src/core/tools/web-scrapers/sec-edgar.ts +274 -0
  115. package/src/core/tools/web-scrapers/security.test.ts +103 -0
  116. package/src/core/tools/web-scrapers/semantic-scholar.ts +190 -0
  117. package/src/core/tools/web-scrapers/snapcraft.ts +200 -0
  118. package/src/core/tools/web-scrapers/social-extended.test.ts +192 -0
  119. package/src/core/tools/web-scrapers/social.test.ts +259 -0
  120. package/src/core/tools/web-scrapers/sourcegraph.ts +373 -0
  121. package/src/core/tools/web-scrapers/spdx.ts +121 -0
  122. package/src/core/tools/web-scrapers/spotify.ts +218 -0
  123. package/src/core/tools/web-scrapers/stackexchange.test.ts +120 -0
  124. package/src/core/tools/web-scrapers/stackoverflow.ts +124 -0
  125. package/src/core/tools/web-scrapers/standards.test.ts +122 -0
  126. package/src/core/tools/web-scrapers/terraform.ts +304 -0
  127. package/src/core/tools/web-scrapers/tldr.ts +51 -0
  128. package/src/core/tools/web-scrapers/twitter.ts +96 -0
  129. package/src/core/tools/web-scrapers/types.ts +234 -0
  130. package/src/core/tools/web-scrapers/utils.ts +162 -0
  131. package/src/core/tools/web-scrapers/vimeo.ts +152 -0
  132. package/src/core/tools/web-scrapers/vscode-marketplace.ts +195 -0
  133. package/src/core/tools/web-scrapers/w3c.ts +163 -0
  134. package/src/core/tools/web-scrapers/wikidata.ts +357 -0
  135. package/src/core/tools/web-scrapers/wikipedia.test.ts +73 -0
  136. package/src/core/tools/web-scrapers/wikipedia.ts +95 -0
  137. package/src/core/tools/web-scrapers/youtube.test.ts +198 -0
  138. package/src/core/tools/web-scrapers/youtube.ts +371 -0
  139. package/src/core/tools/write.ts +21 -18
  140. package/src/core/voice.ts +3 -2
  141. package/src/lib/worktree/collapse.ts +2 -1
  142. package/src/lib/worktree/git.ts +2 -18
  143. package/src/main.ts +59 -3
  144. package/src/modes/interactive/components/extensions/extension-dashboard.ts +33 -19
  145. package/src/modes/interactive/components/extensions/extension-list.ts +15 -8
  146. package/src/modes/interactive/components/hook-editor.ts +2 -1
  147. package/src/modes/interactive/components/model-selector.ts +19 -4
  148. package/src/modes/interactive/interactive-mode.ts +41 -38
  149. package/src/modes/interactive/theme/theme.ts +58 -58
  150. package/src/modes/rpc/rpc-mode.ts +10 -9
  151. package/src/prompts/review-request.md +27 -0
  152. package/src/prompts/reviewer.md +64 -68
  153. package/src/prompts/tools/output.md +22 -3
  154. package/src/prompts/tools/task.md +32 -33
  155. package/src/utils/clipboard.ts +2 -1
  156. package/src/utils/tools-manager.ts +110 -8
  157. package/examples/extensions/subagent/agents/reviewer.md +0 -35
@@ -0,0 +1,51 @@
1
+ import type { RenderResult, SpecialHandler } from "./types";
2
+ import { finalizeOutput, loadPage } from "./types";
3
+
4
+ const TLDR_BASE = "https://raw.githubusercontent.com/tldr-pages/tldr/main/pages";
5
+ const PLATFORMS = ["common", "linux", "osx"] as const;
6
+
7
+ /**
8
+ * Handle tldr page URLs
9
+ * - https://tldr.sh/{command}
10
+ * - https://tldr.ostera.io/{command}
11
+ */
12
+ export const handleTldr: SpecialHandler = async (
13
+ url: string,
14
+ timeout: number,
15
+ signal?: AbortSignal,
16
+ ): Promise<RenderResult | null> => {
17
+ try {
18
+ const parsed = new URL(url);
19
+ if (parsed.hostname !== "tldr.sh" && parsed.hostname !== "tldr.ostera.io") return null;
20
+
21
+ // Extract command from path (e.g., /tar -> tar)
22
+ const command = parsed.pathname.replace(/^\//, "").replace(/\.md$/, "");
23
+ if (!command || command.includes("/")) return null;
24
+
25
+ const fetchedAt = new Date().toISOString();
26
+
27
+ // Try platforms in order: common, linux, osx
28
+ for (const platform of PLATFORMS) {
29
+ const rawUrl = `${TLDR_BASE}/${platform}/${command}.md`;
30
+ const result = await loadPage(rawUrl, { timeout, signal });
31
+
32
+ if (result.ok && result.content.trim()) {
33
+ const output = finalizeOutput(result.content);
34
+ return {
35
+ url,
36
+ finalUrl: rawUrl,
37
+ contentType: "text/markdown",
38
+ method: "tldr",
39
+ content: output.content,
40
+ fetchedAt,
41
+ truncated: output.truncated,
42
+ notes: [`Fetched from tldr-pages (${platform})`],
43
+ };
44
+ }
45
+ }
46
+
47
+ return null;
48
+ } catch {}
49
+
50
+ return null;
51
+ };
@@ -0,0 +1,96 @@
1
+ import { parse as parseHtml } from "node-html-parser";
2
+ import type { RenderResult, SpecialHandler } from "./types";
3
+ import { finalizeOutput, loadPage } from "./types";
4
+
5
+ const NITTER_INSTANCES = [
6
+ "nitter.privacyredirect.com",
7
+ "nitter.tiekoetter.com",
8
+ "nitter.poast.org",
9
+ "nitter.woodland.cafe",
10
+ ];
11
+
12
+ /**
13
+ * Handle Twitter/X URLs via Nitter
14
+ */
15
+ export const handleTwitter: SpecialHandler = async (
16
+ url: string,
17
+ timeout: number,
18
+ signal?: AbortSignal,
19
+ ): Promise<RenderResult | null> => {
20
+ try {
21
+ const parsed = new URL(url);
22
+ if (!["twitter.com", "x.com", "www.twitter.com", "www.x.com"].includes(parsed.hostname)) {
23
+ return null;
24
+ }
25
+
26
+ const fetchedAt = new Date().toISOString();
27
+
28
+ // Try Nitter instances
29
+ for (const instance of NITTER_INSTANCES) {
30
+ const nitterUrl = `https://${instance}${parsed.pathname}`;
31
+ const result = await loadPage(nitterUrl, { timeout: Math.min(timeout, 10), signal });
32
+
33
+ if (result.ok && result.content.length > 500) {
34
+ // Parse the Nitter HTML
35
+ const doc = parseHtml(result.content);
36
+
37
+ // Extract tweet content
38
+ const tweetContent = doc.querySelector(".tweet-content")?.text?.trim();
39
+ const fullname = doc.querySelector(".fullname")?.text?.trim();
40
+ const username = doc.querySelector(".username")?.text?.trim();
41
+ const date = doc.querySelector(".tweet-date a")?.text?.trim();
42
+ const stats = doc.querySelector(".tweet-stats")?.text?.trim();
43
+
44
+ if (tweetContent) {
45
+ let md = `# Tweet by ${fullname || "Unknown"} (${username || "@?"})\n\n`;
46
+ if (date) md += `*${date}*\n\n`;
47
+ md += `${tweetContent}\n\n`;
48
+ if (stats) md += `---\n${stats.replace(/\s+/g, " ")}\n`;
49
+
50
+ // Check for replies/thread
51
+ const replies = doc.querySelectorAll(".timeline-item .tweet-content");
52
+ if (replies.length > 1) {
53
+ md += `\n---\n\n## Thread/Replies\n\n`;
54
+ for (const reply of Array.from(replies).slice(1, 10)) {
55
+ const replyUser = reply.parentNode?.querySelector(".username")?.text?.trim();
56
+ md += `**${replyUser || "@?"}**: ${reply.text?.trim()}\n\n`;
57
+ }
58
+ }
59
+
60
+ const output = finalizeOutput(md);
61
+ return {
62
+ url,
63
+ finalUrl: nitterUrl,
64
+ contentType: "text/markdown",
65
+ method: "twitter-nitter",
66
+ content: output.content,
67
+ fetchedAt,
68
+ truncated: output.truncated,
69
+ notes: [`Via Nitter: ${instance}`],
70
+ };
71
+ }
72
+ }
73
+ }
74
+ } catch {
75
+ if (signal?.aborted) {
76
+ return null;
77
+ }
78
+ }
79
+
80
+ if (signal?.aborted) {
81
+ return null;
82
+ }
83
+
84
+ // X.com blocks all bots - return a helpful error instead of falling through
85
+ return {
86
+ url,
87
+ finalUrl: url,
88
+ contentType: "text/plain",
89
+ method: "twitter-blocked",
90
+ content:
91
+ "Twitter/X blocks automated access. Nitter instances were unavailable.\n\nTry:\n- Opening the link in a browser\n- Using a different Nitter instance manually\n- Checking if the tweet is available via an archive service",
92
+ fetchedAt: new Date().toISOString(),
93
+ truncated: false,
94
+ notes: ["X.com blocks bots; Nitter instances unavailable"],
95
+ };
96
+ };
@@ -0,0 +1,234 @@
1
+ /**
2
+ * Shared types and utilities for web-fetch handlers
3
+ */
4
+
5
+ export interface RenderResult {
6
+ url: string;
7
+ finalUrl: string;
8
+ contentType: string;
9
+ method: string;
10
+ content: string;
11
+ fetchedAt: string;
12
+ truncated: boolean;
13
+ notes: string[];
14
+ }
15
+
16
+ export type SpecialHandler = (url: string, timeout: number, signal?: AbortSignal) => Promise<RenderResult | null>;
17
+
18
+ export const MAX_OUTPUT_CHARS = 500_000;
19
+ const MAX_BYTES = 50 * 1024 * 1024;
20
+
21
+ const USER_AGENTS = [
22
+ "curl/8.0",
23
+ "Mozilla/5.0 (compatible; TextBot/1.0)",
24
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
25
+ ];
26
+
27
+ export interface RequestSignal {
28
+ signal: AbortSignal;
29
+ cleanup: () => void;
30
+ }
31
+
32
+ export function createRequestSignal(timeoutMs: number, signal?: AbortSignal): RequestSignal {
33
+ const controller = new AbortController();
34
+ let timeoutId: ReturnType<typeof setTimeout> | undefined = setTimeout(() => controller.abort(), timeoutMs);
35
+ const abortHandler = () => controller.abort();
36
+
37
+ if (signal) {
38
+ if (signal.aborted) {
39
+ clearTimeout(timeoutId);
40
+ timeoutId = undefined;
41
+ controller.abort();
42
+ } else {
43
+ signal.addEventListener("abort", abortHandler, { once: true });
44
+ }
45
+ }
46
+
47
+ const cleanup = () => {
48
+ if (timeoutId !== undefined) {
49
+ clearTimeout(timeoutId);
50
+ timeoutId = undefined;
51
+ }
52
+ if (signal) {
53
+ signal.removeEventListener("abort", abortHandler);
54
+ }
55
+ };
56
+
57
+ return { signal: controller.signal, cleanup };
58
+ }
59
+
60
+ function isBotBlocked(status: number, content: string): boolean {
61
+ if (status === 403 || status === 503) {
62
+ const lower = content.toLowerCase();
63
+ return (
64
+ lower.includes("cloudflare") ||
65
+ lower.includes("captcha") ||
66
+ lower.includes("challenge") ||
67
+ lower.includes("blocked") ||
68
+ lower.includes("access denied") ||
69
+ lower.includes("bot detection")
70
+ );
71
+ }
72
+ return false;
73
+ }
74
+
75
+ /**
76
+ * Truncate and cleanup output
77
+ */
78
+ export function finalizeOutput(content: string): { content: string; truncated: boolean } {
79
+ const cleaned = content.replace(/\n{3,}/g, "\n\n").trim();
80
+ const truncated = cleaned.length > MAX_OUTPUT_CHARS;
81
+ return {
82
+ content: cleaned.slice(0, MAX_OUTPUT_CHARS),
83
+ truncated,
84
+ };
85
+ }
86
+
87
+ export interface LoadPageOptions {
88
+ timeout?: number;
89
+ headers?: Record<string, string>;
90
+ method?: string;
91
+ body?: string;
92
+ maxBytes?: number;
93
+ signal?: AbortSignal;
94
+ }
95
+
96
+ export interface LoadPageResult {
97
+ content: string;
98
+ contentType: string;
99
+ finalUrl: string;
100
+ ok: boolean;
101
+ status?: number;
102
+ }
103
+
104
+ /**
105
+ * Fetch a page with timeout and size limit
106
+ */
107
+ export async function loadPage(url: string, options: LoadPageOptions = {}): Promise<LoadPageResult> {
108
+ const { timeout = 20, headers = {}, maxBytes = MAX_BYTES, signal, method = "GET", body } = options;
109
+
110
+ for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
111
+ if (signal?.aborted) {
112
+ return { content: "", contentType: "", finalUrl: url, ok: false };
113
+ }
114
+
115
+ const userAgent = USER_AGENTS[attempt];
116
+ const { signal: requestSignal, cleanup } = createRequestSignal(timeout * 1000, signal);
117
+
118
+ try {
119
+ const requestInit: RequestInit = {
120
+ signal: requestSignal,
121
+ method,
122
+ headers: {
123
+ "User-Agent": userAgent,
124
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
125
+ "Accept-Language": "en-US,en;q=0.5",
126
+ ...headers,
127
+ },
128
+ redirect: "follow",
129
+ };
130
+
131
+ if (body !== undefined) {
132
+ requestInit.body = body;
133
+ }
134
+
135
+ const response = await fetch(url, requestInit);
136
+
137
+ const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
138
+ const finalUrl = response.url;
139
+
140
+ const reader = response.body?.getReader();
141
+ if (!reader) {
142
+ return { content: "", contentType, finalUrl, ok: false, status: response.status };
143
+ }
144
+
145
+ const chunks: Uint8Array[] = [];
146
+ let totalSize = 0;
147
+
148
+ while (true) {
149
+ const { done, value } = await reader.read();
150
+ if (done) break;
151
+
152
+ chunks.push(value);
153
+ totalSize += value.length;
154
+
155
+ if (totalSize > maxBytes) {
156
+ reader.cancel();
157
+ break;
158
+ }
159
+ }
160
+
161
+ const decoder = new TextDecoder();
162
+ const content = decoder.decode(Buffer.concat(chunks));
163
+
164
+ if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
165
+ continue;
166
+ }
167
+
168
+ if (!response.ok) {
169
+ return { content, contentType, finalUrl, ok: false, status: response.status };
170
+ }
171
+
172
+ return { content, contentType, finalUrl, ok: true, status: response.status };
173
+ } catch (_err) {
174
+ if (signal?.aborted) {
175
+ return { content: "", contentType: "", finalUrl: url, ok: false };
176
+ }
177
+ if (attempt === USER_AGENTS.length - 1) {
178
+ return { content: "", contentType: "", finalUrl: url, ok: false };
179
+ }
180
+ } finally {
181
+ cleanup();
182
+ }
183
+ }
184
+
185
+ return { content: "", contentType: "", finalUrl: url, ok: false };
186
+ }
187
+
188
+ /**
189
+ * Format large numbers (1000 -> 1K, 1000000 -> 1M)
190
+ */
191
+ export function formatCount(n: number): string {
192
+ if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
193
+ if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
194
+ return String(n);
195
+ }
196
+
197
+ /**
198
+ * Convert basic HTML to markdown
199
+ */
200
+ export function htmlToBasicMarkdown(html: string): string {
201
+ return html
202
+ .replace(/<pre><code[^>]*>/g, "\n```\n")
203
+ .replace(/<\/code><\/pre>/g, "\n```\n")
204
+ .replace(/<code>/g, "`")
205
+ .replace(/<\/code>/g, "`")
206
+ .replace(/<strong>/g, "**")
207
+ .replace(/<\/strong>/g, "**")
208
+ .replace(/<b>/g, "**")
209
+ .replace(/<\/b>/g, "**")
210
+ .replace(/<em>/g, "*")
211
+ .replace(/<\/em>/g, "*")
212
+ .replace(/<i>/g, "*")
213
+ .replace(/<\/i>/g, "*")
214
+ .replace(/<a href="([^"]+)"[^>]*>([^<]+)<\/a>/g, "[$2]($1)")
215
+ .replace(/<p>/g, "\n\n")
216
+ .replace(/<\/p>/g, "")
217
+ .replace(/<br\s*\/?>/g, "\n")
218
+ .replace(/<li>/g, "- ")
219
+ .replace(/<\/li>/g, "\n")
220
+ .replace(/<\/?[uo]l>/g, "\n")
221
+ .replace(/<h(\d)>/g, (_, n) => `\n${"#".repeat(parseInt(n, 10))} `)
222
+ .replace(/<\/h\d>/g, "\n")
223
+ .replace(/<blockquote>/g, "\n> ")
224
+ .replace(/<\/blockquote>/g, "\n")
225
+ .replace(/<[^>]+>/g, "")
226
+ .replace(/&lt;/g, "<")
227
+ .replace(/&gt;/g, ">")
228
+ .replace(/&amp;/g, "&")
229
+ .replace(/&quot;/g, '"')
230
+ .replace(/&#39;/g, "'")
231
+ .replace(/&nbsp;/g, " ")
232
+ .replace(/\n{3,}/g, "\n\n")
233
+ .trim();
234
+ }
@@ -0,0 +1,162 @@
1
+ import { tmpdir } from "node:os";
2
+ import * as path from "node:path";
3
+ import { nanoid } from "nanoid";
4
+ import { ensureTool } from "../../../utils/tools-manager";
5
+ import { createRequestSignal } from "./types";
6
+
7
+ const MAX_BYTES = 50 * 1024 * 1024; // 50MB for binary files
8
+
9
+ interface ExecResult {
10
+ stdout: string;
11
+ stderr: string;
12
+ ok: boolean;
13
+ exitCode: number;
14
+ }
15
+
16
+ type SpawnSyncOptions = NonNullable<Parameters<typeof Bun.spawnSync>[1]>;
17
+
18
+ function exec(cmd: string, args: string[], options?: { timeout?: number; input?: string | Buffer }): ExecResult {
19
+ const stdin = (options?.input ?? "ignore") as SpawnSyncOptions["stdin"];
20
+ const result = Bun.spawnSync([cmd, ...args], {
21
+ stdin,
22
+ stdout: "pipe",
23
+ stderr: "pipe",
24
+ });
25
+ return {
26
+ stdout: result.stdout?.toString() ?? "",
27
+ stderr: result.stderr?.toString() ?? "",
28
+ ok: result.exitCode === 0,
29
+ exitCode: result.exitCode ?? -1,
30
+ };
31
+ }
32
+
33
+ export interface ConvertResult {
34
+ content: string;
35
+ ok: boolean;
36
+ error?: string;
37
+ }
38
+
39
+ export interface BinaryFetchResult {
40
+ buffer: Buffer;
41
+ contentType: string;
42
+ contentDisposition?: string;
43
+ ok: boolean;
44
+ status?: number;
45
+ error?: string;
46
+ }
47
+
48
+ export async function convertWithMarkitdown(
49
+ content: Buffer,
50
+ extensionHint: string,
51
+ timeout: number,
52
+ signal?: AbortSignal,
53
+ ): Promise<ConvertResult> {
54
+ if (signal?.aborted) {
55
+ return { content: "", ok: false, error: "aborted" };
56
+ }
57
+
58
+ const markitdown = await ensureTool("markitdown", true);
59
+ if (!markitdown) {
60
+ return { content: "", ok: false, error: "markitdown not available" };
61
+ }
62
+
63
+ // Write to temp file with extension hint
64
+ const ext = extensionHint || ".bin";
65
+ const tmpDir = tmpdir();
66
+ const tmpFile = path.join(tmpDir, `omp-convert-${nanoid()}${ext}`);
67
+
68
+ if (content.length > MAX_BYTES) {
69
+ return { content: "", ok: false, error: `content exceeds ${MAX_BYTES} bytes` };
70
+ }
71
+
72
+ try {
73
+ await Bun.write(tmpFile, content);
74
+ const result = exec(markitdown, [tmpFile], { timeout });
75
+ if (!result.ok) {
76
+ const stderr = result.stderr.trim();
77
+ return {
78
+ content: result.stdout,
79
+ ok: false,
80
+ error: stderr.length > 0 ? stderr : `markitdown failed (exit ${result.exitCode})`,
81
+ };
82
+ }
83
+ return { content: result.stdout, ok: true };
84
+ } finally {
85
+ try {
86
+ await Bun.$`rm ${tmpFile}`.quiet();
87
+ } catch {}
88
+ }
89
+ }
90
+
91
+ export async function fetchBinary(url: string, timeout: number, signal?: AbortSignal): Promise<BinaryFetchResult> {
92
+ if (signal?.aborted) {
93
+ return { buffer: Buffer.alloc(0), contentType: "", ok: false, error: "aborted" };
94
+ }
95
+
96
+ const { signal: requestSignal, cleanup } = createRequestSignal(timeout * 1000, signal);
97
+
98
+ try {
99
+ const response = await fetch(url, {
100
+ signal: requestSignal,
101
+ headers: {
102
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/131.0.0.0",
103
+ },
104
+ redirect: "follow",
105
+ });
106
+
107
+ const contentType = response.headers.get("content-type") ?? "";
108
+ const contentDisposition = response.headers.get("content-disposition") ?? undefined;
109
+
110
+ if (!response.ok) {
111
+ return {
112
+ buffer: Buffer.alloc(0),
113
+ contentType,
114
+ contentDisposition,
115
+ ok: false,
116
+ status: response.status,
117
+ error: `status ${response.status}`,
118
+ };
119
+ }
120
+
121
+ const contentLength = response.headers.get("content-length");
122
+ if (contentLength) {
123
+ const size = Number.parseInt(contentLength, 10);
124
+ if (Number.isFinite(size) && size > MAX_BYTES) {
125
+ return {
126
+ buffer: Buffer.alloc(0),
127
+ contentType,
128
+ contentDisposition,
129
+ ok: false,
130
+ status: response.status,
131
+ error: `content-length ${size} exceeds ${MAX_BYTES}`,
132
+ };
133
+ }
134
+ }
135
+
136
+ const buffer = Buffer.from(await response.arrayBuffer());
137
+ if (buffer.length > MAX_BYTES) {
138
+ return {
139
+ buffer: Buffer.alloc(0),
140
+ contentType,
141
+ contentDisposition,
142
+ ok: false,
143
+ status: response.status,
144
+ error: `response exceeds ${MAX_BYTES} bytes`,
145
+ };
146
+ }
147
+
148
+ return { buffer, contentType, contentDisposition, ok: true, status: response.status };
149
+ } catch (err) {
150
+ if (signal?.aborted) {
151
+ return { buffer: Buffer.alloc(0), contentType: "", ok: false, error: "aborted" };
152
+ }
153
+ return {
154
+ buffer: Buffer.alloc(0),
155
+ contentType: "",
156
+ ok: false,
157
+ error: `request failed: ${String(err)}`,
158
+ };
159
+ } finally {
160
+ cleanup();
161
+ }
162
+ }
@@ -0,0 +1,152 @@
1
+ import type { SpecialHandler } from "./types";
2
+ import { finalizeOutput, loadPage } from "./types";
3
+
4
+ interface VimeoOEmbed {
5
+ title: string;
6
+ author_name: string;
7
+ author_url: string;
8
+ description?: string;
9
+ duration: number;
10
+ thumbnail_url: string;
11
+ upload_date: string;
12
+ video_id: number;
13
+ }
14
+
15
+ interface VimeoVideoConfig {
16
+ video?: {
17
+ title?: string;
18
+ duration?: number;
19
+ owner?: {
20
+ name?: string;
21
+ url?: string;
22
+ };
23
+ thumbs?: {
24
+ base?: string;
25
+ };
26
+ };
27
+ request?: {
28
+ files?: {
29
+ progressive?: Array<{
30
+ quality: string;
31
+ width: number;
32
+ height: number;
33
+ fps: number;
34
+ }>;
35
+ };
36
+ };
37
+ }
38
+
39
+ /**
40
+ * Format seconds into HH:MM:SS or MM:SS
41
+ */
42
+ function formatDuration(seconds: number): string {
43
+ const h = Math.floor(seconds / 3600);
44
+ const m = Math.floor((seconds % 3600) / 60);
45
+ const s = seconds % 60;
46
+ if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
47
+ return `${m}:${String(s).padStart(2, "0")}`;
48
+ }
49
+
50
+ /**
51
+ * Extract video ID from various Vimeo URL formats
52
+ */
53
+ function extractVideoId(url: string): string | null {
54
+ try {
55
+ const parsed = new URL(url);
56
+
57
+ // player.vimeo.com/video/{id}
58
+ if (parsed.hostname === "player.vimeo.com") {
59
+ const match = parsed.pathname.match(/^\/video\/(\d+)/);
60
+ return match?.[1] ?? null;
61
+ }
62
+
63
+ // vimeo.com/{id} or vimeo.com/{user}/{id}
64
+ if (parsed.hostname === "vimeo.com" || parsed.hostname === "www.vimeo.com") {
65
+ const parts = parsed.pathname.split("/").filter(Boolean);
66
+ // Last part should be the video ID
67
+ const lastPart = parts[parts.length - 1];
68
+ if (lastPart && /^\d+$/.test(lastPart)) {
69
+ return lastPart;
70
+ }
71
+ }
72
+
73
+ return null;
74
+ } catch {
75
+ return null;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Handle Vimeo URLs via oEmbed API
81
+ */
82
+ export const handleVimeo: SpecialHandler = async (url: string, timeout: number, signal?: AbortSignal) => {
83
+ try {
84
+ const parsed = new URL(url);
85
+ if (!parsed.hostname.includes("vimeo.com")) return null;
86
+
87
+ const videoId = extractVideoId(url);
88
+ if (!videoId) return null;
89
+
90
+ const fetchedAt = new Date().toISOString();
91
+
92
+ // Use canonical URL for oEmbed (handles staffpicks and other URL formats)
93
+ const canonicalUrl = `https://vimeo.com/${videoId}`;
94
+ const oembedUrl = `https://vimeo.com/api/oembed.json?url=${encodeURIComponent(canonicalUrl)}`;
95
+ const oembedResult = await loadPage(oembedUrl, { timeout, signal });
96
+
97
+ if (!oembedResult.ok) return null;
98
+
99
+ const oembed = JSON.parse(oembedResult.content) as VimeoOEmbed;
100
+
101
+ let md = `# ${oembed.title}\n\n`;
102
+ md += `**Author:** [${oembed.author_name}](${oembed.author_url})\n`;
103
+ md += `**Duration:** ${formatDuration(oembed.duration)}\n`;
104
+
105
+ if (oembed.upload_date) {
106
+ md += `**Uploaded:** ${oembed.upload_date}\n`;
107
+ }
108
+
109
+ md += `**Video ID:** ${videoId}\n\n`;
110
+
111
+ if (oembed.description) {
112
+ md += `---\n\n## Description\n\n${oembed.description}\n\n`;
113
+ }
114
+
115
+ md += `---\n\n**Thumbnail:** ${oembed.thumbnail_url}\n`;
116
+
117
+ // Try to get additional details from video config
118
+ try {
119
+ const configUrl = `https://player.vimeo.com/video/${videoId}/config`;
120
+ const configResult = await loadPage(configUrl, { timeout: Math.min(timeout, 5), signal });
121
+
122
+ if (configResult.ok) {
123
+ const config = JSON.parse(configResult.content) as VimeoVideoConfig;
124
+
125
+ // Add video quality info if available
126
+ const progressive = config.request?.files?.progressive;
127
+ if (progressive && progressive.length > 0) {
128
+ md += `\n**Available Qualities:**\n`;
129
+ for (const quality of progressive.slice(0, 5)) {
130
+ md += `- ${quality.quality}: ${quality.width}x${quality.height} @ ${quality.fps}fps\n`;
131
+ }
132
+ }
133
+ }
134
+ } catch {
135
+ // Config fetch is optional - continue without it
136
+ }
137
+
138
+ const output = finalizeOutput(md);
139
+ return {
140
+ url,
141
+ finalUrl: url,
142
+ contentType: "text/markdown",
143
+ method: "vimeo",
144
+ content: output.content,
145
+ fetchedAt,
146
+ truncated: output.truncated,
147
+ notes: ["Fetched via Vimeo oEmbed API"],
148
+ };
149
+ } catch {
150
+ return null;
151
+ }
152
+ };