@oh-my-pi/pi-coding-agent 14.0.3 → 14.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +63 -1
  2. package/package.json +11 -8
  3. package/src/config/model-registry.ts +3 -2
  4. package/src/config/model-resolver.ts +33 -25
  5. package/src/config/settings.ts +9 -2
  6. package/src/dap/session.ts +31 -39
  7. package/src/debug/log-formatting.ts +2 -2
  8. package/src/edit/index.ts +2 -0
  9. package/src/edit/modes/chunk.ts +45 -16
  10. package/src/edit/modes/hashline.ts +2 -2
  11. package/src/ipy/executor.ts +3 -7
  12. package/src/ipy/kernel.ts +3 -3
  13. package/src/lsp/client.ts +4 -2
  14. package/src/lsp/index.ts +4 -9
  15. package/src/lsp/lspmux.ts +2 -2
  16. package/src/lsp/utils.ts +27 -143
  17. package/src/modes/components/diff.ts +1 -1
  18. package/src/modes/controllers/event-controller.ts +438 -426
  19. package/src/modes/theme/mermaid-cache.ts +5 -7
  20. package/src/modes/theme/theme.ts +2 -161
  21. package/src/priority.json +8 -0
  22. package/src/prompts/agents/designer.md +1 -2
  23. package/src/prompts/system/system-prompt.md +40 -2
  24. package/src/prompts/tools/chunk-edit.md +66 -38
  25. package/src/prompts/tools/read-chunk.md +10 -1
  26. package/src/sdk.ts +2 -1
  27. package/src/session/agent-session.ts +10 -0
  28. package/src/session/compaction/compaction.ts +1 -1
  29. package/src/tools/ast-edit.ts +2 -2
  30. package/src/tools/browser.ts +84 -21
  31. package/src/tools/fetch.ts +1 -1
  32. package/src/tools/find.ts +40 -94
  33. package/src/tools/gemini-image.ts +1 -0
  34. package/src/tools/index.ts +2 -3
  35. package/src/tools/read.ts +2 -0
  36. package/src/tools/render-utils.ts +1 -1
  37. package/src/tools/report-tool-issue.ts +2 -2
  38. package/src/utils/edit-mode.ts +2 -2
  39. package/src/utils/image-resize.ts +73 -37
  40. package/src/utils/lang-from-path.ts +239 -0
  41. package/src/utils/sixel.ts +2 -2
  42. package/src/web/scrapers/types.ts +50 -32
  43. package/src/web/search/providers/codex.ts +21 -2
@@ -501,6 +501,83 @@ export interface ReadableResult {
501
501
  markdown?: string;
502
502
  }
503
503
 
504
+ type ReadableFormat = "text" | "markdown";
505
+
506
+ /** Trim to non-empty string or undefined. */
507
+ function normalize(text: string | null | undefined): string | undefined {
508
+ const trimmed = text?.trim();
509
+ return trimmed || undefined;
510
+ }
511
+
512
+ /**
513
+ * Extract readable content from raw HTML.
514
+ * Tries Readability (article-isolation scoring) first, then falls back to a
515
+ * CSS selector chain over the same pre-parsed DOM. Returns null if neither
516
+ * path yields usable content.
517
+ */
518
+ export function extractReadableFromHtml(html: string, url: string, format: ReadableFormat): ReadableResult | null {
519
+ const { document } = parseHTML(html);
520
+
521
+ // --- Primary: Readability article extraction ---
522
+ const article = new Readability(document).parse();
523
+ if (article) {
524
+ const result = toReadableResult(url, format, article.textContent, article.content, {
525
+ title: article.title,
526
+ byline: article.byline,
527
+ excerpt: article.excerpt,
528
+ length: article.length,
529
+ });
530
+ if (result) return result;
531
+ }
532
+
533
+ // --- Fallback: CSS selector chain ---
534
+ const candidates = [
535
+ document.querySelector("[data-pagefind-body]"),
536
+ document.querySelector("main article"),
537
+ document.querySelector("article"),
538
+ document.querySelector("main"),
539
+ document.querySelector("[role='main']"),
540
+ document.body,
541
+ ];
542
+ for (const el of candidates) {
543
+ if (!el) continue;
544
+ const innerHTML = el.innerHTML?.trim();
545
+ const textContent = el.textContent?.trim();
546
+ if (!innerHTML || !textContent) continue;
547
+ const result = toReadableResult(url, format, textContent, innerHTML, {
548
+ title: document.title,
549
+ excerpt: textContent.slice(0, 240),
550
+ length: textContent.length,
551
+ });
552
+ if (result) return result;
553
+ }
554
+
555
+ return null;
556
+ }
557
+
558
+ /** Shared builder for both extraction paths. */
559
+ function toReadableResult(
560
+ url: string,
561
+ format: ReadableFormat,
562
+ textContent: string | null | undefined,
563
+ htmlContent: string | null | undefined,
564
+ meta: { title?: string | null; byline?: string | null; excerpt?: string | null; length?: number | null },
565
+ ): ReadableResult | null {
566
+ const text = normalize(textContent);
567
+ const markdown = format === "markdown" ? (normalize(htmlToBasicMarkdown(htmlContent ?? "")) ?? text) : undefined;
568
+ const normalizedText = format === "text" ? text : undefined;
569
+ if (!normalizedText && !markdown) return null;
570
+ return {
571
+ url,
572
+ title: normalize(meta.title),
573
+ byline: normalize(meta.byline),
574
+ excerpt: normalize(meta.excerpt),
575
+ contentLength: meta.length ?? text?.length ?? markdown?.length ?? 0,
576
+ text: normalizedText,
577
+ markdown,
578
+ };
579
+ }
580
+
504
581
  function ensureParam<T>(value: T | undefined, name: string, action: string): T {
505
582
  if (value === undefined || value === null || value === "") {
506
583
  throw new ToolError(`Missing required parameter '${name}' for action '${action}'.`);
@@ -1365,26 +1442,13 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
1365
1442
  const format = params.format ?? "markdown";
1366
1443
  const html = (await untilAborted(signal, () => page.content())) as string;
1367
1444
  const url = page.url();
1368
- const { document } = parseHTML(html);
1369
- const reader = new Readability(document);
1370
- const article = reader.parse();
1371
- if (!article) {
1445
+ const readable = extractReadableFromHtml(html, url, format);
1446
+ if (!readable) {
1372
1447
  throw new ToolError("Readable content not found");
1373
1448
  }
1374
- const markdown = format === "markdown" ? htmlToBasicMarkdown(article.content ?? "") : undefined;
1375
- const text = format === "text" ? (article.textContent ?? "") : undefined;
1376
- const readable: ReadableResult = {
1377
- url,
1378
- title: article.title ?? undefined,
1379
- byline: article.byline ?? undefined,
1380
- excerpt: article.excerpt ?? undefined,
1381
- contentLength: article.length ?? article.textContent?.length ?? 0,
1382
- text,
1383
- markdown,
1384
- };
1385
1449
  details.url = url;
1386
1450
  details.readable = readable;
1387
- details.result = format === "markdown" ? (markdown ?? "") : (text ?? "");
1451
+ details.result = format === "markdown" ? (readable.markdown ?? "") : (readable.text ?? "");
1388
1452
  return toolResult(details)
1389
1453
  .text(JSON.stringify(readable, null, 2))
1390
1454
  .done();
@@ -1407,13 +1471,12 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
1407
1471
  buffer = (await untilAborted(signal, () => page.screenshot({ type: "png", fullPage }))) as Buffer;
1408
1472
  }
1409
1473
 
1410
- // Compress for API content (same as pasted images)
1411
- // NOTE: screenshots can be deceptively large (especially PNG) even at modest resolutions,
1412
- // and tool results are immediately embedded in the next LLM request.
1413
- // Use a tighter budget than the global per-image limit to avoid 413 request_too_large.
1474
+ // Compress aggressively for API content screenshots are the most
1475
+ // frequent image source and land directly in the next LLM request.
1476
+ // 1024px is plenty for OCR/UI inspection; 150KB keeps payloads lean.
1414
1477
  const resized = await resizeImage(
1415
1478
  { type: "image", data: buffer.toBase64(), mimeType: "image/png" },
1416
- { maxBytes: 0.75 * 1024 * 1024 },
1479
+ { maxWidth: 1024, maxHeight: 1024, maxBytes: 150 * 1024, jpegQuality: 70 },
1417
1480
  );
1418
1481
  // Resolve destination: user-defined path > screenshotDir (auto-named) > temp file.
1419
1482
  const screenshotDir = (() => {
@@ -84,7 +84,7 @@ const IMAGE_MIME_BY_EXTENSION = new Map<string, string>([
84
84
  ]);
85
85
  const SUPPORTED_INLINE_IMAGE_MIME_TYPES = new Set(["image/png", "image/jpeg", "image/gif", "image/webp"]);
86
86
  const MAX_INLINE_IMAGE_SOURCE_BYTES = 20 * 1024 * 1024;
87
- const MAX_INLINE_IMAGE_OUTPUT_BYTES = 0.75 * 1024 * 1024;
87
+ const MAX_INLINE_IMAGE_OUTPUT_BYTES = 300 * 1024;
88
88
 
89
89
  // =============================================================================
90
90
  // Utilities
package/src/tools/find.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
4
- import { FileType, type GlobMatch, glob } from "@oh-my-pi/pi-natives";
4
+ import * as natives from "@oh-my-pi/pi-natives";
5
5
  import type { Component } from "@oh-my-pi/pi-tui";
6
6
  import { Text } from "@oh-my-pi/pi-tui";
7
7
  import { isEnoent, prompt, untilAborted } from "@oh-my-pi/pi-utils";
@@ -124,46 +124,15 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
124
124
  throw new ToolError("Limit must be a positive number");
125
125
  }
126
126
  const includeHidden = hidden ?? true;
127
-
128
- // If custom operations provided with glob, use that instead of fd
129
- if (this.#customOps?.glob) {
130
- if (!(await this.#customOps.exists(searchPath))) {
131
- throw new ToolError(`Path not found: ${scopePath}`);
132
- }
133
-
134
- if (!hasGlob && this.#customOps.stat) {
135
- const stat = await this.#customOps.stat(searchPath);
136
- if (stat.isFile()) {
137
- const files = [scopePath];
138
- const details: FindToolDetails = {
139
- scopePath,
140
- fileCount: 1,
141
- files,
142
- truncated: false,
143
- };
144
- return toolResult(details).text(files.join("\n")).done();
145
- }
146
- }
147
-
148
- const results = await this.#customOps.glob(globPattern, searchPath, {
149
- ignore: ["**/node_modules/**", "**/.git/**"],
150
- limit: effectiveLimit,
151
- });
152
-
153
- if (results.length === 0) {
127
+ const timeoutSignal = AbortSignal.timeout(GLOB_TIMEOUT_MS);
128
+ const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
129
+ const buildResult = (files: string[]): AgentToolResult<FindToolDetails> => {
130
+ if (files.length === 0) {
154
131
  const details: FindToolDetails = { scopePath, fileCount: 0, files: [], truncated: false };
155
132
  return toolResult(details).text("No files found matching pattern").done();
156
133
  }
157
134
 
158
- // Relativize paths
159
- const relativized = results.map(p => {
160
- if (p.startsWith(searchPath)) {
161
- return p.slice(searchPath.length + 1);
162
- }
163
- return path.relative(searchPath, p);
164
- });
165
-
166
- const listLimit = applyListLimit(relativized, { limit: effectiveLimit });
135
+ const listLimit = applyListLimit(files, { limit: effectiveLimit });
167
136
  const limited = listLimit.items;
168
137
  const limitMeta = listLimit.meta;
169
138
  const rawOutput = limited.join("\n");
@@ -186,6 +155,32 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
186
155
  }
187
156
 
188
157
  return resultBuilder.done();
158
+ };
159
+
160
+ if (this.#customOps?.glob) {
161
+ if (!(await this.#customOps.exists(searchPath))) {
162
+ throw new ToolError(`Path not found: ${scopePath}`);
163
+ }
164
+
165
+ if (!hasGlob && this.#customOps.stat) {
166
+ const stat = await this.#customOps.stat(searchPath);
167
+ if (stat.isFile()) {
168
+ return buildResult([scopePath]);
169
+ }
170
+ }
171
+
172
+ const results = await this.#customOps.glob(globPattern, searchPath, {
173
+ ignore: ["**/node_modules/**", "**/.git/**"],
174
+ limit: effectiveLimit,
175
+ });
176
+ const relativized = results.map(p => {
177
+ if (p.startsWith(searchPath)) {
178
+ return p.slice(searchPath.length + 1);
179
+ }
180
+ return path.relative(searchPath, p);
181
+ });
182
+
183
+ return buildResult(relativized);
189
184
  }
190
185
 
191
186
  let searchStat: fs.Stats;
@@ -199,20 +194,13 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
199
194
  }
200
195
 
201
196
  if (!hasGlob && searchStat.isFile()) {
202
- const files = [scopePath];
203
- const details: FindToolDetails = {
204
- scopePath,
205
- fileCount: 1,
206
- files,
207
- truncated: false,
208
- };
209
- return toolResult(details).text(files.join("\n")).done();
197
+ return buildResult([scopePath]);
210
198
  }
211
199
  if (!searchStat.isDirectory()) {
212
200
  throw new ToolError(`Path is not a directory: ${searchPath}`);
213
201
  }
214
202
 
215
- let matches: GlobMatch[];
203
+ let matches: natives.GlobMatch[];
216
204
  const onUpdateMatches: string[] = [];
217
205
  const updateIntervalMs = 200;
218
206
  let lastUpdate = 0;
@@ -233,27 +221,25 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
233
221
  });
234
222
  };
235
223
  const onMatch = onUpdate
236
- ? (err: Error | null, match: GlobMatch | null) => {
224
+ ? (err: Error | null, match: natives.GlobMatch | null) => {
237
225
  if (err || signal?.aborted || !match) return;
238
226
  let relativePath = match.path;
239
227
  if (!relativePath) return;
240
- if (match.fileType === FileType.Dir && !relativePath.endsWith("/")) {
228
+ if (match.fileType === natives.FileType.Dir && !relativePath.endsWith("/")) {
241
229
  relativePath += "/";
242
230
  }
243
231
  onUpdateMatches.push(relativePath);
244
232
  emitUpdate();
245
233
  }
246
234
  : undefined;
247
- const timeoutSignal = AbortSignal.timeout(GLOB_TIMEOUT_MS);
248
- const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
249
235
 
250
236
  const doGlob = async (useGitignore: boolean) =>
251
237
  untilAborted(combinedSignal, () =>
252
- glob(
238
+ natives.glob(
253
239
  {
254
240
  pattern: globPattern,
255
241
  path: searchPath,
256
- fileType: FileType.File,
242
+ fileType: natives.FileType.File,
257
243
  hidden: includeHidden,
258
244
  maxResults: effectiveLimit,
259
245
  sortByMtime: true,
@@ -266,7 +252,6 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
266
252
 
267
253
  try {
268
254
  let result = await doGlob(true);
269
- // If gitignore filtering yielded nothing, retry without it
270
255
  if (result.matches.length === 0) {
271
256
  result = await doGlob(false);
272
257
  }
@@ -282,12 +267,7 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
282
267
  throw error;
283
268
  }
284
269
 
285
- if (matches.length === 0) {
286
- const details: FindToolDetails = { scopePath, fileCount: 0, files: [], truncated: false };
287
- return toolResult(details).text("No files found matching pattern").done();
288
- }
289
270
  const relativized: string[] = [];
290
-
291
271
  for (const match of matches) {
292
272
  throwIfAborted(signal);
293
273
  const line = match.path;
@@ -297,9 +277,7 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
297
277
 
298
278
  const hadTrailingSlash = line.endsWith("/") || line.endsWith("\\");
299
279
  let relativePath = line;
300
-
301
- const isDirectory = match.fileType === FileType.Dir;
302
-
280
+ const isDirectory = match.fileType === natives.FileType.Dir;
303
281
  if ((isDirectory || hadTrailingSlash) && !relativePath.endsWith("/")) {
304
282
  relativePath += "/";
305
283
  }
@@ -307,39 +285,7 @@ export class FindTool implements AgentTool<typeof findSchema, FindToolDetails> {
307
285
  relativized.push(relativePath);
308
286
  }
309
287
 
310
- if (relativized.length === 0) {
311
- const details: FindToolDetails = { scopePath, fileCount: 0, files: [], truncated: false };
312
- return toolResult(details).text("No files found matching pattern").done();
313
- }
314
-
315
- // Results are already sorted by mtime from native (sortByMtime: true)
316
-
317
- const listLimit = applyListLimit(relativized, { limit: effectiveLimit });
318
- const limited = listLimit.items;
319
- const limitMeta = listLimit.meta;
320
-
321
- // Apply byte truncation (no line limit since we already have result limit)
322
- const rawOutput = limited.join("\n");
323
- const truncation = truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER });
324
-
325
- const resultOutput = truncation.content;
326
- const details: FindToolDetails = {
327
- scopePath,
328
- fileCount: limited.length,
329
- files: limited,
330
- truncated: Boolean(limitMeta.resultLimit || truncation.truncated),
331
- resultLimitReached: limitMeta.resultLimit?.reached,
332
- truncation: truncation.truncated ? truncation : undefined,
333
- };
334
-
335
- const resultBuilder = toolResult(details)
336
- .text(resultOutput)
337
- .limits({ resultLimit: limitMeta.resultLimit?.reached });
338
- if (truncation.truncated) {
339
- resultBuilder.truncation(truncation, { direction: "head" });
340
- }
341
-
342
- return resultBuilder.done();
288
+ return buildResult(relativized);
343
289
  });
344
290
  }
345
291
  }
@@ -728,6 +728,7 @@ export const geminiImageTool: CustomTool<typeof geminiImageSchema, GeminiImageTo
728
728
  headers: {
729
729
  "Content-Type": "application/json",
730
730
  Authorization: `Bearer ${apiKey.apiKey}`,
731
+ "X-Title": "Oh-My-Pi",
731
732
  },
732
733
  body: JSON.stringify(requestBody),
733
734
  signal: requestSignal,
@@ -1,7 +1,7 @@
1
1
  import type { AgentTool } from "@oh-my-pi/pi-agent-core";
2
2
  import type { ToolChoice } from "@oh-my-pi/pi-ai";
3
3
  import type { SearchDb } from "@oh-my-pi/pi-natives";
4
- import { $env, logger } from "@oh-my-pi/pi-utils";
4
+ import { $env, $flag, isBunTestRuntime, logger } from "@oh-my-pi/pi-utils";
5
5
  import type { AsyncJobManager } from "../async";
6
6
  import type { PromptTemplate } from "../config/prompt-templates";
7
7
  import type { Settings } from "../config/settings";
@@ -297,8 +297,7 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
297
297
  !skipPythonPreflight &&
298
298
  pythonMode !== "bash-only" &&
299
299
  (requestedTools === undefined || requestedTools.includes("python"));
300
- const isTestEnv = Bun.env.BUN_ENV === "test" || Bun.env.NODE_ENV === "test";
301
- const skipPythonWarm = isTestEnv || $env.PI_PYTHON_SKIP_CHECK === "1";
300
+ const skipPythonWarm = isBunTestRuntime() || $flag("PI_PYTHON_SKIP_CHECK");
302
301
  if (shouldCheckPython) {
303
302
  const availability = await logger.time("createTools:pythonCheck", checkPythonKernelAvailability, session.cwd);
304
303
  pythonAvailable = availability.ok;
package/src/tools/read.ts CHANGED
@@ -14,6 +14,7 @@ import {
14
14
  parseChunkReadPath,
15
15
  parseChunkSelector,
16
16
  resolveAnchorStyle,
17
+ resolveChunkAutoIndent,
17
18
  } from "../edit/modes/chunk";
18
19
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
19
20
  import { parseInternalUrl } from "../internal-urls/parse";
@@ -449,6 +450,7 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
449
450
  resolveEditMode(session) === "chunk"
450
451
  ? prompt.render(readChunkDescription, {
451
452
  anchorStyle: resolveAnchorStyle(session.settings),
453
+ chunkAutoIndent: resolveChunkAutoIndent(),
452
454
  })
453
455
  : prompt.render(readDescription, {
454
456
  DEFAULT_LIMIT: String(this.#defaultLimit),
@@ -14,7 +14,7 @@ import type { Theme } from "../modes/theme/theme";
14
14
  import { formatDimensionNote, type ResizedImage } from "../utils/image-resize";
15
15
 
16
16
  export { Ellipsis } from "@oh-my-pi/pi-natives";
17
- export { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
17
+ export { replaceTabs, truncateToWidth, wrapTextWithAnsi } from "@oh-my-pi/pi-tui";
18
18
 
19
19
  // =============================================================================
20
20
  // Standardized Display Constants
@@ -8,7 +8,7 @@
8
8
  import { Database } from "bun:sqlite";
9
9
  import path from "node:path";
10
10
  import type { AgentTool } from "@oh-my-pi/pi-agent-core";
11
- import { $env, getAgentDir, logger, VERSION } from "@oh-my-pi/pi-utils";
11
+ import { $flag, getAgentDir, logger, VERSION } from "@oh-my-pi/pi-utils";
12
12
  import { Type } from "@sinclair/typebox";
13
13
  import type { Settings } from "..";
14
14
  import type { ToolSession } from "./index";
@@ -19,7 +19,7 @@ const ReportToolIssueParams = Type.Object({
19
19
  });
20
20
 
21
21
  export function isAutoQaEnabled(settings?: Settings): boolean {
22
- return $env.PI_AUTO_QA === "1" || !!settings?.get("dev.autoqa");
22
+ return $flag("PI_AUTO_QA") || !!settings?.get("dev.autoqa");
23
23
  }
24
24
 
25
25
  export function getAutoQaDbPath(): string {
@@ -1,4 +1,4 @@
1
- import { $env } from "@oh-my-pi/pi-utils";
1
+ import { $env, $flag } from "@oh-my-pi/pi-utils";
2
2
 
3
3
  export type EditMode = "replace" | "patch" | "hashline" | "chunk";
4
4
 
@@ -36,7 +36,7 @@ export function resolveEditMode(session: EditModeSessionLike): EditMode {
36
36
  const envMode = normalizeEditMode($env.PI_EDIT_VARIANT);
37
37
  if (envMode) return envMode;
38
38
 
39
- if ($env.PI_STRICT_EDIT_MODE === "1") {
39
+ if (!$flag("PI_STRICT_EDIT_MODE")) {
40
40
  if (activeModel?.includes("spark")) return "replace";
41
41
  if (activeModel?.includes("nano")) return "replace";
42
42
  if (activeModel?.includes("mini")) return "replace";
@@ -2,10 +2,10 @@ import type { ImageContent } from "@oh-my-pi/pi-ai";
2
2
  import { ImageFormat, PhotonImage, SamplingFilter } from "@oh-my-pi/pi-natives";
3
3
 
4
4
  export interface ImageResizeOptions {
5
- maxWidth?: number; // Default: 2000
6
- maxHeight?: number; // Default: 2000
7
- maxBytes?: number; // Default: 4.5MB (below Anthropic's 5MB limit)
8
- jpegQuality?: number; // Default: 80
5
+ maxWidth?: number; // Default: 1568
6
+ maxHeight?: number; // Default: 1568
7
+ maxBytes?: number; // Default: 500KB
8
+ jpegQuality?: number; // Default: 75
9
9
  }
10
10
 
11
11
  export interface ResizedImage {
@@ -19,22 +19,25 @@ export interface ResizedImage {
19
19
  get data(): string;
20
20
  }
21
21
 
22
- // 4.5MB - provides headroom below Anthropic's 5MB limit
23
- const DEFAULT_MAX_BYTES = 4.5 * 1024 * 1024;
22
+ // 500KB target aggressive compression; Anthropic's 5MB per-image cap is rarely the
23
+ // binding constraint once images are downsized to 1568px (Anthropic's internal threshold).
24
+ const DEFAULT_MAX_BYTES = 500 * 1024;
24
25
 
25
26
  const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
26
- maxWidth: 2000,
27
- maxHeight: 2000,
27
+ // 1568px — Anthropic downscales anything larger; OpenAI tiles at 768px;
28
+ // sending bigger pixels wastes bandwidth the model never sees.
29
+ maxWidth: 1568,
30
+ maxHeight: 1568,
28
31
  maxBytes: DEFAULT_MAX_BYTES,
29
- jpegQuality: 80,
32
+ jpegQuality: 75,
30
33
  };
31
34
 
32
- /** Helper to pick the smaller of two buffers */
33
- function pickSmaller(
34
- a: { buffer: Uint8Array; mimeType: string },
35
- b: { buffer: Uint8Array; mimeType: string },
36
- ): { buffer: Uint8Array; mimeType: string } {
37
- return a.buffer.length <= b.buffer.length ? a : b;
35
+ /** Pick the smallest of N encoded buffers. */
36
+ function pickSmallest(...candidates: Array<{ buffer: Uint8Array; mimeType: string }>): {
37
+ buffer: Uint8Array;
38
+ mimeType: string;
39
+ } {
40
+ return candidates.reduce((best, c) => (c.buffer.length < best.buffer.length ? c : best));
38
41
  }
39
42
 
40
43
  /** Polyfill for Buffer.toBase64, technically since it derives from Uint8Array it should exist but Bun reasons... */
@@ -43,17 +46,22 @@ Buffer.prototype.toBase64 = function (this: Buffer) {
43
46
  };
44
47
 
45
48
  /**
46
- * Resize an image to fit within the specified max dimensions and file size.
47
- * Returns the original image if it already fits within the limits.
49
+ * Resize and recompress an image to fit within the specified max dimensions and file size.
48
50
  *
49
- * Uses Photon for image processing. If Photon is not available,
50
- * returns the original image unchanged.
51
+ * Defaults target Anthropic's internal 1568px downscale threshold and produce small
52
+ * lossy JPEG output suitable for tool-call payloads (~100–500KB typical).
51
53
  *
52
- * Strategy for staying under maxBytes:
53
- * 1. First resize to maxWidth/maxHeight
54
- * 2. Try both PNG and JPEG formats, pick the smaller one
55
- * 3. If still too large, try JPEG with decreasing quality
56
- * 4. If still too large, progressively reduce dimensions
54
+ * Strategy:
55
+ * 1. Fast path if input already fits dimensions AND is at <=25% of byte budget,
56
+ * return as-is. Avoids re-encoding tiny icons/diagrams.
57
+ * 2. Resize to maxWidth/maxHeight, encode both PNG and JPEG at default quality,
58
+ * pick whichever is smaller. PNG wins for line art / few-color UI; JPEG wins
59
+ * for photographic content.
60
+ * 3. If still too large, JPEG-only quality ladder (PNG quality is a no-op).
61
+ * 4. If still too large, progressively reduce dimensions and retry the JPEG ladder.
62
+ * 5. Last resort: ship the smallest variant produced.
63
+ *
64
+ * On any decode failure, returns the original bytes unchanged with wasResized=false.
57
65
  */
58
66
  export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
59
67
  const opts = { ...DEFAULT_OPTIONS, ...options };
@@ -68,7 +76,12 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
68
76
 
69
77
  // Check if already within all limits (dimensions AND size)
70
78
  const originalSize = inputBuffer.length;
71
- if (originalWidth <= opts.maxWidth && originalHeight <= opts.maxHeight && originalSize <= opts.maxBytes) {
79
+ // Fast path: skip if already within dimensions AND well under budget.
80
+ // Threshold is 1/4 of budget — if already that compact, don't re-encode.
81
+ // Avoids wasted work on tiny icons/diagrams while ensuring larger PNGs
82
+ // still get JPEG-compressed.
83
+ const comfortableSize = opts.maxBytes / 4;
84
+ if (originalWidth <= opts.maxWidth && originalHeight <= opts.maxHeight && originalSize <= comfortableSize) {
72
85
  return {
73
86
  buffer: inputBuffer,
74
87
  mimeType: img.mimeType ?? `image/${format}`,
@@ -96,35 +109,58 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
96
109
  targetHeight = opts.maxHeight;
97
110
  }
98
111
 
99
- // Helper to resize and encode in both formats, returning the smaller one
100
- async function tryBothFormats(
112
+ // First-attempt encoder: try PNG, JPEG, and lossy WebP return whichever is smallest.
113
+ // PNG wins for line art / few-color UI; JPEG and WebP win for photographic content;
114
+ // WebP usually beats JPEG by 25–35% at the same perceptual quality.
115
+ async function encodeSmallest(
101
116
  width: number,
102
117
  height: number,
103
118
  quality: number,
104
119
  ): Promise<{ buffer: Uint8Array; mimeType: string }> {
105
120
  const resized = await image.resize(width, height, SamplingFilter.Lanczos3);
106
121
 
107
- const [pngBuffer, jpegBuffer] = await Promise.all([
122
+ const [pngBuffer, jpegBuffer, webpBuffer] = await Promise.all([
108
123
  resized.encode(ImageFormat.PNG, quality),
109
124
  resized.encode(ImageFormat.JPEG, quality),
125
+ resized.encode(ImageFormat.WEBP, quality),
110
126
  ]);
111
127
 
112
- return pickSmaller(
128
+ return pickSmallest(
113
129
  { buffer: pngBuffer, mimeType: "image/png" },
114
130
  { buffer: jpegBuffer, mimeType: "image/jpeg" },
131
+ { buffer: webpBuffer, mimeType: "image/webp" },
132
+ );
133
+ }
134
+
135
+ // Lossy-only encoder — used in quality/dimension fallback ladders where PNG can't shrink
136
+ // further (PNG quality is a no-op). Picks the smaller of JPEG vs lossy WebP at the
137
+ // requested quality.
138
+ async function encodeLossy(
139
+ width: number,
140
+ height: number,
141
+ quality: number,
142
+ ): Promise<{ buffer: Uint8Array; mimeType: string }> {
143
+ const resized = await image.resize(width, height, SamplingFilter.Lanczos3);
144
+ const [jpegBuffer, webpBuffer] = await Promise.all([
145
+ resized.encode(ImageFormat.JPEG, quality),
146
+ resized.encode(ImageFormat.WEBP, quality),
147
+ ]);
148
+ return pickSmallest(
149
+ { buffer: jpegBuffer, mimeType: "image/jpeg" },
150
+ { buffer: webpBuffer, mimeType: "image/webp" },
115
151
  );
116
152
  }
117
153
 
118
- // Try to produce an image under maxBytes
119
- const qualitySteps = [85, 70, 55, 40];
154
+ // Quality ladder more aggressive steps for tighter budgets
155
+ const qualitySteps = [70, 60, 50, 40];
120
156
  const scaleSteps = [1.0, 0.75, 0.5, 0.35, 0.25];
121
157
 
122
158
  let best: { buffer: Uint8Array; mimeType: string };
123
159
  let finalWidth = targetWidth;
124
160
  let finalHeight = targetHeight;
125
161
 
126
- // First attempt: resize to target dimensions, try both formats
127
- best = await tryBothFormats(targetWidth, targetHeight, opts.jpegQuality);
162
+ // First attempt: resize to target, try both PNG and JPEG, pick smaller
163
+ best = await encodeSmallest(targetWidth, targetHeight, opts.jpegQuality);
128
164
 
129
165
  if (best.buffer.length <= opts.maxBytes) {
130
166
  return {
@@ -141,9 +177,9 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
141
177
  };
142
178
  }
143
179
 
144
- // Still too large - try JPEG with decreasing quality
180
+ // Still too large lossy ladder (JPEG vs WebP, smallest wins) with decreasing quality
145
181
  for (const quality of qualitySteps) {
146
- best = await tryBothFormats(targetWidth, targetHeight, quality);
182
+ best = await encodeLossy(targetWidth, targetHeight, quality);
147
183
 
148
184
  if (best.buffer.length <= opts.maxBytes) {
149
185
  return {
@@ -161,7 +197,7 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
161
197
  }
162
198
  }
163
199
 
164
- // Still too large - reduce dimensions progressively
200
+ // Still too large reduce dimensions progressively with the lossy ladder
165
201
  for (const scale of scaleSteps) {
166
202
  finalWidth = Math.round(targetWidth * scale);
167
203
  finalHeight = Math.round(targetHeight * scale);
@@ -171,7 +207,7 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
171
207
  }
172
208
 
173
209
  for (const quality of qualitySteps) {
174
- best = await tryBothFormats(finalWidth, finalHeight, quality);
210
+ best = await encodeLossy(finalWidth, finalHeight, quality);
175
211
 
176
212
  if (best.buffer.length <= opts.maxBytes) {
177
213
  return {