@oh-my-pi/pi-coding-agent 14.0.4 → 14.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ import type { Theme } from "../modes/theme/theme";
14
14
  import { formatDimensionNote, type ResizedImage } from "../utils/image-resize";
15
15
 
16
16
  export { Ellipsis } from "@oh-my-pi/pi-natives";
17
- export { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
17
+ export { replaceTabs, truncateToWidth, wrapTextWithAnsi } from "@oh-my-pi/pi-tui";
18
18
 
19
19
  // =============================================================================
20
20
  // Standardized Display Constants
@@ -2,10 +2,10 @@ import type { ImageContent } from "@oh-my-pi/pi-ai";
2
2
  import { ImageFormat, PhotonImage, SamplingFilter } from "@oh-my-pi/pi-natives";
3
3
 
4
4
  export interface ImageResizeOptions {
5
- maxWidth?: number; // Default: 2000
6
- maxHeight?: number; // Default: 2000
7
- maxBytes?: number; // Default: 4.5MB (below Anthropic's 5MB limit)
8
- jpegQuality?: number; // Default: 80
5
+ maxWidth?: number; // Default: 1568
6
+ maxHeight?: number; // Default: 1568
7
+ maxBytes?: number; // Default: 500KB
8
+ jpegQuality?: number; // Default: 75
9
9
  }
10
10
 
11
11
  export interface ResizedImage {
@@ -19,22 +19,25 @@ export interface ResizedImage {
19
19
  get data(): string;
20
20
  }
21
21
 
22
- // 4.5MB - provides headroom below Anthropic's 5MB limit
23
- const DEFAULT_MAX_BYTES = 4.5 * 1024 * 1024;
22
+ // 500KB target aggressive compression; Anthropic's 5MB per-image cap is rarely the
23
+ // binding constraint once images are downsized to 1568px (Anthropic's internal threshold).
24
+ const DEFAULT_MAX_BYTES = 500 * 1024;
24
25
 
25
26
  const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
26
- maxWidth: 2000,
27
- maxHeight: 2000,
27
+ // 1568px — Anthropic downscales anything larger; OpenAI tiles at 768px;
28
+ // sending bigger pixels wastes bandwidth the model never sees.
29
+ maxWidth: 1568,
30
+ maxHeight: 1568,
28
31
  maxBytes: DEFAULT_MAX_BYTES,
29
- jpegQuality: 80,
32
+ jpegQuality: 75,
30
33
  };
31
34
 
32
- /** Helper to pick the smaller of two buffers */
33
- function pickSmaller(
34
- a: { buffer: Uint8Array; mimeType: string },
35
- b: { buffer: Uint8Array; mimeType: string },
36
- ): { buffer: Uint8Array; mimeType: string } {
37
- return a.buffer.length <= b.buffer.length ? a : b;
35
+ /** Pick the smallest of N encoded buffers. */
36
+ function pickSmallest(...candidates: Array<{ buffer: Uint8Array; mimeType: string }>): {
37
+ buffer: Uint8Array;
38
+ mimeType: string;
39
+ } {
40
+ return candidates.reduce((best, c) => (c.buffer.length < best.buffer.length ? c : best));
38
41
  }
39
42
 
40
43
  /** Polyfill for Buffer.toBase64, technically since it derives from Uint8Array it should exist but Bun reasons... */
@@ -43,17 +46,22 @@ Buffer.prototype.toBase64 = function (this: Buffer) {
43
46
  };
44
47
 
45
48
  /**
46
- * Resize an image to fit within the specified max dimensions and file size.
47
- * Returns the original image if it already fits within the limits.
49
+ * Resize and recompress an image to fit within the specified max dimensions and file size.
48
50
  *
49
- * Uses Photon for image processing. If Photon is not available,
50
- * returns the original image unchanged.
51
+ * Defaults target Anthropic's internal 1568px downscale threshold and produce small
52
+ * lossy JPEG output suitable for tool-call payloads (~100–500KB typical).
51
53
  *
52
- * Strategy for staying under maxBytes:
53
- * 1. First resize to maxWidth/maxHeight
54
- * 2. Try both PNG and JPEG formats, pick the smaller one
55
- * 3. If still too large, try JPEG with decreasing quality
56
- * 4. If still too large, progressively reduce dimensions
54
+ * Strategy:
55
+ * 1. Fast path if input already fits dimensions AND is at <=25% of byte budget,
56
+ * return as-is. Avoids re-encoding tiny icons/diagrams.
57
+ * 2. Resize to maxWidth/maxHeight, encode both PNG and JPEG at default quality,
58
+ * pick whichever is smaller. PNG wins for line art / few-color UI; JPEG wins
59
+ * for photographic content.
60
+ * 3. If still too large, JPEG-only quality ladder (PNG quality is a no-op).
61
+ * 4. If still too large, progressively reduce dimensions and retry the JPEG ladder.
62
+ * 5. Last resort: ship the smallest variant produced.
63
+ *
64
+ * On any decode failure, returns the original bytes unchanged with wasResized=false.
57
65
  */
58
66
  export async function resizeImage(img: ImageContent, options?: ImageResizeOptions): Promise<ResizedImage> {
59
67
  const opts = { ...DEFAULT_OPTIONS, ...options };
@@ -68,7 +76,12 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
68
76
 
69
77
  // Check if already within all limits (dimensions AND size)
70
78
  const originalSize = inputBuffer.length;
71
- if (originalWidth <= opts.maxWidth && originalHeight <= opts.maxHeight && originalSize <= opts.maxBytes) {
79
+ // Fast path: skip if already within dimensions AND well under budget.
80
+ // Threshold is 1/4 of budget — if already that compact, don't re-encode.
81
+ // Avoids wasted work on tiny icons/diagrams while ensuring larger PNGs
82
+ // still get JPEG-compressed.
83
+ const comfortableSize = opts.maxBytes / 4;
84
+ if (originalWidth <= opts.maxWidth && originalHeight <= opts.maxHeight && originalSize <= comfortableSize) {
72
85
  return {
73
86
  buffer: inputBuffer,
74
87
  mimeType: img.mimeType ?? `image/${format}`,
@@ -96,35 +109,58 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
96
109
  targetHeight = opts.maxHeight;
97
110
  }
98
111
 
99
- // Helper to resize and encode in both formats, returning the smaller one
100
- async function tryBothFormats(
112
+ // First-attempt encoder: try PNG, JPEG, and lossy WebP return whichever is smallest.
113
+ // PNG wins for line art / few-color UI; JPEG and WebP win for photographic content;
114
+ // WebP usually beats JPEG by 25–35% at the same perceptual quality.
115
+ async function encodeSmallest(
101
116
  width: number,
102
117
  height: number,
103
118
  quality: number,
104
119
  ): Promise<{ buffer: Uint8Array; mimeType: string }> {
105
120
  const resized = await image.resize(width, height, SamplingFilter.Lanczos3);
106
121
 
107
- const [pngBuffer, jpegBuffer] = await Promise.all([
122
+ const [pngBuffer, jpegBuffer, webpBuffer] = await Promise.all([
108
123
  resized.encode(ImageFormat.PNG, quality),
109
124
  resized.encode(ImageFormat.JPEG, quality),
125
+ resized.encode(ImageFormat.WEBP, quality),
110
126
  ]);
111
127
 
112
- return pickSmaller(
128
+ return pickSmallest(
113
129
  { buffer: pngBuffer, mimeType: "image/png" },
114
130
  { buffer: jpegBuffer, mimeType: "image/jpeg" },
131
+ { buffer: webpBuffer, mimeType: "image/webp" },
132
+ );
133
+ }
134
+
135
+ // Lossy-only encoder — used in quality/dimension fallback ladders where PNG can't shrink
136
+ // further (PNG quality is a no-op). Picks the smaller of JPEG vs lossy WebP at the
137
+ // requested quality.
138
+ async function encodeLossy(
139
+ width: number,
140
+ height: number,
141
+ quality: number,
142
+ ): Promise<{ buffer: Uint8Array; mimeType: string }> {
143
+ const resized = await image.resize(width, height, SamplingFilter.Lanczos3);
144
+ const [jpegBuffer, webpBuffer] = await Promise.all([
145
+ resized.encode(ImageFormat.JPEG, quality),
146
+ resized.encode(ImageFormat.WEBP, quality),
147
+ ]);
148
+ return pickSmallest(
149
+ { buffer: jpegBuffer, mimeType: "image/jpeg" },
150
+ { buffer: webpBuffer, mimeType: "image/webp" },
115
151
  );
116
152
  }
117
153
 
118
- // Try to produce an image under maxBytes
119
- const qualitySteps = [85, 70, 55, 40];
154
+ // Quality ladder more aggressive steps for tighter budgets
155
+ const qualitySteps = [70, 60, 50, 40];
120
156
  const scaleSteps = [1.0, 0.75, 0.5, 0.35, 0.25];
121
157
 
122
158
  let best: { buffer: Uint8Array; mimeType: string };
123
159
  let finalWidth = targetWidth;
124
160
  let finalHeight = targetHeight;
125
161
 
126
- // First attempt: resize to target dimensions, try both formats
127
- best = await tryBothFormats(targetWidth, targetHeight, opts.jpegQuality);
162
+ // First attempt: resize to target, try both PNG and JPEG, pick smaller
163
+ best = await encodeSmallest(targetWidth, targetHeight, opts.jpegQuality);
128
164
 
129
165
  if (best.buffer.length <= opts.maxBytes) {
130
166
  return {
@@ -141,9 +177,9 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
141
177
  };
142
178
  }
143
179
 
144
- // Still too large - try JPEG with decreasing quality
180
+ // Still too large lossy ladder (JPEG vs WebP, smallest wins) with decreasing quality
145
181
  for (const quality of qualitySteps) {
146
- best = await tryBothFormats(targetWidth, targetHeight, quality);
182
+ best = await encodeLossy(targetWidth, targetHeight, quality);
147
183
 
148
184
  if (best.buffer.length <= opts.maxBytes) {
149
185
  return {
@@ -161,7 +197,7 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
161
197
  }
162
198
  }
163
199
 
164
- // Still too large - reduce dimensions progressively
200
+ // Still too large reduce dimensions progressively with the lossy ladder
165
201
  for (const scale of scaleSteps) {
166
202
  finalWidth = Math.round(targetWidth * scale);
167
203
  finalHeight = Math.round(targetHeight * scale);
@@ -171,7 +207,7 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
171
207
  }
172
208
 
173
209
  for (const quality of qualitySteps) {
174
- best = await tryBothFormats(finalWidth, finalHeight, quality);
210
+ best = await encodeLossy(finalWidth, finalHeight, quality);
175
211
 
176
212
  if (best.buffer.length <= opts.maxBytes) {
177
213
  return {
@@ -2,6 +2,8 @@
2
2
  * Shared types and utilities for web-fetch handlers
3
3
  */
4
4
  import { ptree } from "@oh-my-pi/pi-utils";
5
+ import TurndownService from "turndown";
6
+ import { gfm } from "turndown-plugin-gfm";
5
7
  import { ToolAbortError } from "../../tools/tool-errors";
6
8
 
7
9
  export { formatNumber } from "@oh-my-pi/pi-utils";
@@ -153,41 +155,57 @@ export async function loadPage(url: string, options: LoadPageOptions = {}): Prom
153
155
  return { content: "", contentType: "", finalUrl: url, ok: false };
154
156
  }
155
157
 
158
+ /** Module-level Turndown instance — matches markit-ai's configuration. */
159
+ const turndown = new TurndownService({
160
+ headingStyle: "atx",
161
+ codeBlockStyle: "fenced",
162
+ bulletListMarker: "-",
163
+ });
164
+ turndown.use(gfm);
165
+ turndown.addRule("strikethrough", {
166
+ filter: ["del", "s", "strike"],
167
+ replacement(content) {
168
+ return `~~${content}~~`;
169
+ },
170
+ });
171
+ turndown.addRule("heading", {
172
+ filter: ["h1", "h2", "h3", "h4", "h5", "h6"],
173
+ replacement(content, node) {
174
+ const level = Number(node.nodeName.charAt(1));
175
+ const prefix = "#".repeat(level);
176
+ const cleaned = content.replace(/\\([.])/g, "$1").trim();
177
+ return `\n\n${prefix} ${cleaned}\n\n`;
178
+ },
179
+ });
180
+
181
+ type TurndownListParent = {
182
+ nodeName: string;
183
+ getAttribute(name: string): string | null;
184
+ children: ArrayLike<unknown>;
185
+ };
186
+
187
+ turndown.addRule("listItem", {
188
+ filter: "li",
189
+ replacement(content, node, options) {
190
+ content = content.replace(/^\n+/, "").replace(/\n+$/, "\n").replace(/\n/gm, "\n ");
191
+ const parent = node.parentNode as unknown as TurndownListParent | null;
192
+ let prefix = `${options.bulletListMarker} `;
193
+ if (parent?.nodeName === "OL") {
194
+ const start = parent.getAttribute("start");
195
+ const index = Array.prototype.indexOf.call(parent.children, node);
196
+ prefix = `${(start ? Number(start) : 1) + index}. `;
197
+ }
198
+ return prefix + content + (node.nextSibling ? "\n" : "");
199
+ },
200
+ });
201
+
156
202
  /**
157
- * Convert basic HTML to markdown
203
+ * Convert HTML to markdown using Turndown with GFM support.
204
+ * Strips script/style tags before conversion.
158
205
  */
159
206
  export function htmlToBasicMarkdown(html: string): string {
160
- const stripped = html
161
- .replace(/<pre[^>]*><code[^>]*>/g, "\n```\n")
162
- .replace(/<\/code><\/pre>/g, "\n```\n")
163
- .replace(/<code[^>]*>/g, "`")
164
- .replace(/<\/code>/g, "`")
165
- .replace(/<strong[^>]*>/g, "**")
166
- .replace(/<\/strong>/g, "**")
167
- .replace(/<b[^>]*>/g, "**")
168
- .replace(/<\/b>/g, "**")
169
- .replace(/<em[^>]*>/g, "*")
170
- .replace(/<\/em>/g, "*")
171
- .replace(/<i[^>]*>/g, "*")
172
- .replace(/<\/i>/g, "*")
173
- .replace(
174
- /<a[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/g,
175
- (_, href, text) => `[${text.replace(/<[^>]+>/g, "").trim()}](${href})`,
176
- )
177
- .replace(/<p[^>]*>/g, "\n\n")
178
- .replace(/<\/p>/g, "")
179
- .replace(/<br\s*\/?>/g, "\n")
180
- .replace(/<li[^>]*>/g, "- ")
181
- .replace(/<\/li>/g, "\n")
182
- .replace(/<\/?[uo]l[^>]*>/g, "\n")
183
- .replace(/<h(\d)[^>]*>/g, (_, n) => `\n${"#".repeat(parseInt(n, 10))} `)
184
- .replace(/<\/h\d>/g, "\n")
185
- .replace(/<blockquote[^>]*>/g, "\n> ")
186
- .replace(/<\/blockquote>/g, "\n")
187
- .replace(/<[^>]+>/g, "")
188
- .replace(/\n{3,}/g, "\n\n")
189
- .trim();
190
- return decodeHtmlEntities(stripped);
207
+ const cleaned = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
208
+ return turndown.turndown(cleaned).trim();
191
209
  }
192
210
 
193
211
  /**
@@ -90,6 +90,10 @@ interface CodexResponse {
90
90
  usage?: CodexUsage;
91
91
  }
92
92
 
93
+ function isImagePlaceholderAnswer(text: string): boolean {
94
+ return text.trim().toLowerCase() === "(see attached image)";
95
+ }
96
+
93
97
  /**
94
98
  * Decodes a JWT token and extracts the payload.
95
99
  * @param token - JWT token string
@@ -232,6 +236,7 @@ async function callCodexSearch(
232
236
 
233
237
  // Parse SSE stream
234
238
  const answerParts: string[] = [];
239
+ const streamedAnswerParts: string[] = [];
235
240
  const sources: SearchSource[] = [];
236
241
  let model = requestedModel;
237
242
  let requestId = "";
@@ -241,7 +246,12 @@ async function callCodexSearch(
241
246
  const eventType = typeof rawEvent.type === "string" ? rawEvent.type : "";
242
247
  if (!eventType) continue;
243
248
 
244
- if (eventType === "response.output_item.done") {
249
+ if (eventType === "response.output_text.delta") {
250
+ const delta = typeof rawEvent.delta === "string" ? rawEvent.delta : "";
251
+ if (delta) {
252
+ streamedAnswerParts.push(delta);
253
+ }
254
+ } else if (eventType === "response.output_item.done") {
245
255
  const item = rawEvent.item as CodexResponseItem | undefined;
246
256
  if (!item) continue;
247
257
 
@@ -302,8 +312,17 @@ async function callCodexSearch(
302
312
  }
303
313
  }
304
314
 
315
+ const finalAnswer = answerParts.join("\n\n").trim();
316
+ const streamedAnswer = streamedAnswerParts.join("").trim();
317
+ const answer =
318
+ finalAnswer.length > 0 && !isImagePlaceholderAnswer(finalAnswer)
319
+ ? finalAnswer
320
+ : streamedAnswer.length > 0
321
+ ? streamedAnswer
322
+ : finalAnswer;
323
+
305
324
  return {
306
- answer: answerParts.join("\n\n"),
325
+ answer,
307
326
  sources,
308
327
  model,
309
328
  requestId,