@oh-my-pi/pi-coding-agent 13.9.12 → 13.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/tools/read.ts CHANGED
@@ -23,7 +23,12 @@ import {
23
23
  import { renderCodeCell, renderStatusLine } from "../tui";
24
24
  import { CachedOutputBlock } from "../tui/output-block";
25
25
  import { resolveFileDisplayMode } from "../utils/file-display-mode";
26
- import { formatDimensionNote, resizeImage } from "../utils/image-resize";
26
+ import {
27
+ ImageInputTooLargeError,
28
+ loadImageInput,
29
+ MAX_IMAGE_INPUT_BYTES,
30
+ readImageMetadata,
31
+ } from "../utils/image-input";
27
32
  import { detectSupportedImageMimeTypeFromFile } from "../utils/mime";
28
33
  import { ensureTool } from "../utils/tools-manager";
29
34
  import { applyListLimit } from "./list-limit";
@@ -253,7 +258,7 @@ async function streamLinesFromFile(
253
258
  }
254
259
 
255
260
  // Maximum image file size (20MB) - larger images will be rejected to prevent OOM during serialization
256
- const MAX_IMAGE_SIZE = 20 * 1024 * 1024;
261
+ const MAX_IMAGE_SIZE = MAX_IMAGE_INPUT_BYTES;
257
262
  const GLOB_TIMEOUT_MS = 5000;
258
263
 
259
264
  function isNotFoundError(error: unknown): boolean {
@@ -366,6 +371,7 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
366
371
 
367
372
  readonly #autoResizeImages: boolean;
368
373
  readonly #defaultLimit: number;
374
+ readonly #inspectImageEnabled: boolean;
369
375
 
370
376
  constructor(private readonly session: ToolSession) {
371
377
  const displayMode = resolveFileDisplayMode(session);
@@ -374,6 +380,7 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
374
380
  1,
375
381
  Math.min(session.settings.get("read.defaultLimit") ?? DEFAULT_MAX_LINES, DEFAULT_MAX_LINES),
376
382
  );
383
+ this.#inspectImageEnabled = session.settings.get("inspect_image.enabled");
377
384
  this.description = renderPromptTemplate(readDescription, {
378
385
  DEFAULT_LIMIT: String(this.#defaultLimit),
379
386
  DEFAULT_MAX_LINES: String(DEFAULT_MAX_LINES),
@@ -455,57 +462,63 @@ export class ReadTool implements AgentTool<typeof readSchema, ReadToolDetails> {
455
462
  | undefined;
456
463
 
457
464
  if (mimeType) {
458
- if (fileSize > MAX_IMAGE_SIZE) {
459
- const sizeStr = formatBytes(fileSize);
460
- const maxStr = formatBytes(MAX_IMAGE_SIZE);
461
- throw new ToolError(`Image file too large: ${sizeStr} exceeds ${maxStr} limit.`);
465
+ if (this.#inspectImageEnabled) {
466
+ const metadata = await readImageMetadata({
467
+ path: readPath,
468
+ cwd: this.session.cwd,
469
+ resolvedPath: absolutePath,
470
+ detectedMimeType: mimeType,
471
+ });
472
+ const outputMime = metadata?.mimeType ?? mimeType;
473
+ const outputBytes = metadata?.bytes ?? fileSize;
474
+ const metadataLines = [
475
+ "Image metadata:",
476
+ `- MIME: ${outputMime}`,
477
+ `- Bytes: ${outputBytes} (${formatBytes(outputBytes)})`,
478
+ metadata?.width !== undefined && metadata.height !== undefined
479
+ ? `- Dimensions: ${metadata.width}x${metadata.height}`
480
+ : "- Dimensions: unknown",
481
+ metadata?.channels !== undefined ? `- Channels: ${metadata.channels}` : "- Channels: unknown",
482
+ metadata?.hasAlpha === true
483
+ ? "- Alpha: yes"
484
+ : metadata?.hasAlpha === false
485
+ ? "- Alpha: no"
486
+ : "- Alpha: unknown",
487
+ "",
488
+ `If you want to analyze the image, call inspect_image with path="${readPath}" and a question describing what to inspect and the desired output format.`,
489
+ ];
490
+ content = [{ type: "text", text: metadataLines.join("\n") }];
491
+ details = {};
492
+ sourcePath = absolutePath;
462
493
  } else {
463
- // Read as image (binary)
464
- const file = Bun.file(absolutePath);
465
- const buffer = await file.arrayBuffer();
466
-
467
- // Check actual buffer size after reading to prevent OOM during serialization
468
- if (buffer.byteLength > MAX_IMAGE_SIZE) {
469
- const sizeStr = formatBytes(buffer.byteLength);
494
+ if (fileSize > MAX_IMAGE_SIZE) {
495
+ const sizeStr = formatBytes(fileSize);
470
496
  const maxStr = formatBytes(MAX_IMAGE_SIZE);
471
497
  throw new ToolError(`Image file too large: ${sizeStr} exceeds ${maxStr} limit.`);
472
- } else {
473
- const base64 = new Uint8Array(buffer).toBase64();
474
-
475
- if (this.#autoResizeImages) {
476
- // Resize image if needed - catch errors from Photon
477
- try {
478
- const resized = await resizeImage({ type: "image", data: base64, mimeType });
479
- const dimensionNote = formatDimensionNote(resized);
480
-
481
- let textNote = `Read image file [${resized.mimeType}]`;
482
- if (dimensionNote) {
483
- textNote += `\n${dimensionNote}`;
484
- }
485
-
486
- content = [
487
- { type: "text", text: textNote },
488
- { type: "image", data: resized.data, mimeType: resized.mimeType },
489
- ];
490
- details = {};
491
- sourcePath = absolutePath;
492
- } catch {
493
- // Fall back to original image on resize failure
494
- content = [
495
- { type: "text", text: `Read image file [${mimeType}]` },
496
- { type: "image", data: base64, mimeType },
497
- ];
498
- details = {};
499
- sourcePath = absolutePath;
500
- }
501
- } else {
502
- content = [
503
- { type: "text", text: `Read image file [${mimeType}]` },
504
- { type: "image", data: base64, mimeType },
505
- ];
506
- details = {};
507
- sourcePath = absolutePath;
498
+ }
499
+ try {
500
+ const imageInput = await loadImageInput({
501
+ path: readPath,
502
+ cwd: this.session.cwd,
503
+ autoResize: this.#autoResizeImages,
504
+ maxBytes: MAX_IMAGE_SIZE,
505
+ resolvedPath: absolutePath,
506
+ detectedMimeType: mimeType,
507
+ });
508
+ if (!imageInput) {
509
+ throw new ToolError(`Read image file [${mimeType}] failed: unsupported image format.`);
510
+ }
511
+ content = [
512
+ { type: "text", text: imageInput.textNote },
513
+ { type: "image", data: imageInput.data, mimeType: imageInput.mimeType },
514
+ ];
515
+ details = {};
516
+ sourcePath = imageInput.resolvedPath;
517
+ } catch (error) {
518
+ if (error instanceof ImageInputTooLargeError) {
519
+ throw new ToolError(error.message);
508
520
  }
521
+ throw error;
509
522
  }
510
523
  }
511
524
  } else if (CONVERTIBLE_EXTENSIONS.has(ext)) {
@@ -18,6 +18,7 @@ import { calculatorToolRenderer } from "./calculator";
18
18
  import { fetchToolRenderer } from "./fetch";
19
19
  import { findToolRenderer } from "./find";
20
20
  import { grepToolRenderer } from "./grep";
21
+ import { inspectImageToolRenderer } from "./inspect-image-renderer";
21
22
  import { notebookToolRenderer } from "./notebook";
22
23
  import { pythonToolRenderer } from "./python";
23
24
  import { readToolRenderer } from "./read";
@@ -51,6 +52,7 @@ export const toolRenderers: Record<string, ToolRenderer> = {
51
52
  grep: grepToolRenderer as ToolRenderer,
52
53
  lsp: lspToolRenderer as ToolRenderer,
53
54
  notebook: notebookToolRenderer as ToolRenderer,
55
+ inspect_image: inspectImageToolRenderer as ToolRenderer,
54
56
  read: readToolRenderer as ToolRenderer,
55
57
  resolve: resolveToolRenderer as ToolRenderer,
56
58
  ssh: sshToolRenderer as ToolRenderer,
@@ -0,0 +1,264 @@
1
+ import * as fs from "node:fs/promises";
2
+ import { formatBytes } from "@oh-my-pi/pi-utils";
3
+ import { resolveReadPath } from "../tools/path-utils";
4
+ import { formatDimensionNote, resizeImage } from "./image-resize";
5
+ import { detectSupportedImageMimeTypeFromFile } from "./mime";
6
+
7
+ export const MAX_IMAGE_INPUT_BYTES = 20 * 1024 * 1024;
8
+ const MAX_IMAGE_METADATA_HEADER_BYTES = 256 * 1024;
9
+
10
+ export interface ImageMetadata {
11
+ mimeType: string;
12
+ bytes: number;
13
+ width?: number;
14
+ height?: number;
15
+ channels?: number;
16
+ hasAlpha?: boolean;
17
+ }
18
+
19
+ export interface LoadedImageInput {
20
+ resolvedPath: string;
21
+ mimeType: string;
22
+ data: string;
23
+ textNote: string;
24
+ dimensionNote?: string;
25
+ bytes: number;
26
+ }
27
+
28
+ export interface ReadImageMetadataOptions {
29
+ path: string;
30
+ cwd: string;
31
+ resolvedPath?: string;
32
+ detectedMimeType?: string;
33
+ }
34
+
35
+ export interface LoadImageInputOptions extends ReadImageMetadataOptions {
36
+ autoResize: boolean;
37
+ maxBytes?: number;
38
+ }
39
+
40
+ export class ImageInputTooLargeError extends Error {
41
+ readonly bytes: number;
42
+ readonly maxBytes: number;
43
+
44
+ constructor(bytes: number, maxBytes: number) {
45
+ super(`Image file too large: ${formatBytes(bytes)} exceeds ${formatBytes(maxBytes)} limit.`);
46
+ this.name = "ImageInputTooLargeError";
47
+ this.bytes = bytes;
48
+ this.maxBytes = maxBytes;
49
+ }
50
+ }
51
+
52
+ interface ParsedImageHeaderMetadata {
53
+ width?: number;
54
+ height?: number;
55
+ channels?: number;
56
+ hasAlpha?: boolean;
57
+ }
58
+
59
+ function parsePngMetadata(header: Buffer): ParsedImageHeaderMetadata {
60
+ if (header.length < 26) return {};
61
+ if (
62
+ header[0] !== 0x89 ||
63
+ header[1] !== 0x50 ||
64
+ header[2] !== 0x4e ||
65
+ header[3] !== 0x47 ||
66
+ header[4] !== 0x0d ||
67
+ header[5] !== 0x0a ||
68
+ header[6] !== 0x1a ||
69
+ header[7] !== 0x0a
70
+ ) {
71
+ return {};
72
+ }
73
+ if (header.slice(12, 16).toString("ascii") !== "IHDR") return {};
74
+ const width = header.readUInt32BE(16);
75
+ const height = header.readUInt32BE(20);
76
+ const colorType = header[25];
77
+ if (colorType === 0) return { width, height, channels: 1, hasAlpha: false };
78
+ if (colorType === 2) return { width, height, channels: 3, hasAlpha: false };
79
+ if (colorType === 3) return { width, height, channels: 3 };
80
+ if (colorType === 4) return { width, height, channels: 2, hasAlpha: true };
81
+ if (colorType === 6) return { width, height, channels: 4, hasAlpha: true };
82
+ return { width, height };
83
+ }
84
+
85
+ function parseJpegMetadata(header: Buffer): ParsedImageHeaderMetadata {
86
+ if (header.length < 4) return {};
87
+ if (header[0] !== 0xff || header[1] !== 0xd8) return {};
88
+
89
+ let offset = 2;
90
+ while (offset + 9 < header.length) {
91
+ if (header[offset] !== 0xff) {
92
+ offset += 1;
93
+ continue;
94
+ }
95
+
96
+ let markerOffset = offset + 1;
97
+ while (markerOffset < header.length && header[markerOffset] === 0xff) {
98
+ markerOffset += 1;
99
+ }
100
+ if (markerOffset >= header.length) break;
101
+
102
+ const marker = header[markerOffset];
103
+ const segmentOffset = markerOffset + 1;
104
+
105
+ if (marker === 0xd8 || marker === 0xd9 || marker === 0x01) {
106
+ offset = segmentOffset;
107
+ continue;
108
+ }
109
+ if (marker >= 0xd0 && marker <= 0xd7) {
110
+ offset = segmentOffset;
111
+ continue;
112
+ }
113
+ if (segmentOffset + 1 >= header.length) break;
114
+
115
+ const segmentLength = header.readUInt16BE(segmentOffset);
116
+ if (segmentLength < 2) break;
117
+
118
+ const isStartOfFrame = marker >= 0xc0 && marker <= 0xcf && marker !== 0xc4 && marker !== 0xc8 && marker !== 0xcc;
119
+ if (isStartOfFrame) {
120
+ if (segmentOffset + 7 >= header.length) break;
121
+ const height = header.readUInt16BE(segmentOffset + 3);
122
+ const width = header.readUInt16BE(segmentOffset + 5);
123
+ const channels = header[segmentOffset + 7];
124
+ return {
125
+ width,
126
+ height,
127
+ channels: Number.isFinite(channels) ? channels : undefined,
128
+ hasAlpha: false,
129
+ };
130
+ }
131
+
132
+ offset = segmentOffset + segmentLength;
133
+ }
134
+
135
+ return {};
136
+ }
137
+
138
+ function parseGifMetadata(header: Buffer): ParsedImageHeaderMetadata {
139
+ if (header.length < 10) return {};
140
+ const signature = header.slice(0, 6).toString("ascii");
141
+ if (signature !== "GIF87a" && signature !== "GIF89a") return {};
142
+ return {
143
+ width: header.readUInt16LE(6),
144
+ height: header.readUInt16LE(8),
145
+ channels: 3,
146
+ };
147
+ }
148
+
149
+ function parseWebpMetadata(header: Buffer): ParsedImageHeaderMetadata {
150
+ if (header.length < 30) return {};
151
+ if (header.slice(0, 4).toString("ascii") !== "RIFF") return {};
152
+ if (header.slice(8, 12).toString("ascii") !== "WEBP") return {};
153
+
154
+ const chunkType = header.slice(12, 16).toString("ascii");
155
+ if (chunkType === "VP8X") {
156
+ const hasAlpha = (header[20] & 0x10) !== 0;
157
+ const width = (header[24] | (header[25] << 8) | (header[26] << 16)) + 1;
158
+ const height = (header[27] | (header[28] << 8) | (header[29] << 16)) + 1;
159
+ return { width, height, channels: hasAlpha ? 4 : 3, hasAlpha };
160
+ }
161
+ if (chunkType === "VP8L") {
162
+ if (header.length < 25) return {};
163
+ const bits = header.readUInt32LE(21);
164
+ const width = (bits & 0x3fff) + 1;
165
+ const height = ((bits >> 14) & 0x3fff) + 1;
166
+ const hasAlpha = ((bits >> 28) & 0x1) === 1;
167
+ return { width, height, channels: hasAlpha ? 4 : 3, hasAlpha };
168
+ }
169
+ if (chunkType === "VP8 ") {
170
+ const width = header.readUInt16LE(26) & 0x3fff;
171
+ const height = header.readUInt16LE(28) & 0x3fff;
172
+ return { width, height, channels: 3, hasAlpha: false };
173
+ }
174
+ return {};
175
+ }
176
+
177
+ function parseImageHeaderMetadata(header: Buffer, mimeType: string): ParsedImageHeaderMetadata {
178
+ if (mimeType === "image/png") return parsePngMetadata(header);
179
+ if (mimeType === "image/jpeg") return parseJpegMetadata(header);
180
+ if (mimeType === "image/gif") return parseGifMetadata(header);
181
+ if (mimeType === "image/webp") return parseWebpMetadata(header);
182
+ return {};
183
+ }
184
+
185
+ async function readHeader(filePath: string, maxBytes: number): Promise<Buffer> {
186
+ if (maxBytes <= 0) return Buffer.alloc(0);
187
+ const fileHandle = await fs.open(filePath, "r");
188
+ try {
189
+ const buffer = Buffer.allocUnsafe(maxBytes);
190
+ const { bytesRead } = await fileHandle.read(buffer, 0, maxBytes, 0);
191
+ return buffer.subarray(0, bytesRead);
192
+ } finally {
193
+ await fileHandle.close();
194
+ }
195
+ }
196
+
197
+ export async function readImageMetadata(options: ReadImageMetadataOptions): Promise<ImageMetadata | null> {
198
+ const resolvedPath = options.resolvedPath ?? resolveReadPath(options.path, options.cwd);
199
+ const mimeType = options.detectedMimeType ?? (await detectSupportedImageMimeTypeFromFile(resolvedPath));
200
+ if (!mimeType) return null;
201
+
202
+ const stats = await Bun.file(resolvedPath).stat();
203
+ const bytes = stats.size;
204
+ const headerBytes = Math.max(0, Math.min(bytes, MAX_IMAGE_METADATA_HEADER_BYTES));
205
+ const header = await readHeader(resolvedPath, headerBytes);
206
+ const parsed = parseImageHeaderMetadata(header, mimeType);
207
+
208
+ return {
209
+ mimeType,
210
+ bytes,
211
+ width: parsed.width,
212
+ height: parsed.height,
213
+ channels: parsed.channels,
214
+ hasAlpha: parsed.hasAlpha,
215
+ };
216
+ }
217
+
218
+ export async function loadImageInput(options: LoadImageInputOptions): Promise<LoadedImageInput | null> {
219
+ const maxBytes = options.maxBytes ?? MAX_IMAGE_INPUT_BYTES;
220
+ const resolvedPath = options.resolvedPath ?? resolveReadPath(options.path, options.cwd);
221
+ const mimeType = options.detectedMimeType ?? (await detectSupportedImageMimeTypeFromFile(resolvedPath));
222
+ if (!mimeType) return null;
223
+
224
+ const stat = await Bun.file(resolvedPath).stat();
225
+ if (stat.size > maxBytes) {
226
+ throw new ImageInputTooLargeError(stat.size, maxBytes);
227
+ }
228
+
229
+ const inputBuffer = await fs.readFile(resolvedPath);
230
+ if (inputBuffer.byteLength > maxBytes) {
231
+ throw new ImageInputTooLargeError(inputBuffer.byteLength, maxBytes);
232
+ }
233
+
234
+ let outputData = new Uint8Array(inputBuffer).toBase64();
235
+ let outputMimeType = mimeType;
236
+ let outputBytes = inputBuffer.byteLength;
237
+ let dimensionNote: string | undefined;
238
+
239
+ if (options.autoResize) {
240
+ try {
241
+ const resized = await resizeImage({ type: "image", data: outputData, mimeType });
242
+ outputData = resized.data;
243
+ outputMimeType = resized.mimeType;
244
+ outputBytes = resized.buffer.byteLength;
245
+ dimensionNote = formatDimensionNote(resized);
246
+ } catch {
247
+ // keep original image when resize fails
248
+ }
249
+ }
250
+
251
+ let textNote = `Read image file [${outputMimeType}]`;
252
+ if (dimensionNote) {
253
+ textNote += `\n${dimensionNote}`;
254
+ }
255
+
256
+ return {
257
+ resolvedPath,
258
+ mimeType: outputMimeType,
259
+ data: outputData,
260
+ textNote,
261
+ dimensionNote,
262
+ bytes: outputBytes,
263
+ };
264
+ }