pi-image-preview 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 rielj
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,110 @@
1
+ # pi-image-preview
2
+
3
+ Image preview extension for [pi coding agent](https://github.com/mariozechner/pi-coding-agent) — renders inline image thumbnails above the editor using the kitty graphics protocol with full tmux support.
4
+
5
+ ![Screenshot](screenshot.png)
6
+
7
+ ## Features
8
+
9
+ - **Inline image preview** — paste an image (`Ctrl+V`) and a thumbnail renders above the editor
10
+ - **Horizontal layout** — multiple images display side by side
11
+ - **tmux support** — uses kitty's Unicode placeholder protocol (`U=1`) so images are pane-aware (no ghosting across panes)
12
+ - **Auto-cleanup** — delete the image path from editor text and the preview disappears
13
+ - **No editor conflicts** — works alongside vim mode and other editor extensions (does not use `setEditorComponent`)
14
+ - **Image resizing** — leverages pi's built-in WASM image resizer for efficient thumbnails
15
+ - **Screenshot integration** — automatically inlines images from screenshot tool results
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pi install npm:pi-image-preview
21
+ ```
22
+
23
+ ## How it works
24
+
25
+ 1. **Paste** an image with `Ctrl+V`
26
+ 2. Pi saves the clipboard to a temp file and inserts the path into the editor
27
+ 3. The extension **detects the image path**, reads the file, and renders a thumbnail above the editor
28
+ 4. The raw file path stays in the editor — the label below the thumbnail shows a truncated version
29
+ 5. On **submit**, image paths are stripped from the text and the images are attached to your message
30
+
31
+ ## Prerequisites
32
+
33
+ ### Terminal: [Kitty](https://sw.kovidgoyal.net/kitty/) (required)
34
+
35
+ This extension uses the **kitty graphics protocol** to render images. It will **not** render images in other terminals (iTerm2, Alacritty, WezTerm, etc.) — it falls back to text labels instead.
36
+
37
+ - **Minimum version**: Kitty 0.28+ (Unicode placeholder support)
38
+ - **Recommended**: Kitty 0.35+ for best compatibility
39
+
40
+ No special kitty config is required — the extension works with default kitty settings.
41
+
42
+ ### tmux (optional but supported)
43
+
44
+ If you run pi inside tmux, you need **one config change** in your `~/.tmux.conf`:
45
+
46
+ ```tmux
47
+ set -g allow-passthrough all
48
+ ```
49
+
50
+ Then reload: `tmux source-file ~/.tmux.conf`
51
+
52
+ This allows kitty graphics escape sequences to pass through tmux to the terminal. Without it, images will not render.
53
+
54
+ **tmux version**: 3.3a+ required (added `allow-passthrough` support).
55
+
56
+ ### pi coding agent
57
+
58
+ - **Version**: Latest recommended — the extension uses `setWidget`, `getEditorText`, and the `input` event transform API
59
+ - **No additional pi configuration needed** — just install the extension
60
+
61
+ ## Supported image formats
62
+
63
+ - PNG
64
+ - JPEG / JPG
65
+ - GIF (first frame)
66
+ - WebP
67
+
68
+ Maximum file size: **50 MB** (larger files are silently skipped).
69
+
70
+ ## Limitations
71
+
72
+ - **Kitty terminal only** — other terminals get text-only labels (no image rendering)
73
+ - **macOS / Linux only** — kitty does not run on Windows natively
74
+ - **tmux requires `allow-passthrough all`** — without it, images won't render inside tmux (the extension still works, but shows text fallback)
75
+ - **No image selection/navigation** — this is a simple preview, not a gallery browser
76
+ - **Thumbnail size is fixed** — images are scaled to fit within 25 columns; not configurable yet
77
+ - **Images are not preserved in chat history** — after submitting, the preview clears; the image is sent as an attachment to the model
78
+ - **GIF animation** — only the first frame is displayed
79
+ - **SSH sessions** — kitty graphics protocol does not work over SSH unless using `kitten ssh` (kitty's SSH kitten)
80
+ - **Multiple tmux panes showing pi** — each pane renders independently; switching panes clears/restores images correctly via Unicode placeholders, but rapid switching may briefly show artifacts
81
+
82
+ ## How tmux support works
83
+
84
+ Standard kitty graphics render pixels at absolute terminal positions. This causes images to "ghost" across tmux panes — an image rendered in pane 1 is still visible when you switch to pane 2.
85
+
86
+ This extension uses kitty's **Unicode placeholder protocol** instead:
87
+
88
+ 1. Image data is transmitted to kitty with `U=1` flag (stored but not directly rendered)
89
+ 2. Special `U+10EEEE` characters with combining diacritics are output where the image should appear
90
+ 3. These are **regular text characters** that tmux manages per-pane
91
+ 4. Switching panes swaps the text buffer → placeholders disappear → image disappears
92
+ 5. Switching back → placeholders redrawn → image reappears
93
+
94
+ ## Development
95
+
96
+ ```bash
97
+ # Clone
98
+ git clone https://github.com/rielj/pi-image-preview.git
99
+ cd pi-image-preview
100
+
101
+ # Symlink into pi extensions
102
+ ln -s "$(pwd)" ~/.pi/agent/extensions/image-preview
103
+
104
+ # Reload pi
105
+ # Inside pi, run: /reload
106
+ ```
107
+
108
+ ## License
109
+
110
+ MIT
package/index.ts ADDED
@@ -0,0 +1,59 @@
1
+ import { createRequire } from "node:module";
2
+ import path from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ import {
5
+ loadImageContentFromPath,
6
+ maybeResizeImage,
7
+ readImageContentFromPathAsync,
8
+ type ImageResizer,
9
+ } from "./src/image-content.ts";
10
+ import { registerImagePreviewExtension } from "./src/extension-runtime.ts";
11
+ import { debugLog } from "./src/debug.ts";
12
+
13
+ let cachedResizerPromise: Promise<ImageResizer | null> | undefined;
14
+
15
+ async function loadPiImageResizer(): Promise<ImageResizer | null> {
16
+ if (cachedResizerPromise) return cachedResizerPromise;
17
+
18
+ cachedResizerPromise = (async () => {
19
+ try {
20
+ const require = createRequire(import.meta.url);
21
+ const piEntry = require.resolve("@mariozechner/pi-coding-agent");
22
+ const distDir = path.dirname(piEntry);
23
+ const moduleUrl = pathToFileURL(
24
+ path.join(distDir, "utils", "image-resize.js"),
25
+ ).href;
26
+ const mod = (await import(moduleUrl)) as {
27
+ resizeImage?: (image: {
28
+ type: "image";
29
+ data: string;
30
+ mimeType: string;
31
+ }) => Promise<{ data: string; mimeType: string }>;
32
+ };
33
+ if (!mod.resizeImage) return null;
34
+ return async (image) => {
35
+ const resized = await mod.resizeImage!(image);
36
+ return {
37
+ type: "image",
38
+ data: resized.data,
39
+ mimeType: resized.mimeType,
40
+ };
41
+ };
42
+ } catch (err) {
43
+ debugLog("Failed to load pi image resizer", err);
44
+ return null;
45
+ }
46
+ })();
47
+
48
+ return cachedResizerPromise;
49
+ }
50
+
51
+ export default function (pi: any): void {
52
+ registerImagePreviewExtension(pi, {
53
+ readImageContentFromPathAsync,
54
+ maybeResizeImage: async (image) =>
55
+ maybeResizeImage(image, await loadPiImageResizer()),
56
+ loadImageContentFromPath: async (filePath) =>
57
+ loadImageContentFromPath(filePath, await loadPiImageResizer()),
58
+ });
59
+ }
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "pi-image-preview",
3
+ "version": "0.1.0",
4
+ "description": "Image preview extension for pi coding agent — renders inline image thumbnails above the editor using kitty graphics protocol with tmux support",
5
+ "keywords": [
6
+ "pi-package",
7
+ "kitty",
8
+ "image",
9
+ "preview",
10
+ "tmux",
11
+ "terminal",
12
+ "graphics"
13
+ ],
14
+ "license": "MIT",
15
+ "author": "rielj",
16
+ "type": "module",
17
+ "files": [
18
+ "index.ts",
19
+ "src/",
20
+ "README.md",
21
+ "LICENSE"
22
+ ],
23
+ "pi": {
24
+ "extensions": [
25
+ "./index.ts"
26
+ ]
27
+ },
28
+ "repository": {
29
+ "type": "git",
30
+ "url": "git+https://github.com/rielj/pi-image-preview.git"
31
+ },
32
+ "peerDependencies": {
33
+ "@mariozechner/pi-coding-agent": "*"
34
+ }
35
+ }
package/src/content.ts ADDED
@@ -0,0 +1,12 @@
1
+ export type TextContent = {
2
+ type: "text";
3
+ text: string;
4
+ };
5
+
6
+ export type ImageContent = {
7
+ type: "image";
8
+ data: string;
9
+ mimeType: string;
10
+ };
11
+
12
+ export type ContentBlock = TextContent | ImageContent;
package/src/debug.ts ADDED
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Debug logging utility for pi-image-preview.
3
+ *
4
+ * Set the PI_IMAGE_PREVIEW_DEBUG environment variable to enable debug output.
5
+ * Logs are written to stderr so they don't interfere with terminal rendering.
6
+ *
7
+ * @example
8
+ * PI_IMAGE_PREVIEW_DEBUG=1 pi
9
+ */
10
+ const DEBUG_ENABLED = Boolean(process.env.PI_IMAGE_PREVIEW_DEBUG);
11
+
12
+ export function debugLog(message: string, error?: unknown): void {
13
+ if (!DEBUG_ENABLED) return;
14
+
15
+ const prefix = "[pi-image-preview]";
16
+ if (error) {
17
+ console.error(prefix, message, error);
18
+ } else {
19
+ console.error(prefix, message);
20
+ }
21
+ }
@@ -0,0 +1,332 @@
1
+ import path from "node:path";
2
+ import type { ImageContent, ContentBlock } from "./content.ts";
3
+ import { ImageGallery, type GalleryImage } from "./image-gallery.ts";
4
+ import { PREFER_INLINE_SCREENSHOT_PROMPT } from "./prompt.ts";
5
+ import { upgradeScreenshotToolResult } from "./tool-result-upgrader.ts";
6
+ import { debugLog } from "./debug.ts";
7
+
8
+ // ── Types ──────────────────────────────────────────────────
9
+
10
+ type TrackedImage = {
11
+ filePath: string;
12
+ image: ImageContent;
13
+ label: string;
14
+ };
15
+
16
+ export type ExtensionDeps = {
17
+ readImageContentFromPathAsync: (
18
+ filePath: string,
19
+ ) => Promise<ImageContent | null>;
20
+ maybeResizeImage?: (image: ImageContent) => Promise<ImageContent>;
21
+ loadImageContentFromPath: (
22
+ filePath: string,
23
+ ) => Promise<ImageContent | null>;
24
+ };
25
+
26
+ type PiLike = {
27
+ on(event: string, handler: (...args: any[]) => any): void;
28
+ sendUserMessage(
29
+ content: string | ContentBlock[],
30
+ options?: { deliverAs?: "steer" | "followUp" },
31
+ ): void;
32
+ };
33
+
34
+ type CtxLike = {
35
+ cwd: string;
36
+ isIdle(): boolean;
37
+ ui: {
38
+ setWidget(
39
+ key: string,
40
+ content:
41
+ | string[]
42
+ | ((tui: any, theme: any) => any)
43
+ | undefined,
44
+ options?: { placement?: "aboveEditor" | "belowEditor" },
45
+ ): void;
46
+ getEditorText(): string;
47
+ setEditorText(text: string): void;
48
+ theme: any;
49
+ };
50
+ };
51
+
52
+ /** Event shape for the "input" event from pi. */
53
+ type InputEvent = {
54
+ text: string;
55
+ images?: ImageContent[];
56
+ };
57
+
58
+ /** Discriminated union for input handler return values. */
59
+ type InputResult =
60
+ | { action: "continue" }
61
+ | { action: "handled" }
62
+ | { action: "transform"; text: string; images: ImageContent[] };
63
+
64
+ /** Re-export for tool_result event typing. */
65
+ type ToolResultEvent = import("./tool-result-upgrader.ts").ToolResultEventLike;
66
+
67
+ // ── Constants ──────────────────────────────────────────────
68
+
69
+ const WIDGET_KEY = "image-preview";
70
+ const POLL_INTERVAL_MS = 250;
71
+
72
+ // Matches image file paths:
73
+ // - Absolute: /path/to/image.png
74
+ // - Home-relative: ~/screenshots/image.png
75
+ // - Relative: ./images/image.png, ../images/image.png
76
+ // Supports common path characters including spaces (escaped with \),
77
+ // parens, #, +, and other special characters.
78
+ const IMAGE_PATH_RE =
79
+ /((?:~\/|\.\.?\/|\/)[^\s:*?"<>|][^\s:*?"<>|]*\.(?:png|jpe?g|gif|webp))(?=\s|$)/gi;
80
+
81
+ /** Produce a label from an image path — just the filename. */
82
+ function trimImageLabel(filePath: string): string {
83
+ return path.basename(filePath);
84
+ }
85
+
86
+ // ── Extension ──────────────────────────────────────────────
87
+
88
+ export function registerImagePreviewExtension(
89
+ pi: PiLike,
90
+ deps: ExtensionDeps,
91
+ ): void {
92
+ let tracked: Map<string, TrackedImage> = new Map();
93
+ let gallery: ImageGallery | null = null;
94
+ let pollTimer: ReturnType<typeof setInterval> | null = null;
95
+ let latestCtx: CtxLike | null = null;
96
+
97
+ // ── Helpers ────────────────────────────────────────────
98
+
99
+ function refreshWidget(ctx: CtxLike): void {
100
+ if (tracked.size === 0) {
101
+ if (gallery) {
102
+ gallery.dispose();
103
+ gallery = null;
104
+ }
105
+ ctx.ui.setWidget(WIDGET_KEY, undefined);
106
+ return;
107
+ }
108
+
109
+ const galleryImages: GalleryImage[] = [...tracked.values()].map((t) => ({
110
+ data: t.image.data,
111
+ mimeType: t.image.mimeType,
112
+ label: t.label,
113
+ }));
114
+
115
+ // Dispose the previous gallery to free kitty image resources before replacement
116
+ if (gallery) {
117
+ gallery.dispose();
118
+ gallery = null;
119
+ }
120
+
121
+ ctx.ui.setWidget(
122
+ WIDGET_KEY,
123
+ (_tui: any, theme: any) => {
124
+ const galleryTheme = {
125
+ accent: (s: string) => theme.fg("accent", s),
126
+ muted: (s: string) => theme.fg("muted", s),
127
+ dim: (s: string) => theme.fg("dim", s),
128
+ bold: (s: string) => theme.bold(s),
129
+ };
130
+
131
+ gallery = new ImageGallery(galleryTheme);
132
+ gallery.setImages(galleryImages);
133
+ return gallery;
134
+ },
135
+ { placement: "aboveEditor" },
136
+ );
137
+ }
138
+
139
+ function resetDraft(ctx: CtxLike): void {
140
+ if (gallery) {
141
+ gallery.dispose();
142
+ gallery = null;
143
+ }
144
+ tracked = new Map();
145
+ ctx.ui.setWidget(WIDGET_KEY, undefined);
146
+ }
147
+
148
+ /**
149
+ * Scan editor text for image paths.
150
+ * Track new ones, remove ones that are no longer in the text.
151
+ * Async to avoid blocking the event loop with file I/O.
152
+ */
153
+ async function scanEditorText(ctx: CtxLike): Promise<void> {
154
+ let text: string;
155
+ try {
156
+ text = ctx.ui.getEditorText();
157
+ } catch (err) {
158
+ debugLog("Failed to get editor text", err);
159
+ return;
160
+ }
161
+ if (!text) {
162
+ if (tracked.size > 0) {
163
+ tracked = new Map();
164
+ refreshWidget(ctx);
165
+ }
166
+ return;
167
+ }
168
+
169
+ // Find all image paths currently in the text
170
+ // Create a fresh regex each time to avoid stale lastIndex from the `g` flag
171
+ const imagePathRe = new RegExp(IMAGE_PATH_RE.source, IMAGE_PATH_RE.flags);
172
+ const matches = [...text.matchAll(imagePathRe)];
173
+ const currentPaths = new Set<string>();
174
+
175
+ let changed = false;
176
+
177
+ for (const match of matches) {
178
+ const rawPath = match[1];
179
+ if (!rawPath) continue;
180
+ currentPaths.add(rawPath);
181
+
182
+ // Already tracked?
183
+ if (tracked.has(rawPath)) continue;
184
+
185
+ // New path — try to load it (async to avoid blocking event loop)
186
+ const image = await deps.readImageContentFromPathAsync(rawPath);
187
+ if (!image) continue;
188
+
189
+ tracked.set(rawPath, {
190
+ filePath: rawPath,
191
+ image,
192
+ label: trimImageLabel(rawPath),
193
+ });
194
+ changed = true;
195
+
196
+ // Async resize in background
197
+ if (deps.maybeResizeImage) {
198
+ const entry = tracked.get(rawPath)!;
199
+ void deps.maybeResizeImage(image).then((resized) => {
200
+ // Guard against the entry having been removed while resize was in-flight
201
+ if (tracked.has(rawPath) && tracked.get(rawPath) === entry) {
202
+ entry.image = resized;
203
+ if (latestCtx) refreshWidget(latestCtx);
204
+ }
205
+ }).catch((err) => {
206
+ debugLog(`Failed to resize image ${rawPath}`, err);
207
+ });
208
+ }
209
+ }
210
+
211
+ // Remove tracked images whose paths are no longer in the text
212
+ for (const trackedPath of tracked.keys()) {
213
+ if (!currentPaths.has(trackedPath)) {
214
+ tracked.delete(trackedPath);
215
+ changed = true;
216
+ }
217
+ }
218
+
219
+ if (changed) {
220
+ refreshWidget(ctx);
221
+ }
222
+ }
223
+
224
+ function startPolling(): void {
225
+ stopPolling();
226
+ pollTimer = setInterval(() => {
227
+ if (!latestCtx) return;
228
+ scanEditorText(latestCtx).catch((err) => {
229
+ debugLog("Error during editor text scan", err);
230
+ });
231
+ }, POLL_INTERVAL_MS);
232
+ }
233
+
234
+ function stopPolling(): void {
235
+ if (pollTimer) {
236
+ clearInterval(pollTimer);
237
+ pollTimer = null;
238
+ }
239
+ }
240
+
241
+ // ── Event handlers ─────────────────────────────────────
242
+
243
+ pi.on("before_agent_start", () => {
244
+ return { systemPrompt: PREFER_INLINE_SCREENSHOT_PROMPT };
245
+ });
246
+
247
+ // Clean up resources when the process exits
248
+ const cleanup = (): void => {
249
+ stopPolling();
250
+ if (gallery) {
251
+ gallery.dispose();
252
+ gallery = null;
253
+ }
254
+ };
255
+ process.on("exit", cleanup);
256
+ process.on("SIGINT", cleanup);
257
+ process.on("SIGTERM", cleanup);
258
+
259
+ pi.on("session_start", async (_event: unknown, ctx: CtxLike) => {
260
+ latestCtx = ctx;
261
+ resetDraft(ctx);
262
+ startPolling();
263
+ });
264
+
265
+ pi.on("session_switch", async (_event: unknown, ctx: CtxLike) => {
266
+ latestCtx = ctx;
267
+ resetDraft(ctx);
268
+ startPolling();
269
+ });
270
+
271
+ pi.on("tool_result", async (event: ToolResultEvent, ctx: CtxLike) => {
272
+ latestCtx = ctx;
273
+ return upgradeScreenshotToolResult(
274
+ event,
275
+ ctx.cwd,
276
+ deps.loadImageContentFromPath,
277
+ );
278
+ });
279
+
280
+ // On submit: strip image paths from text, attach actual images
281
+ pi.on("input", async (event: InputEvent, ctx: CtxLike): Promise<InputResult> => {
282
+ latestCtx = ctx;
283
+
284
+ if (tracked.size === 0) {
285
+ return { action: "continue" };
286
+ }
287
+
288
+ const fullText = (event.text || "").trim();
289
+
290
+ // Don't transform commands or shell escapes
291
+ if (fullText.startsWith("/") || fullText.trimStart().startsWith("!")) {
292
+ return { action: "continue" };
293
+ }
294
+
295
+ // Find which tracked paths are still in the submitted text
296
+ const usedImages: ImageContent[] = [];
297
+ let strippedText = fullText;
298
+
299
+ for (const [trackedPath, entry] of tracked) {
300
+ if (fullText.includes(trackedPath)) {
301
+ usedImages.push(entry.image);
302
+ // Strip the path from the text
303
+ strippedText = strippedText.split(trackedPath).join("");
304
+ }
305
+ }
306
+
307
+ if (usedImages.length === 0) {
308
+ return { action: "continue" };
309
+ }
310
+
311
+ // Clean up whitespace after stripping paths
312
+ strippedText = strippedText.replace(/\s+/g, " ").trim();
313
+
314
+ // Clear state
315
+ resetDraft(ctx);
316
+
317
+ if (!strippedText) {
318
+ // Images only, no text — send directly
319
+ pi.sendUserMessage(
320
+ usedImages,
321
+ ctx.isIdle() ? undefined : { deliverAs: "steer" },
322
+ );
323
+ return { action: "handled" };
324
+ }
325
+
326
+ return {
327
+ action: "transform",
328
+ text: strippedText,
329
+ images: [...(event.images ?? []), ...usedImages],
330
+ };
331
+ });
332
+ }
@@ -0,0 +1,107 @@
1
+ import fs from "node:fs";
2
+ import fsp from "node:fs/promises";
3
+ import type { ImageContent } from "./content.ts";
4
+ import {
5
+ inferMimeType,
6
+ looksLikeImagePath,
7
+ looksLikeImagePathAsync,
8
+ } from "./path-utils.ts";
9
+ import { debugLog } from "./debug.ts";
10
+
11
+ export type ImageResizer = (image: ImageContent) => Promise<ImageContent>;
12
+
13
+ /** Maximum image file size in bytes (50 MB). Files larger than this are skipped to prevent OOM. */
14
+ const MAX_IMAGE_FILE_SIZE = 50 * 1024 * 1024;
15
+
16
+ /**
17
+ * Synchronous image read — used only at startup / non-poll paths.
18
+ * Prefer readImageContentFromPathAsync in the poll loop.
19
+ */
20
+ export function readImageContentFromPath(
21
+ filePath: string,
22
+ ): ImageContent | null {
23
+ if (!looksLikeImagePath(filePath)) return null;
24
+
25
+ try {
26
+ const stat = fs.statSync(filePath);
27
+ if (stat.size > MAX_IMAGE_FILE_SIZE) {
28
+ debugLog(
29
+ `Skipping image ${filePath}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds ${MAX_IMAGE_FILE_SIZE / 1024 / 1024}MB limit`,
30
+ );
31
+ return null;
32
+ }
33
+ } catch (err) {
34
+ debugLog(`Failed to stat image file ${filePath}`, err);
35
+ return null;
36
+ }
37
+
38
+ const mimeType = inferMimeType(filePath)!;
39
+ try {
40
+ const bytes = fs.readFileSync(filePath);
41
+ return {
42
+ type: "image",
43
+ data: bytes.toString("base64"),
44
+ mimeType,
45
+ };
46
+ } catch (err) {
47
+ debugLog(`Failed to read image file ${filePath}`, err);
48
+ return null;
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Async image read — non-blocking, preferred in the 250ms poll loop.
54
+ */
55
+ export async function readImageContentFromPathAsync(
56
+ filePath: string,
57
+ ): Promise<ImageContent | null> {
58
+ if (!(await looksLikeImagePathAsync(filePath))) return null;
59
+
60
+ try {
61
+ const stat = await fsp.stat(filePath);
62
+ if (stat.size > MAX_IMAGE_FILE_SIZE) {
63
+ debugLog(
64
+ `Skipping image ${filePath}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds ${MAX_IMAGE_FILE_SIZE / 1024 / 1024}MB limit`,
65
+ );
66
+ return null;
67
+ }
68
+ } catch (err) {
69
+ debugLog(`Failed to stat image file ${filePath}`, err);
70
+ return null;
71
+ }
72
+
73
+ const mimeType = inferMimeType(filePath)!;
74
+ try {
75
+ const bytes = await fsp.readFile(filePath);
76
+ return {
77
+ type: "image",
78
+ data: bytes.toString("base64"),
79
+ mimeType,
80
+ };
81
+ } catch (err) {
82
+ debugLog(`Failed to read image file ${filePath}`, err);
83
+ return null;
84
+ }
85
+ }
86
+
87
+ export async function maybeResizeImage(
88
+ image: ImageContent,
89
+ resizeImage?: ImageResizer | null,
90
+ ): Promise<ImageContent> {
91
+ if (!resizeImage) return image;
92
+ try {
93
+ return await resizeImage(image);
94
+ } catch (err) {
95
+ debugLog("Image resize failed, using original", err);
96
+ return image;
97
+ }
98
+ }
99
+
100
+ export async function loadImageContentFromPath(
101
+ filePath: string,
102
+ resizeImage?: ImageResizer | null,
103
+ ): Promise<ImageContent | null> {
104
+ const image = readImageContentFromPath(filePath);
105
+ if (!image) return null;
106
+ return maybeResizeImage(image, resizeImage);
107
+ }
@@ -0,0 +1,345 @@
1
+ import {
2
+ type Component,
3
+ getCapabilities,
4
+ getImageDimensions,
5
+ calculateImageRows,
6
+ getCellDimensions,
7
+ } from "@mariozechner/pi-tui";
8
+
9
+ export interface GalleryTheme {
10
+ accent: (s: string) => string;
11
+ muted: (s: string) => string;
12
+ dim: (s: string) => string;
13
+ bold: (s: string) => string;
14
+ }
15
+
16
+ export interface GalleryImage {
17
+ data: string; // base64
18
+ mimeType: string;
19
+ label: string;
20
+ }
21
+
22
+ const THUMB_MAX_WIDTH = 25;
23
+ const GAP = 2; // columns between images
24
+
25
+ // Monotonic counter for kitty image IDs — avoids birthday-paradox collisions
26
+ // that occurred with the previous Math.random() * 254 approach.
27
+ // Uses the full 24-bit range (1–16,777,215) supported by kitty's true-color encoding.
28
+ let nextImageId = 1;
29
+ function allocateImageId(): number {
30
+ const id = nextImageId;
31
+ nextImageId = (nextImageId % 0xffffff) + 1; // wrap at 16M, skip 0
32
+ return id;
33
+ }
34
+
35
+ // ── Kitty Unicode Placeholder Protocol ─────────────────────
36
+ // Instead of rendering pixels directly (which ghost across tmux panes),
37
+ // we transmit the image data and then output U+10EEEE placeholder
38
+ // characters with diacritics encoding row/col. Since these are just
39
+ // text characters, tmux manages them per-pane — images appear/disappear
40
+ // naturally when switching panes.
41
+ //
42
+ // Protocol:
43
+ // 1. Transmit image: ESC_G a=T,U=1,f=100,i=<id>,c=<cols>,r=<rows>,q=2; base64 ESC\
44
+ // 2. Print placeholder chars with foreground color set to image_id:
45
+ // ESC[38;5;<id>m <U+10EEEE><row_diac><col_diac> ... ESC[39m
46
+ //
47
+ // Diacritics: row 0 = U+0305, row 1 = U+030D, row 2 = U+030E, etc.
48
+ // See kitty docs rowcolumn-diacritics.txt
49
+
50
+ // Row/column diacritics from kitty's rowcolumn-diacritics.txt
51
+ // These are the combining characters used to encode row and column numbers
52
+ const ROW_COL_DIACRITICS = [
53
+ 0x0305, 0x030d, 0x030e, 0x0310, 0x0312, 0x033d, 0x033e, 0x033f,
54
+ 0x0346, 0x034a, 0x034b, 0x034c, 0x0350, 0x0351, 0x0352, 0x0353,
55
+ 0x0357, 0x035b, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368,
56
+ 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 0x0483,
57
+ 0x0484, 0x0485, 0x0486, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
58
+ 0x0598, 0x0599, 0x059c, 0x059d, 0x059e, 0x059f, 0x05a0, 0x05a1,
59
+ 0x05a8, 0x05a9, 0x05ab, 0x05ac, 0x05af, 0x05c4, 0x0610, 0x0611,
60
+ 0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617, 0x0618, 0x0619,
61
+ 0x061a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
62
+ 0x0652, 0x0653, 0x0654, 0x0655, 0x0656, 0x0657, 0x0658, 0x0659,
63
+ 0x065a, 0x065b, 0x065c, 0x065d, 0x065e, 0x065f, 0x0670, 0x06d6,
64
+ 0x06d7, 0x06d8, 0x06d9, 0x06da, 0x06db, 0x06dc, 0x06df, 0x06e0,
65
+ 0x06e1, 0x06e2, 0x06e3, 0x06e4, 0x06e7, 0x06e8, 0x06ea, 0x06eb,
66
+ 0x06ec, 0x06ed,
67
+ ];
68
+
69
+ const PLACEHOLDER_CHAR = String.fromCodePoint(0x10EEEE);
70
+
71
+ function diacriticFor(n: number): string {
72
+ if (n < ROW_COL_DIACRITICS.length) {
73
+ return String.fromCodePoint(ROW_COL_DIACRITICS[n]);
74
+ }
75
+ // Fallback for very large values (shouldn't happen for thumbnails)
76
+ return String.fromCodePoint(ROW_COL_DIACRITICS[0]);
77
+ }
78
+
79
+ function isInTmux(): boolean {
80
+ return Boolean(process.env.TMUX);
81
+ }
82
+
83
+ /**
84
+ * Wrap kitty APC sequences in DCS passthrough for tmux.
85
+ */
86
+ function wrapForTmux(sequence: string): string {
87
+ if (!isInTmux()) return sequence;
88
+ return sequence.replace(
89
+ /\x1b_G([^\x1b]*)\x1b\\/g,
90
+ (_match, content) =>
91
+ `\x1bPtmux;\x1b\x1b_G${content}\x1b\x1b\\\x1b\\`,
92
+ );
93
+ }
94
+
95
+ /**
96
+ * Transmit image and create virtual placement using Unicode placeholder mode.
97
+ * The image data is sent to kitty but NOT displayed directly.
98
+ * Display happens via U+10EEEE placeholder characters.
99
+ */
100
+ function transmitImageWithPlaceholder(
101
+ base64Data: string,
102
+ imageId: number,
103
+ columns: number,
104
+ rows: number,
105
+ ): void {
106
+ // Transmit image + create virtual placement in one command
107
+ // a=T: transmit and display, U=1: use unicode placeholders
108
+ // q=2: suppress all responses (important in tmux)
109
+ const CHUNK_SIZE = 4096;
110
+
111
+ if (base64Data.length <= CHUNK_SIZE) {
112
+ const seq = `\x1b_Ga=T,U=1,f=100,i=${imageId},c=${columns},r=${rows},q=2;${base64Data}\x1b\\`;
113
+ process.stdout.write(wrapForTmux(seq));
114
+ } else {
115
+ // Chunked transfer
116
+ let offset = 0;
117
+ let isFirst = true;
118
+ while (offset < base64Data.length) {
119
+ const chunk = base64Data.slice(offset, offset + CHUNK_SIZE);
120
+ const isLast = offset + CHUNK_SIZE >= base64Data.length;
121
+ let seq: string;
122
+
123
+ if (isFirst) {
124
+ seq = `\x1b_Ga=T,U=1,f=100,i=${imageId},c=${columns},r=${rows},q=2,m=1;${chunk}\x1b\\`;
125
+ isFirst = false;
126
+ } else if (isLast) {
127
+ seq = `\x1b_Gm=0;${chunk}\x1b\\`;
128
+ } else {
129
+ seq = `\x1b_Gm=1;${chunk}\x1b\\`;
130
+ }
131
+
132
+ process.stdout.write(wrapForTmux(seq));
133
+ offset += CHUNK_SIZE;
134
+ }
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Delete a kitty image by ID.
140
+ */
141
+ function deleteImage(imageId: number): void {
142
+ const seq = `\x1b_Ga=d,d=I,i=${imageId},q=2\x1b\\`;
143
+ process.stdout.write(wrapForTmux(seq));
144
+ }
145
+
146
+ /**
147
+ * Build a row of Unicode placeholder characters for the given image.
148
+ * Uses foreground color to encode image_id, diacritics to encode row/col.
149
+ */
150
+ function buildPlaceholderRow(
151
+ imageId: number,
152
+ row: number,
153
+ columns: number,
154
+ ): string {
155
+ // Set foreground color to image_id (using 24-bit true color for large IDs)
156
+ const r = (imageId >> 16) & 0xff;
157
+ const g = (imageId >> 8) & 0xff;
158
+ const b = imageId & 0xff;
159
+ const fgStart = imageId < 256
160
+ ? `\x1b[38;5;${imageId}m`
161
+ : `\x1b[38;2;${r};${g};${b}m`;
162
+ const fgEnd = `\x1b[39m`;
163
+
164
+ let line = fgStart;
165
+
166
+ // First cell: full diacritics (row + col)
167
+ line += PLACEHOLDER_CHAR + diacriticFor(row) + diacriticFor(0);
168
+
169
+ // Subsequent cells: no diacritics needed — kitty auto-increments
170
+ // column index from the left neighbor's col diacritic
171
+ for (let col = 1; col < columns; col++) {
172
+ line += PLACEHOLDER_CHAR;
173
+ }
174
+
175
+ line += fgEnd;
176
+ return line;
177
+ }
178
+
179
+ // ── Gallery Component ──────────────────────────────────────
180
+
181
+ /**
182
+ * Renders image thumbnails above the editor using kitty's Unicode
183
+ * placeholder protocol. Images are part of the text buffer, so
184
+ * tmux manages them per-pane — no ghosting across panes.
185
+ */
186
+ export class ImageGallery implements Component {
187
+ private images: GalleryImage[] = [];
188
+ private theme: GalleryTheme;
189
+ private cachedLines?: string[];
190
+ private cachedWidth?: number;
191
+ private activeImageIds: number[] = [];
192
+
193
+ constructor(theme: GalleryTheme) {
194
+ this.theme = theme;
195
+ }
196
+
197
+ setImages(images: GalleryImage[]): void {
198
+ this.images = images;
199
+ this.invalidate();
200
+ }
201
+
202
+ invalidate(): void {
203
+ this.cachedLines = undefined;
204
+ this.cachedWidth = undefined;
205
+ }
206
+
207
+ dispose(): void {
208
+ for (const id of this.activeImageIds) {
209
+ deleteImage(id);
210
+ }
211
+ this.activeImageIds = [];
212
+ }
213
+
214
+ render(width: number): string[] {
215
+ if (this.cachedLines && this.cachedWidth === width) {
216
+ return this.cachedLines;
217
+ }
218
+
219
+ // Delete previous images before re-rendering
220
+ for (const id of this.activeImageIds) {
221
+ deleteImage(id);
222
+ }
223
+ this.activeImageIds = [];
224
+
225
+ if (this.images.length === 0) {
226
+ this.cachedLines = [];
227
+ this.cachedWidth = width;
228
+ return this.cachedLines;
229
+ }
230
+
231
+ const lines: string[] = [];
232
+ const caps = getCapabilities();
233
+
234
+ // Header
235
+ const count = this.images.length;
236
+ const headerText =
237
+ count === 1
238
+ ? " 📎 1 image attached"
239
+ : ` 📎 ${count} images attached`;
240
+ lines.push(this.theme.accent(headerText));
241
+
242
+ if (caps.images === "kitty") {
243
+ this.renderKittyHorizontal(lines, width);
244
+ } else {
245
+ this.renderTextFallback(lines);
246
+ }
247
+
248
+ this.cachedLines = lines;
249
+ this.cachedWidth = width;
250
+ return this.cachedLines;
251
+ }
252
+
253
+ private renderKittyHorizontal(lines: string[], width: number): void {
254
+ // Calculate per-image thumb width so they all fit side by side
255
+ const available = width - 2; // padding
256
+ const totalGaps = Math.max(0, this.images.length - 1) * GAP;
257
+ const thumbWidth = Math.min(
258
+ THUMB_MAX_WIDTH,
259
+ Math.floor((available - totalGaps) / this.images.length),
260
+ );
261
+
262
+ if (thumbWidth < 4) {
263
+ // Too narrow for horizontal, fall back to text
264
+ this.renderTextFallback(lines);
265
+ return;
266
+ }
267
+
268
+ // Prepare each image: transmit data, calculate rows
269
+ const imageInfos: { imageId: number; rows: number; cols: number }[] = [];
270
+
271
+ for (const img of this.images) {
272
+ // getImageDimensions returns null for corrupt or unrecognised image data;
273
+ // fall back to a common aspect ratio so the thumbnail still renders.
274
+ const dims = getImageDimensions(img.data, img.mimeType) || {
275
+ widthPx: 800,
276
+ heightPx: 600,
277
+ };
278
+
279
+ const rows = calculateImageRows(dims, thumbWidth, getCellDimensions());
280
+ const imageId = allocateImageId();
281
+ this.activeImageIds.push(imageId);
282
+
283
+ transmitImageWithPlaceholder(img.data, imageId, thumbWidth, rows);
284
+ imageInfos.push({ imageId, rows, cols: thumbWidth });
285
+ }
286
+
287
+ const maxRows = Math.max(...imageInfos.map((i) => i.rows));
288
+
289
+ // Build horizontal rows: each line has placeholder chars for all images side by side
290
+ for (let row = 0; row < maxRows; row++) {
291
+ let line = " ";
292
+ for (let i = 0; i < this.images.length; i++) {
293
+ const info = imageInfos[i];
294
+
295
+ if (row < info.rows) {
296
+ // Output placeholder chars for this image at this row
297
+ line += buildPlaceholderRow(info.imageId, row, info.cols);
298
+ } else {
299
+ // Image is shorter, pad with spaces
300
+ line += " ".repeat(info.cols);
301
+ }
302
+
303
+ if (i < this.images.length - 1) {
304
+ line += " ".repeat(GAP);
305
+ }
306
+ }
307
+ lines.push(line);
308
+ }
309
+
310
+ // Label row beneath images — middle-truncate and center
311
+ let labelLine = " ";
312
+ for (let i = 0; i < this.images.length; i++) {
313
+ const cols = imageInfos[i].cols;
314
+ let label = this.images[i].label;
315
+
316
+ // Middle-truncate: "pi-clipboard-044c...21ad4.png"
317
+ if (label.length > cols) {
318
+ const keep = cols - 1; // 1 char for …
319
+ const head = Math.ceil(keep / 2);
320
+ const tail = keep - head;
321
+ label = label.slice(0, head) + "…" + label.slice(-tail);
322
+ }
323
+
324
+ // Center the label within the column width
325
+ const totalPad = Math.max(0, cols - label.length);
326
+ const leftPad = Math.floor(totalPad / 2);
327
+ const rightPad = totalPad - leftPad;
328
+ const padded = " ".repeat(leftPad) + label + " ".repeat(rightPad);
329
+ labelLine += this.theme.dim(padded);
330
+
331
+ if (i < this.images.length - 1) {
332
+ labelLine += " ".repeat(GAP);
333
+ }
334
+ }
335
+ lines.push(labelLine);
336
+ }
337
+
338
+ private renderTextFallback(lines: string[]): void {
339
+ for (const img of this.images) {
340
+ lines.push(
341
+ this.theme.muted(` ${img.label}`),
342
+ );
343
+ }
344
+ }
345
+ }
@@ -0,0 +1,92 @@
1
+ import fs from "node:fs";
2
+ import fsp from "node:fs/promises";
3
+ import path from "node:path";
4
+
5
+ const IMAGE_MIME_BY_EXT: Record<string, string> = {
6
+ png: "image/png",
7
+ jpg: "image/jpeg",
8
+ jpeg: "image/jpeg",
9
+ gif: "image/gif",
10
+ webp: "image/webp",
11
+ };
12
+
13
+ export function inferMimeType(filePath: string): string | null {
14
+ const ext = path.extname(filePath).replace(/^\./, "").toLowerCase();
15
+ return IMAGE_MIME_BY_EXT[ext] ?? null;
16
+ }
17
+
18
+ export function looksLikeImagePath(filePath: string): boolean {
19
+ const mimeType = inferMimeType(filePath);
20
+ if (!mimeType) return false;
21
+ try {
22
+ return fs.statSync(filePath).isFile();
23
+ } catch {
24
+ return false;
25
+ }
26
+ }
27
+
28
+ /** Async version of looksLikeImagePath — preferred in the poll path to avoid blocking the event loop. */
29
+ export async function looksLikeImagePathAsync(
30
+ filePath: string,
31
+ ): Promise<boolean> {
32
+ const mimeType = inferMimeType(filePath);
33
+ if (!mimeType) return false;
34
+ try {
35
+ const stat = await fsp.stat(filePath);
36
+ return stat.isFile();
37
+ } catch {
38
+ return false;
39
+ }
40
+ }
41
+
42
+ export function resolveMaybeRelativePath(
43
+ filePath: string,
44
+ cwd: string,
45
+ ): string {
46
+ return path.isAbsolute(filePath) ? filePath : path.resolve(cwd, filePath);
47
+ }
48
+
49
+ export function isScreenshotToolName(toolName: string): boolean {
50
+ return (
51
+ toolName === "take_screenshot" ||
52
+ toolName === "chrome_devtools_take_screenshot" ||
53
+ toolName.endsWith("_take_screenshot")
54
+ );
55
+ }
56
+
57
+ export function isScreenshotToolResult(event: {
58
+ toolName: string;
59
+ details?: unknown;
60
+ }): boolean {
61
+ if (isScreenshotToolName(event.toolName)) return true;
62
+ if (!event.details || typeof event.details !== "object") return false;
63
+ const maybeTool = (event.details as { tool?: unknown }).tool;
64
+ return typeof maybeTool === "string" && isScreenshotToolName(maybeTool);
65
+ }
66
+
67
+ const SCREENSHOT_SAVE_LINE_RE = /^Saved screenshot to\s+(.+)$/gim;
68
+
69
+ export function extractSavedScreenshotPaths(text: string): string[] {
70
+ const paths: string[] = [];
71
+ for (const match of text.matchAll(SCREENSHOT_SAVE_LINE_RE)) {
72
+ const rawPath = match[1]?.trim();
73
+ if (!rawPath) continue;
74
+ paths.push(rawPath.replace(/\.$/, ""));
75
+ }
76
+ return paths;
77
+ }
78
+
79
+ export function collectTextContent(
80
+ content: Array<{ type: string; text?: string }>,
81
+ ): string {
82
+ return content
83
+ .filter((item) => item.type === "text" && item.text)
84
+ .map((item) => item.text!)
85
+ .join("\n");
86
+ }
87
+
88
+ export function hasInlineImageContent(
89
+ content: Array<{ type: string }>,
90
+ ): boolean {
91
+ return content.some((item) => item.type === "image");
92
+ }
package/src/prompt.ts ADDED
@@ -0,0 +1,2 @@
1
+ export const PREFER_INLINE_SCREENSHOT_PROMPT =
2
+ "When you need to inspect a screenshot yourself, prefer screenshot tool calls that return the image inline. Avoid passing filePath to screenshot tools unless the user explicitly asked you to save a file or you do not need to inspect the image content yourself.";
@@ -0,0 +1,52 @@
1
+ import type { ContentBlock, ImageContent, TextContent } from "./content.ts";
2
+ import {
3
+ collectTextContent,
4
+ extractSavedScreenshotPaths,
5
+ hasInlineImageContent,
6
+ isScreenshotToolResult,
7
+ resolveMaybeRelativePath,
8
+ } from "./path-utils.ts";
9
+
10
+ export type ToolResultEventLike = {
11
+ toolName: string;
12
+ content: ContentBlock[];
13
+ details?: unknown;
14
+ isError: boolean;
15
+ };
16
+
17
+ export async function upgradeScreenshotToolResult(
18
+ event: ToolResultEventLike,
19
+ cwd: string,
20
+ loadImageFromPath: (filePath: string) => Promise<ImageContent | null>,
21
+ ): Promise<{ content: ContentBlock[] } | undefined> {
22
+ if (
23
+ event.isError ||
24
+ !isScreenshotToolResult(event) ||
25
+ hasInlineImageContent(event.content)
26
+ ) {
27
+ return undefined;
28
+ }
29
+
30
+ const text = collectTextContent(event.content);
31
+ const savedPaths = extractSavedScreenshotPaths(text);
32
+ if (savedPaths.length === 0) return undefined;
33
+
34
+ const images: ImageContent[] = [];
35
+ for (const rawPath of savedPaths) {
36
+ const resolvedPath = resolveMaybeRelativePath(rawPath, cwd);
37
+ const image = await loadImageFromPath(resolvedPath);
38
+ if (image) {
39
+ images.push(image);
40
+ }
41
+ }
42
+
43
+ if (images.length > 0) {
44
+ return { content: [...event.content, ...images] };
45
+ }
46
+
47
+ const hint: TextContent = {
48
+ type: "text",
49
+ text: "[image-preview: screenshot was saved via filePath but the image file was not readable. If you need to inspect the screenshot agentically, retry the screenshot tool without filePath so the image is returned inline.]",
50
+ };
51
+ return { content: [...event.content, hint] };
52
+ }