pi-image-preview 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +110 -0
- package/index.ts +59 -0
- package/package.json +35 -0
- package/src/content.ts +12 -0
- package/src/debug.ts +21 -0
- package/src/extension-runtime.ts +332 -0
- package/src/image-content.ts +107 -0
- package/src/image-gallery.ts +345 -0
- package/src/path-utils.ts +92 -0
- package/src/prompt.ts +2 -0
- package/src/tool-result-upgrader.ts +52 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 rielj
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# pi-image-preview
|
|
2
|
+
|
|
3
|
+
Image preview extension for [pi coding agent](https://github.com/mariozechner/pi-coding-agent) — renders inline image thumbnails above the editor using the kitty graphics protocol with full tmux support.
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Inline image preview** — paste an image (`Ctrl+V`) and a thumbnail renders above the editor
|
|
10
|
+
- **Horizontal layout** — multiple images display side by side
|
|
11
|
+
- **tmux support** — uses kitty's Unicode placeholder protocol (`U=1`) so images are pane-aware (no ghosting across panes)
|
|
12
|
+
- **Auto-cleanup** — delete the image path from editor text and the preview disappears
|
|
13
|
+
- **No editor conflicts** — works alongside vim mode and other editor extensions (does not use `setEditorComponent`)
|
|
14
|
+
- **Image resizing** — leverages pi's built-in WASM image resizer for efficient thumbnails
|
|
15
|
+
- **Screenshot integration** — automatically inlines images from screenshot tool results
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pi install npm:pi-image-preview
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## How it works
|
|
24
|
+
|
|
25
|
+
1. **Paste** an image with `Ctrl+V`
|
|
26
|
+
2. Pi saves the clipboard to a temp file and inserts the path into the editor
|
|
27
|
+
3. The extension **detects the image path**, reads the file, and renders a thumbnail above the editor
|
|
28
|
+
4. The raw file path stays in the editor — the label below the thumbnail shows a truncated version
|
|
29
|
+
5. On **submit**, image paths are stripped from the text and the images are attached to your message
|
|
30
|
+
|
|
31
|
+
## Prerequisites
|
|
32
|
+
|
|
33
|
+
### Terminal: [Kitty](https://sw.kovidgoyal.net/kitty/) (required)
|
|
34
|
+
|
|
35
|
+
This extension uses the **kitty graphics protocol** to render images. It will **not** render images in other terminals (iTerm2, Alacritty, WezTerm, etc.) — it falls back to text labels instead.
|
|
36
|
+
|
|
37
|
+
- **Minimum version**: Kitty 0.28+ (Unicode placeholder support)
|
|
38
|
+
- **Recommended**: Kitty 0.35+ for best compatibility
|
|
39
|
+
|
|
40
|
+
No special kitty config is required — the extension works with default kitty settings.
|
|
41
|
+
|
|
42
|
+
### tmux (optional but supported)
|
|
43
|
+
|
|
44
|
+
If you run pi inside tmux, you need **one config change** in your `~/.tmux.conf`:
|
|
45
|
+
|
|
46
|
+
```tmux
|
|
47
|
+
set -g allow-passthrough all
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Then reload: `tmux source-file ~/.tmux.conf`
|
|
51
|
+
|
|
52
|
+
This allows kitty graphics escape sequences to pass through tmux to the terminal. Without it, images will not render.
|
|
53
|
+
|
|
54
|
+
**tmux version**: 3.3a+ required (added `allow-passthrough` support).
|
|
55
|
+
|
|
56
|
+
### pi coding agent
|
|
57
|
+
|
|
58
|
+
- **Version**: Latest recommended — the extension uses `setWidget`, `getEditorText`, and the `input` event transform API
|
|
59
|
+
- **No additional pi configuration needed** — just install the extension
|
|
60
|
+
|
|
61
|
+
## Supported image formats
|
|
62
|
+
|
|
63
|
+
- PNG
|
|
64
|
+
- JPEG / JPG
|
|
65
|
+
- GIF (first frame)
|
|
66
|
+
- WebP
|
|
67
|
+
|
|
68
|
+
Maximum file size: **50 MB** (larger files are silently skipped).
|
|
69
|
+
|
|
70
|
+
## Limitations
|
|
71
|
+
|
|
72
|
+
- **Kitty terminal only** — other terminals get text-only labels (no image rendering)
|
|
73
|
+
- **macOS / Linux only** — kitty does not run on Windows natively
|
|
74
|
+
- **tmux requires `allow-passthrough all`** — without it, images won't render inside tmux (the extension still works, but shows text fallback)
|
|
75
|
+
- **No image selection/navigation** — this is a simple preview, not a gallery browser
|
|
76
|
+
- **Thumbnail size is fixed** — images are scaled to fit within 25 columns; not configurable yet
|
|
77
|
+
- **Images are not preserved in chat history** — after submitting, the preview clears; the image is sent as an attachment to the model
|
|
78
|
+
- **GIF animation** — only the first frame is displayed
|
|
79
|
+
- **SSH sessions** — kitty graphics protocol does not work over SSH unless using `kitten ssh` (kitty's SSH kitten)
|
|
80
|
+
- **Multiple tmux panes showing pi** — each pane renders independently; switching panes clears/restores images correctly via Unicode placeholders, but rapid switching may briefly show artifacts
|
|
81
|
+
|
|
82
|
+
## How tmux support works
|
|
83
|
+
|
|
84
|
+
Standard kitty graphics render pixels at absolute terminal positions. This causes images to "ghost" across tmux panes — an image rendered in pane 1 is still visible when you switch to pane 2.
|
|
85
|
+
|
|
86
|
+
This extension uses kitty's **Unicode placeholder protocol** instead:
|
|
87
|
+
|
|
88
|
+
1. Image data is transmitted to kitty with `U=1` flag (stored but not directly rendered)
|
|
89
|
+
2. Special `U+10EEEE` characters with combining diacritics are output where the image should appear
|
|
90
|
+
3. These are **regular text characters** that tmux manages per-pane
|
|
91
|
+
4. Switching panes swaps the text buffer → placeholders disappear → image disappears
|
|
92
|
+
5. Switching back → placeholders redrawn → image reappears
|
|
93
|
+
|
|
94
|
+
## Development
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Clone
|
|
98
|
+
git clone https://github.com/rielj/pi-image-preview.git
|
|
99
|
+
cd pi-image-preview
|
|
100
|
+
|
|
101
|
+
# Symlink into pi extensions
|
|
102
|
+
ln -s "$(pwd)" ~/.pi/agent/extensions/image-preview
|
|
103
|
+
|
|
104
|
+
# Reload pi
|
|
105
|
+
# Inside pi, run: /reload
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
MIT
|
package/index.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
import {
|
|
5
|
+
loadImageContentFromPath,
|
|
6
|
+
maybeResizeImage,
|
|
7
|
+
readImageContentFromPathAsync,
|
|
8
|
+
type ImageResizer,
|
|
9
|
+
} from "./src/image-content.ts";
|
|
10
|
+
import { registerImagePreviewExtension } from "./src/extension-runtime.ts";
|
|
11
|
+
import { debugLog } from "./src/debug.ts";
|
|
12
|
+
|
|
13
|
+
let cachedResizerPromise: Promise<ImageResizer | null> | undefined;
|
|
14
|
+
|
|
15
|
+
async function loadPiImageResizer(): Promise<ImageResizer | null> {
|
|
16
|
+
if (cachedResizerPromise) return cachedResizerPromise;
|
|
17
|
+
|
|
18
|
+
cachedResizerPromise = (async () => {
|
|
19
|
+
try {
|
|
20
|
+
const require = createRequire(import.meta.url);
|
|
21
|
+
const piEntry = require.resolve("@mariozechner/pi-coding-agent");
|
|
22
|
+
const distDir = path.dirname(piEntry);
|
|
23
|
+
const moduleUrl = pathToFileURL(
|
|
24
|
+
path.join(distDir, "utils", "image-resize.js"),
|
|
25
|
+
).href;
|
|
26
|
+
const mod = (await import(moduleUrl)) as {
|
|
27
|
+
resizeImage?: (image: {
|
|
28
|
+
type: "image";
|
|
29
|
+
data: string;
|
|
30
|
+
mimeType: string;
|
|
31
|
+
}) => Promise<{ data: string; mimeType: string }>;
|
|
32
|
+
};
|
|
33
|
+
if (!mod.resizeImage) return null;
|
|
34
|
+
return async (image) => {
|
|
35
|
+
const resized = await mod.resizeImage!(image);
|
|
36
|
+
return {
|
|
37
|
+
type: "image",
|
|
38
|
+
data: resized.data,
|
|
39
|
+
mimeType: resized.mimeType,
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
} catch (err) {
|
|
43
|
+
debugLog("Failed to load pi image resizer", err);
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
})();
|
|
47
|
+
|
|
48
|
+
return cachedResizerPromise;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export default function (pi: any): void {
|
|
52
|
+
registerImagePreviewExtension(pi, {
|
|
53
|
+
readImageContentFromPathAsync,
|
|
54
|
+
maybeResizeImage: async (image) =>
|
|
55
|
+
maybeResizeImage(image, await loadPiImageResizer()),
|
|
56
|
+
loadImageContentFromPath: async (filePath) =>
|
|
57
|
+
loadImageContentFromPath(filePath, await loadPiImageResizer()),
|
|
58
|
+
});
|
|
59
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-image-preview",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Image preview extension for pi coding agent — renders inline image thumbnails above the editor using kitty graphics protocol with tmux support",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"pi-package",
|
|
7
|
+
"kitty",
|
|
8
|
+
"image",
|
|
9
|
+
"preview",
|
|
10
|
+
"tmux",
|
|
11
|
+
"terminal",
|
|
12
|
+
"graphics"
|
|
13
|
+
],
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"author": "rielj",
|
|
16
|
+
"type": "module",
|
|
17
|
+
"files": [
|
|
18
|
+
"index.ts",
|
|
19
|
+
"src/",
|
|
20
|
+
"README.md",
|
|
21
|
+
"LICENSE"
|
|
22
|
+
],
|
|
23
|
+
"pi": {
|
|
24
|
+
"extensions": [
|
|
25
|
+
"./index.ts"
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": "git+https://github.com/rielj/pi-image-preview.git"
|
|
31
|
+
},
|
|
32
|
+
"peerDependencies": {
|
|
33
|
+
"@mariozechner/pi-coding-agent": "*"
|
|
34
|
+
}
|
|
35
|
+
}
|
package/src/content.ts
ADDED
package/src/debug.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Debug logging utility for pi-image-preview.
|
|
3
|
+
*
|
|
4
|
+
* Set the PI_IMAGE_PREVIEW_DEBUG environment variable to enable debug output.
|
|
5
|
+
* Logs are written to stderr so they don't interfere with terminal rendering.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* PI_IMAGE_PREVIEW_DEBUG=1 pi
|
|
9
|
+
*/
|
|
10
|
+
const DEBUG_ENABLED = Boolean(process.env.PI_IMAGE_PREVIEW_DEBUG);
|
|
11
|
+
|
|
12
|
+
export function debugLog(message: string, error?: unknown): void {
|
|
13
|
+
if (!DEBUG_ENABLED) return;
|
|
14
|
+
|
|
15
|
+
const prefix = "[pi-image-preview]";
|
|
16
|
+
if (error) {
|
|
17
|
+
console.error(prefix, message, error);
|
|
18
|
+
} else {
|
|
19
|
+
console.error(prefix, message);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import type { ImageContent, ContentBlock } from "./content.ts";
|
|
3
|
+
import { ImageGallery, type GalleryImage } from "./image-gallery.ts";
|
|
4
|
+
import { PREFER_INLINE_SCREENSHOT_PROMPT } from "./prompt.ts";
|
|
5
|
+
import { upgradeScreenshotToolResult } from "./tool-result-upgrader.ts";
|
|
6
|
+
import { debugLog } from "./debug.ts";
|
|
7
|
+
|
|
8
|
+
// ── Types ──────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
type TrackedImage = {
|
|
11
|
+
filePath: string;
|
|
12
|
+
image: ImageContent;
|
|
13
|
+
label: string;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type ExtensionDeps = {
|
|
17
|
+
readImageContentFromPathAsync: (
|
|
18
|
+
filePath: string,
|
|
19
|
+
) => Promise<ImageContent | null>;
|
|
20
|
+
maybeResizeImage?: (image: ImageContent) => Promise<ImageContent>;
|
|
21
|
+
loadImageContentFromPath: (
|
|
22
|
+
filePath: string,
|
|
23
|
+
) => Promise<ImageContent | null>;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
type PiLike = {
|
|
27
|
+
on(event: string, handler: (...args: any[]) => any): void;
|
|
28
|
+
sendUserMessage(
|
|
29
|
+
content: string | ContentBlock[],
|
|
30
|
+
options?: { deliverAs?: "steer" | "followUp" },
|
|
31
|
+
): void;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
type CtxLike = {
|
|
35
|
+
cwd: string;
|
|
36
|
+
isIdle(): boolean;
|
|
37
|
+
ui: {
|
|
38
|
+
setWidget(
|
|
39
|
+
key: string,
|
|
40
|
+
content:
|
|
41
|
+
| string[]
|
|
42
|
+
| ((tui: any, theme: any) => any)
|
|
43
|
+
| undefined,
|
|
44
|
+
options?: { placement?: "aboveEditor" | "belowEditor" },
|
|
45
|
+
): void;
|
|
46
|
+
getEditorText(): string;
|
|
47
|
+
setEditorText(text: string): void;
|
|
48
|
+
theme: any;
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
/** Event shape for the "input" event from pi. */
|
|
53
|
+
type InputEvent = {
|
|
54
|
+
text: string;
|
|
55
|
+
images?: ImageContent[];
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
/** Discriminated union for input handler return values. */
|
|
59
|
+
type InputResult =
|
|
60
|
+
| { action: "continue" }
|
|
61
|
+
| { action: "handled" }
|
|
62
|
+
| { action: "transform"; text: string; images: ImageContent[] };
|
|
63
|
+
|
|
64
|
+
/** Re-export for tool_result event typing. */
|
|
65
|
+
type ToolResultEvent = import("./tool-result-upgrader.ts").ToolResultEventLike;
|
|
66
|
+
|
|
67
|
+
// ── Constants ──────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
const WIDGET_KEY = "image-preview";
|
|
70
|
+
const POLL_INTERVAL_MS = 250;
|
|
71
|
+
|
|
72
|
+
// Matches image file paths:
|
|
73
|
+
// - Absolute: /path/to/image.png
|
|
74
|
+
// - Home-relative: ~/screenshots/image.png
|
|
75
|
+
// - Relative: ./images/image.png, ../images/image.png
|
|
76
|
+
// Supports common path characters including spaces (escaped with \),
|
|
77
|
+
// parens, #, +, and other special characters.
|
|
78
|
+
const IMAGE_PATH_RE =
|
|
79
|
+
/((?:~\/|\.\.?\/|\/)[^\s:*?"<>|][^\s:*?"<>|]*\.(?:png|jpe?g|gif|webp))(?=\s|$)/gi;
|
|
80
|
+
|
|
81
|
+
/** Produce a label from an image path — just the filename. */
|
|
82
|
+
function trimImageLabel(filePath: string): string {
|
|
83
|
+
return path.basename(filePath);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// ── Extension ──────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
export function registerImagePreviewExtension(
|
|
89
|
+
pi: PiLike,
|
|
90
|
+
deps: ExtensionDeps,
|
|
91
|
+
): void {
|
|
92
|
+
let tracked: Map<string, TrackedImage> = new Map();
|
|
93
|
+
let gallery: ImageGallery | null = null;
|
|
94
|
+
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
|
95
|
+
let latestCtx: CtxLike | null = null;
|
|
96
|
+
|
|
97
|
+
// ── Helpers ────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
function refreshWidget(ctx: CtxLike): void {
|
|
100
|
+
if (tracked.size === 0) {
|
|
101
|
+
if (gallery) {
|
|
102
|
+
gallery.dispose();
|
|
103
|
+
gallery = null;
|
|
104
|
+
}
|
|
105
|
+
ctx.ui.setWidget(WIDGET_KEY, undefined);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const galleryImages: GalleryImage[] = [...tracked.values()].map((t) => ({
|
|
110
|
+
data: t.image.data,
|
|
111
|
+
mimeType: t.image.mimeType,
|
|
112
|
+
label: t.label,
|
|
113
|
+
}));
|
|
114
|
+
|
|
115
|
+
// Dispose the previous gallery to free kitty image resources before replacement
|
|
116
|
+
if (gallery) {
|
|
117
|
+
gallery.dispose();
|
|
118
|
+
gallery = null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
ctx.ui.setWidget(
|
|
122
|
+
WIDGET_KEY,
|
|
123
|
+
(_tui: any, theme: any) => {
|
|
124
|
+
const galleryTheme = {
|
|
125
|
+
accent: (s: string) => theme.fg("accent", s),
|
|
126
|
+
muted: (s: string) => theme.fg("muted", s),
|
|
127
|
+
dim: (s: string) => theme.fg("dim", s),
|
|
128
|
+
bold: (s: string) => theme.bold(s),
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
gallery = new ImageGallery(galleryTheme);
|
|
132
|
+
gallery.setImages(galleryImages);
|
|
133
|
+
return gallery;
|
|
134
|
+
},
|
|
135
|
+
{ placement: "aboveEditor" },
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function resetDraft(ctx: CtxLike): void {
|
|
140
|
+
if (gallery) {
|
|
141
|
+
gallery.dispose();
|
|
142
|
+
gallery = null;
|
|
143
|
+
}
|
|
144
|
+
tracked = new Map();
|
|
145
|
+
ctx.ui.setWidget(WIDGET_KEY, undefined);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Scan editor text for image paths.
|
|
150
|
+
* Track new ones, remove ones that are no longer in the text.
|
|
151
|
+
* Async to avoid blocking the event loop with file I/O.
|
|
152
|
+
*/
|
|
153
|
+
async function scanEditorText(ctx: CtxLike): Promise<void> {
|
|
154
|
+
let text: string;
|
|
155
|
+
try {
|
|
156
|
+
text = ctx.ui.getEditorText();
|
|
157
|
+
} catch (err) {
|
|
158
|
+
debugLog("Failed to get editor text", err);
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
if (!text) {
|
|
162
|
+
if (tracked.size > 0) {
|
|
163
|
+
tracked = new Map();
|
|
164
|
+
refreshWidget(ctx);
|
|
165
|
+
}
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Find all image paths currently in the text
|
|
170
|
+
// Create a fresh regex each time to avoid stale lastIndex from the `g` flag
|
|
171
|
+
const imagePathRe = new RegExp(IMAGE_PATH_RE.source, IMAGE_PATH_RE.flags);
|
|
172
|
+
const matches = [...text.matchAll(imagePathRe)];
|
|
173
|
+
const currentPaths = new Set<string>();
|
|
174
|
+
|
|
175
|
+
let changed = false;
|
|
176
|
+
|
|
177
|
+
for (const match of matches) {
|
|
178
|
+
const rawPath = match[1];
|
|
179
|
+
if (!rawPath) continue;
|
|
180
|
+
currentPaths.add(rawPath);
|
|
181
|
+
|
|
182
|
+
// Already tracked?
|
|
183
|
+
if (tracked.has(rawPath)) continue;
|
|
184
|
+
|
|
185
|
+
// New path — try to load it (async to avoid blocking event loop)
|
|
186
|
+
const image = await deps.readImageContentFromPathAsync(rawPath);
|
|
187
|
+
if (!image) continue;
|
|
188
|
+
|
|
189
|
+
tracked.set(rawPath, {
|
|
190
|
+
filePath: rawPath,
|
|
191
|
+
image,
|
|
192
|
+
label: trimImageLabel(rawPath),
|
|
193
|
+
});
|
|
194
|
+
changed = true;
|
|
195
|
+
|
|
196
|
+
// Async resize in background
|
|
197
|
+
if (deps.maybeResizeImage) {
|
|
198
|
+
const entry = tracked.get(rawPath)!;
|
|
199
|
+
void deps.maybeResizeImage(image).then((resized) => {
|
|
200
|
+
// Guard against the entry having been removed while resize was in-flight
|
|
201
|
+
if (tracked.has(rawPath) && tracked.get(rawPath) === entry) {
|
|
202
|
+
entry.image = resized;
|
|
203
|
+
if (latestCtx) refreshWidget(latestCtx);
|
|
204
|
+
}
|
|
205
|
+
}).catch((err) => {
|
|
206
|
+
debugLog(`Failed to resize image ${rawPath}`, err);
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Remove tracked images whose paths are no longer in the text
|
|
212
|
+
for (const trackedPath of tracked.keys()) {
|
|
213
|
+
if (!currentPaths.has(trackedPath)) {
|
|
214
|
+
tracked.delete(trackedPath);
|
|
215
|
+
changed = true;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (changed) {
|
|
220
|
+
refreshWidget(ctx);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function startPolling(): void {
|
|
225
|
+
stopPolling();
|
|
226
|
+
pollTimer = setInterval(() => {
|
|
227
|
+
if (!latestCtx) return;
|
|
228
|
+
scanEditorText(latestCtx).catch((err) => {
|
|
229
|
+
debugLog("Error during editor text scan", err);
|
|
230
|
+
});
|
|
231
|
+
}, POLL_INTERVAL_MS);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function stopPolling(): void {
|
|
235
|
+
if (pollTimer) {
|
|
236
|
+
clearInterval(pollTimer);
|
|
237
|
+
pollTimer = null;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// ── Event handlers ─────────────────────────────────────
|
|
242
|
+
|
|
243
|
+
pi.on("before_agent_start", () => {
|
|
244
|
+
return { systemPrompt: PREFER_INLINE_SCREENSHOT_PROMPT };
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
// Clean up resources when the process exits
|
|
248
|
+
const cleanup = (): void => {
|
|
249
|
+
stopPolling();
|
|
250
|
+
if (gallery) {
|
|
251
|
+
gallery.dispose();
|
|
252
|
+
gallery = null;
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
process.on("exit", cleanup);
|
|
256
|
+
process.on("SIGINT", cleanup);
|
|
257
|
+
process.on("SIGTERM", cleanup);
|
|
258
|
+
|
|
259
|
+
pi.on("session_start", async (_event: unknown, ctx: CtxLike) => {
|
|
260
|
+
latestCtx = ctx;
|
|
261
|
+
resetDraft(ctx);
|
|
262
|
+
startPolling();
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
pi.on("session_switch", async (_event: unknown, ctx: CtxLike) => {
|
|
266
|
+
latestCtx = ctx;
|
|
267
|
+
resetDraft(ctx);
|
|
268
|
+
startPolling();
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
pi.on("tool_result", async (event: ToolResultEvent, ctx: CtxLike) => {
|
|
272
|
+
latestCtx = ctx;
|
|
273
|
+
return upgradeScreenshotToolResult(
|
|
274
|
+
event,
|
|
275
|
+
ctx.cwd,
|
|
276
|
+
deps.loadImageContentFromPath,
|
|
277
|
+
);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
// On submit: strip image paths from text, attach actual images
|
|
281
|
+
pi.on("input", async (event: InputEvent, ctx: CtxLike): Promise<InputResult> => {
|
|
282
|
+
latestCtx = ctx;
|
|
283
|
+
|
|
284
|
+
if (tracked.size === 0) {
|
|
285
|
+
return { action: "continue" };
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const fullText = (event.text || "").trim();
|
|
289
|
+
|
|
290
|
+
// Don't transform commands or shell escapes
|
|
291
|
+
if (fullText.startsWith("/") || fullText.trimStart().startsWith("!")) {
|
|
292
|
+
return { action: "continue" };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Find which tracked paths are still in the submitted text
|
|
296
|
+
const usedImages: ImageContent[] = [];
|
|
297
|
+
let strippedText = fullText;
|
|
298
|
+
|
|
299
|
+
for (const [trackedPath, entry] of tracked) {
|
|
300
|
+
if (fullText.includes(trackedPath)) {
|
|
301
|
+
usedImages.push(entry.image);
|
|
302
|
+
// Strip the path from the text
|
|
303
|
+
strippedText = strippedText.split(trackedPath).join("");
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (usedImages.length === 0) {
|
|
308
|
+
return { action: "continue" };
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Clean up whitespace after stripping paths
|
|
312
|
+
strippedText = strippedText.replace(/\s+/g, " ").trim();
|
|
313
|
+
|
|
314
|
+
// Clear state
|
|
315
|
+
resetDraft(ctx);
|
|
316
|
+
|
|
317
|
+
if (!strippedText) {
|
|
318
|
+
// Images only, no text — send directly
|
|
319
|
+
pi.sendUserMessage(
|
|
320
|
+
usedImages,
|
|
321
|
+
ctx.isIdle() ? undefined : { deliverAs: "steer" },
|
|
322
|
+
);
|
|
323
|
+
return { action: "handled" };
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
action: "transform",
|
|
328
|
+
text: strippedText,
|
|
329
|
+
images: [...(event.images ?? []), ...usedImages],
|
|
330
|
+
};
|
|
331
|
+
});
|
|
332
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import fsp from "node:fs/promises";
|
|
3
|
+
import type { ImageContent } from "./content.ts";
|
|
4
|
+
import {
|
|
5
|
+
inferMimeType,
|
|
6
|
+
looksLikeImagePath,
|
|
7
|
+
looksLikeImagePathAsync,
|
|
8
|
+
} from "./path-utils.ts";
|
|
9
|
+
import { debugLog } from "./debug.ts";
|
|
10
|
+
|
|
11
|
+
export type ImageResizer = (image: ImageContent) => Promise<ImageContent>;
|
|
12
|
+
|
|
13
|
+
/** Maximum image file size in bytes (50 MB). Files larger than this are skipped to prevent OOM. */
|
|
14
|
+
const MAX_IMAGE_FILE_SIZE = 50 * 1024 * 1024;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Synchronous image read — used only at startup / non-poll paths.
|
|
18
|
+
* Prefer readImageContentFromPathAsync in the poll loop.
|
|
19
|
+
*/
|
|
20
|
+
export function readImageContentFromPath(
|
|
21
|
+
filePath: string,
|
|
22
|
+
): ImageContent | null {
|
|
23
|
+
if (!looksLikeImagePath(filePath)) return null;
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
const stat = fs.statSync(filePath);
|
|
27
|
+
if (stat.size > MAX_IMAGE_FILE_SIZE) {
|
|
28
|
+
debugLog(
|
|
29
|
+
`Skipping image ${filePath}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds ${MAX_IMAGE_FILE_SIZE / 1024 / 1024}MB limit`,
|
|
30
|
+
);
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
} catch (err) {
|
|
34
|
+
debugLog(`Failed to stat image file ${filePath}`, err);
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const mimeType = inferMimeType(filePath)!;
|
|
39
|
+
try {
|
|
40
|
+
const bytes = fs.readFileSync(filePath);
|
|
41
|
+
return {
|
|
42
|
+
type: "image",
|
|
43
|
+
data: bytes.toString("base64"),
|
|
44
|
+
mimeType,
|
|
45
|
+
};
|
|
46
|
+
} catch (err) {
|
|
47
|
+
debugLog(`Failed to read image file ${filePath}`, err);
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Async image read — non-blocking, preferred in the 250ms poll loop.
|
|
54
|
+
*/
|
|
55
|
+
export async function readImageContentFromPathAsync(
|
|
56
|
+
filePath: string,
|
|
57
|
+
): Promise<ImageContent | null> {
|
|
58
|
+
if (!(await looksLikeImagePathAsync(filePath))) return null;
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const stat = await fsp.stat(filePath);
|
|
62
|
+
if (stat.size > MAX_IMAGE_FILE_SIZE) {
|
|
63
|
+
debugLog(
|
|
64
|
+
`Skipping image ${filePath}: file size ${(stat.size / 1024 / 1024).toFixed(1)}MB exceeds ${MAX_IMAGE_FILE_SIZE / 1024 / 1024}MB limit`,
|
|
65
|
+
);
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
} catch (err) {
|
|
69
|
+
debugLog(`Failed to stat image file ${filePath}`, err);
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const mimeType = inferMimeType(filePath)!;
|
|
74
|
+
try {
|
|
75
|
+
const bytes = await fsp.readFile(filePath);
|
|
76
|
+
return {
|
|
77
|
+
type: "image",
|
|
78
|
+
data: bytes.toString("base64"),
|
|
79
|
+
mimeType,
|
|
80
|
+
};
|
|
81
|
+
} catch (err) {
|
|
82
|
+
debugLog(`Failed to read image file ${filePath}`, err);
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export async function maybeResizeImage(
|
|
88
|
+
image: ImageContent,
|
|
89
|
+
resizeImage?: ImageResizer | null,
|
|
90
|
+
): Promise<ImageContent> {
|
|
91
|
+
if (!resizeImage) return image;
|
|
92
|
+
try {
|
|
93
|
+
return await resizeImage(image);
|
|
94
|
+
} catch (err) {
|
|
95
|
+
debugLog("Image resize failed, using original", err);
|
|
96
|
+
return image;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export async function loadImageContentFromPath(
|
|
101
|
+
filePath: string,
|
|
102
|
+
resizeImage?: ImageResizer | null,
|
|
103
|
+
): Promise<ImageContent | null> {
|
|
104
|
+
const image = readImageContentFromPath(filePath);
|
|
105
|
+
if (!image) return null;
|
|
106
|
+
return maybeResizeImage(image, resizeImage);
|
|
107
|
+
}
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import {
|
|
2
|
+
type Component,
|
|
3
|
+
getCapabilities,
|
|
4
|
+
getImageDimensions,
|
|
5
|
+
calculateImageRows,
|
|
6
|
+
getCellDimensions,
|
|
7
|
+
} from "@mariozechner/pi-tui";
|
|
8
|
+
|
|
9
|
+
export interface GalleryTheme {
|
|
10
|
+
accent: (s: string) => string;
|
|
11
|
+
muted: (s: string) => string;
|
|
12
|
+
dim: (s: string) => string;
|
|
13
|
+
bold: (s: string) => string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface GalleryImage {
|
|
17
|
+
data: string; // base64
|
|
18
|
+
mimeType: string;
|
|
19
|
+
label: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const THUMB_MAX_WIDTH = 25;
|
|
23
|
+
const GAP = 2; // columns between images
|
|
24
|
+
|
|
25
|
+
// Monotonic counter for kitty image IDs — avoids birthday-paradox collisions
|
|
26
|
+
// that occurred with the previous Math.random() * 254 approach.
|
|
27
|
+
// Uses the full 24-bit range (1–16,777,215) supported by kitty's true-color encoding.
|
|
28
|
+
let nextImageId = 1;
|
|
29
|
+
function allocateImageId(): number {
|
|
30
|
+
const id = nextImageId;
|
|
31
|
+
nextImageId = (nextImageId % 0xffffff) + 1; // wrap at 16M, skip 0
|
|
32
|
+
return id;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ── Kitty Unicode Placeholder Protocol ─────────────────────
|
|
36
|
+
// Instead of rendering pixels directly (which ghost across tmux panes),
|
|
37
|
+
// we transmit the image data and then output U+10EEEE placeholder
|
|
38
|
+
// characters with diacritics encoding row/col. Since these are just
|
|
39
|
+
// text characters, tmux manages them per-pane — images appear/disappear
|
|
40
|
+
// naturally when switching panes.
|
|
41
|
+
//
|
|
42
|
+
// Protocol:
|
|
43
|
+
// 1. Transmit image: ESC_G a=T,U=1,f=100,i=<id>,c=<cols>,r=<rows>,q=2; base64 ESC\
|
|
44
|
+
// 2. Print placeholder chars with foreground color set to image_id:
|
|
45
|
+
// ESC[38;5;<id>m <U+10EEEE><row_diac><col_diac> ... ESC[39m
|
|
46
|
+
//
|
|
47
|
+
// Diacritics: row 0 = U+0305, row 1 = U+030D, row 2 = U+030E, etc.
|
|
48
|
+
// See kitty docs rowcolumn-diacritics.txt
|
|
49
|
+
|
|
50
|
+
// Row/column diacritics from kitty's rowcolumn-diacritics.txt
|
|
51
|
+
// These are the combining characters used to encode row and column numbers
|
|
52
|
+
const ROW_COL_DIACRITICS = [
|
|
53
|
+
0x0305, 0x030d, 0x030e, 0x0310, 0x0312, 0x033d, 0x033e, 0x033f,
|
|
54
|
+
0x0346, 0x034a, 0x034b, 0x034c, 0x0350, 0x0351, 0x0352, 0x0353,
|
|
55
|
+
0x0357, 0x035b, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368,
|
|
56
|
+
0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 0x0483,
|
|
57
|
+
0x0484, 0x0485, 0x0486, 0x0592, 0x0593, 0x0594, 0x0595, 0x0597,
|
|
58
|
+
0x0598, 0x0599, 0x059c, 0x059d, 0x059e, 0x059f, 0x05a0, 0x05a1,
|
|
59
|
+
0x05a8, 0x05a9, 0x05ab, 0x05ac, 0x05af, 0x05c4, 0x0610, 0x0611,
|
|
60
|
+
0x0612, 0x0613, 0x0614, 0x0615, 0x0616, 0x0617, 0x0618, 0x0619,
|
|
61
|
+
0x061a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
|
|
62
|
+
0x0652, 0x0653, 0x0654, 0x0655, 0x0656, 0x0657, 0x0658, 0x0659,
|
|
63
|
+
0x065a, 0x065b, 0x065c, 0x065d, 0x065e, 0x065f, 0x0670, 0x06d6,
|
|
64
|
+
0x06d7, 0x06d8, 0x06d9, 0x06da, 0x06db, 0x06dc, 0x06df, 0x06e0,
|
|
65
|
+
0x06e1, 0x06e2, 0x06e3, 0x06e4, 0x06e7, 0x06e8, 0x06ea, 0x06eb,
|
|
66
|
+
0x06ec, 0x06ed,
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
const PLACEHOLDER_CHAR = String.fromCodePoint(0x10EEEE);
|
|
70
|
+
|
|
71
|
+
function diacriticFor(n: number): string {
|
|
72
|
+
if (n < ROW_COL_DIACRITICS.length) {
|
|
73
|
+
return String.fromCodePoint(ROW_COL_DIACRITICS[n]);
|
|
74
|
+
}
|
|
75
|
+
// Fallback for very large values (shouldn't happen for thumbnails)
|
|
76
|
+
return String.fromCodePoint(ROW_COL_DIACRITICS[0]);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function isInTmux(): boolean {
|
|
80
|
+
return Boolean(process.env.TMUX);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Wrap kitty APC sequences in DCS passthrough for tmux.
|
|
85
|
+
*/
|
|
86
|
+
function wrapForTmux(sequence: string): string {
|
|
87
|
+
if (!isInTmux()) return sequence;
|
|
88
|
+
return sequence.replace(
|
|
89
|
+
/\x1b_G([^\x1b]*)\x1b\\/g,
|
|
90
|
+
(_match, content) =>
|
|
91
|
+
`\x1bPtmux;\x1b\x1b_G${content}\x1b\x1b\\\x1b\\`,
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Transmit image and create virtual placement using Unicode placeholder mode.
|
|
97
|
+
* The image data is sent to kitty but NOT displayed directly.
|
|
98
|
+
* Display happens via U+10EEEE placeholder characters.
|
|
99
|
+
*/
|
|
100
|
+
function transmitImageWithPlaceholder(
|
|
101
|
+
base64Data: string,
|
|
102
|
+
imageId: number,
|
|
103
|
+
columns: number,
|
|
104
|
+
rows: number,
|
|
105
|
+
): void {
|
|
106
|
+
// Transmit image + create virtual placement in one command
|
|
107
|
+
// a=T: transmit and display, U=1: use unicode placeholders
|
|
108
|
+
// q=2: suppress all responses (important in tmux)
|
|
109
|
+
const CHUNK_SIZE = 4096;
|
|
110
|
+
|
|
111
|
+
if (base64Data.length <= CHUNK_SIZE) {
|
|
112
|
+
const seq = `\x1b_Ga=T,U=1,f=100,i=${imageId},c=${columns},r=${rows},q=2;${base64Data}\x1b\\`;
|
|
113
|
+
process.stdout.write(wrapForTmux(seq));
|
|
114
|
+
} else {
|
|
115
|
+
// Chunked transfer
|
|
116
|
+
let offset = 0;
|
|
117
|
+
let isFirst = true;
|
|
118
|
+
while (offset < base64Data.length) {
|
|
119
|
+
const chunk = base64Data.slice(offset, offset + CHUNK_SIZE);
|
|
120
|
+
const isLast = offset + CHUNK_SIZE >= base64Data.length;
|
|
121
|
+
let seq: string;
|
|
122
|
+
|
|
123
|
+
if (isFirst) {
|
|
124
|
+
seq = `\x1b_Ga=T,U=1,f=100,i=${imageId},c=${columns},r=${rows},q=2,m=1;${chunk}\x1b\\`;
|
|
125
|
+
isFirst = false;
|
|
126
|
+
} else if (isLast) {
|
|
127
|
+
seq = `\x1b_Gm=0;${chunk}\x1b\\`;
|
|
128
|
+
} else {
|
|
129
|
+
seq = `\x1b_Gm=1;${chunk}\x1b\\`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
process.stdout.write(wrapForTmux(seq));
|
|
133
|
+
offset += CHUNK_SIZE;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Delete a kitty image by ID.
|
|
140
|
+
*/
|
|
141
|
+
function deleteImage(imageId: number): void {
|
|
142
|
+
const seq = `\x1b_Ga=d,d=I,i=${imageId},q=2\x1b\\`;
|
|
143
|
+
process.stdout.write(wrapForTmux(seq));
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Build a row of Unicode placeholder characters for the given image.
|
|
148
|
+
* Uses foreground color to encode image_id, diacritics to encode row/col.
|
|
149
|
+
*/
|
|
150
|
+
function buildPlaceholderRow(
|
|
151
|
+
imageId: number,
|
|
152
|
+
row: number,
|
|
153
|
+
columns: number,
|
|
154
|
+
): string {
|
|
155
|
+
// Set foreground color to image_id (using 24-bit true color for large IDs)
|
|
156
|
+
const r = (imageId >> 16) & 0xff;
|
|
157
|
+
const g = (imageId >> 8) & 0xff;
|
|
158
|
+
const b = imageId & 0xff;
|
|
159
|
+
const fgStart = imageId < 256
|
|
160
|
+
? `\x1b[38;5;${imageId}m`
|
|
161
|
+
: `\x1b[38;2;${r};${g};${b}m`;
|
|
162
|
+
const fgEnd = `\x1b[39m`;
|
|
163
|
+
|
|
164
|
+
let line = fgStart;
|
|
165
|
+
|
|
166
|
+
// First cell: full diacritics (row + col)
|
|
167
|
+
line += PLACEHOLDER_CHAR + diacriticFor(row) + diacriticFor(0);
|
|
168
|
+
|
|
169
|
+
// Subsequent cells: no diacritics needed — kitty auto-increments
|
|
170
|
+
// column index from the left neighbor's col diacritic
|
|
171
|
+
for (let col = 1; col < columns; col++) {
|
|
172
|
+
line += PLACEHOLDER_CHAR;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
line += fgEnd;
|
|
176
|
+
return line;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ── Gallery Component ──────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Renders image thumbnails above the editor using kitty's Unicode
|
|
183
|
+
* placeholder protocol. Images are part of the text buffer, so
|
|
184
|
+
* tmux manages them per-pane — no ghosting across panes.
|
|
185
|
+
*/
|
|
186
|
+
export class ImageGallery implements Component {
|
|
187
|
+
private images: GalleryImage[] = [];
|
|
188
|
+
private theme: GalleryTheme;
|
|
189
|
+
private cachedLines?: string[];
|
|
190
|
+
private cachedWidth?: number;
|
|
191
|
+
private activeImageIds: number[] = [];
|
|
192
|
+
|
|
193
|
+
constructor(theme: GalleryTheme) {
|
|
194
|
+
this.theme = theme;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
setImages(images: GalleryImage[]): void {
|
|
198
|
+
this.images = images;
|
|
199
|
+
this.invalidate();
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
invalidate(): void {
|
|
203
|
+
this.cachedLines = undefined;
|
|
204
|
+
this.cachedWidth = undefined;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
dispose(): void {
|
|
208
|
+
for (const id of this.activeImageIds) {
|
|
209
|
+
deleteImage(id);
|
|
210
|
+
}
|
|
211
|
+
this.activeImageIds = [];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
render(width: number): string[] {
|
|
215
|
+
if (this.cachedLines && this.cachedWidth === width) {
|
|
216
|
+
return this.cachedLines;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Delete previous images before re-rendering
|
|
220
|
+
for (const id of this.activeImageIds) {
|
|
221
|
+
deleteImage(id);
|
|
222
|
+
}
|
|
223
|
+
this.activeImageIds = [];
|
|
224
|
+
|
|
225
|
+
if (this.images.length === 0) {
|
|
226
|
+
this.cachedLines = [];
|
|
227
|
+
this.cachedWidth = width;
|
|
228
|
+
return this.cachedLines;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const lines: string[] = [];
|
|
232
|
+
const caps = getCapabilities();
|
|
233
|
+
|
|
234
|
+
// Header
|
|
235
|
+
const count = this.images.length;
|
|
236
|
+
const headerText =
|
|
237
|
+
count === 1
|
|
238
|
+
? " 📎 1 image attached"
|
|
239
|
+
: ` 📎 ${count} images attached`;
|
|
240
|
+
lines.push(this.theme.accent(headerText));
|
|
241
|
+
|
|
242
|
+
if (caps.images === "kitty") {
|
|
243
|
+
this.renderKittyHorizontal(lines, width);
|
|
244
|
+
} else {
|
|
245
|
+
this.renderTextFallback(lines);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
this.cachedLines = lines;
|
|
249
|
+
this.cachedWidth = width;
|
|
250
|
+
return this.cachedLines;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
private renderKittyHorizontal(lines: string[], width: number): void {
|
|
254
|
+
// Calculate per-image thumb width so they all fit side by side
|
|
255
|
+
const available = width - 2; // padding
|
|
256
|
+
const totalGaps = Math.max(0, this.images.length - 1) * GAP;
|
|
257
|
+
const thumbWidth = Math.min(
|
|
258
|
+
THUMB_MAX_WIDTH,
|
|
259
|
+
Math.floor((available - totalGaps) / this.images.length),
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
if (thumbWidth < 4) {
|
|
263
|
+
// Too narrow for horizontal, fall back to text
|
|
264
|
+
this.renderTextFallback(lines);
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Prepare each image: transmit data, calculate rows
|
|
269
|
+
const imageInfos: { imageId: number; rows: number; cols: number }[] = [];
|
|
270
|
+
|
|
271
|
+
for (const img of this.images) {
|
|
272
|
+
// getImageDimensions returns null for corrupt or unrecognised image data;
|
|
273
|
+
// fall back to a common aspect ratio so the thumbnail still renders.
|
|
274
|
+
const dims = getImageDimensions(img.data, img.mimeType) || {
|
|
275
|
+
widthPx: 800,
|
|
276
|
+
heightPx: 600,
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
const rows = calculateImageRows(dims, thumbWidth, getCellDimensions());
|
|
280
|
+
const imageId = allocateImageId();
|
|
281
|
+
this.activeImageIds.push(imageId);
|
|
282
|
+
|
|
283
|
+
transmitImageWithPlaceholder(img.data, imageId, thumbWidth, rows);
|
|
284
|
+
imageInfos.push({ imageId, rows, cols: thumbWidth });
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const maxRows = Math.max(...imageInfos.map((i) => i.rows));
|
|
288
|
+
|
|
289
|
+
// Build horizontal rows: each line has placeholder chars for all images side by side
|
|
290
|
+
for (let row = 0; row < maxRows; row++) {
|
|
291
|
+
let line = " ";
|
|
292
|
+
for (let i = 0; i < this.images.length; i++) {
|
|
293
|
+
const info = imageInfos[i];
|
|
294
|
+
|
|
295
|
+
if (row < info.rows) {
|
|
296
|
+
// Output placeholder chars for this image at this row
|
|
297
|
+
line += buildPlaceholderRow(info.imageId, row, info.cols);
|
|
298
|
+
} else {
|
|
299
|
+
// Image is shorter, pad with spaces
|
|
300
|
+
line += " ".repeat(info.cols);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (i < this.images.length - 1) {
|
|
304
|
+
line += " ".repeat(GAP);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
lines.push(line);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Label row beneath images — middle-truncate and center
|
|
311
|
+
let labelLine = " ";
|
|
312
|
+
for (let i = 0; i < this.images.length; i++) {
|
|
313
|
+
const cols = imageInfos[i].cols;
|
|
314
|
+
let label = this.images[i].label;
|
|
315
|
+
|
|
316
|
+
// Middle-truncate: "pi-clipboard-044c...21ad4.png"
|
|
317
|
+
if (label.length > cols) {
|
|
318
|
+
const keep = cols - 1; // 1 char for …
|
|
319
|
+
const head = Math.ceil(keep / 2);
|
|
320
|
+
const tail = keep - head;
|
|
321
|
+
label = label.slice(0, head) + "…" + label.slice(-tail);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Center the label within the column width
|
|
325
|
+
const totalPad = Math.max(0, cols - label.length);
|
|
326
|
+
const leftPad = Math.floor(totalPad / 2);
|
|
327
|
+
const rightPad = totalPad - leftPad;
|
|
328
|
+
const padded = " ".repeat(leftPad) + label + " ".repeat(rightPad);
|
|
329
|
+
labelLine += this.theme.dim(padded);
|
|
330
|
+
|
|
331
|
+
if (i < this.images.length - 1) {
|
|
332
|
+
labelLine += " ".repeat(GAP);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
lines.push(labelLine);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
private renderTextFallback(lines: string[]): void {
|
|
339
|
+
for (const img of this.images) {
|
|
340
|
+
lines.push(
|
|
341
|
+
this.theme.muted(` ${img.label}`),
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import fsp from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
const IMAGE_MIME_BY_EXT: Record<string, string> = {
|
|
6
|
+
png: "image/png",
|
|
7
|
+
jpg: "image/jpeg",
|
|
8
|
+
jpeg: "image/jpeg",
|
|
9
|
+
gif: "image/gif",
|
|
10
|
+
webp: "image/webp",
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export function inferMimeType(filePath: string): string | null {
|
|
14
|
+
const ext = path.extname(filePath).replace(/^\./, "").toLowerCase();
|
|
15
|
+
return IMAGE_MIME_BY_EXT[ext] ?? null;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function looksLikeImagePath(filePath: string): boolean {
|
|
19
|
+
const mimeType = inferMimeType(filePath);
|
|
20
|
+
if (!mimeType) return false;
|
|
21
|
+
try {
|
|
22
|
+
return fs.statSync(filePath).isFile();
|
|
23
|
+
} catch {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Async version of looksLikeImagePath — preferred in the poll path to avoid blocking the event loop. */
|
|
29
|
+
export async function looksLikeImagePathAsync(
|
|
30
|
+
filePath: string,
|
|
31
|
+
): Promise<boolean> {
|
|
32
|
+
const mimeType = inferMimeType(filePath);
|
|
33
|
+
if (!mimeType) return false;
|
|
34
|
+
try {
|
|
35
|
+
const stat = await fsp.stat(filePath);
|
|
36
|
+
return stat.isFile();
|
|
37
|
+
} catch {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function resolveMaybeRelativePath(
|
|
43
|
+
filePath: string,
|
|
44
|
+
cwd: string,
|
|
45
|
+
): string {
|
|
46
|
+
return path.isAbsolute(filePath) ? filePath : path.resolve(cwd, filePath);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function isScreenshotToolName(toolName: string): boolean {
|
|
50
|
+
return (
|
|
51
|
+
toolName === "take_screenshot" ||
|
|
52
|
+
toolName === "chrome_devtools_take_screenshot" ||
|
|
53
|
+
toolName.endsWith("_take_screenshot")
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function isScreenshotToolResult(event: {
|
|
58
|
+
toolName: string;
|
|
59
|
+
details?: unknown;
|
|
60
|
+
}): boolean {
|
|
61
|
+
if (isScreenshotToolName(event.toolName)) return true;
|
|
62
|
+
if (!event.details || typeof event.details !== "object") return false;
|
|
63
|
+
const maybeTool = (event.details as { tool?: unknown }).tool;
|
|
64
|
+
return typeof maybeTool === "string" && isScreenshotToolName(maybeTool);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const SCREENSHOT_SAVE_LINE_RE = /^Saved screenshot to\s+(.+)$/gim;
|
|
68
|
+
|
|
69
|
+
export function extractSavedScreenshotPaths(text: string): string[] {
|
|
70
|
+
const paths: string[] = [];
|
|
71
|
+
for (const match of text.matchAll(SCREENSHOT_SAVE_LINE_RE)) {
|
|
72
|
+
const rawPath = match[1]?.trim();
|
|
73
|
+
if (!rawPath) continue;
|
|
74
|
+
paths.push(rawPath.replace(/\.$/, ""));
|
|
75
|
+
}
|
|
76
|
+
return paths;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function collectTextContent(
|
|
80
|
+
content: Array<{ type: string; text?: string }>,
|
|
81
|
+
): string {
|
|
82
|
+
return content
|
|
83
|
+
.filter((item) => item.type === "text" && item.text)
|
|
84
|
+
.map((item) => item.text!)
|
|
85
|
+
.join("\n");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function hasInlineImageContent(
|
|
89
|
+
content: Array<{ type: string }>,
|
|
90
|
+
): boolean {
|
|
91
|
+
return content.some((item) => item.type === "image");
|
|
92
|
+
}
|
package/src/prompt.ts
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export const PREFER_INLINE_SCREENSHOT_PROMPT =
|
|
2
|
+
"When you need to inspect a screenshot yourself, prefer screenshot tool calls that return the image inline. Avoid passing filePath to screenshot tools unless the user explicitly asked you to save a file or you do not need to inspect the image content yourself.";
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { ContentBlock, ImageContent, TextContent } from "./content.ts";
|
|
2
|
+
import {
|
|
3
|
+
collectTextContent,
|
|
4
|
+
extractSavedScreenshotPaths,
|
|
5
|
+
hasInlineImageContent,
|
|
6
|
+
isScreenshotToolResult,
|
|
7
|
+
resolveMaybeRelativePath,
|
|
8
|
+
} from "./path-utils.ts";
|
|
9
|
+
|
|
10
|
+
export type ToolResultEventLike = {
|
|
11
|
+
toolName: string;
|
|
12
|
+
content: ContentBlock[];
|
|
13
|
+
details?: unknown;
|
|
14
|
+
isError: boolean;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export async function upgradeScreenshotToolResult(
|
|
18
|
+
event: ToolResultEventLike,
|
|
19
|
+
cwd: string,
|
|
20
|
+
loadImageFromPath: (filePath: string) => Promise<ImageContent | null>,
|
|
21
|
+
): Promise<{ content: ContentBlock[] } | undefined> {
|
|
22
|
+
if (
|
|
23
|
+
event.isError ||
|
|
24
|
+
!isScreenshotToolResult(event) ||
|
|
25
|
+
hasInlineImageContent(event.content)
|
|
26
|
+
) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const text = collectTextContent(event.content);
|
|
31
|
+
const savedPaths = extractSavedScreenshotPaths(text);
|
|
32
|
+
if (savedPaths.length === 0) return undefined;
|
|
33
|
+
|
|
34
|
+
const images: ImageContent[] = [];
|
|
35
|
+
for (const rawPath of savedPaths) {
|
|
36
|
+
const resolvedPath = resolveMaybeRelativePath(rawPath, cwd);
|
|
37
|
+
const image = await loadImageFromPath(resolvedPath);
|
|
38
|
+
if (image) {
|
|
39
|
+
images.push(image);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (images.length > 0) {
|
|
44
|
+
return { content: [...event.content, ...images] };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const hint: TextContent = {
|
|
48
|
+
type: "text",
|
|
49
|
+
text: "[image-preview: screenshot was saved via filePath but the image file was not readable. If you need to inspect the screenshot agentically, retry the screenshot tool without filePath so the image is returned inline.]",
|
|
50
|
+
};
|
|
51
|
+
return { content: [...event.content, hint] };
|
|
52
|
+
}
|