pixel-surgeon-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2552 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { z } from "zod";
5
+ import sharp from "sharp";
6
+ import { createServer } from "http";
7
+ import { randomUUID } from "crypto";
8
+ import { writeFile, readFile, mkdir, readdir, copyFile, stat } from "fs/promises";
9
+ import { readFileSync } from "fs";
10
+ import { join, extname } from "path";
11
+ import { homedir } from "os";
12
+ import { createRequire } from "module";
13
+ const require = createRequire(import.meta.url);
14
+ const potrace = require("potrace");
15
+ const SAVE_DIR = join(homedir(), "Pictures", "pixel-surgeon");
16
+ /** Platform-aware "open URL/path in default app" with fallback chain for Linux */
17
+ function openExternal(target) {
18
+ import("child_process").then(({ execFile }) => {
19
+ if (process.platform === "darwin") {
20
+ execFile("open", [target]);
21
+ }
22
+ else if (process.platform === "win32") {
23
+ execFile("cmd", ["/c", "start", "", target]);
24
+ }
25
+ else {
26
+ // Linux: try xdg-open, then common DE-specific openers, then browsers
27
+ const candidates = ["xdg-open", "gio", "kde-open5", "gnome-open", "wslview"];
28
+ (function tryNext(i) {
29
+ if (i >= candidates.length)
30
+ return;
31
+ execFile(candidates[i], [target], (err) => { if (err)
32
+ tryNext(i + 1); });
33
+ })(0);
34
+ }
35
+ });
36
+ }
37
+ function loadKeysFromClaudeConfig() {
38
+ try {
39
+ const configPath = join(homedir(), ".claude.json");
40
+ const config = JSON.parse(readFileSync(configPath, "utf-8"));
41
+ const env = config?.mcpServers?.["pixel-surgeon"]?.env ?? {};
42
+ return { google: env.GOOGLE_API_KEY ?? "", openai: env.OPENAI_API_KEY ?? "" };
43
+ }
44
+ catch {
45
+ return { google: "", openai: "" };
46
+ }
47
+ }
48
+ const _claudeKeys = (!process.env.GOOGLE_API_KEY && !process.env.OPENAI_API_KEY) ? loadKeysFromClaudeConfig() : { google: "", openai: "" };
49
+ const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY || _claudeKeys.google;
50
+ const OPENAI_API_KEY = process.env.OPENAI_API_KEY || _claudeKeys.openai;
51
+ const MODELS = {
52
+ "gemini-3.1-flash-image": {
53
+ id: "gemini-3.1-flash-image-preview",
54
+ label: "Gemini 3.1 Flash Image",
55
+ provider: "gemini",
56
+ tier: "paid",
57
+ },
58
+ "gemini-2.5-flash-image": {
59
+ id: "gemini-2.5-flash-image",
60
+ label: "Gemini 2.5 Flash Image",
61
+ provider: "gemini",
62
+ tier: "free",
63
+ },
64
+ "gpt-image-1": {
65
+ id: "gpt-image-1",
66
+ label: "GPT Image 1 (OpenAI)",
67
+ provider: "openai",
68
+ tier: "paid",
69
+ },
70
+ "gpt-image-2": {
71
+ id: "gpt-image-2",
72
+ label: "GPT Image 2 (OpenAI)",
73
+ provider: "openai",
74
+ tier: "paid",
75
+ },
76
+ };
77
+ const MODEL_KEYS = Object.keys(MODELS);
78
+ const GEMINI_DEFAULT = "gemini-3.1-flash-image";
79
+ const GEMINI_FALLBACK = "gemini-2.5-flash-image";
80
+ const MODEL_PRIMARY = MODELS[GEMINI_DEFAULT].id;
81
+ const MODEL_FALLBACK = MODELS[GEMINI_FALLBACK].id;
82
+ function isGeminiModel(modelId) {
83
+ const entry = Object.values(MODELS).find(m => m.id === modelId);
84
+ return entry?.provider === "gemini";
85
+ }
86
+ function getDefaultModelKey() {
87
+ const envModel = process.env.DEFAULT_IMAGE_MODEL;
88
+ if (envModel && envModel in MODELS)
89
+ return envModel;
90
+ if (process.env.OPENAI_API_KEY)
91
+ return "gpt-image-2";
92
+ return GEMINI_DEFAULT;
93
+ }
94
+ const providers = {};
95
+ function getProvider(modelKey) {
96
+ const key = modelKey ?? getDefaultModelKey();
97
+ const entry = MODELS[key];
98
+ if (!entry)
99
+ throw new Error(`Unknown model "${key}". Available: ${MODEL_KEYS.join(", ")}`);
100
+ const provider = providers[entry.provider];
101
+ if (!provider) {
102
+ const envHint = entry.provider === "gemini" ? "GOOGLE_API_KEY" : "OPENAI_API_KEY";
103
+ throw new Error(`Provider "${entry.provider}" not available. Set ${envHint} env var.`);
104
+ }
105
+ return { provider, modelId: entry.id, modelKey: key };
106
+ }
107
+ const geminiEndpoint = (model) => `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
108
+ const RESPIN_SIZES = ["512", "1K", "2K", "4K"];
109
+ const RESPIN_ASPECTS = ["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2", "4:5", "5:4"];
110
+ const VEO_MODEL = "veo-3.1-generate-preview";
111
+ const VEO_BASE = "https://generativelanguage.googleapis.com/v1beta";
112
+ const VEO_ENDPOINT = `${VEO_BASE}/models/${VEO_MODEL}:predictLongRunning`;
113
+ const VEO_POLL_INTERVAL = 10_000; // 10 seconds
114
+ const VEO_MAX_POLLS = 60; // 10 minutes max
115
+ const MAX_MCP_BYTES = 950_000;
116
+ function log(msg) {
117
+ console.error(`[pixel-surgeon ${new Date().toISOString()}] ${msg}`);
118
+ }
119
+ const imageStore = [];
120
+ const videoStore = [];
121
+ let viewerPort = null;
122
+ const sseClients = new Set();
123
+ let lastPrompt = "";
124
+ const pendingSelections = new Map();
125
+ const pendingCrops = new Map();
126
+ function notifyViewerClients(img) {
127
+ void writeSidecar(img);
128
+ const event = JSON.stringify({ id: img.id, prompt: img.prompt, type: "image", modelUsed: img.modelUsed, imageSize: img.imageSize, aspectRatio: img.aspectRatio });
129
+ for (const client of sseClients) {
130
+ client.write(`data: ${event}\n\n`);
131
+ }
132
+ }
133
+ async function writeSidecar(img) {
134
+ const sidecarName = img.filename.replace(/\.[^./]+$/, "") + ".json";
135
+ const meta = {
136
+ id: img.id,
137
+ filename: img.filename,
138
+ prompt: img.prompt,
139
+ aspectRatio: img.aspectRatio ?? null,
140
+ imageSize: img.imageSize ?? null,
141
+ modelUsed: img.modelUsed ?? null,
142
+ timestamp: img.timestamp,
143
+ };
144
+ try {
145
+ await writeFile(join(SAVE_DIR, sidecarName), JSON.stringify(meta, null, 2));
146
+ }
147
+ catch (err) {
148
+ log(`sidecar write failed for ${img.filename}: ${err instanceof Error ? err.message : String(err)}`);
149
+ }
150
+ }
151
+ function notifyViewerClientsVideo(vid) {
152
+ const event = JSON.stringify({ id: vid.id, prompt: vid.prompt, type: "video", filename: vid.filename });
153
+ for (const client of sseClients) {
154
+ client.write(`data: ${event}\n\n`);
155
+ }
156
+ }
157
+ function startViewer() {
158
+ return new Promise((resolve) => {
159
+ const srv = createServer((req, res) => {
160
+ const url = new URL(req.url ?? "/", `http://localhost`);
161
+ if (url.pathname.startsWith("/img/")) {
162
+ const id = url.pathname.slice(5);
163
+ const img = imageStore.find((i) => i.id === id);
164
+ if (img) {
165
+ res.writeHead(200, {
166
+ "Content-Type": "image/png",
167
+ "Cache-Control": "public, max-age=31536000, immutable",
168
+ });
169
+ res.end(img.fullPng);
170
+ return;
171
+ }
172
+ res.writeHead(404);
173
+ res.end("Not found");
174
+ return;
175
+ }
176
+ // Serve image files directly from disk by filename
177
+ if (url.pathname.startsWith("/file/")) {
178
+ const fname = decodeURIComponent(url.pathname.slice(6));
179
+ const fpath = join(SAVE_DIR, fname);
180
+ readFile(fpath)
181
+ .then((buf) => {
182
+ const ext = extname(fname).toLowerCase();
183
+ const mime = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : ext === ".webp" ? "image/webp" : "image/png";
184
+ res.writeHead(200, { "Content-Type": mime, "Cache-Control": "public, max-age=31536000, immutable" });
185
+ res.end(buf);
186
+ })
187
+ .catch(() => {
188
+ res.writeHead(404);
189
+ res.end("Not found");
190
+ });
191
+ return;
192
+ }
193
+ // Serve video files by filename
194
+ if (url.pathname.startsWith("/video/")) {
195
+ const fname = decodeURIComponent(url.pathname.slice(7));
196
+ const fpath = join(SAVE_DIR, fname);
197
+ readFile(fpath)
198
+ .then((buf) => {
199
+ res.writeHead(200, {
200
+ "Content-Type": "video/mp4",
201
+ "Content-Length": buf.length.toString(),
202
+ "Cache-Control": "public, max-age=31536000, immutable",
203
+ });
204
+ res.end(buf);
205
+ })
206
+ .catch(() => {
207
+ res.writeHead(404);
208
+ res.end("Not found");
209
+ });
210
+ return;
211
+ }
212
+ // Interactive crop UI
213
+ if (url.pathname.startsWith("/crop/")) {
214
+ const fname = decodeURIComponent(url.pathname.slice(6));
215
+ res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
216
+ res.end(cropHtml(fname));
217
+ return;
218
+ }
219
+ // Crop submission endpoint
220
+ if (url.pathname === "/crop-submit" && req.method === "POST") {
221
+ let body = "";
222
+ req.on("data", (chunk) => { body += chunk.toString(); });
223
+ req.on("end", async () => {
224
+ try {
225
+ const data = JSON.parse(body);
226
+ const { filename, x, y, width, height, prompt, shots } = data;
227
+ const pending = pendingCrops.get(filename);
228
+ if (pending) {
229
+ // Resolve the MCP tool's await with the crop data
230
+ pending.resolve({ x, y, width, height, prompt: prompt || "", shots: Math.max(1, Math.min(5, shots || 1)) });
231
+ // Hold this HTTP response open until Gemini processing completes
232
+ const result = await pending.onComplete;
233
+ res.writeHead(200, { "Content-Type": "application/json" });
234
+ res.end(JSON.stringify(result));
235
+ }
236
+ else {
237
+ res.writeHead(404, { "Content-Type": "application/json" });
238
+ res.end(JSON.stringify({ error: "No pending crop for this filename" }));
239
+ }
240
+ }
241
+ catch {
242
+ res.writeHead(400, { "Content-Type": "application/json" });
243
+ res.end(JSON.stringify({ error: "Invalid JSON" }));
244
+ }
245
+ });
246
+ return;
247
+ }
248
+ // Selection endpoint — user picks their preferred shot
249
+ if (url.pathname === "/crop-select" && req.method === "POST") {
250
+ let body = "";
251
+ req.on("data", (chunk) => { body += chunk.toString(); });
252
+ req.on("end", () => {
253
+ try {
254
+ const data = JSON.parse(body);
255
+ const { filename, selectedIndex } = data;
256
+ const pending = pendingSelections.get(filename);
257
+ if (pending) {
258
+ pending.resolve(selectedIndex);
259
+ pendingSelections.delete(filename);
260
+ res.writeHead(200, { "Content-Type": "application/json" });
261
+ res.end(JSON.stringify({ ok: true, filename: pending.filenames[selectedIndex] }));
262
+ }
263
+ else {
264
+ res.writeHead(404, { "Content-Type": "application/json" });
265
+ res.end(JSON.stringify({ error: "No pending selection for this filename" }));
266
+ }
267
+ }
268
+ catch {
269
+ res.writeHead(400, { "Content-Type": "application/json" });
270
+ res.end(JSON.stringify({ error: "Invalid JSON" }));
271
+ }
272
+ });
273
+ return;
274
+ }
275
+ if (url.pathname === "/open-folder") {
276
+ openExternal(SAVE_DIR);
277
+ res.writeHead(204);
278
+ res.end();
279
+ return;
280
+ }
281
+ // History — paginated/searchable listing of persisted image sidecars
282
+ if (url.pathname === "/history" && req.method === "GET") {
283
+ (async () => {
284
+ try {
285
+ const offset = Math.max(0, parseInt(url.searchParams.get("offset") ?? "0", 10) || 0);
286
+ const limit = Math.min(500, Math.max(1, parseInt(url.searchParams.get("limit") ?? "20", 10) || 20));
287
+ const q = (url.searchParams.get("q") ?? "").toLowerCase().trim();
288
+ await ensureSaveDir();
289
+ const files = await readdir(SAVE_DIR);
290
+ const jsonFiles = files.filter((f) => f.endsWith(".json")).sort().reverse();
291
+ const readMeta = async (f) => {
292
+ try {
293
+ const raw = await readFile(join(SAVE_DIR, f), "utf-8");
294
+ return JSON.parse(raw);
295
+ }
296
+ catch {
297
+ return null;
298
+ }
299
+ };
300
+ let items;
301
+ let total;
302
+ if (q) {
303
+ const all = [];
304
+ for (const f of jsonFiles) {
305
+ const meta = await readMeta(f);
306
+ if (meta && (meta.prompt ?? "").toLowerCase().includes(q))
307
+ all.push(meta);
308
+ }
309
+ total = all.length;
310
+ items = all.slice(offset, offset + limit);
311
+ }
312
+ else {
313
+ total = jsonFiles.length;
314
+ const slice = jsonFiles.slice(offset, offset + limit);
315
+ const loaded = await Promise.all(slice.map(readMeta));
316
+ items = loaded.filter((m) => m !== null);
317
+ }
318
+ const hasMore = offset + items.length < total;
319
+ res.writeHead(200, { "Content-Type": "application/json" });
320
+ res.end(JSON.stringify({ items, total, hasMore, offset, limit }));
321
+ }
322
+ catch (err) {
323
+ const msg = err instanceof Error ? err.message : String(err);
324
+ res.writeHead(500, { "Content-Type": "application/json" });
325
+ res.end(JSON.stringify({ error: msg }));
326
+ }
327
+ })();
328
+ return;
329
+ }
330
+ // Respin endpoint — regenerate an image with the same prompt
331
+ if (url.pathname === "/respin" && req.method === "POST") {
332
+ let body = "";
333
+ req.on("data", (chunk) => { body += chunk.toString(); });
334
+ req.on("end", async () => {
335
+ try {
336
+ const { id, prompt: customPrompt, size, aspect, model: respinModel } = JSON.parse(body);
337
+ const source = id ? imageStore.find((i) => i.id === id) : undefined;
338
+ const finalPrompt = (customPrompt && customPrompt.trim())
339
+ ? customPrompt.trim()
340
+ : source?.prompt;
341
+ if (!finalPrompt) {
342
+ res.writeHead(400, { "Content-Type": "application/json" });
343
+ res.end(JSON.stringify({ error: "No prompt provided and source not in live store" }));
344
+ return;
345
+ }
346
+ const finalSize = (typeof size === "string" && size) ? size : (source?.imageSize ?? "1K");
347
+ const finalAspect = (typeof aspect === "string" && aspect) ? aspect : (source?.aspectRatio ?? "1:1");
348
+ const finalModel = (typeof respinModel === "string" && respinModel in MODELS) ? respinModel : undefined;
349
+ log(`respin: re-generating from "${finalPrompt.slice(0, 80)}..." (${finalSize}, ${finalAspect}${finalModel ? `, model=${finalModel}` : ""})`);
350
+ const result = await generateAndStore(finalPrompt, finalAspect, finalSize, finalModel);
351
+ res.writeHead(200, { "Content-Type": "application/json" });
352
+ res.end(JSON.stringify({ ok: true, id: imageStore[imageStore.length - 1].id, filename: result.filename }));
353
+ }
354
+ catch (err) {
355
+ const msg = err instanceof Error ? err.message : String(err);
356
+ log(`respin error: ${msg}`);
357
+ res.writeHead(500, { "Content-Type": "application/json" });
358
+ res.end(JSON.stringify({ error: msg }));
359
+ }
360
+ });
361
+ return;
362
+ }
363
+ if (url.pathname === "/events") {
364
+ res.writeHead(200, {
365
+ "Content-Type": "text/event-stream",
366
+ "Cache-Control": "no-cache",
367
+ Connection: "keep-alive",
368
+ });
369
+ const interval = setInterval(() => res.write(":\n\n"), 30000);
370
+ sseClients.add(res);
371
+ req.on("close", () => {
372
+ clearInterval(interval);
373
+ sseClients.delete(res);
374
+ });
375
+ return;
376
+ }
377
+ res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
378
+ res.end(viewerHtml());
379
+ });
380
+ srv.listen(0, "127.0.0.1", () => {
381
+ const addr = srv.address();
382
+ const port = typeof addr === "object" && addr ? addr.port : 0;
383
+ resolve(port);
384
+ });
385
+ });
386
+ }
387
+ function viewerHtml() {
388
+ const items = [
389
+ ...imageStore.map((i) => ({ type: "image", data: i })),
390
+ ...videoStore.map((v) => ({ type: "video", data: v })),
391
+ ].sort((a, b) => b.data.timestamp - a.data.timestamp);
392
+ const itemTags = items
393
+ .map((item) => {
394
+ if (item.type === "video") {
395
+ const vid = item.data;
396
+ return `<div class="img-entry" id="vid-${vid.id}">
397
+ <div class="prompt-row">
398
+ <textarea class="prompt-edit" readonly>${esc(vid.prompt)}</textarea>
399
+ <span class="video-badge">VIDEO</span>
400
+ </div>
401
+ <video src="/video/${encodeURIComponent(vid.filename)}" controls loop playsinline style="max-width:100%;"></video>
402
+ </div>`;
403
+ }
404
+ const img = item.data;
405
+ const isFallback = img.modelUsed && img.modelUsed !== MODEL_PRIMARY && isGeminiModel(img.modelUsed);
406
+ const fallbackBanner = isFallback
407
+ ? `<div class="fallback-banner">⚠️ Generated with <strong>${esc(img.modelUsed)}</strong> (free-tier fallback). Upgrade to <strong>${esc(MODEL_PRIMARY)}</strong> for higher-quality imagegen — <a href="https://aistudio.google.com/" target="_blank">top up credits</a>.</div>`
408
+ : "";
409
+ const curSize = img.imageSize ?? "1K";
410
+ const curAspect = img.aspectRatio ?? "1:1";
411
+ const curModel = img.modelUsed ?? "";
412
+ const sizeOpts = RESPIN_SIZES.map(s => `<option value="${s}"${s === curSize ? " selected" : ""}>${s}</option>`).join("");
413
+ const aspectOpts = RESPIN_ASPECTS.map(a => `<option value="${a}"${a === curAspect ? " selected" : ""}>${a}</option>`).join("");
414
+ const modelOpts = MODEL_KEYS.map(k => `<option value="${k}"${MODELS[k].id === curModel ? " selected" : ""}>${esc(MODELS[k].label)}</option>`).join("");
415
+ return `<div class="img-entry" id="img-${img.id}">
416
+ <div class="prompt-row">
417
+ <textarea class="prompt-edit" data-id="${img.id}">${esc(img.prompt)}</textarea>
418
+ <div class="respin-controls">
419
+ <select class="respin-select" data-size="${img.id}" title="Resolution">${sizeOpts}</select>
420
+ <select class="respin-select" data-aspect="${img.id}" title="Aspect ratio">${aspectOpts}</select>
421
+ <select class="respin-select" data-model="${img.id}" title="Model">${modelOpts}</select>
422
+ <button class="respin-btn" onclick="respin('${img.id}', this)" title="Regenerate (edit prompt / size / aspect above)">&#x21bb; Respin</button>
423
+ </div>
424
+ </div>
425
+ ${fallbackBanner}
426
+ <div class="img-wrapper">
427
+ <span class="model-label">${esc(curModel)}</span>
428
+ <img src="/img/${img.id}" />
429
+ </div>
430
+ </div>`;
431
+ })
432
+ .join("\n");
433
+ const latestImage = items.find(i => i.type === "image")?.data;
434
+ const topSize = latestImage?.imageSize ?? "1K";
435
+ const topAspect = latestImage?.aspectRatio ?? "1:1";
436
+ const topModel = latestImage?.modelUsed ?? "";
437
+ const topPrompt = latestImage?.prompt ?? lastPrompt;
438
+ const topSizeOpts = RESPIN_SIZES.map(s => `<option value="${s}"${s === topSize ? " selected" : ""}>${s}</option>`).join("");
439
+ const topAspectOpts = RESPIN_ASPECTS.map(a => `<option value="${a}"${a === topAspect ? " selected" : ""}>${a}</option>`).join("");
440
+ const topModelOpts = MODEL_KEYS.map(k => `<option value="${k}"${MODELS[k].id === topModel ? " selected" : ""}>${esc(MODELS[k].label)}</option>`).join("");
441
+ return `<!DOCTYPE html>
442
+ <html><head><title>pixel-surgeon-mcp</title>
443
+ <style>
444
+ body { margin: 20px; background: #1a1a1a; color: #ccc; font-family: system-ui; }
445
+ .page-title { position: fixed; top: 12px; right: 16px; font-size: 12px; font-family: ui-monospace, monospace; color: #555; z-index: 100; pointer-events: none; letter-spacing: 0.5px; }
446
+ img { max-width: 100%; }
447
+ video { max-width: 100%; border-radius: 4px; }
448
+ div.img-entry { margin-bottom: 24px; }
449
+ p { margin: 0 0 8px 0; font-size: 14px; color: #999; }
450
+ #empty { display: ${items.length === 0 ? "block" : "none"}; }
451
+ #open-folder { background: #333; color: #ccc; border: 1px solid #555; padding: 8px 16px; cursor: pointer; font-size: 14px; font-family: system-ui; margin-bottom: 20px; }
452
+ #open-folder:hover { background: #444; }
453
+ #top-prompt { margin-bottom: 20px; padding-bottom: 16px; border-bottom: 1px solid #333; }
454
+ .prompt-row { display: flex; gap: 8px; align-items: flex-start; margin-bottom: 8px; }
455
+ .prompt-edit { flex: 1; background: #252525; color: #bbb; border: 1px solid #444; padding: 8px; font-size: 13px; font-family: system-ui; border-radius: 4px; resize: vertical; min-height: 48px; line-height: 1.4; }
456
+ .prompt-edit:focus { border-color: #3a6a9b; color: #ddd; outline: none; }
457
+ .respin-controls { display: flex; flex-direction: column; gap: 4px; align-self: flex-start; }
458
+ .respin-select { background: #252525; color: #bbb; border: 1px solid #444; padding: 4px 6px; font-size: 12px; font-family: system-ui; border-radius: 4px; cursor: pointer; min-width: 72px; }
459
+ .respin-select:hover { border-color: #3a6a9b; color: #ddd; }
460
+ .respin-btn { background: #2a4a6b; color: #8bc4ff; border: 1px solid #3a6a9b; padding: 8px 16px; cursor: pointer; font-size: 13px; font-family: system-ui; border-radius: 4px; transition: all 0.15s; white-space: nowrap; }
461
+ .respin-btn:hover { background: #3a6a9b; color: #fff; }
462
+ .respin-btn:disabled { opacity: 0.5; cursor: wait; }
463
+ .video-badge { background: #6b2a2a; color: #ff8b8b; border: 1px solid #9b3a3a; padding: 8px 16px; font-size: 11px; font-family: system-ui; border-radius: 4px; white-space: nowrap; align-self: flex-start; font-weight: 600; letter-spacing: 0.5px; }
464
+ .img-wrapper { position: relative; }
465
+ .model-label { position: absolute; top: 8px; left: 8px; background: rgba(0,0,0,0.7); color: #8bc4ff; padding: 3px 8px; font-size: 11px; font-family: monospace; border-radius: 3px; z-index: 1; pointer-events: none; }
466
+ .fallback-banner { background: #3a2e12; color: #f0c066; border: 1px solid #7a5c20; padding: 8px 12px; font-size: 12px; border-radius: 4px; margin-bottom: 8px; line-height: 1.5; }
467
+ .fallback-banner a { color: #ffd988; text-decoration: underline; }
468
+ .fallback-banner strong { color: #ffdf9e; }
469
+ .tabs { display: flex; gap: 4px; margin-bottom: 16px; border-bottom: 1px solid #333; }
470
+ .tab-btn { background: transparent; color: #888; border: none; border-bottom: 2px solid transparent; padding: 10px 18px; cursor: pointer; font-size: 14px; font-family: system-ui; transition: all 0.15s; }
471
+ .tab-btn:hover { color: #ccc; }
472
+ .tab-btn.active { color: #8bc4ff; border-bottom-color: #3a6a9b; }
473
+ .tab-pane { display: none; }
474
+ .tab-pane.active { display: block; }
475
+ .history-toolbar { display: flex; gap: 8px; margin-bottom: 16px; align-items: center; flex-wrap: wrap; }
476
+ .history-search { flex: 1; min-width: 240px; background: #252525; color: #ddd; border: 1px solid #444; padding: 8px 12px; font-size: 13px; font-family: system-ui; border-radius: 4px; }
477
+ .history-search:focus { border-color: #3a6a9b; outline: none; }
478
+ .history-status { color: #777; font-size: 12px; }
479
+ .history-load-more { background: #2a4a6b; color: #8bc4ff; border: 1px solid #3a6a9b; padding: 10px 24px; cursor: pointer; font-size: 13px; font-family: system-ui; border-radius: 4px; margin: 16px auto; display: block; }
480
+ .history-load-more:hover { background: #3a6a9b; color: #fff; }
481
+ .history-load-more:disabled { opacity: 0.5; cursor: wait; }
482
+ .history-entry { margin-bottom: 24px; padding-bottom: 16px; border-bottom: 1px solid #2a2a2a; }
483
+ .history-meta { color: #777; font-size: 11px; font-family: ui-monospace, monospace; margin-top: 4px; }
484
+ .history-prompt { background: #252525; color: #bbb; border: 1px solid #444; padding: 8px; font-size: 12px; border-radius: 4px; margin-bottom: 8px; max-height: 140px; overflow-y: auto; white-space: pre-wrap; line-height: 1.4; }
485
+ </style></head><body>
486
+ <div class="page-title">pixel-surgeon-mcp image viewer</div>
487
+ <div class="tabs">
488
+ <button class="tab-btn active" data-tab="live" onclick="switchTab('live')">Live</button>
489
+ <button class="tab-btn" data-tab="history" onclick="switchTab('history')">History</button>
490
+ </div>
491
+ <div class="tab-pane active" id="tab-live">
492
+ <div id="top-prompt">
493
+ <div class="prompt-row">
494
+ <textarea class="prompt-edit" id="top-prompt-text" placeholder="Describe an image to generate...">${esc(topPrompt)}</textarea>
495
+ <div class="respin-controls">
496
+ <select class="respin-select" id="top-size" title="Resolution">${topSizeOpts}</select>
497
+ <select class="respin-select" id="top-aspect" title="Aspect ratio">${topAspectOpts}</select>
498
+ <select class="respin-select" id="top-model" title="Model">${topModelOpts}</select>
499
+ <button class="respin-btn" id="top-generate-btn" onclick="generateFromTop(this)">&#x2728; Generate</button>
500
+ </div>
501
+ </div>
502
+ </div>
503
+ <button id="open-folder" onclick="fetch('/open-folder',{method:'POST'})">Open in Finder</button>
504
+ <p id="empty">No images yet — type a prompt above to get started.</p>
505
+ <div id="gallery">${itemTags}</div>
506
+ </div>
507
+ <div class="tab-pane" id="tab-history">
508
+ <div class="history-toolbar">
509
+ <input class="history-search" id="history-search" type="text" placeholder="Search prompts..." />
510
+ <span class="history-status" id="history-status"></span>
511
+ </div>
512
+ <div id="history-gallery"></div>
513
+ <button class="history-load-more" id="history-load-more" style="display:none;" onclick="loadMoreHistory()">Load 100 more</button>
514
+ </div>
515
+ <script>
516
+ const gallery = document.getElementById("gallery");
517
+ const empty = document.getElementById("empty");
518
+ const es = new EventSource("/events");
519
+ const PRIMARY_MODEL = ${JSON.stringify(MODEL_PRIMARY)};
520
+ const RESPIN_SIZES = ${JSON.stringify(RESPIN_SIZES)};
521
+ const RESPIN_ASPECTS = ${JSON.stringify(RESPIN_ASPECTS)};
522
+ const MODEL_OPTIONS = ${JSON.stringify(MODEL_KEYS.map(k => ({ key: k, label: MODELS[k].label, id: MODELS[k].id })))};
523
+ es.onmessage = (e) => {
524
+ const data = JSON.parse(e.data);
525
+ const { id, prompt, type, filename, modelUsed } = data;
526
+ empty.style.display = "none";
527
+ if (type !== "video") {
528
+ const topTa = document.getElementById("top-prompt-text");
529
+ if (topTa) topTa.value = "";
530
+ }
531
+ const div = document.createElement("div");
532
+ div.className = "img-entry";
533
+
534
+ if (type === "video") {
535
+ div.id = "vid-" + id;
536
+ const row = document.createElement("div");
537
+ row.className = "prompt-row";
538
+ const ta = document.createElement("textarea");
539
+ ta.className = "prompt-edit";
540
+ ta.readOnly = true;
541
+ ta.value = prompt;
542
+ const badge = document.createElement("span");
543
+ badge.className = "video-badge";
544
+ badge.textContent = "VIDEO";
545
+ row.appendChild(ta);
546
+ row.appendChild(badge);
547
+ const vid = document.createElement("video");
548
+ vid.src = "/video/" + encodeURIComponent(filename);
549
+ vid.controls = true;
550
+ vid.loop = true;
551
+ vid.playsInline = true;
552
+ vid.style.maxWidth = "100%";
553
+ div.appendChild(row);
554
+ div.appendChild(vid);
555
+ } else {
556
+ div.id = "img-" + id;
557
+ const row = document.createElement("div");
558
+ row.className = "prompt-row";
559
+ const ta = document.createElement("textarea");
560
+ ta.className = "prompt-edit";
561
+ ta.dataset.id = id;
562
+ ta.value = prompt;
563
+ const controls = document.createElement("div");
564
+ controls.className = "respin-controls";
565
+ const curSize = data.imageSize || "1K";
566
+ const curAspect = data.aspectRatio || "1:1";
567
+ const sizeSel = document.createElement("select");
568
+ sizeSel.className = "respin-select";
569
+ sizeSel.dataset.size = id;
570
+ sizeSel.title = "Resolution";
571
+ RESPIN_SIZES.forEach(function(s) {
572
+ const o = document.createElement("option");
573
+ o.value = s; o.textContent = s;
574
+ if (s === curSize) o.selected = true;
575
+ sizeSel.appendChild(o);
576
+ });
577
+ const aspectSel = document.createElement("select");
578
+ aspectSel.className = "respin-select";
579
+ aspectSel.dataset.aspect = id;
580
+ aspectSel.title = "Aspect ratio";
581
+ RESPIN_ASPECTS.forEach(function(a) {
582
+ const o = document.createElement("option");
583
+ o.value = a; o.textContent = a;
584
+ if (a === curAspect) o.selected = true;
585
+ aspectSel.appendChild(o);
586
+ });
587
+ const btn = document.createElement("button");
588
+ btn.className = "respin-btn";
589
+ btn.innerHTML = "&#x21bb; Respin";
590
+ btn.title = "Regenerate (edit prompt / size / aspect above)";
591
+ btn.onclick = function() { respin(id, this); };
592
+ const modelSel = document.createElement("select");
593
+ modelSel.className = "respin-select";
594
+ modelSel.dataset.model = id;
595
+ modelSel.title = "Model";
596
+ MODEL_OPTIONS.forEach(function(m) {
597
+ const o = document.createElement("option");
598
+ o.value = m.key; o.textContent = m.label;
599
+ if (m.id === modelUsed) o.selected = true;
600
+ modelSel.appendChild(o);
601
+ });
602
+ controls.appendChild(sizeSel);
603
+ controls.appendChild(aspectSel);
604
+ controls.appendChild(modelSel);
605
+ controls.appendChild(btn);
606
+ row.appendChild(ta);
607
+ row.appendChild(controls);
608
+ div.appendChild(row);
609
+ if (modelUsed && modelUsed !== PRIMARY_MODEL && modelUsed.startsWith('gemini')) {
610
+ const banner = document.createElement("div");
611
+ banner.className = "fallback-banner";
612
+ banner.innerHTML = '\u26A0\uFE0F Generated with <strong>' + modelUsed + '</strong> (free-tier fallback). Upgrade to <strong>' + PRIMARY_MODEL + '</strong> for higher-quality imagegen \u2014 <a href="https://aistudio.google.com/" target="_blank">top up credits</a>.';
613
+ div.appendChild(banner);
614
+ }
615
+ const wrapper = document.createElement("div");
616
+ wrapper.className = "img-wrapper";
617
+ const label = document.createElement("span");
618
+ label.className = "model-label";
619
+ label.textContent = modelUsed || "";
620
+ wrapper.appendChild(label);
621
+ const img = document.createElement("img");
622
+ img.src = "/img/" + id;
623
+ wrapper.appendChild(img);
624
+ div.appendChild(wrapper);
625
+ }
626
+ gallery.prepend(div);
627
+ };
628
+ async function respin(id, btn) {
629
+ btn.disabled = true;
630
+ btn.textContent = "Generating...";
631
+ const ta = document.querySelector('textarea[data-id="' + id + '"]');
632
+ const prompt = ta ? ta.value : undefined;
633
+ const sizeEl = document.querySelector('select[data-size="' + id + '"]');
634
+ const aspectEl = document.querySelector('select[data-aspect="' + id + '"]');
635
+ const modelEl = document.querySelector('select[data-model="' + id + '"]');
636
+ const size = sizeEl ? sizeEl.value : undefined;
637
+ const aspect = aspectEl ? aspectEl.value : undefined;
638
+ const model = modelEl ? modelEl.value : undefined;
639
+ try {
640
+ const res = await fetch("/respin", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ id, prompt, size, aspect, model }) });
641
+ const data = await res.json();
642
+ if (!res.ok) throw new Error(data.error || "Respin failed");
643
+ btn.textContent = "\u21bb Respin";
644
+ btn.disabled = false;
645
+ } catch (err) {
646
+ btn.textContent = "Failed — retry?";
647
+ btn.disabled = false;
648
+ }
649
+ }
650
+
651
+ async function generateFromTop(btn) {
652
+ const ta = document.getElementById("top-prompt-text");
653
+ const prompt = ta ? ta.value.trim() : "";
654
+ if (!prompt) { ta && ta.focus(); return; }
655
+ btn.disabled = true;
656
+ btn.textContent = "Generating...";
657
+ const size = document.getElementById("top-size").value;
658
+ const aspect = document.getElementById("top-aspect").value;
659
+ const model = document.getElementById("top-model").value;
660
+ try {
661
+ const res = await fetch("/respin", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ prompt, size, aspect, model }) });
662
+ const data = await res.json();
663
+ if (!res.ok) throw new Error(data.error || "Generate failed");
664
+ btn.innerHTML = "&#x2728; Generate";
665
+ btn.disabled = false;
666
+ } catch (err) {
667
+ btn.textContent = "Failed — retry?";
668
+ btn.disabled = false;
669
+ }
670
+ }
671
+
672
+ // === Tabs + History ===
673
+ const HISTORY_INITIAL = 20;
674
+ const HISTORY_PAGE = 100;
675
+ const historyState = { offset: 0, query: "", total: 0, hasMore: false, loaded: false, pending: false };
676
+
677
+ function switchTab(name) {
678
+ document.querySelectorAll(".tab-btn").forEach(b => b.classList.toggle("active", b.dataset.tab === name));
679
+ document.querySelectorAll(".tab-pane").forEach(p => p.classList.toggle("active", p.id === "tab-" + name));
680
+ if (name === "history" && !historyState.loaded) {
681
+ historyState.loaded = true;
682
+ resetAndLoadHistory(HISTORY_INITIAL);
683
+ }
684
+ }
685
+
686
+ function escHtml(s) {
687
+ return String(s == null ? "" : s).replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
688
+ }
689
+
690
+ function fmtTs(ts) {
691
+ if (!ts) return "";
692
+ try { return new Date(ts).toLocaleString(); } catch { return ""; }
693
+ }
694
+
695
+ async function resetAndLoadHistory(limit) {
696
+ historyState.offset = 0;
697
+ historyState.total = 0;
698
+ historyState.hasMore = false;
699
+ document.getElementById("history-gallery").innerHTML = "";
700
+ await fetchHistoryPage(limit);
701
+ }
702
+
703
+ async function fetchHistoryPage(limit) {
704
+ if (historyState.pending) return;
705
+ historyState.pending = true;
706
+ const btn = document.getElementById("history-load-more");
707
+ const statusEl = document.getElementById("history-status");
708
+ if (btn) btn.disabled = true;
709
+ statusEl.textContent = "Loading...";
710
+ try {
711
+ const params = new URLSearchParams();
712
+ params.set("offset", String(historyState.offset));
713
+ params.set("limit", String(limit));
714
+ if (historyState.query) params.set("q", historyState.query);
715
+ const res = await fetch("/history?" + params.toString());
716
+ const data = await res.json();
717
+ if (!res.ok) throw new Error(data.error || "History fetch failed");
718
+ historyState.total = data.total;
719
+ historyState.hasMore = data.hasMore;
720
+ historyState.offset += data.items.length;
721
+ renderHistoryItems(data.items);
722
+ statusEl.textContent = historyState.total + " match" + (historyState.total === 1 ? "" : "es") + " · showing " + historyState.offset;
723
+ if (btn) btn.style.display = historyState.hasMore ? "block" : "none";
724
+ if (historyState.total === 0) {
725
+ document.getElementById("history-gallery").innerHTML = '<p style="color:#777;padding:20px 0;">No images found.</p>';
726
+ }
727
+ } catch (err) {
728
+ statusEl.textContent = "Error: " + (err && err.message ? err.message : "failed");
729
+ } finally {
730
+ historyState.pending = false;
731
+ if (btn) btn.disabled = false;
732
+ }
733
+ }
734
+
735
+ function renderHistoryItems(items) {
736
+ const gallery = document.getElementById("history-gallery");
737
+ for (const it of items) {
738
+ const div = document.createElement("div");
739
+ div.className = "history-entry";
740
+ const key = it.filename || "";
741
+ const curSize = it.imageSize || "1K";
742
+ const curAspect = it.aspectRatio || "1:1";
743
+ const meta = [fmtTs(it.timestamp), curSize, curAspect, it.modelUsed || "?"].filter(Boolean).join(" · ");
744
+
745
+ const row = document.createElement("div");
746
+ row.className = "prompt-row";
747
+ const ta = document.createElement("textarea");
748
+ ta.className = "prompt-edit";
749
+ ta.dataset.histId = key;
750
+ ta.value = it.prompt || "";
751
+ const controls = document.createElement("div");
752
+ controls.className = "respin-controls";
753
+ const sizeSel = document.createElement("select");
754
+ sizeSel.className = "respin-select";
755
+ sizeSel.dataset.histSize = key;
756
+ sizeSel.title = "Resolution";
757
+ RESPIN_SIZES.forEach((s) => {
758
+ const o = document.createElement("option");
759
+ o.value = s; o.textContent = s;
760
+ if (s === curSize) o.selected = true;
761
+ sizeSel.appendChild(o);
762
+ });
763
+ const aspectSel = document.createElement("select");
764
+ aspectSel.className = "respin-select";
765
+ aspectSel.dataset.histAspect = key;
766
+ aspectSel.title = "Aspect ratio";
767
+ RESPIN_ASPECTS.forEach((a) => {
768
+ const o = document.createElement("option");
769
+ o.value = a; o.textContent = a;
770
+ if (a === curAspect) o.selected = true;
771
+ aspectSel.appendChild(o);
772
+ });
773
+ const btn = document.createElement("button");
774
+ btn.className = "respin-btn";
775
+ btn.innerHTML = "&#x21bb; Respin";
776
+ btn.title = "Regenerate and switch to Live tab";
777
+ btn.onclick = function () { respinHistory(key, this); };
778
+ const modelSel = document.createElement("select");
779
+ modelSel.className = "respin-select";
780
+ modelSel.dataset.histModel = key;
781
+ modelSel.title = "Model";
782
+ MODEL_OPTIONS.forEach((m) => {
783
+ const o = document.createElement("option");
784
+ o.value = m.key; o.textContent = m.label;
785
+ if (m.id === (it.modelUsed || "")) o.selected = true;
786
+ modelSel.appendChild(o);
787
+ });
788
+ controls.appendChild(sizeSel);
789
+ controls.appendChild(aspectSel);
790
+ controls.appendChild(modelSel);
791
+ controls.appendChild(btn);
792
+ row.appendChild(ta);
793
+ row.appendChild(controls);
794
+
795
+ const metaDiv = document.createElement("div");
796
+ metaDiv.className = "history-meta";
797
+ metaDiv.textContent = (it.filename || "") + " · " + meta;
798
+
799
+ const wrapper = document.createElement("div");
800
+ wrapper.className = "img-wrapper";
801
+ const label = document.createElement("span");
802
+ label.className = "model-label";
803
+ label.textContent = it.modelUsed || "";
804
+ wrapper.appendChild(label);
805
+ const img = document.createElement("img");
806
+ img.src = "/file/" + encodeURIComponent(key);
807
+ img.loading = "lazy";
808
+ wrapper.appendChild(img);
809
+
810
+ div.appendChild(row);
811
+ div.appendChild(metaDiv);
812
+ div.appendChild(wrapper);
813
+ gallery.appendChild(div);
814
+ }
815
+ }
816
+
817
+ async function respinHistory(key, btn) {
818
+ btn.disabled = true;
819
+ btn.textContent = "Generating...";
820
+ const ta = document.querySelector('textarea[data-hist-id="' + key + '"]');
821
+ const sizeEl = document.querySelector('select[data-hist-size="' + key + '"]');
822
+ const aspectEl = document.querySelector('select[data-hist-aspect="' + key + '"]');
823
+ const modelEl = document.querySelector('select[data-hist-model="' + key + '"]');
824
+ const prompt = ta ? ta.value : undefined;
825
+ const size = sizeEl ? sizeEl.value : undefined;
826
+ const aspect = aspectEl ? aspectEl.value : undefined;
827
+ const model = modelEl ? modelEl.value : undefined;
828
+ if (!prompt || !prompt.trim()) {
829
+ btn.textContent = "Need prompt";
830
+ btn.disabled = false;
831
+ return;
832
+ }
833
+ switchTab("live");
834
+ try {
835
+ const res = await fetch("/respin", {
836
+ method: "POST",
837
+ headers: { "Content-Type": "application/json" },
838
+ body: JSON.stringify({ prompt, size, aspect, model }),
839
+ });
840
+ const data = await res.json();
841
+ if (!res.ok) throw new Error(data.error || "Respin failed");
842
+ btn.textContent = "\u21bb Respin";
843
+ btn.disabled = false;
844
+ } catch (err) {
845
+ btn.textContent = "Failed — retry?";
846
+ btn.disabled = false;
847
+ }
848
+ }
849
+
850
+ async function loadMoreHistory() {
851
+ await fetchHistoryPage(HISTORY_PAGE);
852
+ }
853
+
854
+ let searchDebounce;
855
+ document.addEventListener("DOMContentLoaded", () => {
856
+ const searchEl = document.getElementById("history-search");
857
+ if (searchEl) {
858
+ searchEl.addEventListener("input", (e) => {
859
+ clearTimeout(searchDebounce);
860
+ const val = e.target.value;
861
+ searchDebounce = setTimeout(() => {
862
+ historyState.query = val.trim();
863
+ resetAndLoadHistory(HISTORY_INITIAL);
864
+ }, 250);
865
+ });
866
+ }
867
+ });
868
+ </script>
869
+ </body></html>`;
870
+ }
871
+ function esc(s) {
872
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
873
+ }
874
+ function cropHtml(filename) {
875
+ return `<!DOCTYPE html>
876
+ <html><head><title>Fix Region — ${esc(filename)}</title>
877
+ <style>
878
+ * { box-sizing: border-box; margin: 0; padding: 0; }
879
+ body { background: #111; color: #ccc; font-family: system-ui; display: flex; flex-direction: column; height: 100vh; }
880
+ .toolbar { padding: 12px 16px; background: #1a1a1a; border-bottom: 1px solid #333; display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
881
+ .toolbar h2 { font-size: 15px; color: #eee; margin-right: 8px; }
882
+ .toolbar label { font-size: 13px; color: #999; }
883
+ .toolbar textarea { flex: 1; min-width: 300px; height: 56px; background: #222; color: #eee; border: 1px solid #444; border-radius: 4px; padding: 8px; font-size: 13px; font-family: system-ui; resize: vertical; }
884
+ .toolbar button { background: #2d7d46; color: #fff; border: none; padding: 10px 24px; border-radius: 4px; font-size: 14px; cursor: pointer; font-weight: 600; }
885
+ .toolbar button:hover { background: #38a55a; }
886
+ .toolbar button:disabled { background: #555; cursor: not-allowed; }
887
+ .toolbar select { background: #222; color: #eee; border: 1px solid #444; border-radius: 4px; padding: 6px 8px; font-size: 13px; }
888
+ .canvas-wrap { flex: 1; overflow: auto; position: relative; display: flex; align-items: flex-start; justify-content: center; padding: 16px; flex-wrap: wrap; gap: 16px; }
889
+ canvas { cursor: crosshair; max-width: 100%; }
890
+ .coords { font-size: 12px; color: #666; min-width: 180px; text-align: right; }
891
+ .status { font-size: 13px; color: #ffcc00; padding: 8px 16px; background: #1a1a1a; border-top: 1px solid #333; }
892
+ .results { display: flex; gap: 12px; flex-wrap: wrap; width: 100%; }
893
+ .results img { max-width: 48%; border: 2px solid transparent; border-radius: 4px; cursor: pointer; }
894
+ .results img:hover { border-color: #4caf50; }
895
+ .results img.selected { border-color: #4caf50; box-shadow: 0 0 12px rgba(76,175,80,0.5); }
896
+ .status.done { color: #4caf50; }
897
+ .status.error { color: #f44336; }
898
+ </style></head><body>
899
+ <div class="toolbar">
900
+ <h2>Select region to fix</h2>
901
+ <label>Notes / instructions:</label>
902
+ <textarea id="prompt" placeholder="e.g. '36GB should be 96GB', 'fix the garbled text in this section'">Clean up and fix any garbled, glitched, or distorted text. Preserve style, colors, and layout.</textarea>
903
+ <label>Shots:</label>
904
+ <select id="shots">
905
+ <option value="1">1</option>
906
+ <option value="2">2</option>
907
+ <option value="3" selected>3</option>
908
+ <option value="4">4</option>
909
+ <option value="5">5</option>
910
+ </select>
911
+ <button id="submit" disabled>Submit Region</button>
912
+ <div class="coords" id="coords">Draw a rectangle on the image</div>
913
+ </div>
914
+ <div class="canvas-wrap">
915
+ <canvas id="canvas"></canvas>
916
+ </div>
917
+ <div class="status" id="status">Loading image...</div>
918
+ <script>
919
+ const filename = ${JSON.stringify(filename)};
920
+ const canvas = document.getElementById("canvas");
921
+ const ctx = canvas.getContext("2d");
922
+ const coordsEl = document.getElementById("coords");
923
+ const statusEl = document.getElementById("status");
924
+ const submitBtn = document.getElementById("submit");
925
+ const promptEl = document.getElementById("prompt");
926
+
927
+ const img = new Image();
928
+ img.onload = () => {
929
+ // Scale to fit viewport while keeping full resolution for coordinates
930
+ const maxW = window.innerWidth - 32;
931
+ const maxH = window.innerHeight - 160;
932
+ const scale = Math.min(1, maxW / img.width, maxH / img.height);
933
+ canvas.width = Math.round(img.width * scale);
934
+ canvas.height = Math.round(img.height * scale);
935
+ canvas.dataset.scale = scale;
936
+ canvas.dataset.imgW = img.width;
937
+ canvas.dataset.imgH = img.height;
938
+ ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
939
+ statusEl.textContent = img.width + "x" + img.height + " — click and drag to select a region";
940
+ };
941
+ img.src = "/file/" + encodeURIComponent(filename);
942
+
943
+ let drawing = false;
944
+ let startX = 0, startY = 0, endX = 0, endY = 0;
945
+ let hasSelection = false;
946
+
947
+ canvas.addEventListener("mousedown", (e) => {
948
+ const rect = canvas.getBoundingClientRect();
949
+ startX = e.clientX - rect.left;
950
+ startY = e.clientY - rect.top;
951
+ drawing = true;
952
+ hasSelection = false;
953
+ submitBtn.disabled = true;
954
+ });
955
+
956
+ canvas.addEventListener("mousemove", (e) => {
957
+ if (!drawing) return;
958
+ const rect = canvas.getBoundingClientRect();
959
+ endX = e.clientX - rect.left;
960
+ endY = e.clientY - rect.top;
961
+ redraw();
962
+ });
963
+
964
+ canvas.addEventListener("mouseup", (e) => {
965
+ if (!drawing) return;
966
+ drawing = false;
967
+ const rect = canvas.getBoundingClientRect();
968
+ endX = e.clientX - rect.left;
969
+ endY = e.clientY - rect.top;
970
+ const sel = getSelection();
971
+ if (sel.w > 5 && sel.h > 5) {
972
+ hasSelection = true;
973
+ submitBtn.disabled = false;
974
+ redraw();
975
+ }
976
+ });
977
+
978
+ function getSelection() {
979
+ const x = Math.min(startX, endX);
980
+ const y = Math.min(startY, endY);
981
+ const w = Math.abs(endX - startX);
982
+ const h = Math.abs(endY - startY);
983
+ return { x, y, w, h };
984
+ }
985
+
986
+ function toPercent(sel) {
987
+ const cw = canvas.width;
988
+ const ch = canvas.height;
989
+ return {
990
+ x: (sel.x / cw) * 100,
991
+ y: (sel.y / ch) * 100,
992
+ width: (sel.w / cw) * 100,
993
+ height: (sel.h / ch) * 100,
994
+ };
995
+ }
996
+
997
+ function redraw() {
998
+ ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
999
+ const sel = getSelection();
1000
+ if (sel.w > 2 && sel.h > 2) {
1001
+ // Dim everything outside selection
1002
+ ctx.fillStyle = "rgba(0,0,0,0.5)";
1003
+ ctx.fillRect(0, 0, canvas.width, sel.y); // top
1004
+ ctx.fillRect(0, sel.y + sel.h, canvas.width, canvas.height - sel.y - sel.h); // bottom
1005
+ ctx.fillRect(0, sel.y, sel.x, sel.h); // left
1006
+ ctx.fillRect(sel.x + sel.w, sel.y, canvas.width - sel.x - sel.w, sel.h); // right
1007
+
1008
+ // Selection border
1009
+ ctx.strokeStyle = "#4caf50";
1010
+ ctx.lineWidth = 2;
1011
+ ctx.strokeRect(sel.x, sel.y, sel.w, sel.h);
1012
+
1013
+ const pct = toPercent(sel);
1014
+ coordsEl.textContent = pct.x.toFixed(1) + "%, " + pct.y.toFixed(1) + "% — " + pct.width.toFixed(1) + "% x " + pct.height.toFixed(1) + "%";
1015
+ }
1016
+ }
1017
+
1018
+ submitBtn.addEventListener("click", async () => {
1019
+ if (!hasSelection) return;
1020
+ const sel = getSelection();
1021
+ const pct = toPercent(sel);
1022
+ submitBtn.disabled = true;
1023
+ submitBtn.textContent = "Processing...";
1024
+ statusEl.textContent = "Sending region to Gemini for fixing...";
1025
+ statusEl.className = "status";
1026
+
1027
+ const shotsVal = parseInt(document.getElementById("shots").value, 10);
1028
+ try {
1029
+ statusEl.textContent = "Region submitted — generating " + shotsVal + " shot(s) with Gemini...";
1030
+ statusEl.className = "status";
1031
+ const resp = await fetch("/crop-submit", {
1032
+ method: "POST",
1033
+ headers: { "Content-Type": "application/json" },
1034
+ body: JSON.stringify({
1035
+ filename,
1036
+ x: pct.x,
1037
+ y: pct.y,
1038
+ width: pct.width,
1039
+ height: pct.height,
1040
+ prompt: promptEl.value,
1041
+ shots: shotsVal,
1042
+ }),
1043
+ });
1044
+ const result = await resp.json();
1045
+ if (result.ok) {
1046
+ if (result.filenames && result.filenames.length > 1) {
1047
+ statusEl.textContent = result.filenames.length + " shots ready — click to select the best one, then click Use Selected";
1048
+ statusEl.className = "status done";
1049
+ const wrap = document.querySelector(".canvas-wrap");
1050
+ const resultsDiv = document.createElement("div");
1051
+ resultsDiv.className = "results";
1052
+ let selectedIdx = 0;
1053
+ result.filenames.forEach((fn, i) => {
1054
+ const img = document.createElement("img");
1055
+ img.src = "/file/" + encodeURIComponent(fn);
1056
+ img.title = "Shot " + (i + 1) + ": " + fn;
1057
+ if (i === 0) img.classList.add("selected");
1058
+ img.addEventListener("click", () => {
1059
+ resultsDiv.querySelectorAll("img").forEach(el => el.classList.remove("selected"));
1060
+ img.classList.add("selected");
1061
+ selectedIdx = i;
1062
+ });
1063
+ resultsDiv.appendChild(img);
1064
+ });
1065
+ wrap.appendChild(resultsDiv);
1066
+ // Add "Use Selected" button
1067
+ const useBtn = document.createElement("button");
1068
+ useBtn.textContent = "Use Selected";
1069
+ useBtn.style.cssText = "background:#2d7d46;color:#fff;border:none;padding:10px 24px;border-radius:4px;font-size:14px;cursor:pointer;font-weight:600;margin-top:12px;";
1070
+ useBtn.addEventListener("click", async () => {
1071
+ useBtn.disabled = true;
1072
+ useBtn.textContent = "Confirming...";
1073
+ const confirmResp = await fetch("/crop-select", {
1074
+ method: "POST",
1075
+ headers: { "Content-Type": "application/json" },
1076
+ body: JSON.stringify({ filename, selectedIndex: selectedIdx }),
1077
+ });
1078
+ const confirmResult = await confirmResp.json();
1079
+ if (confirmResult.ok) {
1080
+ statusEl.textContent = "Selected shot " + (selectedIdx + 1) + " — saved as " + confirmResult.filename;
1081
+ useBtn.textContent = "Done";
1082
+ }
1083
+ });
1084
+ wrap.appendChild(useBtn);
1085
+ } else {
1086
+ statusEl.textContent = "Done! Saved as " + result.filename;
1087
+ statusEl.className = "status done";
1088
+ const resultImg = document.createElement("img");
1089
+ resultImg.src = "/file/" + encodeURIComponent(result.filename);
1090
+ resultImg.style.maxWidth = "100%";
1091
+ resultImg.style.marginTop = "16px";
1092
+ document.querySelector(".canvas-wrap").appendChild(resultImg);
1093
+ }
1094
+ submitBtn.textContent = "Complete";
1095
+ } else {
1096
+ statusEl.textContent = "Error: " + (result.error || "Unknown");
1097
+ statusEl.className = "status error";
1098
+ submitBtn.disabled = false;
1099
+ submitBtn.textContent = "Submit Region";
1100
+ }
1101
+ } catch (err) {
1102
+ statusEl.textContent = "Network error: " + err.message;
1103
+ statusEl.className = "status error";
1104
+ submitBtn.disabled = false;
1105
+ submitBtn.textContent = "Submit Region";
1106
+ }
1107
+ });
1108
+ </script>
1109
+ </body></html>`;
1110
+ }
1111
+ // --- Histogram matching ---
1112
+ /**
1113
+ * Match the brightness/contrast of a fixed region to the original region.
1114
+ * Uses per-channel linear normalization: output = (input - fixedMean) * (origStdev / fixedStdev) + origMean
1115
+ * This ensures the composited region blends seamlessly with the surrounding image.
1116
+ */
1117
+ async function matchHistogram(fixedBuf, originalBuf) {
1118
+ const [fixedStats, origStats] = await Promise.all([
1119
+ sharp(fixedBuf).stats(),
1120
+ sharp(originalBuf).stats(),
1121
+ ]);
1122
+ // Build per-channel linear transform: output = input * a + b
1123
+ // where a = origStdev / fixedStdev, b = origMean - fixedMean * a
1124
+ const multipliers = [];
1125
+ const offsets = [];
1126
+ // Process R, G, B channels (skip alpha if present)
1127
+ const channels = Math.min(fixedStats.channels.length, origStats.channels.length, 3);
1128
+ for (let i = 0; i < channels; i++) {
1129
+ const origCh = origStats.channels[i];
1130
+ const fixedCh = fixedStats.channels[i];
1131
+ // Avoid division by zero — if fixed channel has no variance, just shift the mean
1132
+ const a = fixedCh.stdev > 0.001 ? origCh.stdev / fixedCh.stdev : 1;
1133
+ const b = origCh.mean - fixedCh.mean * a;
1134
+ // Clamp the multiplier to avoid extreme adjustments
1135
+ const clampedA = Math.max(0.5, Math.min(2.0, a));
1136
+ const clampedB = origCh.mean - fixedCh.mean * clampedA;
1137
+ multipliers.push(clampedA);
1138
+ offsets.push(clampedB);
1139
+ }
1140
+ log(` Histogram match: R(×${multipliers[0]?.toFixed(2)}+${offsets[0]?.toFixed(1)}) G(×${multipliers[1]?.toFixed(2)}+${offsets[1]?.toFixed(1)}) B(×${multipliers[2]?.toFixed(2)}+${offsets[2]?.toFixed(1)})`);
1141
+ return sharp(fixedBuf)
1142
+ .linear(multipliers, offsets)
1143
+ .toBuffer();
1144
+ }
1145
+ // --- Image resizing for MCP ---
1146
+ async function shrinkForMcp(pngBuffer) {
1147
+ const origBase64 = pngBuffer.toString("base64");
1148
+ if (origBase64.length <= MAX_MCP_BYTES) {
1149
+ log(` MCP size: ${(origBase64.length / 1024).toFixed(0)}KB PNG (no resize needed)`);
1150
+ return { base64: origBase64, mime: "image/png" };
1151
+ }
1152
+ const metadata = await sharp(pngBuffer).metadata();
1153
+ const origWidth = metadata.width ?? 1024;
1154
+ log(` Original: ${origWidth}x${metadata.height} PNG, ${(origBase64.length / 1024).toFixed(0)}KB base64`);
1155
+ for (const scale of [0.75, 0.5, 0.35, 0.25]) {
1156
+ const width = Math.round(origWidth * scale);
1157
+ const buf = await sharp(pngBuffer).resize(width).jpeg({ quality: 80 }).toBuffer();
1158
+ const b64 = buf.toString("base64");
1159
+ if (b64.length <= MAX_MCP_BYTES) {
1160
+ log(` MCP size: ${(b64.length / 1024).toFixed(0)}KB JPEG @ ${Math.round(scale * 100)}% (${width}px wide)`);
1161
+ return { base64: b64, mime: "image/jpeg" };
1162
+ }
1163
+ }
1164
+ const buf = await sharp(pngBuffer).resize(256).jpeg({ quality: 60 }).toBuffer();
1165
+ const b64 = buf.toString("base64");
1166
+ log(` MCP size: ${(b64.length / 1024).toFixed(0)}KB JPEG @ 256px (last resort)`);
1167
+ return { base64: b64, mime: "image/jpeg" };
1168
+ }
1169
+ // --- Shared directory helpers ---
1170
+ async function ensureSaveDir() {
1171
+ await mkdir(SAVE_DIR, { recursive: true });
1172
+ }
1173
+ /** Save a buffer to the shared dir, return the filename */
1174
+ async function saveToDisk(buf, label, ext = ".png") {
1175
+ await ensureSaveDir();
1176
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
1177
+ const filename = `${ts}_${label}${ext}`;
1178
+ await writeFile(join(SAVE_DIR, filename), buf);
1179
+ return filename;
1180
+ }
1181
+ /** Load an image from the shared dir by filename, compress for Gemini input */
1182
+ async function loadForGemini(filename) {
1183
+ const filepath = join(SAVE_DIR, filename);
1184
+ const buf = await readFile(filepath);
1185
+ const metadata = await sharp(buf).metadata();
1186
+ const width = metadata.width ?? 1024;
1187
+ log(` Source file: ${filename} (${width}x${metadata.height}, ${(buf.length / 1024).toFixed(0)}KB)`);
1188
+ // Compress to max 1024px wide JPEG for fast Gemini upload
1189
+ if (width > 1024 || buf.length > 500_000) {
1190
+ const resized = await sharp(buf)
1191
+ .resize(Math.min(width, 1024))
1192
+ .jpeg({ quality: 85 })
1193
+ .toBuffer();
1194
+ log(` Compressed for Gemini: ${(resized.length / 1024).toFixed(0)}KB JPEG`);
1195
+ return { base64: resized.toString("base64"), mime: "image/jpeg" };
1196
+ }
1197
+ const ext = extname(filename).toLowerCase();
1198
+ const mime = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : ext === ".webp" ? "image/webp" : "image/png";
1199
+ return { base64: buf.toString("base64"), mime };
1200
+ }
1201
+ function isPrepayError(msg) {
1202
+ const m = msg.toLowerCase();
1203
+ return (m.includes("prepay") ||
1204
+ m.includes("prepaid") ||
1205
+ m.includes("credits are depleted") ||
1206
+ m.includes("billing is required") ||
1207
+ m.includes("requires billing") ||
1208
+ m.includes("enable billing") ||
1209
+ m.includes("insufficient credit") ||
1210
+ m.includes("billing account"));
1211
+ }
1212
+ class GeminiProvider {
1213
+ name = "gemini";
1214
+ async generate(req) {
1215
+ return this.call([{ text: req.prompt }], req.aspectRatio, req.imageSize);
1216
+ }
1217
+ async edit(req) {
1218
+ return this.call([
1219
+ { text: req.prompt },
1220
+ { inlineData: { mimeType: req.imageMime, data: req.imageBase64 } },
1221
+ ], req.aspectRatio, req.imageSize);
1222
+ }
1223
+ async callOnce(model, inputParts, aspectRatio, imageSize) {
1224
+ const t0 = Date.now();
1225
+ log(` Calling Gemini API [${model}] (${imageSize}, ${aspectRatio}, ${inputParts.length} parts)...`);
1226
+ let res;
1227
+ try {
1228
+ res = await fetch(`${geminiEndpoint(model)}?key=${GOOGLE_API_KEY}`, {
1229
+ method: "POST",
1230
+ headers: { "Content-Type": "application/json" },
1231
+ body: JSON.stringify({
1232
+ contents: [{ parts: inputParts }],
1233
+ generationConfig: {
1234
+ responseModalities: ["TEXT", "IMAGE"],
1235
+ imageConfig: { aspectRatio, imageSize },
1236
+ },
1237
+ }),
1238
+ });
1239
+ }
1240
+ catch (fetchErr) {
1241
+ throw new Error(`Network error calling Gemini API: ${fetchErr instanceof Error ? fetchErr.message : String(fetchErr)}`);
1242
+ }
1243
+ const elapsed = Date.now() - t0;
1244
+ log(` Gemini [${model}] responded HTTP ${res.status} in ${(elapsed / 1000).toFixed(1)}s`);
1245
+ const rawBody = await res.text();
1246
+ let data;
1247
+ try {
1248
+ data = JSON.parse(rawBody);
1249
+ }
1250
+ catch {
1251
+ throw new Error(`Gemini API returned non-JSON (HTTP ${res.status}). Raw body: ${rawBody.slice(0, 2000)}`);
1252
+ }
1253
+ if (!res.ok || data.error) {
1254
+ const safeBody = rawBody.length > 3000 ? rawBody.slice(0, 3000) + "... [truncated]" : rawBody;
1255
+ throw new Error(`Gemini API HTTP ${res.status}: ${data.error?.message ?? "unknown error"}. Full response: ${safeBody}`);
1256
+ }
1257
+ const candidate = data.candidates?.[0];
1258
+ if (!candidate?.content?.parts?.length) {
1259
+ throw new Error(`Gemini API returned no content parts. Full response: ${JSON.stringify(data, (k, v) => (k === "data" && typeof v === "string" && v.length > 100 ? "[truncated]" : v))}`);
1260
+ }
1261
+ const responseParts = candidate.content.parts;
1262
+ let imageBase64 = "";
1263
+ let text = "";
1264
+ for (const part of responseParts) {
1265
+ if (part.inlineData)
1266
+ imageBase64 = part.inlineData.data;
1267
+ if (part.text)
1268
+ text = part.text;
1269
+ }
1270
+ if (!imageBase64) {
1271
+ const textContent = text ? `Model responded with text: "${text.slice(0, 1000)}"` : "No text content either.";
1272
+ throw new Error(`Gemini returned no image. ${textContent} | Parts structure: ${JSON.stringify(responseParts.map((p) => ({ hasText: !!p.text, hasInlineData: !!p.inlineData })))}`);
1273
+ }
1274
+ log(` Got image: ${(imageBase64.length / 1024).toFixed(0)}KB base64 from ${model}`);
1275
+ return { imageBase64, text };
1276
+ }
1277
+ async call(inputParts, aspectRatio, imageSize) {
1278
+ const defaultKey = getDefaultModelKey();
1279
+ const isGeminiDefault = MODELS[defaultKey]?.provider === "gemini";
1280
+ if (!isGeminiDefault) {
1281
+ const result = await this.callOnce(MODEL_PRIMARY, inputParts, aspectRatio, imageSize);
1282
+ return { ...result, modelUsed: MODEL_PRIMARY };
1283
+ }
1284
+ try {
1285
+ const result = await this.callOnce(MODEL_PRIMARY, inputParts, aspectRatio, imageSize);
1286
+ return { ...result, modelUsed: MODEL_PRIMARY };
1287
+ }
1288
+ catch (err) {
1289
+ const msg = err instanceof Error ? err.message : String(err);
1290
+ if (isPrepayError(msg)) {
1291
+ log(` Prepay error on ${MODEL_PRIMARY} — falling back to ${MODEL_FALLBACK}. Original: ${msg.slice(0, 300)}`);
1292
+ const result = await this.callOnce(MODEL_FALLBACK, inputParts, aspectRatio, imageSize);
1293
+ return { ...result, modelUsed: MODEL_FALLBACK };
1294
+ }
1295
+ throw err;
1296
+ }
1297
+ }
1298
+ }
1299
+ // --- OpenAI provider ---
1300
+ const OPENAI_V1_SIZE_MAP = {
1301
+ "1:1": "1024x1024",
1302
+ "16:9": "1536x1024",
1303
+ "9:16": "1024x1536",
1304
+ "3:4": "1024x1536",
1305
+ "4:3": "1536x1024",
1306
+ "2:3": "1024x1536",
1307
+ "3:2": "1536x1024",
1308
+ "4:5": "1024x1536",
1309
+ "5:4": "1536x1024",
1310
+ };
1311
+ const OPENAI_V2_SIZE_MAP = {
1312
+ "512": {
1313
+ "1:1": "512x512", "16:9": "912x512", "9:16": "512x912",
1314
+ "3:4": "512x680", "4:3": "680x512", "2:3": "512x768",
1315
+ "3:2": "768x512", "4:5": "512x640", "5:4": "640x512",
1316
+ },
1317
+ "1K": {
1318
+ "1:1": "1024x1024", "16:9": "1536x1024", "9:16": "1024x1536",
1319
+ "3:4": "1024x1360", "4:3": "1360x1024", "2:3": "1024x1536",
1320
+ "3:2": "1536x1024", "4:5": "1024x1280", "5:4": "1280x1024",
1321
+ },
1322
+ "2K": {
1323
+ "1:1": "2048x2048", "16:9": "2560x1440", "9:16": "1440x2560",
1324
+ "3:4": "1536x2048", "4:3": "2048x1536", "2:3": "1440x2160",
1325
+ "3:2": "2160x1440", "4:5": "1536x1920", "5:4": "1920x1536",
1326
+ },
1327
+ "4K": {
1328
+ "1:1": "3840x3840", "16:9": "3840x2160", "9:16": "2160x3840",
1329
+ "3:4": "2880x3840", "4:3": "3840x2880", "2:3": "2560x3840",
1330
+ "3:2": "3840x2560", "4:5": "3072x3840", "5:4": "3840x3072",
1331
+ },
1332
+ };
1333
+ function openaiSize(modelId, aspectRatio, imageSize) {
1334
+ if (modelId === "gpt-image-2") {
1335
+ return OPENAI_V2_SIZE_MAP[imageSize]?.[aspectRatio] ?? OPENAI_V2_SIZE_MAP["1K"][aspectRatio] ?? "1024x1024";
1336
+ }
1337
+ return OPENAI_V1_SIZE_MAP[aspectRatio] ?? "1024x1024";
1338
+ }
1339
+ function openaiQuality(imageSize) {
1340
+ if (imageSize === "2K" || imageSize === "4K")
1341
+ return "high";
1342
+ return "medium";
1343
+ }
1344
+ class OpenAIProvider {
1345
+ name = "openai";
1346
+ async generate(req) {
1347
+ const size = openaiSize(req.modelId, req.aspectRatio, req.imageSize);
1348
+ const quality = openaiQuality(req.imageSize);
1349
+ const t0 = Date.now();
1350
+ log(` Calling OpenAI ${req.modelId} generate (${size}, quality=${quality})...`);
1351
+ let res;
1352
+ try {
1353
+ res = await fetch("https://api.openai.com/v1/images/generations", {
1354
+ method: "POST",
1355
+ headers: {
1356
+ "Content-Type": "application/json",
1357
+ "Authorization": `Bearer ${OPENAI_API_KEY}`,
1358
+ },
1359
+ body: JSON.stringify({
1360
+ model: req.modelId,
1361
+ prompt: req.prompt,
1362
+ n: 1,
1363
+ size,
1364
+ quality,
1365
+ }),
1366
+ });
1367
+ }
1368
+ catch (fetchErr) {
1369
+ throw new Error(`Network error calling OpenAI API: ${fetchErr instanceof Error ? fetchErr.message : String(fetchErr)}`);
1370
+ }
1371
+ const elapsed = Date.now() - t0;
1372
+ log(` OpenAI responded HTTP ${res.status} in ${(elapsed / 1000).toFixed(1)}s`);
1373
+ const rawBody = await res.text();
1374
+ let data;
1375
+ try {
1376
+ data = JSON.parse(rawBody);
1377
+ }
1378
+ catch {
1379
+ throw new Error(`OpenAI API returned non-JSON (HTTP ${res.status}). Raw body: ${rawBody.slice(0, 2000)}`);
1380
+ }
1381
+ if (!res.ok || data.error) {
1382
+ throw new Error(`OpenAI API HTTP ${res.status}: ${data.error?.message ?? rawBody.slice(0, 2000)}`);
1383
+ }
1384
+ const imageBase64 = data.data?.[0]?.b64_json;
1385
+ if (!imageBase64) {
1386
+ throw new Error(`OpenAI returned no image data. Response: ${rawBody.slice(0, 2000)}`);
1387
+ }
1388
+ log(` Got image: ${(imageBase64.length / 1024).toFixed(0)}KB base64 from ${req.modelId}`);
1389
+ return { imageBase64, text: "", modelUsed: req.modelId };
1390
+ }
1391
+ async edit(req) {
1392
+ const size = openaiSize(req.modelId, req.aspectRatio, req.imageSize);
1393
+ const quality = openaiQuality(req.imageSize);
1394
+ const t0 = Date.now();
1395
+ log(` Calling OpenAI ${req.modelId} edit (${size}, quality=${quality})...`);
1396
+ const imageBuffer = Buffer.from(req.imageBase64, "base64");
1397
+ const imageBlob = new Blob([imageBuffer], { type: req.imageMime });
1398
+ const form = new FormData();
1399
+ form.append("model", req.modelId);
1400
+ form.append("prompt", req.prompt);
1401
+ form.append("image", imageBlob, "image.png");
1402
+ form.append("size", size);
1403
+ form.append("quality", quality);
1404
+ let res;
1405
+ try {
1406
+ res = await fetch("https://api.openai.com/v1/images/edits", {
1407
+ method: "POST",
1408
+ headers: { "Authorization": `Bearer ${OPENAI_API_KEY}` },
1409
+ body: form,
1410
+ });
1411
+ }
1412
+ catch (fetchErr) {
1413
+ throw new Error(`Network error calling OpenAI edit API: ${fetchErr instanceof Error ? fetchErr.message : String(fetchErr)}`);
1414
+ }
1415
+ const elapsed = Date.now() - t0;
1416
+ log(` OpenAI edit responded HTTP ${res.status} in ${(elapsed / 1000).toFixed(1)}s`);
1417
+ const rawBody = await res.text();
1418
+ let data;
1419
+ try {
1420
+ data = JSON.parse(rawBody);
1421
+ }
1422
+ catch {
1423
+ throw new Error(`OpenAI edit API returned non-JSON (HTTP ${res.status}). Raw body: ${rawBody.slice(0, 2000)}`);
1424
+ }
1425
+ if (!res.ok || data.error) {
1426
+ throw new Error(`OpenAI edit API HTTP ${res.status}: ${data.error?.message ?? rawBody.slice(0, 2000)}`);
1427
+ }
1428
+ const imageBase64 = data.data?.[0]?.b64_json;
1429
+ if (!imageBase64) {
1430
+ throw new Error(`OpenAI edit returned no image data. Response: ${rawBody.slice(0, 2000)}`);
1431
+ }
1432
+ log(` Got image: ${(imageBase64.length / 1024).toFixed(0)}KB base64 from ${req.modelId} edit`);
1433
+ return { imageBase64, text: "", modelUsed: req.modelId };
1434
+ }
1435
+ }
1436
+ // --- Veo API ---
1437
+ async function callVeo(prompt, aspectRatio, durationSeconds) {
1438
+ const t0 = Date.now();
1439
+ log(` Calling Veo API (${VEO_MODEL}, ${aspectRatio}, ${durationSeconds}s)...`);
1440
+ let res;
1441
+ try {
1442
+ res = await fetch(`${VEO_ENDPOINT}?key=${GOOGLE_API_KEY}`, {
1443
+ method: "POST",
1444
+ headers: { "Content-Type": "application/json" },
1445
+ body: JSON.stringify({
1446
+ instances: [{ prompt }],
1447
+ parameters: {
1448
+ aspectRatio,
1449
+ durationSeconds,
1450
+ personGeneration: "allow_all",
1451
+ sampleCount: 1,
1452
+ resolution: "720p",
1453
+ },
1454
+ }),
1455
+ });
1456
+ }
1457
+ catch (fetchErr) {
1458
+ throw new Error(`Network error calling Veo API: ${fetchErr instanceof Error ? fetchErr.message : String(fetchErr)}`);
1459
+ }
1460
+ const rawBody = await res.text();
1461
+ let operation;
1462
+ try {
1463
+ operation = JSON.parse(rawBody);
1464
+ }
1465
+ catch {
1466
+ throw new Error(`Veo API returned non-JSON (HTTP ${res.status}). Raw body: ${rawBody.slice(0, 2000)}`);
1467
+ }
1468
+ if (!res.ok || operation.error) {
1469
+ throw new Error(`Veo API HTTP ${res.status}: ${operation.error?.message ?? rawBody.slice(0, 2000)}`);
1470
+ }
1471
+ if (!operation.name) {
1472
+ throw new Error(`Veo API returned no operation name. Response: ${rawBody.slice(0, 2000)}`);
1473
+ }
1474
+ log(` Veo operation started: ${operation.name}`);
1475
+ // Poll for completion
1476
+ const pollUrl = `${VEO_BASE}/${operation.name}?key=${GOOGLE_API_KEY}`;
1477
+ for (let i = 0; i < VEO_MAX_POLLS; i++) {
1478
+ await new Promise((r) => setTimeout(r, VEO_POLL_INTERVAL));
1479
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(0);
1480
+ log(` Polling Veo (${elapsed}s elapsed, attempt ${i + 1}/${VEO_MAX_POLLS})...`);
1481
+ const pollRes = await fetch(pollUrl);
1482
+ const pollBody = await pollRes.text();
1483
+ let pollData;
1484
+ try {
1485
+ pollData = JSON.parse(pollBody);
1486
+ }
1487
+ catch {
1488
+ log(` Poll returned non-JSON, retrying...`);
1489
+ continue;
1490
+ }
1491
+ if (pollData.error) {
1492
+ throw new Error(`Veo generation failed: ${pollData.error.message}`);
1493
+ }
1494
+ if (pollData.done) {
1495
+ const videoUri = pollData.response?.generateVideoResponse?.generatedSamples?.[0]?.video?.uri;
1496
+ if (!videoUri) {
1497
+ throw new Error(`Veo completed but no video URI found. Response: ${pollBody.slice(0, 2000)}`);
1498
+ }
1499
+ log(` Veo complete in ${((Date.now() - t0) / 1000).toFixed(1)}s, downloading video...`);
1500
+ // Download the video — append API key
1501
+ const downloadRes = await fetch(`${videoUri}&key=${GOOGLE_API_KEY}`, { redirect: "follow" });
1502
+ if (!downloadRes.ok) {
1503
+ throw new Error(`Failed to download video (HTTP ${downloadRes.status})`);
1504
+ }
1505
+ const videoBuffer = Buffer.from(await downloadRes.arrayBuffer());
1506
+ log(` Downloaded video: ${(videoBuffer.length / 1024 / 1024).toFixed(1)}MB`);
1507
+ return videoBuffer;
1508
+ }
1509
+ }
1510
+ throw new Error(`Veo generation timed out after ${VEO_MAX_POLLS * VEO_POLL_INTERVAL / 1000}s`);
1511
+ }
1512
+ /** Generate, store, return shrunk for MCP */
1513
+ async function generateAndStore(prompt, aspectRatio, imageSize, modelKey) {
1514
+ lastPrompt = prompt;
1515
+ const { provider, modelId } = getProvider(modelKey);
1516
+ const { imageBase64, text, modelUsed } = await provider.generate({ prompt, aspectRatio, imageSize, modelId });
1517
+ const fullPng = Buffer.from(imageBase64, "base64");
1518
+ const id = randomUUID();
1519
+ const filename = await saveToDisk(fullPng, id.slice(0, 8));
1520
+ log(` Saved ${filename}`);
1521
+ const img = { id, prompt, fullPng, timestamp: Date.now(), filename, aspectRatio, imageSize, modelUsed };
1522
+ imageStore.push(img);
1523
+ notifyViewerClients(img);
1524
+ const { base64: mcpBase64, mime: mcpMimeType } = await shrinkForMcp(fullPng);
1525
+ return { mcpBase64, mcpMimeType, text, filename, modelUsed };
1526
+ }
1527
+ /** Edit, store, return shrunk for MCP */
1528
+ async function editAndStore(prompt, sourceBase64, sourceMime, aspectRatio, imageSize, modelKey) {
1529
+ const { provider, modelId } = getProvider(modelKey);
1530
+ const { imageBase64, text, modelUsed } = await provider.edit({
1531
+ prompt,
1532
+ imageBase64: sourceBase64,
1533
+ imageMime: sourceMime,
1534
+ aspectRatio,
1535
+ imageSize,
1536
+ modelId,
1537
+ });
1538
+ const fullPng = Buffer.from(imageBase64, "base64");
1539
+ const id = randomUUID();
1540
+ const filename = await saveToDisk(fullPng, id.slice(0, 8));
1541
+ log(` Saved ${filename}`);
1542
+ const img = { id, prompt: `[edit] ${prompt}`, fullPng, timestamp: Date.now(), filename, modelUsed };
1543
+ imageStore.push(img);
1544
+ notifyViewerClients(img);
1545
+ const { base64: mcpBase64, mime: mcpMimeType } = await shrinkForMcp(fullPng);
1546
+ return { mcpBase64, mcpMimeType, text, filename, modelUsed };
1547
+ }
1548
+ /** Notice text to append to MCP responses when the auto-fallback model was used. */
1549
+ const AUTO_FALLBACK_NOTICE = `\u26A0\uFE0F Generated with ${MODEL_FALLBACK} (Gemini 2.5 Flash Image) — the free-tier-eligible fallback. ` +
1550
+ `The primary ${MODEL_PRIMARY} (Gemini 3.1 Flash Image) requires a billed/prepaid Google AI account, ` +
1551
+ `and your prepayment credits appear to be depleted. For higher-quality image generation, ` +
1552
+ `top up credits at https://aistudio.google.com/ and re-run.`;
1553
+ /** Notice text when the user explicitly chose the free model. */
1554
+ const EXPLICIT_FREE_NOTICE = `\u2139\uFE0F Generated with ${MODEL_FALLBACK} (Gemini 2.5 Flash Image, free tier, as requested). ` +
1555
+ `For higher-quality image generation, pass model='${GEMINI_DEFAULT}' — this requires prepaid credits on your Google AI account.`;
1556
+ const MODEL_PARAM_DESCRIPTION = `Model to use. Available: ${MODEL_KEYS.map(k => `'${k}' (${MODELS[k].label})`).join(", ")}. ` +
1557
+ `Default: '${getDefaultModelKey()}'. Set DEFAULT_IMAGE_MODEL env var to change the default. ` +
1558
+ `Gemini models fall back to free tier on billing errors. OpenAI requires OPENAI_API_KEY.`;
1559
+ function noticeFor(modelUsed, explicitModelKey) {
1560
+ if (explicitModelKey && MODELS[explicitModelKey]?.provider !== "gemini")
1561
+ return "";
1562
+ if (modelUsed === MODEL_PRIMARY)
1563
+ return "";
1564
+ const notice = explicitModelKey ? EXPLICIT_FREE_NOTICE : AUTO_FALLBACK_NOTICE;
1565
+ return `\n\n${notice}`;
1566
+ }
1567
+ function resolveImageSize(imageSize, modelKey) {
1568
+ if (!modelKey)
1569
+ return imageSize;
1570
+ const entry = MODELS[modelKey];
1571
+ if (entry?.provider !== "gemini")
1572
+ return imageSize;
1573
+ if (modelKey === "gemini-2.5-flash-image" && imageSize === "512") {
1574
+ log(` Note: gemini-2.5-flash-image doesn't support image_size=512, using 1K instead`);
1575
+ return "1K";
1576
+ }
1577
+ return imageSize;
1578
+ }
1579
+ // --- Style presets ---
1580
+ const STYLE_PRESETS = {
1581
+ "neo-brutalist": {
1582
+ description: "Neo-brutalist minimalist magazine editorial. Bold oversized typography, cream/black/terracotta palette, halftone textures, visible grid lines, asymmetric layout. Think Emigre meets Swiss brutalism.",
1583
+ promptPrefix: "Neo-brutalist minimalist design. Magazine editorial style layout. Off-white / cream background with bold black typography in a heavy-weight grotesque sans-serif font, slightly overlapping and breaking the grid. Accent color: muted burnt orange or terracotta used sparingly as stripe or block elements. Raw, unpolished aesthetic — visible grid lines, asymmetric layout, oversized type that bleeds off edges. Subtle halftone texture overlay. Monospaced subtext in lowercase. No gradients, no glossy effects, no heavy saturation. Clean but edgy, restrained but bold.",
1584
+ defaultAspectRatio: "4:5",
1585
+ },
1586
+ "duval-software-infographic": {
1587
+ description: "Duval Software's signature retro-futurist infographic style. 1960s Space Age optimism meets 1980s arcade aesthetics. Cathode blue, warm amber, salmon red, warm green palette. CRT scanlines, atomic-age geometry, pixel-grid accents. Great for diagrams, system overviews, and technical illustrations.",
1588
+ promptPrefix: "Neo-retro-futurism style. Blend of 1960s Space Age futurism and 1980s video game aesthetics with a modern neo-retro sensibility. Color palette: deep cathode-ray blue (#1a3a5c to #4a9eff glowing CRT blue), warm amber (#d4a017 to #ffcc44), salmon red (#e8735a to #ff6b6b), and warm muted greens (#5a8a5c to #8bbd7b). Dark background evoking a CRT monitor with subtle scanline texture and faint phosphor glow. Typography: mix of retrofuturist geometric sans-serif (like Eurostile, Microgramma, or Bank Gothic) with pixel-grid or bitmap-style secondary text. Design elements: atomic-age starbursts, orbital ellipses, rounded-rectangle pods, jet-age swooshes, and subtle 8-bit pixel patterns along borders or dividers. Faint CRT curvature vignette at edges. Thin vector grid lines receding to a vanishing point. Icons and illustrations should feel like arcade cabinet art meets Googie architecture meets NASA mission patches. Warm analog glow on all light sources — no harsh pure whites, everything filtered through amber or blue phosphor. The overall mood is optimistic, adventurous, and slightly nostalgic — a future that never was, rendered through a cathode ray tube.",
1589
+ defaultAspectRatio: "4:5",
1590
+ },
1591
+ "fractal-arcade": {
1592
+ description: "Geometric dithered fractal style. All shading via dithering patterns and geometric cross-hatch grids — no smooth gradients. Fractal backgrounds (Sierpinski, hexagonal tessellations, recursive diamonds), low-poly faceted subjects, retro CRT palette.",
1593
+ promptPrefix: "Geometric dithered illustration style. All shading done through dithering patterns, halftone dots, and geometric cross-hatch grids — NO smooth gradients anywhere. Every surface rendered with visible pixel-level dithering like a 16-color EGA/VGA palette pushed through ordered Bayer matrix dithering. Fractal geometric patterns in the background — Sierpinski triangles, hexagonal tessellations, recursive diamond grids. Color palette: deep cathode-ray blue (#1a3a5c to #4a9eff), warm amber (#d4a017 to #ffcc44), salmon red (#e8735a), warm muted greens (#5a8a5c). Subjects built from clean geometric shapes — triangular facets, polygonal planes, like a low-poly render but flat and 2D with dithered color fills instead of smooth shading. Think: Saul Bass designed a character select screen for an Amiga game. Geometric line-art icons. Chunky retrofuturist typeface for headers, smaller geometric caps for subtitles. Horizontal scanline overlay. No photorealism, no soft shadows, no AI-gradient smoothness. Every color transition is a hard dither pattern. Clean, precise, geometric, but retro-cool.",
1594
+ defaultAspectRatio: "4:5",
1595
+ },
1596
+ "clean-tech-infographic": {
1597
+ description: "Clean technical infographic for architecture diagrams, system flows, and data pipelines. Dark navy background, cyan/electric blue glowing connection lines, geometric nodes, professional and precise.",
1598
+ promptPrefix: "Clean, professional technical infographic on a dark navy (#0a1628) background with subtle grid lines. Use cyan (#00d4ff) and electric blue (#4a9eff) glowing connection lines between components. White and light gray text only — no bright colors for text. Components rendered as clean geometric shapes: rounded rectangles, hexagons, circles with thin borders and subtle inner glow. Icons are minimal line-art style (server racks, phones, browsers, databases, cloud services). Typography: modern sans-serif (like Inter or SF Pro) — bold for titles, regular weight for labels, monospace for technical details (ports, protocols, versions). Layout follows clear left-to-right or top-to-bottom data flow with labeled arrows showing protocols and data formats. No decorative illustrations, no clip art, no logos, no random embellishments. Include a thin tech stack bar at the bottom. The overall feel is a polished engineering diagram you'd present to a CTO — precise, minimal, and authoritative.",
1599
+ defaultAspectRatio: "16:9",
1600
+ },
1601
+ };
1602
+ const STYLE_KEYS = Object.keys(STYLE_PRESETS);
1603
+ const STYLE_DESCRIPTION = "Optional style preset to apply. When set, the style's prompt prefix is prepended and its default aspect ratio is used (unless you explicitly set one).\n\nAvailable styles:\n" +
1604
+ Object.entries(STYLE_PRESETS).map(([key, val]) => `• ${key} — ${val.description}`).join("\n");
1605
+ function applyStyle(prompt, style) {
1606
+ if (!style || !STYLE_PRESETS[style])
1607
+ return prompt;
1608
+ return `${STYLE_PRESETS[style].promptPrefix}\n\nSubject/content: ${prompt}`;
1609
+ }
1610
+ function resolveAspectRatio(explicit, style) {
1611
+ if (explicit !== "1:1")
1612
+ return explicit; // user explicitly chose something
1613
+ if (style && STYLE_PRESETS[style]?.defaultAspectRatio)
1614
+ return STYLE_PRESETS[style].defaultAspectRatio;
1615
+ return explicit;
1616
+ }
1617
+ // --- MCP server ---
1618
+ const server = new McpServer({ name: "pixel-surgeon", version: "1.0.0" }, { capabilities: { tools: {} } });
1619
+ server.tool("list_images", `List image and video files in the shared pixel-surgeon directory (${SAVE_DIR}). Use this to find images available for editing.`, {}, async () => {
1620
+ try {
1621
+ await ensureSaveDir();
1622
+ const files = await readdir(SAVE_DIR);
1623
+ const mediaFiles = files.filter((f) => /\.(png|jpg|jpeg|webp|mp4)$/i.test(f));
1624
+ mediaFiles.sort().reverse(); // newest first
1625
+ const entries = [];
1626
+ for (const f of mediaFiles.slice(0, 50)) {
1627
+ const s = await stat(join(SAVE_DIR, f));
1628
+ const isVideo = /\.mp4$/i.test(f);
1629
+ const sizeStr = isVideo
1630
+ ? `${(s.size / 1024 / 1024).toFixed(1)}MB`
1631
+ : `${(s.size / 1024).toFixed(0)}KB`;
1632
+ entries.push(`${isVideo ? "[VIDEO] " : ""}${f} (${sizeStr})`);
1633
+ }
1634
+ return {
1635
+ content: [{
1636
+ type: "text",
1637
+ text: entries.length > 0
1638
+ ? `Files in ${SAVE_DIR}:\n${entries.join("\n")}`
1639
+ : `No files in ${SAVE_DIR}`,
1640
+ }],
1641
+ };
1642
+ }
1643
+ catch (err) {
1644
+ const msg = err instanceof Error ? err.message : String(err);
1645
+ return {
1646
+ content: [{ type: "text", text: `list_images failed: ${msg}` }],
1647
+ isError: true,
1648
+ };
1649
+ }
1650
+ });
1651
+ server.tool("save_image", `Copy an image file into the shared pixel-surgeon directory (${SAVE_DIR}) so it can be used with edit_image. Use this when the user wants to edit an image that exists elsewhere on their filesystem.`, {
1652
+ source_path: z.string().describe("Absolute path to the image file to import"),
1653
+ }, async ({ source_path }) => {
1654
+ try {
1655
+ await ensureSaveDir();
1656
+ const ext = extname(source_path).toLowerCase() || ".png";
1657
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
1658
+ const destFilename = `${ts}_imported${ext}`;
1659
+ const destPath = join(SAVE_DIR, destFilename);
1660
+ await copyFile(source_path, destPath);
1661
+ const s = await stat(destPath);
1662
+ log(`save_image: copied ${source_path} -> ${destFilename} (${(s.size / 1024).toFixed(0)}KB)`);
1663
+ return {
1664
+ content: [{
1665
+ type: "text",
1666
+ text: `Saved as ${destFilename} in ${SAVE_DIR} (${(s.size / 1024).toFixed(0)}KB). You can now use this filename with edit_image.`,
1667
+ }],
1668
+ };
1669
+ }
1670
+ catch (err) {
1671
+ const msg = err instanceof Error ? err.message : String(err);
1672
+ log(`save_image error: ${msg}`);
1673
+ return {
1674
+ content: [{ type: "text", text: `save_image failed: ${msg}` }],
1675
+ isError: true,
1676
+ };
1677
+ }
1678
+ });
1679
+ server.tool("generate_images", "Generate multiple images in parallel. Supports Gemini and OpenAI models — pass the model param to choose. Returns the generated images and any accompanying text. Full-resolution images are viewable in the browser viewer.", {
1680
+ prompts: z
1681
+ .array(z.string())
1682
+ .min(1)
1683
+ .max(8)
1684
+ .describe("Array of text prompts, one per image to generate (1-8 images)"),
1685
+ aspect_ratio: z
1686
+ .enum(["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2", "4:5", "5:4"])
1687
+ .default("1:1")
1688
+ .describe("Aspect ratio for all generated images"),
1689
+ image_size: z
1690
+ .enum(["512", "1K", "2K", "4K"])
1691
+ .default("1K")
1692
+ .describe("Image resolution"),
1693
+ style: z
1694
+ .enum(STYLE_KEYS)
1695
+ .optional()
1696
+ .describe(STYLE_DESCRIPTION),
1697
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
1698
+ }, async ({ prompts, aspect_ratio, image_size, style, model }) => {
1699
+ try {
1700
+ await ensureViewer();
1701
+ const resolvedAR = resolveAspectRatio(aspect_ratio, style);
1702
+ log(`generate_images: ${prompts.length} prompts, ${image_size}, ${resolvedAR}${style ? ` [style: ${style}]` : ""}${model ? ` [model: ${model}]` : ""}`);
1703
+ const t0 = Date.now();
1704
+ const resolvedSize = resolveImageSize(image_size, model);
1705
+ const results = await Promise.allSettled(prompts.map((prompt, i) => {
1706
+ const styledPrompt = applyStyle(prompt, style);
1707
+ log(` [${i + 1}/${prompts.length}] "${styledPrompt.slice(0, 80)}${styledPrompt.length > 80 ? "..." : ""}"`);
1708
+ return generateAndStore(styledPrompt, resolvedAR, resolvedSize, model);
1709
+ }));
1710
+ const content = [];
1711
+ let anySucceeded = false;
1712
+ let anyFallback = false;
1713
+ for (let i = 0; i < results.length; i++) {
1714
+ const result = results[i];
1715
+ if (result.status === "fulfilled") {
1716
+ anySucceeded = true;
1717
+ if (result.value.modelUsed !== MODEL_PRIMARY && isGeminiModel(result.value.modelUsed))
1718
+ anyFallback = true;
1719
+ content.push({
1720
+ type: "text",
1721
+ text: `Image ${i + 1}: ${result.value.filename}${result.value.text ? ` — ${result.value.text}` : ""}`,
1722
+ });
1723
+ content.push({
1724
+ type: "image",
1725
+ data: result.value.mcpBase64,
1726
+ mimeType: result.value.mcpMimeType,
1727
+ });
1728
+ }
1729
+ else {
1730
+ content.push({
1731
+ type: "text",
1732
+ text: `Image ${i + 1} failed (prompt: "${prompts[i]}"): ${result.reason?.message ?? "Unknown error"}`,
1733
+ });
1734
+ }
1735
+ }
1736
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
1737
+ log(`generate_images complete: ${results.filter((r) => r.status === "fulfilled").length}/${prompts.length} succeeded in ${elapsed}s`);
1738
+ content.push({
1739
+ type: "text",
1740
+ text: `Full-res images in ${SAVE_DIR} — viewable at http://localhost:${viewerPort}${anyFallback ? `\n\n${model ? EXPLICIT_FREE_NOTICE : AUTO_FALLBACK_NOTICE}` : ""}`,
1741
+ });
1742
+ if (!anySucceeded)
1743
+ return { content, isError: true };
1744
+ return { content };
1745
+ }
1746
+ catch (err) {
1747
+ const msg = err instanceof Error ? err.message : String(err);
1748
+ log(`generate_images error: ${msg}`);
1749
+ return {
1750
+ content: [{ type: "text", text: `generate_images failed: ${msg}` }],
1751
+ isError: true,
1752
+ };
1753
+ }
1754
+ });
1755
+ server.tool("generate_image", "Generate a single image. Supports Gemini and OpenAI models — pass the model param to choose. Full-resolution image is viewable in the browser viewer.", {
1756
+ prompt: z.string().describe("Text prompt describing the image to generate"),
1757
+ aspect_ratio: z
1758
+ .enum(["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2", "4:5", "5:4"])
1759
+ .default("1:1")
1760
+ .describe("Aspect ratio for the image"),
1761
+ image_size: z
1762
+ .enum(["512", "1K", "2K", "4K"])
1763
+ .default("1K")
1764
+ .describe("Image resolution"),
1765
+ style: z
1766
+ .enum(STYLE_KEYS)
1767
+ .optional()
1768
+ .describe(STYLE_DESCRIPTION),
1769
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
1770
+ }, async ({ prompt, aspect_ratio, image_size, style, model }) => {
1771
+ try {
1772
+ await ensureViewer();
1773
+ const styledPrompt = applyStyle(prompt, style);
1774
+ const resolvedAR = resolveAspectRatio(aspect_ratio, style);
1775
+ log(`generate_image: "${styledPrompt.slice(0, 80)}${styledPrompt.length > 80 ? "..." : ""}" (${image_size}, ${resolvedAR})${style ? ` [style: ${style}]` : ""}${model ? ` [model: ${model}]` : ""}`);
1776
+ const t0 = Date.now();
1777
+ const resolvedSize = resolveImageSize(image_size, model);
1778
+ const { mcpBase64, mcpMimeType, text, filename, modelUsed } = await generateAndStore(styledPrompt, resolvedAR, resolvedSize, model);
1779
+ log(`generate_image complete in ${((Date.now() - t0) / 1000).toFixed(1)}s`);
1780
+ return {
1781
+ content: [
1782
+ ...(text ? [{ type: "text", text }] : []),
1783
+ { type: "image", data: mcpBase64, mimeType: mcpMimeType },
1784
+ { type: "text", text: `Saved as ${filename} — full-res at http://localhost:${viewerPort}${noticeFor(modelUsed, model)}` },
1785
+ ],
1786
+ };
1787
+ }
1788
+ catch (err) {
1789
+ const msg = err instanceof Error ? err.message : String(err);
1790
+ log(`generate_image error: ${msg}`);
1791
+ return {
1792
+ content: [{ type: "text", text: `generate_image failed: ${msg}` }],
1793
+ isError: true,
1794
+ };
1795
+ }
1796
+ });
1797
+ server.tool("generate_video", "Generate a video using Google's Veo 3. Returns an MP4 video file. Video generation takes 1-3 minutes — the tool will poll until complete. Veo 3 generates both video and ambient audio. Videos are saved to the shared directory and viewable in the browser viewer.", {
1798
+ prompt: z.string().describe("Text prompt describing the video to generate. Be descriptive about motion, camera angles, lighting, and scene details for best results."),
1799
+ aspect_ratio: z
1800
+ .enum(["16:9", "9:16"])
1801
+ .default("16:9")
1802
+ .describe("Aspect ratio — 16:9 for landscape, 9:16 for portrait/vertical"),
1803
+ duration: z
1804
+ .enum(["5", "8"])
1805
+ .default("8")
1806
+ .describe("Video duration in seconds"),
1807
+ }, async ({ prompt, aspect_ratio, duration }) => {
1808
+ try {
1809
+ await ensureViewer();
1810
+ const durationSeconds = parseInt(duration, 10);
1811
+ log(`generate_video: "${prompt.slice(0, 80)}${prompt.length > 80 ? "..." : ""}" (${aspect_ratio}, ${durationSeconds}s)`);
1812
+ const t0 = Date.now();
1813
+ const videoBuffer = await callVeo(prompt, aspect_ratio, durationSeconds);
1814
+ const id = randomUUID();
1815
+ const filename = await saveToDisk(videoBuffer, id.slice(0, 8), ".mp4");
1816
+ log(` Saved video ${filename} (${(videoBuffer.length / 1024 / 1024).toFixed(1)}MB)`);
1817
+ const vid = { id, prompt, filename, timestamp: Date.now(), aspectRatio: aspect_ratio, durationSeconds };
1818
+ videoStore.push(vid);
1819
+ notifyViewerClientsVideo(vid);
1820
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
1821
+ log(`generate_video complete in ${elapsed}s`);
1822
+ return {
1823
+ content: [
1824
+ { type: "text", text: `Video generated successfully in ${elapsed}s` },
1825
+ { type: "text", text: `Saved as ${filename} in ${SAVE_DIR} (${(videoBuffer.length / 1024 / 1024).toFixed(1)}MB)` },
1826
+ { type: "text", text: `Viewable at http://localhost:${viewerPort}` },
1827
+ ],
1828
+ };
1829
+ }
1830
+ catch (err) {
1831
+ const msg = err instanceof Error ? err.message : String(err);
1832
+ log(`generate_video error: ${msg}`);
1833
+ return {
1834
+ content: [{ type: "text", text: `generate_video failed: ${msg}` }],
1835
+ isError: true,
1836
+ };
1837
+ }
1838
+ });
1839
+ server.tool("edit_image", `Edit an existing image. Supports Gemini and OpenAI models — pass the model param to choose. Provide the filename of an image in ${SAVE_DIR} (use list_images to see available files, or save_image to import one first). The MCP reads the file directly — do NOT pass base64 image data.`, {
1840
+ prompt: z.string().describe("Text prompt describing the edits to make to the image"),
1841
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR} (e.g. "2026-03-17T17-47-31-152Z_59f735df.png")`),
1842
+ aspect_ratio: z
1843
+ .enum(["1:1", "16:9", "9:16", "3:4", "4:3", "2:3", "3:2", "4:5", "5:4"])
1844
+ .default("1:1")
1845
+ .describe("Aspect ratio for the output image"),
1846
+ image_size: z
1847
+ .enum(["512", "1K", "2K", "4K"])
1848
+ .default("1K")
1849
+ .describe("Output image resolution"),
1850
+ style: z
1851
+ .enum(STYLE_KEYS)
1852
+ .optional()
1853
+ .describe(STYLE_DESCRIPTION),
1854
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
1855
+ }, async ({ prompt, filename, aspect_ratio, image_size, style, model }) => {
1856
+ try {
1857
+ await ensureViewer();
1858
+ const styledPrompt = applyStyle(prompt, style);
1859
+ const resolvedAR = resolveAspectRatio(aspect_ratio, style);
1860
+ log(`edit_image: "${styledPrompt.slice(0, 80)}${styledPrompt.length > 80 ? "..." : ""}" source=${filename} (${image_size}, ${resolvedAR})${style ? ` [style: ${style}]` : ""}${model ? ` [model: ${model}]` : ""}`);
1861
+ const t0 = Date.now();
1862
+ const { base64: srcBase64, mime: srcMime } = await loadForGemini(filename);
1863
+ const resolvedSize = resolveImageSize(image_size, model);
1864
+ const { mcpBase64, mcpMimeType, text, filename: outFilename, modelUsed } = await editAndStore(styledPrompt, srcBase64, srcMime, resolvedAR, resolvedSize, model);
1865
+ log(`edit_image complete in ${((Date.now() - t0) / 1000).toFixed(1)}s`);
1866
+ return {
1867
+ content: [
1868
+ ...(text ? [{ type: "text", text }] : []),
1869
+ { type: "image", data: mcpBase64, mimeType: mcpMimeType },
1870
+ { type: "text", text: `Saved as ${outFilename} — full-res at http://localhost:${viewerPort}${noticeFor(modelUsed, model)}` },
1871
+ ],
1872
+ };
1873
+ }
1874
+ catch (err) {
1875
+ const msg = err instanceof Error ? err.message : String(err);
1876
+ log(`edit_image error: ${msg}`);
1877
+ return {
1878
+ content: [{ type: "text", text: `edit_image failed: ${msg}` }],
1879
+ isError: true,
1880
+ };
1881
+ }
1882
+ });
1883
+ server.tool("fix_image", `Fix an image that has glitched or garbled text by splitting it into tiles, re-rendering each tile, and stitching them back together. This works because smaller sections have less text for the model to handle at once. Use this when a generated image has text artifacts or overloaded text regions.`, {
1884
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR}`),
1885
+ prompt: z
1886
+ .string()
1887
+ .default("Clean up and fix any garbled, glitched, or distorted text in this image tile. Preserve the style, colors, and layout exactly but make all text crisp and legible.")
1888
+ .describe("Instructions for fixing each tile"),
1889
+ grid: z
1890
+ .enum(["2x2", "3x3", "2x1", "1x2", "3x1", "1x3"])
1891
+ .default("2x2")
1892
+ .describe("How to split the image: cols x rows"),
1893
+ image_size: z
1894
+ .enum(["512", "1K", "2K", "4K"])
1895
+ .default("1K")
1896
+ .describe("Resolution for each tile"),
1897
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
1898
+ }, async ({ filename, prompt, grid, image_size, model }) => {
1899
+ try {
1900
+ await ensureViewer();
1901
+ log(`fix_image: source=${filename} grid=${grid}`);
1902
+ const t0 = Date.now();
1903
+ // Parse grid
1904
+ const [colStr, rowStr] = grid.split("x");
1905
+ const cols = parseInt(colStr, 10);
1906
+ const rows = parseInt(rowStr, 10);
1907
+ // Load source image
1908
+ const filepath = join(SAVE_DIR, filename);
1909
+ const srcBuf = await readFile(filepath);
1910
+ const metadata = await sharp(srcBuf).metadata();
1911
+ const imgWidth = metadata.width;
1912
+ const imgHeight = metadata.height;
1913
+ log(` Source: ${imgWidth}x${imgHeight}`);
1914
+ const tileW = Math.floor(imgWidth / cols);
1915
+ const tileH = Math.floor(imgHeight / rows);
1916
+ // Extract tiles
1917
+ const tiles = [];
1918
+ for (let row = 0; row < rows; row++) {
1919
+ for (let col = 0; col < cols; col++) {
1920
+ const left = col * tileW;
1921
+ const top = row * tileH;
1922
+ // Last tile in each direction extends to the edge
1923
+ const width = col === cols - 1 ? imgWidth - left : tileW;
1924
+ const height = row === rows - 1 ? imgHeight - top : tileH;
1925
+ const buf = await sharp(srcBuf)
1926
+ .extract({ left, top, width, height })
1927
+ .png()
1928
+ .toBuffer();
1929
+ tiles.push({ col, row, buffer: buf });
1930
+ }
1931
+ }
1932
+ log(` Extracted ${tiles.length} tiles (${tileW}x${tileH} each)`);
1933
+ // Compute aspect ratio closest to tile dimensions for Gemini
1934
+ const tileAspect = tileW / tileH;
1935
+ const bestAspect = ASPECT_RATIOS.reduce((best, opt) => Math.abs(Math.log(opt.ratio / tileAspect)) < Math.abs(Math.log(best.ratio / tileAspect)) ? opt : best);
1936
+ log(` Tile aspect ~${tileAspect.toFixed(2)}, using ${bestAspect.label}`);
1937
+ // Send each tile to the provider in parallel
1938
+ const { provider: tileProvider, modelId: tileModelId } = getProvider(model);
1939
+ const fixResults = await Promise.allSettled(tiles.map(async (tile, i) => {
1940
+ log(` [tile ${i + 1}/${tiles.length}] sending to ${tileProvider.name}...`);
1941
+ const tileSharp = sharp(tile.buffer);
1942
+ const tileMeta = await tileSharp.metadata();
1943
+ let sendBuf;
1944
+ let sendMime;
1945
+ if ((tileMeta.width ?? 0) > 1024 || tile.buffer.length > 500_000) {
1946
+ sendBuf = await sharp(tile.buffer).resize(Math.min(tileMeta.width ?? 1024, 1024)).jpeg({ quality: 85 }).toBuffer();
1947
+ sendMime = "image/jpeg";
1948
+ }
1949
+ else {
1950
+ sendBuf = tile.buffer;
1951
+ sendMime = "image/png";
1952
+ }
1953
+ const { imageBase64, modelUsed } = await tileProvider.edit({
1954
+ prompt,
1955
+ imageBase64: sendBuf.toString("base64"),
1956
+ imageMime: sendMime,
1957
+ aspectRatio: bestAspect.label,
1958
+ imageSize: image_size,
1959
+ modelId: tileModelId,
1960
+ });
1961
+ return { col: tile.col, row: tile.row, buffer: Buffer.from(imageBase64, "base64"), modelUsed };
1962
+ }));
1963
+ // Check for failures
1964
+ const failed = fixResults.filter((r) => r.status === "rejected");
1965
+ if (failed.length === fixResults.length) {
1966
+ throw new Error(`All ${fixResults.length} tiles failed. First error: ${failed[0].reason?.message}`);
1967
+ }
1968
+ if (failed.length > 0) {
1969
+ log(` WARNING: ${failed.length}/${fixResults.length} tiles failed, using originals for those`);
1970
+ }
1971
+ // Build fixed tile map, falling back to original tile on failure
1972
+ const fixedTiles = fixResults.map((result, i) => {
1973
+ if (result.status === "fulfilled") {
1974
+ return result.value;
1975
+ }
1976
+ log(` Tile ${i + 1} failed, using original: ${result.reason?.message}`);
1977
+ return tiles[i];
1978
+ });
1979
+ // Resize each fixed tile back to exact tile dimensions and composite
1980
+ const compositeInputs = [];
1981
+ for (const tile of fixedTiles) {
1982
+ const left = tile.col * tileW;
1983
+ const top = tile.row * tileH;
1984
+ const width = tile.col === cols - 1 ? imgWidth - left : tileW;
1985
+ const height = tile.row === rows - 1 ? imgHeight - top : tileH;
1986
+ const resized = await sharp(tile.buffer)
1987
+ .resize(width, height, { fit: "fill" })
1988
+ .png()
1989
+ .toBuffer();
1990
+ compositeInputs.push({ input: resized, left, top });
1991
+ }
1992
+ const finalBuf = await sharp({
1993
+ create: { width: imgWidth, height: imgHeight, channels: 4, background: { r: 0, g: 0, b: 0, alpha: 1 } },
1994
+ })
1995
+ .composite(compositeInputs)
1996
+ .png()
1997
+ .toBuffer();
1998
+ // Save result
1999
+ const id = randomUUID();
2000
+ const outFilename = await saveToDisk(finalBuf, `fix_${id.slice(0, 8)}`);
2001
+ log(` Stitched ${fixedTiles.length} tiles -> ${outFilename}`);
2002
+ const tileModels = [];
2003
+ for (const r of fixResults) {
2004
+ if (r.status === "fulfilled") {
2005
+ const m = r.value.modelUsed;
2006
+ if (m)
2007
+ tileModels.push(m);
2008
+ }
2009
+ }
2010
+ const imageModelUsed = tileModels.find((m) => m !== MODEL_PRIMARY) ?? MODEL_PRIMARY;
2011
+ const img = {
2012
+ id,
2013
+ prompt: `[fix ${grid}] ${prompt.slice(0, 60)}`,
2014
+ fullPng: finalBuf,
2015
+ timestamp: Date.now(),
2016
+ filename: outFilename,
2017
+ modelUsed: imageModelUsed,
2018
+ };
2019
+ imageStore.push(img);
2020
+ notifyViewerClients(img);
2021
+ const { base64: mcpBase64, mime: mcpMimeType } = await shrinkForMcp(finalBuf);
2022
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
2023
+ const successCount = fixResults.filter((r) => r.status === "fulfilled").length;
2024
+ log(`fix_image complete in ${elapsed}s (${successCount}/${fixResults.length} tiles succeeded)`);
2025
+ return {
2026
+ content: [
2027
+ { type: "image", data: mcpBase64, mimeType: mcpMimeType },
2028
+ {
2029
+ type: "text",
2030
+ text: `Fixed ${successCount}/${tiles.length} tiles (${grid} grid). Saved as ${outFilename} — full-res at http://localhost:${viewerPort}${noticeFor(imageModelUsed)}`,
2031
+ },
2032
+ ],
2033
+ };
2034
+ }
2035
+ catch (err) {
2036
+ const msg = err instanceof Error ? err.message : String(err);
2037
+ log(`fix_image error: ${msg}`);
2038
+ return {
2039
+ content: [{ type: "text", text: `fix_image failed: ${msg}` }],
2040
+ isError: true,
2041
+ };
2042
+ }
2043
+ });
2044
+ // --- Aspect ratio snapping helper ---
2045
+ const ASPECT_RATIOS = [
2046
+ { label: "1:1", ratio: 1 },
2047
+ { label: "16:9", ratio: 16 / 9 },
2048
+ { label: "9:16", ratio: 9 / 16 },
2049
+ { label: "3:4", ratio: 3 / 4 },
2050
+ { label: "4:3", ratio: 4 / 3 },
2051
+ { label: "2:3", ratio: 2 / 3 },
2052
+ { label: "3:2", ratio: 3 / 2 },
2053
+ { label: "4:5", ratio: 4 / 5 },
2054
+ { label: "5:4", ratio: 5 / 4 },
2055
+ ];
2056
+ /**
2057
+ * Given a crop region, snap it to the nearest Gemini aspect ratio.
2058
+ * Adjusts width/height to match the ratio while keeping the center point,
2059
+ * clamped to image bounds.
2060
+ */
2061
+ function snapToAspectRatio(x, y, w, h, imgWidth, imgHeight) {
2062
+ const cropRatio = w / h;
2063
+ const best = ASPECT_RATIOS.reduce((a, b) => Math.abs(Math.log(a.ratio / cropRatio)) <= Math.abs(Math.log(b.ratio / cropRatio)) ? a : b);
2064
+ // Adjust dimensions to match the snapped ratio, keeping area roughly the same
2065
+ const centerX = x + w / 2;
2066
+ const centerY = y + h / 2;
2067
+ let newW, newH;
2068
+ if (best.ratio > cropRatio) {
2069
+ // Need wider — expand width, keep height
2070
+ newH = h;
2071
+ newW = Math.round(h * best.ratio);
2072
+ }
2073
+ else {
2074
+ // Need taller — expand height, keep width
2075
+ newW = w;
2076
+ newH = Math.round(w / best.ratio);
2077
+ }
2078
+ // Re-center and clamp to image bounds
2079
+ let left = Math.round(centerX - newW / 2);
2080
+ let top = Math.round(centerY - newH / 2);
2081
+ if (left < 0)
2082
+ left = 0;
2083
+ if (top < 0)
2084
+ top = 0;
2085
+ if (left + newW > imgWidth)
2086
+ left = imgWidth - newW;
2087
+ if (top + newH > imgHeight)
2088
+ top = imgHeight - newH;
2089
+ // If still out of bounds (region larger than image), clamp dimensions
2090
+ if (left < 0) {
2091
+ left = 0;
2092
+ newW = imgWidth;
2093
+ }
2094
+ if (top < 0) {
2095
+ top = 0;
2096
+ newH = imgHeight;
2097
+ }
2098
+ return { left, top, width: newW, height: newH, aspectLabel: best.label };
2099
+ }
2100
+ server.tool("fix_region", `Fix a specific region of an image by cropping it out, sending it for repair, and reinserting it. The crop is automatically snapped to the nearest supported aspect ratio. Use this when only part of an image has glitched text or artifacts — more precise than fix_image's grid approach.`, {
2101
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR}`),
2102
+ prompt: z
2103
+ .string()
2104
+ .default("Clean up and fix any garbled, glitched, or distorted text in this image region. Preserve the style, colors, and layout exactly but make all text crisp and legible.")
2105
+ .describe("Instructions for fixing the selected region"),
2106
+ x: z.number().min(0).max(100).describe("Left edge of region as percentage of image width (0-100)"),
2107
+ y: z.number().min(0).max(100).describe("Top edge of region as percentage of image height (0-100)"),
2108
+ width: z.number().min(1).max(100).describe("Width of region as percentage of image width (1-100)"),
2109
+ height: z.number().min(1).max(100).describe("Height of region as percentage of image height (1-100)"),
2110
+ image_size: z
2111
+ .enum(["512", "1K", "2K", "4K"])
2112
+ .default("1K")
2113
+ .describe("Resolution for the cropped region"),
2114
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
2115
+ }, async ({ filename, prompt, x, y, width, height, image_size, model }) => {
2116
+ try {
2117
+ await ensureViewer();
2118
+ log(`fix_region: source=${filename} region=(${x}%,${y}%,${width}%,${height}%)`);
2119
+ const t0 = Date.now();
2120
+ // Load source image
2121
+ const filepath = join(SAVE_DIR, filename);
2122
+ const srcBuf = await readFile(filepath);
2123
+ const metadata = await sharp(srcBuf).metadata();
2124
+ const imgW = metadata.width;
2125
+ const imgH = metadata.height;
2126
+ log(` Source: ${imgW}x${imgH}`);
2127
+ // Convert percentages to pixels
2128
+ const pxX = Math.round((x / 100) * imgW);
2129
+ const pxY = Math.round((y / 100) * imgH);
2130
+ const pxW = Math.round((width / 100) * imgW);
2131
+ const pxH = Math.round((height / 100) * imgH);
2132
+ // Snap to nearest aspect ratio
2133
+ const snapped = snapToAspectRatio(pxX, pxY, pxW, pxH, imgW, imgH);
2134
+ log(` Requested: ${pxW}x${pxH} at (${pxX},${pxY}) -> Snapped: ${snapped.width}x${snapped.height} at (${snapped.left},${snapped.top}) [${snapped.aspectLabel}]`);
2135
+ // Extract the region
2136
+ const regionBuf = await sharp(srcBuf)
2137
+ .extract({ left: snapped.left, top: snapped.top, width: snapped.width, height: snapped.height })
2138
+ .png()
2139
+ .toBuffer();
2140
+ // Compress for Gemini if needed
2141
+ let sendBuf;
2142
+ let sendMime;
2143
+ if (snapped.width > 1024 || regionBuf.length > 500_000) {
2144
+ sendBuf = await sharp(regionBuf).resize(Math.min(snapped.width, 1024)).jpeg({ quality: 85 }).toBuffer();
2145
+ sendMime = "image/jpeg";
2146
+ }
2147
+ else {
2148
+ sendBuf = regionBuf;
2149
+ sendMime = "image/png";
2150
+ }
2151
+ const { provider: regionProvider, modelId: regionModelId } = getProvider(model);
2152
+ const { imageBase64, modelUsed } = await regionProvider.edit({
2153
+ prompt,
2154
+ imageBase64: sendBuf.toString("base64"),
2155
+ imageMime: sendMime,
2156
+ aspectRatio: snapped.aspectLabel,
2157
+ imageSize: image_size,
2158
+ modelId: regionModelId,
2159
+ });
2160
+ // Resize fixed region back to exact pixel dimensions of the snapped crop
2161
+ let fixedRegion = await sharp(Buffer.from(imageBase64, "base64"))
2162
+ .resize(snapped.width, snapped.height, { fit: "fill" })
2163
+ .png()
2164
+ .toBuffer();
2165
+ // Match brightness/contrast to original region
2166
+ fixedRegion = await matchHistogram(fixedRegion, regionBuf);
2167
+ // Composite back into original image
2168
+ const finalBuf = await sharp(srcBuf)
2169
+ .composite([{ input: fixedRegion, left: snapped.left, top: snapped.top }])
2170
+ .png()
2171
+ .toBuffer();
2172
+ // Save result
2173
+ const id = randomUUID();
2174
+ const outFilename = await saveToDisk(finalBuf, `fixreg_${id.slice(0, 8)}`);
2175
+ log(` Composited fixed region -> ${outFilename}`);
2176
+ const img = {
2177
+ id,
2178
+ prompt: `[fix-region ${x}%,${y}% ${width}%x${height}%] ${prompt.slice(0, 50)}`,
2179
+ fullPng: finalBuf,
2180
+ timestamp: Date.now(),
2181
+ filename: outFilename,
2182
+ modelUsed,
2183
+ };
2184
+ imageStore.push(img);
2185
+ notifyViewerClients(img);
2186
+ const { base64: mcpBase64, mime: mcpMimeType } = await shrinkForMcp(finalBuf);
2187
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
2188
+ log(`fix_region complete in ${elapsed}s`);
2189
+ return {
2190
+ content: [
2191
+ {
2192
+ type: "text",
2193
+ text: `Region snapped from ${pxW}x${pxH} to ${snapped.width}x${snapped.height} (${snapped.aspectLabel})`,
2194
+ },
2195
+ { type: "image", data: mcpBase64, mimeType: mcpMimeType },
2196
+ { type: "text", text: `Saved as ${outFilename} — full-res at http://localhost:${viewerPort}${noticeFor(modelUsed)}` },
2197
+ ],
2198
+ };
2199
+ }
2200
+ catch (err) {
2201
+ const msg = err instanceof Error ? err.message : String(err);
2202
+ log(`fix_region error: ${msg}`);
2203
+ return {
2204
+ content: [{ type: "text", text: `fix_region failed: ${msg}` }],
2205
+ isError: true,
2206
+ };
2207
+ }
2208
+ });
2209
+ server.tool("interactive_fix", `Opens an image in a browser-based crop tool where the user can draw a rectangle around the region to fix, add notes/instructions, and submit. The tool waits for the user's selection, then sends the cropped region for repair and composites it back into the original image. Best for precise, user-guided fixes.`, {
2210
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR}`),
2211
+ image_size: z
2212
+ .enum(["512", "1K", "2K", "4K"])
2213
+ .default("1K")
2214
+ .describe("Resolution for the cropped region"),
2215
+ model: z.enum(MODEL_KEYS).optional().describe(MODEL_PARAM_DESCRIPTION),
2216
+ }, async ({ filename, image_size, model }) => {
2217
+ let completeResolve;
2218
+ try {
2219
+ await ensureViewer();
2220
+ log(`interactive_fix: opening crop UI for ${filename}`);
2221
+ // Open crop UI in browser
2222
+ const cropUrl = `http://localhost:${viewerPort}/crop/${encodeURIComponent(filename)}`;
2223
+ log(` Opening crop URL: ${cropUrl}`);
2224
+ openExternal(cropUrl);
2225
+ // Wait for the user to submit a crop selection
2226
+ log(` Waiting for user to select region in browser...`);
2227
+ const onComplete = new Promise((r) => { completeResolve = r; });
2228
+ const submission = await new Promise((resolve) => {
2229
+ pendingCrops.set(filename, { resolve, onComplete, completeResolve });
2230
+ });
2231
+ log(` User submitted: region=(${submission.x.toFixed(1)}%,${submission.y.toFixed(1)}%,${submission.width.toFixed(1)}%,${submission.height.toFixed(1)}%) prompt="${submission.prompt.slice(0, 80)}"`);
2232
+ // Load source image
2233
+ const filepath = join(SAVE_DIR, filename);
2234
+ const srcBuf = await readFile(filepath);
2235
+ const metadata = await sharp(srcBuf).metadata();
2236
+ const imgW = metadata.width;
2237
+ const imgH = metadata.height;
2238
+ // Convert percentages to pixels
2239
+ const pxX = Math.round((submission.x / 100) * imgW);
2240
+ const pxY = Math.round((submission.y / 100) * imgH);
2241
+ const pxW = Math.round((submission.width / 100) * imgW);
2242
+ const pxH = Math.round((submission.height / 100) * imgH);
2243
+ // Snap to nearest aspect ratio
2244
+ const snapped = snapToAspectRatio(pxX, pxY, pxW, pxH, imgW, imgH);
2245
+ log(` Snapped: ${snapped.width}x${snapped.height} at (${snapped.left},${snapped.top}) [${snapped.aspectLabel}]`);
2246
+ // Extract the region
2247
+ const regionBuf = await sharp(srcBuf)
2248
+ .extract({ left: snapped.left, top: snapped.top, width: snapped.width, height: snapped.height })
2249
+ .png()
2250
+ .toBuffer();
2251
+ // Compress for Gemini if needed
2252
+ let sendBuf;
2253
+ let sendMime;
2254
+ if (snapped.width > 1024 || regionBuf.length > 500_000) {
2255
+ sendBuf = await sharp(regionBuf).resize(Math.min(snapped.width, 1024)).jpeg({ quality: 85 }).toBuffer();
2256
+ sendMime = "image/jpeg";
2257
+ }
2258
+ else {
2259
+ sendBuf = regionBuf;
2260
+ sendMime = "image/png";
2261
+ }
2262
+ const prompt = submission.prompt || "Clean up and fix any garbled, glitched, or distorted text. Preserve the style, colors, and layout exactly.";
2263
+ const shots = submission.shots || 1;
2264
+ const { provider: fixProvider, modelId: fixModelId } = getProvider(model);
2265
+ log(` Firing ${shots} parallel ${fixProvider.name} call(s)...`);
2266
+ const geminiResults = await Promise.allSettled(Array.from({ length: shots }, (_, i) => fixProvider.edit({
2267
+ prompt,
2268
+ imageBase64: sendBuf.toString("base64"),
2269
+ imageMime: sendMime,
2270
+ aspectRatio: snapped.aspectLabel,
2271
+ imageSize: image_size,
2272
+ modelId: fixModelId,
2273
+ }).then(async ({ imageBase64, modelUsed }) => {
2274
+ // Resize, histogram match, and composite each result
2275
+ let fixedRegion = await sharp(Buffer.from(imageBase64, "base64"))
2276
+ .resize(snapped.width, snapped.height, { fit: "fill" })
2277
+ .png()
2278
+ .toBuffer();
2279
+ fixedRegion = await matchHistogram(fixedRegion, regionBuf);
2280
+ const compositedBuf = await sharp(srcBuf)
2281
+ .composite([{ input: fixedRegion, left: snapped.left, top: snapped.top }])
2282
+ .png()
2283
+ .toBuffer();
2284
+ // Save each shot to disk
2285
+ const shotId = randomUUID();
2286
+ const shotFilename = await saveToDisk(compositedBuf, `ifix_${shotId.slice(0, 8)}`);
2287
+ log(` Shot ${i + 1}/${shots} -> ${shotFilename}`);
2288
+ const img = {
2289
+ id: shotId,
2290
+ prompt: `[interactive-fix shot ${i + 1}] ${prompt.slice(0, 50)}`,
2291
+ fullPng: compositedBuf,
2292
+ timestamp: Date.now(),
2293
+ filename: shotFilename,
2294
+ modelUsed,
2295
+ };
2296
+ imageStore.push(img);
2297
+ notifyViewerClients(img);
2298
+ return { filename: shotFilename, buffer: compositedBuf, modelUsed };
2299
+ })));
2300
+ const succeeded = geminiResults
2301
+ .filter((r) => r.status === "fulfilled")
2302
+ .map((r) => r.value);
2303
+ if (succeeded.length === 0) {
2304
+ const firstErr = geminiResults[0].reason?.message || "Unknown error";
2305
+ throw new Error(`All ${shots} shots failed. First error: ${firstErr}`);
2306
+ }
2307
+ log(` ${succeeded.length}/${shots} shots succeeded`);
2308
+ let chosenFilename;
2309
+ let chosenBuffer;
2310
+ let chosenModel;
2311
+ if (succeeded.length === 1) {
2312
+ // Single result — use it directly
2313
+ chosenFilename = succeeded[0].filename;
2314
+ chosenBuffer = succeeded[0].buffer;
2315
+ chosenModel = succeeded[0].modelUsed;
2316
+ completeResolve({ ok: true, filename: chosenFilename });
2317
+ }
2318
+ else {
2319
+ // Multiple results — send filenames to browser for user selection
2320
+ const filenames = succeeded.map((s) => s.filename);
2321
+ completeResolve({ ok: true, filenames });
2322
+ // Wait for user to pick their favorite
2323
+ log(` Waiting for user to select from ${filenames.length} shots...`);
2324
+ const selectedIndex = await new Promise((resolve) => {
2325
+ pendingSelections.set(filename, { resolve, filenames });
2326
+ });
2327
+ chosenFilename = succeeded[selectedIndex].filename;
2328
+ chosenBuffer = succeeded[selectedIndex].buffer;
2329
+ chosenModel = succeeded[selectedIndex].modelUsed;
2330
+ log(` User selected shot ${selectedIndex + 1}: ${chosenFilename}`);
2331
+ }
2332
+ const { base64: mcpBase64, mime: mcpMimeType } = await shrinkForMcp(chosenBuffer);
2333
+ return {
2334
+ content: [
2335
+ {
2336
+ type: "text",
2337
+ text: `User selected region: ${submission.x.toFixed(1)}%,${submission.y.toFixed(1)}% ${submission.width.toFixed(1)}%x${submission.height.toFixed(1)}% -> snapped to ${snapped.width}x${snapped.height} (${snapped.aspectLabel})\n${shots} shot(s), ${succeeded.length} succeeded. User picked: ${chosenFilename}\nUser notes: ${submission.prompt || "(none)"}`,
2338
+ },
2339
+ { type: "image", data: mcpBase64, mimeType: mcpMimeType },
2340
+ { type: "text", text: `Saved as ${chosenFilename} — full-res at http://localhost:${viewerPort}${noticeFor(chosenModel)}` },
2341
+ ],
2342
+ };
2343
+ }
2344
+ catch (err) {
2345
+ const msg = err instanceof Error ? err.message : String(err);
2346
+ log(`interactive_fix error: ${msg}`);
2347
+ // Notify the browser of failure too
2348
+ completeResolve?.({ ok: false, error: msg });
2349
+ return {
2350
+ content: [{ type: "text", text: `interactive_fix failed: ${msg}` }],
2351
+ isError: true,
2352
+ };
2353
+ }
2354
+ });
2355
+ function parseHexColor(hex) {
2356
+ const h = hex.replace(/^#/, "");
2357
+ if (h.length === 3) {
2358
+ return [
2359
+ parseInt(h[0] + h[0], 16),
2360
+ parseInt(h[1] + h[1], 16),
2361
+ parseInt(h[2] + h[2], 16),
2362
+ ];
2363
+ }
2364
+ if (h.length === 6) {
2365
+ return [
2366
+ parseInt(h.slice(0, 2), 16),
2367
+ parseInt(h.slice(2, 4), 16),
2368
+ parseInt(h.slice(4, 6), 16),
2369
+ ];
2370
+ }
2371
+ throw new Error(`Invalid hex color: ${hex}`);
2372
+ }
2373
+ server.tool("remove_background", `Remove a background color from an image and make it transparent. Outputs a PNG with alpha channel. The image must already exist in ${SAVE_DIR} (use save_image to import first).`, {
2374
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR}`),
2375
+ color: z
2376
+ .string()
2377
+ .default("#FFFFFF")
2378
+ .describe('Target background color as hex (e.g. "#FFFFFF" for white, "#000000" for black, "#FF0000" for red). Default: white.'),
2379
+ threshold: z
2380
+ .number()
2381
+ .min(0)
2382
+ .max(255)
2383
+ .default(30)
2384
+ .describe("How far from the target color a pixel can be and still count as background (0 = exact match only, 30 = default, higher = more aggressive)"),
2385
+ }, async ({ filename, color, threshold }) => {
2386
+ try {
2387
+ const [targetR, targetG, targetB] = parseHexColor(color);
2388
+ const srcPath = join(SAVE_DIR, filename);
2389
+ const { data, info } = await sharp(srcPath)
2390
+ .ensureAlpha()
2391
+ .raw()
2392
+ .toBuffer({ resolveWithObject: true });
2393
+ const channels = info.channels; // 4 (RGBA)
2394
+ for (let i = 0; i < data.length; i += channels) {
2395
+ const r = data[i];
2396
+ const g = data[i + 1];
2397
+ const b = data[i + 2];
2398
+ if (Math.abs(r - targetR) <= threshold &&
2399
+ Math.abs(g - targetG) <= threshold &&
2400
+ Math.abs(b - targetB) <= threshold) {
2401
+ data[i + 3] = 0;
2402
+ }
2403
+ }
2404
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
2405
+ const outFilename = `${ts}_nobg.png`;
2406
+ const outPath = join(SAVE_DIR, outFilename);
2407
+ await sharp(data, {
2408
+ raw: { width: info.width, height: info.height, channels: 4 },
2409
+ })
2410
+ .png()
2411
+ .toFile(outPath);
2412
+ const s = await stat(outPath);
2413
+ log(`remove_background: ${filename} -> ${outFilename} (${(s.size / 1024).toFixed(0)}KB, color=${color}, threshold=${threshold})`);
2414
+ // Register in viewer
2415
+ await ensureViewer();
2416
+ const fullPng = await readFile(outPath);
2417
+ const id = randomUUID();
2418
+ const img = {
2419
+ id,
2420
+ prompt: `remove_background(${filename}, color=${color}, threshold=${threshold})`,
2421
+ fullPng,
2422
+ timestamp: Date.now(),
2423
+ filename: outFilename,
2424
+ };
2425
+ imageStore.push(img);
2426
+ notifyViewerClients(img);
2427
+ return {
2428
+ content: [
2429
+ {
2430
+ type: "text",
2431
+ text: `Background removed! Saved as ${outFilename} (${(s.size / 1024).toFixed(0)}KB). View full-res at http://localhost:${viewerPort}`,
2432
+ },
2433
+ ],
2434
+ };
2435
+ }
2436
+ catch (err) {
2437
+ const msg = err instanceof Error ? err.message : String(err);
2438
+ log(`remove_background error: ${msg}`);
2439
+ return {
2440
+ content: [{ type: "text", text: `remove_background failed: ${msg}` }],
2441
+ isError: true,
2442
+ };
2443
+ }
2444
+ });
2445
+ function traceAsync(input, options) {
2446
+ return new Promise((resolve, reject) => {
2447
+ potrace.trace(input, options, (err, svg) => {
2448
+ if (err)
2449
+ reject(err);
2450
+ else
2451
+ resolve(svg);
2452
+ });
2453
+ });
2454
+ }
2455
+ server.tool("trace_to_svg", `Convert a raster image to SVG using potrace vectorization. Best for line art, diagrams, logos, and high-contrast images. The image must already exist in ${SAVE_DIR} (use save_image to import first).`, {
2456
+ filename: z.string().describe(`Filename of the source image in ${SAVE_DIR}`),
2457
+ threshold: z
2458
+ .number()
2459
+ .min(-1)
2460
+ .max(255)
2461
+ .default(-1)
2462
+ .describe("Brightness threshold for black/white conversion (0-255). Lower = more black. -1 = auto-detect. Default: auto."),
2463
+ invert: z
2464
+ .boolean()
2465
+ .default(false)
2466
+ .describe("Invert the image before tracing (useful for light-on-dark images)"),
2467
+ turdsize: z
2468
+ .number()
2469
+ .min(0)
2470
+ .default(2)
2471
+ .describe("Suppress speckles up to this size (in pixels). Default: 2."),
2472
+ }, async ({ filename, threshold, invert, turdsize }) => {
2473
+ try {
2474
+ const srcPath = join(SAVE_DIR, filename);
2475
+ let inputBuf;
2476
+ if (invert) {
2477
+ inputBuf = await sharp(srcPath).negate({ alpha: false }).png().toBuffer();
2478
+ }
2479
+ else {
2480
+ inputBuf = await readFile(srcPath);
2481
+ }
2482
+ const opts = {
2483
+ turdSize: turdsize,
2484
+ background: "transparent",
2485
+ };
2486
+ if (threshold >= 0)
2487
+ opts.threshold = threshold;
2488
+ const svg = await traceAsync(inputBuf, opts);
2489
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
2490
+ const outFilename = `${ts}_traced.svg`;
2491
+ const outPath = join(SAVE_DIR, outFilename);
2492
+ await writeFile(outPath, svg);
2493
+ const s = await stat(outPath);
2494
+ log(`trace_to_svg: ${filename} -> ${outFilename} (${(s.size / 1024).toFixed(1)}KB)`);
2495
+ return {
2496
+ content: [
2497
+ {
2498
+ type: "text",
2499
+ text: `Traced to SVG! Saved as ${outFilename} (${(s.size / 1024).toFixed(1)}KB) in ${SAVE_DIR}`,
2500
+ },
2501
+ ],
2502
+ };
2503
+ }
2504
+ catch (err) {
2505
+ const msg = err instanceof Error ? err.message : String(err);
2506
+ log(`trace_to_svg error: ${msg}`);
2507
+ return {
2508
+ content: [{ type: "text", text: `trace_to_svg failed: ${msg}` }],
2509
+ isError: true,
2510
+ };
2511
+ }
2512
+ });
2513
+ // --- Startup ---
2514
+ let viewerStarted = false;
2515
+ async function ensureViewer() {
2516
+ if (viewerStarted)
2517
+ return;
2518
+ viewerStarted = true;
2519
+ viewerPort = await startViewer();
2520
+ log(`Viewer running at http://localhost:${viewerPort}`);
2521
+ openExternal(`http://localhost:${viewerPort}`);
2522
+ }
2523
+ async function main() {
2524
+ if (GOOGLE_API_KEY) {
2525
+ providers["gemini"] = new GeminiProvider();
2526
+ log("Gemini provider available");
2527
+ }
2528
+ if (OPENAI_API_KEY) {
2529
+ providers["openai"] = new OpenAIProvider();
2530
+ log("OpenAI provider available");
2531
+ }
2532
+ if (process.argv.includes("--viewer")) {
2533
+ if (!GOOGLE_API_KEY && !OPENAI_API_KEY) {
2534
+ console.log("Note: No API keys set — viewer is read-only (respin disabled). Set GOOGLE_API_KEY or OPENAI_API_KEY to enable generation.");
2535
+ }
2536
+ viewerPort = await startViewer();
2537
+ console.log(`pixel-surgeon-mcp viewer running at http://localhost:${viewerPort}`);
2538
+ openExternal(`http://localhost:${viewerPort}`);
2539
+ return;
2540
+ }
2541
+ if (!GOOGLE_API_KEY && !OPENAI_API_KEY) {
2542
+ log("WARNING: Neither GOOGLE_API_KEY nor OPENAI_API_KEY is set. No image providers available.");
2543
+ }
2544
+ log(`Default model: ${getDefaultModelKey()}`);
2545
+ const transport = new StdioServerTransport();
2546
+ await server.connect(transport);
2547
+ log("MCP server running on stdio");
2548
+ }
2549
+ main().catch((err) => {
2550
+ console.error("Fatal error:", err);
2551
+ process.exit(1);
2552
+ });