restyle-sprites 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,335 @@
1
+ import fs from 'node:fs/promises';
2
+ import OpenAI from 'openai';
3
+ import sharp from 'sharp';
4
+ export class OpenAIImageClient {
5
+ static STYLE_REFERENCE_MAX_ATTEMPTS = 3;
6
+ client;
7
+ model;
8
+ geminiModel;
9
+ constructor(options) {
10
+ const apiKey = process.env.OPENAI_API_KEY?.trim();
11
+ this.client = apiKey ? new OpenAI({ apiKey }) : null;
12
+ this.model = options?.model ?? 'gpt-image-1.5';
13
+ this.geminiModel = process.env.GEMINI_IMAGE_MODEL?.trim() || 'gemini-3.1-flash-image-preview';
14
+ }
15
+ getOpenAIModelCandidates() {
16
+ const ordered = [this.model, 'gpt-image-1', 'gpt-image-1-mini'];
17
+ return Array.from(new Set(ordered));
18
+ }
19
+ async generateStyleReference(prompt, inspirationImagePath) {
20
+ const inspirationImage = inspirationImagePath ? await fs.readFile(inspirationImagePath) : null;
21
+ const inspirationB64 = inspirationImage?.toString('base64');
22
+ const mergedPrompt = inspirationB64 ? `${prompt} Use the uploaded image as structural reference.` : prompt;
23
+ const basePrompt = `${mergedPrompt} Transparent background. Return one PNG image.`;
24
+ for (let attempt = 1; attempt <= OpenAIImageClient.STYLE_REFERENCE_MAX_ATTEMPTS; attempt += 1) {
25
+ const strictSuffix = attempt === 1
26
+ ? ''
27
+ : ' CRITICAL: Do not paint checkerboard, gray-white transparency tiles, or any background grid.';
28
+ const fullPrompt = `${basePrompt}${strictSuffix}`;
29
+ try {
30
+ const image = await this.generateWithGemini({
31
+ prompt: fullPrompt,
32
+ images: inspirationB64 ? [{ mimeType: 'image/png', data: inspirationB64 }] : undefined,
33
+ });
34
+ const sanitized = await this.sanitizeStyleReferenceBuffer(image);
35
+ const quality = await this.analyzeStyleReferenceQuality(sanitized);
36
+ if (!quality.hasCheckerboard || attempt === OpenAIImageClient.STYLE_REFERENCE_MAX_ATTEMPTS) {
37
+ return { image: sanitized };
38
+ }
39
+ }
40
+ catch (error) {
41
+ if (!this.shouldFallbackToOpenAI(error)) {
42
+ throw error;
43
+ }
44
+ const image = await this.generateWithOpenAI(fullPrompt, inspirationB64);
45
+ const sanitized = await this.sanitizeStyleReferenceBuffer(image);
46
+ const quality = await this.analyzeStyleReferenceQuality(sanitized);
47
+ if (!quality.hasCheckerboard || attempt === OpenAIImageClient.STYLE_REFERENCE_MAX_ATTEMPTS) {
48
+ return { image: sanitized };
49
+ }
50
+ }
51
+ }
52
+ throw new Error('Failed to generate style reference without checkerboard artifacts.');
53
+ }
54
+ async restyleAsset(params) {
55
+ const sourceBuffer = await fs.readFile(params.sourceAssetPath);
56
+ return this.restyleAssetBuffer({
57
+ sourceAsset: sourceBuffer,
58
+ styleReferencePath: params.styleReferencePath,
59
+ prompt: params.prompt,
60
+ renderSize: params.renderSize,
61
+ });
62
+ }
63
+ async restyleAssetBuffer(params) {
64
+ const styleBuffer = await fs.readFile(params.styleReferencePath);
65
+ const sanitizedStyleBuffer = await this.sanitizeStyleReferenceBuffer(styleBuffer);
66
+ const sourceB64 = params.sourceAsset.toString('base64');
67
+ const styleB64 = sanitizedStyleBuffer.toString('base64');
68
+ try {
69
+ const generated = await this.generateWithGemini({
70
+ prompt: `${params.prompt} Return one PNG image at ${params.renderSize ?? '1024x1024'}.`,
71
+ images: [
72
+ { mimeType: 'image/png', data: sourceB64 },
73
+ { mimeType: 'image/png', data: styleB64 },
74
+ ],
75
+ });
76
+ return this.sanitizeStyleReferenceBuffer(generated);
77
+ }
78
+ catch (error) {
79
+ if (!this.shouldFallbackToOpenAI(error)) {
80
+ throw error;
81
+ }
82
+ const generated = await this.generateWithOpenAIRestyle(params.prompt, sourceB64, styleB64, params.renderSize);
83
+ return this.sanitizeStyleReferenceBuffer(generated);
84
+ }
85
+ }
86
+ async generateWithOpenAI(prompt, inspirationB64) {
87
+ if (!this.client) {
88
+ throw new Error('OpenAI client is not configured (missing OPENAI_API_KEY).');
89
+ }
90
+ const content = [
91
+ { type: 'input_text', text: prompt },
92
+ { type: 'input_text', text: 'Return one transparent PNG image.' },
93
+ ];
94
+ if (inspirationB64) {
95
+ content.push({
96
+ type: 'input_image',
97
+ image_url: `data:image/png;base64,${inspirationB64}`,
98
+ detail: 'high',
99
+ });
100
+ }
101
+ let lastError;
102
+ for (const model of this.getOpenAIModelCandidates()) {
103
+ try {
104
+ const response = await this.client.responses.create({
105
+ model,
106
+ input: [{ role: 'user', content }],
107
+ tools: [{ type: 'image_generation' }],
108
+ });
109
+ const imageOutput = response.output.find((item) => item.type === 'image_generation_call');
110
+ const imageBase64 = imageOutput && 'result' in imageOutput ? imageOutput.result : undefined;
111
+ if (!imageBase64) {
112
+ throw new Error(`OpenAI did not return an image for style generation (model=${model}).`);
113
+ }
114
+ return Buffer.from(imageBase64, 'base64');
115
+ }
116
+ catch (error) {
117
+ lastError = error;
118
+ if (!this.isModelNotFound(error)) {
119
+ throw error;
120
+ }
121
+ }
122
+ }
123
+ throw lastError instanceof Error ? lastError : new Error('OpenAI style generation failed.');
124
+ }
125
+ async generateWithOpenAIRestyle(prompt, sourceB64, styleB64, renderSize) {
126
+ if (!this.client) {
127
+ throw new Error('OpenAI client is not configured (missing OPENAI_API_KEY).');
128
+ }
129
+ let lastError;
130
+ for (const model of this.getOpenAIModelCandidates()) {
131
+ try {
132
+ const response = await this.client.responses.create({
133
+ model,
134
+ input: [
135
+ {
136
+ role: 'user',
137
+ content: [
138
+ { type: 'input_text', text: prompt },
139
+ { type: 'input_text', text: `Return one PNG image at ${renderSize ?? '1024x1024'}.` },
140
+ { type: 'input_image', image_url: `data:image/png;base64,${sourceB64}`, detail: 'high' },
141
+ { type: 'input_image', image_url: `data:image/png;base64,${styleB64}`, detail: 'high' },
142
+ ],
143
+ },
144
+ ],
145
+ tools: [{ type: 'image_generation' }],
146
+ });
147
+ const imageOutput = response.output.find((item) => item.type === 'image_generation_call');
148
+ const imageBase64 = imageOutput && 'result' in imageOutput ? imageOutput.result : undefined;
149
+ if (!imageBase64) {
150
+ throw new Error(`OpenAI did not return image bytes for restyle (model=${model}).`);
151
+ }
152
+ return Buffer.from(imageBase64, 'base64');
153
+ }
154
+ catch (error) {
155
+ lastError = error;
156
+ if (!this.isModelNotFound(error)) {
157
+ throw error;
158
+ }
159
+ }
160
+ }
161
+ throw lastError instanceof Error ? lastError : new Error('OpenAI restyle failed.');
162
+ }
163
+ shouldFallbackToOpenAI(_error) {
164
+ return this.client !== null;
165
+ }
166
+ isModelNotFound(error) {
167
+ if (!(error instanceof Error)) {
168
+ return false;
169
+ }
170
+ const message = error.message.toLowerCase();
171
+ return message.includes('model_not_found') || (message.includes('requested model') && message.includes('not found'));
172
+ }
173
+ async generateWithGemini(params) {
174
+ const geminiApiKey = process.env.GEMINI_API_KEY;
175
+ if (!geminiApiKey) {
176
+ throw new Error('Gemini request failed because GEMINI_API_KEY is missing.');
177
+ }
178
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(this.geminiModel)}:generateContent?key=${encodeURIComponent(geminiApiKey)}`;
179
+ const requestParts = [{ text: params.prompt }];
180
+ for (const image of params.images ?? []) {
181
+ requestParts.push({
182
+ inlineData: {
183
+ mimeType: image.mimeType,
184
+ data: image.data,
185
+ },
186
+ });
187
+ }
188
+ const response = await fetch(url, {
189
+ method: 'POST',
190
+ headers: { 'Content-Type': 'application/json' },
191
+ body: JSON.stringify({
192
+ contents: [{ role: 'user', parts: requestParts }],
193
+ generationConfig: {
194
+ responseModalities: ['TEXT', 'IMAGE'],
195
+ },
196
+ }),
197
+ });
198
+ if (!response.ok) {
199
+ const body = await response.text();
200
+ throw new Error(`Gemini request failed (${response.status}): ${body}`);
201
+ }
202
+ const payload = (await response.json());
203
+ const responseParts = payload.candidates?.[0]?.content?.parts ?? [];
204
+ const imagePart = responseParts.find((part) => part.inlineData?.data || part.inline_data?.data);
205
+ const encoded = imagePart?.inlineData?.data ?? imagePart?.inline_data?.data;
206
+ if (!encoded) {
207
+ const textPart = responseParts.find((part) => typeof part.text === 'string' && part.text.length > 0)?.text;
208
+ const finishReason = payload.candidates?.[0]?.finishReason;
209
+ throw new Error(`Gemini did not return image bytes (model=${this.geminiModel}, finishReason=${String(finishReason ?? 'unknown')}, text=${textPart ?? 'none'}, promptFeedback=${JSON.stringify(payload.promptFeedback ?? null)}).`);
210
+ }
211
+ return Buffer.from(encoded, 'base64');
212
+ }
213
+ async sanitizeStyleReferenceBuffer(buffer) {
214
+ const raw = await sharp(buffer)
215
+ .ensureAlpha()
216
+ .raw()
217
+ .toBuffer({ resolveWithObject: true });
218
+ const width = raw.info.width;
219
+ const height = raw.info.height;
220
+ const data = Buffer.from(raw.data);
221
+ const visited = new Uint8Array(width * height);
222
+ const queue = [];
223
+ const push = (x, y) => {
224
+ if (x < 0 || y < 0 || x >= width || y >= height) {
225
+ return;
226
+ }
227
+ const index = y * width + x;
228
+ if (visited[index] === 1) {
229
+ return;
230
+ }
231
+ visited[index] = 1;
232
+ queue.push([x, y]);
233
+ };
234
+ for (let x = 0; x < width; x += 1) {
235
+ push(x, 0);
236
+ push(x, height - 1);
237
+ }
238
+ for (let y = 0; y < height; y += 1) {
239
+ push(0, y);
240
+ push(width - 1, y);
241
+ }
242
+ while (queue.length > 0) {
243
+ const point = queue.pop();
244
+ if (!point) {
245
+ continue;
246
+ }
247
+ const [x, y] = point;
248
+ const pixelOffset = (y * width + x) * 4;
249
+ const alpha = data[pixelOffset + 3];
250
+ if (alpha === 0) {
251
+ continue;
252
+ }
253
+ const red = data[pixelOffset];
254
+ const green = data[pixelOffset + 1];
255
+ const blue = data[pixelOffset + 2];
256
+ if (!this.isNeutralCheckerColor(red, green, blue)) {
257
+ continue;
258
+ }
259
+ data[pixelOffset + 3] = 0;
260
+ push(x + 1, y);
261
+ push(x - 1, y);
262
+ push(x, y + 1);
263
+ push(x, y - 1);
264
+ }
265
+ return sharp(data, {
266
+ raw: {
267
+ width,
268
+ height,
269
+ channels: 4,
270
+ },
271
+ })
272
+ .png()
273
+ .toBuffer();
274
+ }
275
+ async analyzeStyleReferenceQuality(buffer) {
276
+ const raw = await sharp(buffer)
277
+ .ensureAlpha()
278
+ .raw()
279
+ .toBuffer({ resolveWithObject: true });
280
+ const width = raw.info.width;
281
+ const height = raw.info.height;
282
+ const data = raw.data;
283
+ const visited = new Uint8Array(width * height);
284
+ const queue = [];
285
+ let neutralEdgeOpaque = 0;
286
+ const push = (x, y) => {
287
+ if (x < 0 || y < 0 || x >= width || y >= height) {
288
+ return;
289
+ }
290
+ const index = y * width + x;
291
+ if (visited[index] === 1) {
292
+ return;
293
+ }
294
+ visited[index] = 1;
295
+ queue.push([x, y]);
296
+ };
297
+ for (let x = 0; x < width; x += 1) {
298
+ push(x, 0);
299
+ push(x, height - 1);
300
+ }
301
+ for (let y = 0; y < height; y += 1) {
302
+ push(0, y);
303
+ push(width - 1, y);
304
+ }
305
+ while (queue.length > 0) {
306
+ const point = queue.pop();
307
+ if (!point) {
308
+ continue;
309
+ }
310
+ const [x, y] = point;
311
+ const pixelOffset = (y * width + x) * 4;
312
+ if (data[pixelOffset + 3] === 0) {
313
+ continue;
314
+ }
315
+ const red = data[pixelOffset];
316
+ const green = data[pixelOffset + 1];
317
+ const blue = data[pixelOffset + 2];
318
+ if (!this.isNeutralCheckerColor(red, green, blue)) {
319
+ continue;
320
+ }
321
+ neutralEdgeOpaque += 1;
322
+ push(x + 1, y);
323
+ push(x - 1, y);
324
+ push(x, y + 1);
325
+ push(x, y - 1);
326
+ }
327
+ return { hasCheckerboard: neutralEdgeOpaque > Math.max(64, Math.floor(width * height * 0.01)) };
328
+ }
329
+ isNeutralCheckerColor(red, green, blue) {
330
+ const max = Math.max(red, green, blue);
331
+ const min = Math.min(red, green, blue);
332
+ const saturation = max === 0 ? 0 : (max - min) / max;
333
+ return saturation < 0.16 && max > 70;
334
+ }
335
+ }
@@ -0,0 +1,33 @@
1
+ import { AssetCategory } from './types.js';
2
+ export interface PixelArtPostProcessorOptions {
3
+ maxColors?: number;
4
+ alphaThreshold?: number;
5
+ sourceReference?: Buffer;
6
+ category?: AssetCategory;
7
+ }
8
+ export declare class PixelArtPostProcessor {
9
+ private static readonly DEFAULT_MAX_COLORS;
10
+ private static readonly DEFAULT_ALPHA_THRESHOLD;
11
+ process(rawBuffer: Buffer, targetWidth: number, targetHeight: number, options?: PixelArtPostProcessorOptions): Promise<Buffer>;
12
+ extractPalette(sourceBuffer: Buffer, maxColors?: number): Promise<string[]>;
13
+ stripLegacyBackground(sourceBuffer: Buffer, tolerance?: number): Promise<Buffer>;
14
+ binarizeAlpha(buffer: Buffer, threshold?: number): Promise<Buffer>;
15
+ alphaTightCrop(buffer: Buffer): Promise<Buffer>;
16
+ quantizeColors(buffer: Buffer, maxColors?: number): Promise<Buffer>;
17
+ resizeNearest(buffer: Buffer, width: number, height: number): Promise<Buffer>;
18
+ finalAlphaCleanup(buffer: Buffer, threshold?: number): Promise<Buffer>;
19
+ private resizeWithReferenceGeometry;
20
+ private getAlphaBounds;
21
+ private getSourceContentBounds;
22
+ private isFullyOpaque;
23
+ private getColorKeyBounds;
24
+ private clamp;
25
+ private applySourceMask;
26
+ private estimateBackgroundColor;
27
+ private zeroRgbOnTransparent;
28
+ private toRawImage;
29
+ private fromRawImage;
30
+ private createTransparentPixel;
31
+ private rgbToHex;
32
+ private toHex;
33
+ }