climage 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -19
- package/dist/cli.js +1168 -151
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +49 -8
- package/dist/index.js +1037 -98
- package/dist/index.js.map +1 -1
- package/package.json +6 -2
package/dist/cli.js
CHANGED
|
@@ -26,6 +26,16 @@ function loadEnv(cwd = process2.cwd()) {
|
|
|
26
26
|
// src/core/output.ts
|
|
27
27
|
import fs2 from "fs/promises";
|
|
28
28
|
import path2 from "path";
|
|
29
|
+
var IMAGE_MIME_TYPES = {
|
|
30
|
+
".png": "image/png",
|
|
31
|
+
".jpg": "image/jpeg",
|
|
32
|
+
".jpeg": "image/jpeg",
|
|
33
|
+
".webp": "image/webp",
|
|
34
|
+
".gif": "image/gif",
|
|
35
|
+
".avif": "image/avif",
|
|
36
|
+
".heif": "image/heif",
|
|
37
|
+
".heic": "image/heic"
|
|
38
|
+
};
|
|
29
39
|
function extensionForFormat(format) {
|
|
30
40
|
switch (format) {
|
|
31
41
|
case "jpg":
|
|
@@ -34,6 +44,12 @@ function extensionForFormat(format) {
|
|
|
34
44
|
return "png";
|
|
35
45
|
case "webp":
|
|
36
46
|
return "webp";
|
|
47
|
+
case "mp4":
|
|
48
|
+
return "mp4";
|
|
49
|
+
case "webm":
|
|
50
|
+
return "webm";
|
|
51
|
+
case "gif":
|
|
52
|
+
return "gif";
|
|
37
53
|
}
|
|
38
54
|
}
|
|
39
55
|
function resolveOutDir(outDir) {
|
|
@@ -47,10 +63,56 @@ function makeOutputPath(req, index) {
|
|
|
47
63
|
const filename = `${base}${suffix}.${ext}`;
|
|
48
64
|
return path2.join(req.outDir, filename);
|
|
49
65
|
}
|
|
50
|
-
async function
|
|
66
|
+
async function writeMediaFile(filePath, bytes) {
|
|
51
67
|
await fs2.mkdir(path2.dirname(filePath), { recursive: true });
|
|
52
68
|
await fs2.writeFile(filePath, bytes);
|
|
53
69
|
}
|
|
70
|
+
function toJsonResult(items) {
|
|
71
|
+
const images = items.filter((i) => i.kind === "image").map((img) => ({
|
|
72
|
+
provider: img.provider,
|
|
73
|
+
model: img.model,
|
|
74
|
+
index: img.index,
|
|
75
|
+
filePath: img.filePath,
|
|
76
|
+
url: img.url,
|
|
77
|
+
bytes: img.bytes.byteLength,
|
|
78
|
+
mimeType: img.mimeType
|
|
79
|
+
}));
|
|
80
|
+
const videos = items.filter((i) => i.kind === "video").map((vid) => ({
|
|
81
|
+
provider: vid.provider,
|
|
82
|
+
model: vid.model,
|
|
83
|
+
index: vid.index,
|
|
84
|
+
filePath: vid.filePath,
|
|
85
|
+
url: vid.url,
|
|
86
|
+
bytes: vid.bytes.byteLength,
|
|
87
|
+
mimeType: vid.mimeType
|
|
88
|
+
}));
|
|
89
|
+
return {
|
|
90
|
+
...images.length ? { images } : {},
|
|
91
|
+
...videos.length ? { videos } : {}
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
async function resolveImageInput(pathOrUrl) {
|
|
95
|
+
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
96
|
+
return pathOrUrl;
|
|
97
|
+
}
|
|
98
|
+
if (pathOrUrl.startsWith("data:")) {
|
|
99
|
+
return pathOrUrl;
|
|
100
|
+
}
|
|
101
|
+
const resolvedPath = path2.isAbsolute(pathOrUrl) ? pathOrUrl : path2.resolve(process.cwd(), pathOrUrl);
|
|
102
|
+
const ext = path2.extname(resolvedPath).toLowerCase();
|
|
103
|
+
const mimeType = IMAGE_MIME_TYPES[ext];
|
|
104
|
+
if (!mimeType) {
|
|
105
|
+
throw new Error(
|
|
106
|
+
`Unsupported image format: ${ext}. Supported: ${Object.keys(IMAGE_MIME_TYPES).join(", ")}`
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
const fileBuffer = await fs2.readFile(resolvedPath);
|
|
110
|
+
const base64 = fileBuffer.toString("base64");
|
|
111
|
+
return `data:${mimeType};base64,${base64}`;
|
|
112
|
+
}
|
|
113
|
+
async function resolveImageInputs(pathsOrUrls) {
|
|
114
|
+
return Promise.all(pathsOrUrls.map(resolveImageInput));
|
|
115
|
+
}
|
|
54
116
|
|
|
55
117
|
// src/core/strings.ts
|
|
56
118
|
function slugify(input, maxLen = 60) {
|
|
@@ -68,62 +130,253 @@ var XAI_API_BASE = "https://api.x.ai/v1";
|
|
|
68
130
|
function getXaiApiKey(env) {
|
|
69
131
|
return env.XAI_API_KEY || env.XAI_TOKEN || env.GROK_API_KEY;
|
|
70
132
|
}
|
|
133
|
+
var verboseMode = false;
|
|
134
|
+
function log(...args) {
|
|
135
|
+
if (verboseMode) console.error("[xai]", ...args);
|
|
136
|
+
}
|
|
71
137
|
async function downloadBytes(url) {
|
|
138
|
+
log("Downloading from:", url.slice(0, 100) + "...");
|
|
139
|
+
const start = Date.now();
|
|
72
140
|
const res = await fetch(url);
|
|
73
|
-
if (!res.ok) throw new Error(`xAI
|
|
141
|
+
if (!res.ok) throw new Error(`xAI download failed (${res.status})`);
|
|
74
142
|
const ab = await res.arrayBuffer();
|
|
75
143
|
const ct = res.headers.get("content-type") || void 0;
|
|
144
|
+
log(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
76
145
|
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
77
146
|
}
|
|
147
|
+
async function sleep(ms) {
|
|
148
|
+
await new Promise((r) => setTimeout(r, ms));
|
|
149
|
+
}
|
|
150
|
+
async function generateXaiImages(req, apiKey) {
|
|
151
|
+
const model = req.model ?? "grok-imagine-image";
|
|
152
|
+
log("Starting image generation, model:", model, "n:", req.n);
|
|
153
|
+
const body = {
|
|
154
|
+
model,
|
|
155
|
+
prompt: req.prompt,
|
|
156
|
+
n: req.n,
|
|
157
|
+
// xAI docs: endpoint supports aspect_ratio
|
|
158
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
|
|
159
|
+
// Use URL format to download + save.
|
|
160
|
+
response_format: "url"
|
|
161
|
+
};
|
|
162
|
+
log("Request body:", JSON.stringify(body));
|
|
163
|
+
log("Calling xAI images/generations...");
|
|
164
|
+
const startTime = Date.now();
|
|
165
|
+
const res = await fetch(`${XAI_API_BASE}/images/generations`, {
|
|
166
|
+
method: "POST",
|
|
167
|
+
headers: {
|
|
168
|
+
authorization: `Bearer ${apiKey}`,
|
|
169
|
+
"content-type": "application/json"
|
|
170
|
+
},
|
|
171
|
+
body: JSON.stringify(body)
|
|
172
|
+
});
|
|
173
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
174
|
+
if (!res.ok) {
|
|
175
|
+
const txt = await res.text().catch(() => "");
|
|
176
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
177
|
+
throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
178
|
+
}
|
|
179
|
+
const json = await res.json();
|
|
180
|
+
log("Response data count:", json.data?.length);
|
|
181
|
+
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
182
|
+
return processXaiImageResponse(json, model);
|
|
183
|
+
}
|
|
184
|
+
async function editXaiImages(req, apiKey) {
|
|
185
|
+
const model = req.model ?? "grok-imagine-image";
|
|
186
|
+
const inputImage = req.inputImages?.[0];
|
|
187
|
+
if (!inputImage) throw new Error("No input image provided for editing");
|
|
188
|
+
log("Starting image editing, model:", model, "n:", req.n);
|
|
189
|
+
const body = {
|
|
190
|
+
model,
|
|
191
|
+
prompt: req.prompt,
|
|
192
|
+
n: req.n,
|
|
193
|
+
image: { url: inputImage },
|
|
194
|
+
// Object with url field containing data URI or URL
|
|
195
|
+
response_format: "url",
|
|
196
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {}
|
|
197
|
+
};
|
|
198
|
+
log("Request body:", JSON.stringify({ ...body, image: { url: "...(data uri)..." } }));
|
|
199
|
+
log("Calling xAI images/edits...");
|
|
200
|
+
const startTime = Date.now();
|
|
201
|
+
const res = await fetch(`${XAI_API_BASE}/images/edits`, {
|
|
202
|
+
method: "POST",
|
|
203
|
+
headers: {
|
|
204
|
+
authorization: `Bearer ${apiKey}`,
|
|
205
|
+
"content-type": "application/json"
|
|
206
|
+
},
|
|
207
|
+
body: JSON.stringify(body)
|
|
208
|
+
});
|
|
209
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
210
|
+
if (!res.ok) {
|
|
211
|
+
const txt = await res.text().catch(() => "");
|
|
212
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
213
|
+
throw new Error(`xAI edits failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
214
|
+
}
|
|
215
|
+
const json = await res.json();
|
|
216
|
+
log("Response data count:", json.data?.length);
|
|
217
|
+
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
218
|
+
return processXaiImageResponse(json, model);
|
|
219
|
+
}
|
|
220
|
+
async function processXaiImageResponse(json, model) {
|
|
221
|
+
const results = [];
|
|
222
|
+
for (let i = 0; i < json.data.length; i++) {
|
|
223
|
+
const img = json.data[i];
|
|
224
|
+
if (!img) continue;
|
|
225
|
+
log(`Processing image ${i}...`);
|
|
226
|
+
if (img.url) {
|
|
227
|
+
const { bytes, mimeType } = await downloadBytes(img.url);
|
|
228
|
+
results.push({
|
|
229
|
+
kind: "image",
|
|
230
|
+
provider: "xai",
|
|
231
|
+
model,
|
|
232
|
+
index: i,
|
|
233
|
+
url: img.url,
|
|
234
|
+
bytes,
|
|
235
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
236
|
+
});
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
if (img.b64_json) {
|
|
240
|
+
log(`Image ${i} is base64 encoded`);
|
|
241
|
+
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
242
|
+
results.push({ kind: "image", provider: "xai", model, index: i, bytes });
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
throw new Error("xAI returned image without url or b64_json");
|
|
246
|
+
}
|
|
247
|
+
log(`Successfully generated ${results.length} image(s)`);
|
|
248
|
+
return results;
|
|
249
|
+
}
|
|
250
|
+
async function generateXaiVideo(req, apiKey) {
|
|
251
|
+
const model = req.model ?? "grok-imagine-video";
|
|
252
|
+
const imageUrl = req.startFrame ?? req.inputImages?.[0];
|
|
253
|
+
log(
|
|
254
|
+
"Starting video generation, model:",
|
|
255
|
+
model,
|
|
256
|
+
"hasImageUrl:",
|
|
257
|
+
!!imageUrl,
|
|
258
|
+
"duration:",
|
|
259
|
+
req.duration
|
|
260
|
+
);
|
|
261
|
+
const createBody = {
|
|
262
|
+
prompt: req.prompt,
|
|
263
|
+
model,
|
|
264
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
|
|
265
|
+
// Add image_url for image-to-video (data URI or URL string)
|
|
266
|
+
...imageUrl ? { image_url: imageUrl } : {},
|
|
267
|
+
// Add duration (xAI supports 1-15 seconds)
|
|
268
|
+
...req.duration !== void 0 ? { duration: req.duration } : {}
|
|
269
|
+
};
|
|
270
|
+
log(
|
|
271
|
+
"Request body:",
|
|
272
|
+
JSON.stringify({
|
|
273
|
+
...createBody,
|
|
274
|
+
image_url: createBody.image_url ? `...(${String(createBody.image_url).length} chars)` : void 0
|
|
275
|
+
})
|
|
276
|
+
);
|
|
277
|
+
log("Calling xAI videos/generations...");
|
|
278
|
+
const startTime = Date.now();
|
|
279
|
+
const createRes = await fetch(`${XAI_API_BASE}/videos/generations`, {
|
|
280
|
+
method: "POST",
|
|
281
|
+
headers: {
|
|
282
|
+
authorization: `Bearer ${apiKey}`,
|
|
283
|
+
"content-type": "application/json"
|
|
284
|
+
},
|
|
285
|
+
body: JSON.stringify(createBody)
|
|
286
|
+
});
|
|
287
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${createRes.status}`);
|
|
288
|
+
if (!createRes.ok) {
|
|
289
|
+
const txt = await createRes.text().catch(() => "");
|
|
290
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
291
|
+
throw new Error(`xAI video generations failed (${createRes.status}): ${txt.slice(0, 500)}`);
|
|
292
|
+
}
|
|
293
|
+
const createJson = await createRes.json();
|
|
294
|
+
const requestId = createJson.request_id;
|
|
295
|
+
log("Got request_id:", requestId);
|
|
296
|
+
if (!requestId) throw new Error("xAI video generation returned no request_id");
|
|
297
|
+
const maxAttempts = 120;
|
|
298
|
+
const intervalMs = 3e3;
|
|
299
|
+
let result;
|
|
300
|
+
log(`Starting poll loop (max ${maxAttempts} attempts, ${intervalMs}ms interval)...`);
|
|
301
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
302
|
+
const res = await fetch(`${XAI_API_BASE}/videos/${encodeURIComponent(requestId)}`, {
|
|
303
|
+
method: "GET",
|
|
304
|
+
headers: {
|
|
305
|
+
authorization: `Bearer ${apiKey}`
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
if (!res.ok) {
|
|
309
|
+
const txt = await res.text().catch(() => "");
|
|
310
|
+
log(`Poll attempt ${attempt + 1} failed:`, txt.slice(0, 500));
|
|
311
|
+
throw new Error(`xAI video poll failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
312
|
+
}
|
|
313
|
+
const json = await res.json();
|
|
314
|
+
result = json;
|
|
315
|
+
log(
|
|
316
|
+
`Poll attempt ${attempt + 1}/${maxAttempts}: status=${json.status}, raw:`,
|
|
317
|
+
JSON.stringify(json).slice(0, 300)
|
|
318
|
+
);
|
|
319
|
+
if (json.video?.url) {
|
|
320
|
+
log("Video generation complete!");
|
|
321
|
+
break;
|
|
322
|
+
}
|
|
323
|
+
if (json.status === "failed" || json.status === "error") {
|
|
324
|
+
log("Video generation failed:", JSON.stringify(json));
|
|
325
|
+
throw new Error(`xAI video generation failed: ${JSON.stringify(json)}`);
|
|
326
|
+
}
|
|
327
|
+
await sleep(intervalMs);
|
|
328
|
+
}
|
|
329
|
+
if (!result?.video?.url) {
|
|
330
|
+
log("Timed out. Last result:", JSON.stringify(result));
|
|
331
|
+
throw new Error(`xAI video generation timed out (request_id=${requestId})`);
|
|
332
|
+
}
|
|
333
|
+
const url = result.video.url;
|
|
334
|
+
log("Video URL:", url);
|
|
335
|
+
if (result.video?.respect_moderation === false) {
|
|
336
|
+
throw new Error("xAI video generation was blocked by moderation");
|
|
337
|
+
}
|
|
338
|
+
const { bytes, mimeType } = await downloadBytes(url);
|
|
339
|
+
log(`Successfully generated video, ${bytes.byteLength} bytes`);
|
|
340
|
+
return [
|
|
341
|
+
{
|
|
342
|
+
kind: "video",
|
|
343
|
+
provider: "xai",
|
|
344
|
+
model: result.model ?? model,
|
|
345
|
+
index: 0,
|
|
346
|
+
url,
|
|
347
|
+
bytes,
|
|
348
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
349
|
+
}
|
|
350
|
+
];
|
|
351
|
+
}
|
|
352
|
+
var xaiCapabilities = {
|
|
353
|
+
maxInputImages: 1,
|
|
354
|
+
supportsVideoInterpolation: false,
|
|
355
|
+
// xAI does not support end frame
|
|
356
|
+
videoDurationRange: [1, 15],
|
|
357
|
+
// 1-15 seconds
|
|
358
|
+
supportsImageEditing: true
|
|
359
|
+
};
|
|
78
360
|
var xaiProvider = {
|
|
79
361
|
id: "xai",
|
|
80
|
-
displayName: "xAI
|
|
362
|
+
displayName: "xAI",
|
|
363
|
+
supports: ["image", "video"],
|
|
364
|
+
capabilities: xaiCapabilities,
|
|
81
365
|
isAvailable(env) {
|
|
82
366
|
return Boolean(getXaiApiKey(env));
|
|
83
367
|
},
|
|
84
368
|
async generate(req, env) {
|
|
85
369
|
const apiKey = getXaiApiKey(env);
|
|
86
370
|
if (!apiKey) throw new Error("Missing xAI API key. Set XAI_API_KEY (or XAI_TOKEN).");
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
// Use URL format to download + save.
|
|
95
|
-
response_format: "url"
|
|
96
|
-
};
|
|
97
|
-
const res = await fetch(`${XAI_API_BASE}/images/generations`, {
|
|
98
|
-
method: "POST",
|
|
99
|
-
headers: {
|
|
100
|
-
authorization: `Bearer ${apiKey}`,
|
|
101
|
-
"content-type": "application/json"
|
|
102
|
-
},
|
|
103
|
-
body: JSON.stringify(body)
|
|
104
|
-
});
|
|
105
|
-
if (!res.ok) {
|
|
106
|
-
const txt = await res.text().catch(() => "");
|
|
107
|
-
throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
108
|
-
}
|
|
109
|
-
const json = await res.json();
|
|
110
|
-
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
111
|
-
const results = [];
|
|
112
|
-
for (let i = 0; i < json.data.length; i++) {
|
|
113
|
-
const img = json.data[i];
|
|
114
|
-
if (img.url) {
|
|
115
|
-
const { bytes, mimeType } = await downloadBytes(img.url);
|
|
116
|
-
results.push({ provider: "xai", model, index: i, url: img.url, bytes, mimeType });
|
|
117
|
-
continue;
|
|
118
|
-
}
|
|
119
|
-
if (img.b64_json) {
|
|
120
|
-
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
121
|
-
results.push({ provider: "xai", model, index: i, bytes });
|
|
122
|
-
continue;
|
|
123
|
-
}
|
|
124
|
-
throw new Error("xAI returned image without url or b64_json");
|
|
371
|
+
verboseMode = req.verbose;
|
|
372
|
+
log("Provider initialized, kind:", req.kind);
|
|
373
|
+
if (req.kind === "video") return generateXaiVideo(req, apiKey);
|
|
374
|
+
const hasInputImages = req.inputImages && req.inputImages.length > 0;
|
|
375
|
+
if (hasInputImages) {
|
|
376
|
+
log("Input images detected, using edit endpoint");
|
|
377
|
+
return editXaiImages(req, apiKey);
|
|
125
378
|
}
|
|
126
|
-
return
|
|
379
|
+
return generateXaiImages(req, apiKey);
|
|
127
380
|
}
|
|
128
381
|
};
|
|
129
382
|
|
|
@@ -132,50 +385,198 @@ import { fal } from "@fal-ai/client";
|
|
|
132
385
|
function getFalKey(env) {
|
|
133
386
|
return env.FAL_API_KEY || env.FAL_KEY;
|
|
134
387
|
}
|
|
135
|
-
|
|
388
|
+
function log2(verbose, ...args) {
|
|
389
|
+
if (verbose) console.error("[fal]", ...args);
|
|
390
|
+
}
|
|
391
|
+
async function downloadBytes2(url, verbose) {
|
|
392
|
+
log2(verbose, "Downloading from:", url.slice(0, 100) + "...");
|
|
393
|
+
const start = Date.now();
|
|
136
394
|
const res = await fetch(url);
|
|
137
|
-
if (!res.ok) throw new Error(`fal
|
|
395
|
+
if (!res.ok) throw new Error(`fal download failed (${res.status})`);
|
|
138
396
|
const ab = await res.arrayBuffer();
|
|
139
397
|
const ct = res.headers.get("content-type") || void 0;
|
|
398
|
+
log2(verbose, `Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
140
399
|
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
141
400
|
}
|
|
401
|
+
function pickMany(result, kind) {
|
|
402
|
+
if (kind === "image") {
|
|
403
|
+
if (Array.isArray(result.images) && result.images.length) return result.images;
|
|
404
|
+
if (result.image?.url) return [result.image];
|
|
405
|
+
return [];
|
|
406
|
+
}
|
|
407
|
+
if (Array.isArray(result.videos) && result.videos.length) return result.videos;
|
|
408
|
+
if (result.video?.url) return [result.video];
|
|
409
|
+
return [];
|
|
410
|
+
}
|
|
411
|
+
var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
|
|
412
|
+
var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
|
|
413
|
+
var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
|
|
414
|
+
var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
|
|
415
|
+
var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
|
|
416
|
+
var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
|
|
417
|
+
function selectVideoModel(req) {
|
|
418
|
+
if (req.model) return req.model;
|
|
419
|
+
if (req.startFrame && req.endFrame) {
|
|
420
|
+
return DEFAULT_START_END_VIDEO_MODEL;
|
|
421
|
+
}
|
|
422
|
+
if (req.inputImages?.length && !req.startFrame) {
|
|
423
|
+
return DEFAULT_REFERENCE_VIDEO_MODEL;
|
|
424
|
+
}
|
|
425
|
+
if (req.startFrame || req.inputImages?.length) {
|
|
426
|
+
return DEFAULT_IMAGE_TO_VIDEO_MODEL;
|
|
427
|
+
}
|
|
428
|
+
return DEFAULT_VIDEO_MODEL;
|
|
429
|
+
}
|
|
430
|
+
function selectImageModel(req) {
|
|
431
|
+
if (req.model) return req.model;
|
|
432
|
+
if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
|
|
433
|
+
return DEFAULT_IMAGE_MODEL;
|
|
434
|
+
}
|
|
435
|
+
function mapAspectRatio(aspectRatio) {
|
|
436
|
+
if (!aspectRatio) return void 0;
|
|
437
|
+
const ar = aspectRatio.trim();
|
|
438
|
+
if (ar === "1:1") return "square";
|
|
439
|
+
if (ar === "4:3") return "landscape_4_3";
|
|
440
|
+
if (ar === "16:9") return "landscape_16_9";
|
|
441
|
+
if (ar === "3:4") return "portrait_4_3";
|
|
442
|
+
if (ar === "9:16") return "portrait_16_9";
|
|
443
|
+
return ar;
|
|
444
|
+
}
|
|
445
|
+
function buildVideoInput(req) {
|
|
446
|
+
const input = {
|
|
447
|
+
prompt: req.prompt
|
|
448
|
+
};
|
|
449
|
+
if (req.startFrame && req.endFrame) {
|
|
450
|
+
input.start_image_url = req.startFrame;
|
|
451
|
+
input.end_image_url = req.endFrame;
|
|
452
|
+
return input;
|
|
453
|
+
}
|
|
454
|
+
if (req.inputImages?.length && !req.startFrame) {
|
|
455
|
+
input.reference_image_urls = req.inputImages.slice(0, 7);
|
|
456
|
+
const ar = mapAspectRatio(req.aspectRatio);
|
|
457
|
+
if (ar) input.aspect_ratio = ar;
|
|
458
|
+
if (req.duration) input.duration = String(req.duration);
|
|
459
|
+
return input;
|
|
460
|
+
}
|
|
461
|
+
const imageUrl = req.startFrame ?? req.inputImages?.[0];
|
|
462
|
+
if (imageUrl) {
|
|
463
|
+
input.image_url = imageUrl;
|
|
464
|
+
if (req.duration) input.duration = String(req.duration);
|
|
465
|
+
return input;
|
|
466
|
+
}
|
|
467
|
+
const imageSize = mapAspectRatio(req.aspectRatio);
|
|
468
|
+
if (imageSize) input.image_size = imageSize;
|
|
469
|
+
if (req.n) input.num_videos = req.n;
|
|
470
|
+
return input;
|
|
471
|
+
}
|
|
472
|
+
function buildImageInput(req) {
|
|
473
|
+
const input = {
|
|
474
|
+
prompt: req.prompt
|
|
475
|
+
};
|
|
476
|
+
const imageSize = mapAspectRatio(req.aspectRatio);
|
|
477
|
+
if (imageSize) input.image_size = imageSize;
|
|
478
|
+
if (req.n) input.num_images = req.n;
|
|
479
|
+
if (req.inputImages?.[0]) {
|
|
480
|
+
input.image_url = req.inputImages[0];
|
|
481
|
+
input.strength = 0.75;
|
|
482
|
+
}
|
|
483
|
+
return input;
|
|
484
|
+
}
|
|
485
|
+
var falCapabilities = {
|
|
486
|
+
maxInputImages: 7,
|
|
487
|
+
// Vidu supports up to 7 reference images
|
|
488
|
+
supportsVideoInterpolation: true,
|
|
489
|
+
// Vidu start-end-to-video
|
|
490
|
+
videoDurationRange: [2, 8],
|
|
491
|
+
// Vidu supports 2-8 seconds
|
|
492
|
+
supportsImageEditing: true
|
|
493
|
+
};
|
|
142
494
|
var falProvider = {
|
|
143
495
|
id: "fal",
|
|
144
496
|
displayName: "fal.ai",
|
|
497
|
+
supports: ["image", "video"],
|
|
498
|
+
capabilities: falCapabilities,
|
|
145
499
|
isAvailable(env) {
|
|
146
500
|
return Boolean(getFalKey(env));
|
|
147
501
|
},
|
|
148
502
|
async generate(req, env) {
|
|
149
503
|
const key = getFalKey(env);
|
|
150
504
|
if (!key) throw new Error("Missing fal API key. Set FAL_KEY (or FAL_API_KEY).");
|
|
505
|
+
const verbose = req.verbose;
|
|
506
|
+
log2(verbose, "Starting generation, kind:", req.kind, "n:", req.n);
|
|
507
|
+
log2(
|
|
508
|
+
verbose,
|
|
509
|
+
"Input images:",
|
|
510
|
+
req.inputImages?.length ?? 0,
|
|
511
|
+
"startFrame:",
|
|
512
|
+
!!req.startFrame,
|
|
513
|
+
"endFrame:",
|
|
514
|
+
!!req.endFrame
|
|
515
|
+
);
|
|
151
516
|
fal.config({ credentials: key });
|
|
152
|
-
const model = req.
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
517
|
+
const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
|
|
518
|
+
log2(verbose, "Selected model:", model);
|
|
519
|
+
const input = req.kind === "video" ? buildVideoInput(req) : buildImageInput(req);
|
|
520
|
+
const inputSummary = { ...input };
|
|
521
|
+
for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
|
|
522
|
+
if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
|
|
523
|
+
inputSummary[key2] = `data:...${inputSummary[key2].length} chars`;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
if (Array.isArray(inputSummary.reference_image_urls)) {
|
|
527
|
+
inputSummary.reference_image_urls = inputSummary.reference_image_urls.map(
|
|
528
|
+
(url) => url.startsWith("data:") ? `data:...${url.length} chars` : url
|
|
529
|
+
);
|
|
530
|
+
}
|
|
531
|
+
log2(verbose, "Request input:", JSON.stringify(inputSummary));
|
|
532
|
+
log2(verbose, "Calling fal.subscribe...");
|
|
533
|
+
const startTime = Date.now();
|
|
534
|
+
const subscribeOptions = {
|
|
535
|
+
input,
|
|
536
|
+
logs: verbose
|
|
167
537
|
};
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
538
|
+
if (verbose) {
|
|
539
|
+
subscribeOptions.onQueueUpdate = (update) => {
|
|
540
|
+
log2(true, "Queue update:", update.status, JSON.stringify(update).slice(0, 200));
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
const result = await fal.subscribe(model, subscribeOptions);
|
|
544
|
+
log2(verbose, `fal.subscribe completed in ${Date.now() - startTime}ms`);
|
|
545
|
+
log2(verbose, "Raw result keys:", Object.keys(result?.data ?? {}));
|
|
546
|
+
log2(verbose, "Result preview:", JSON.stringify(result?.data ?? {}).slice(0, 500));
|
|
547
|
+
const items = pickMany(result?.data ?? {}, req.kind);
|
|
548
|
+
log2(verbose, `Found ${items.length} ${req.kind}(s) in response`);
|
|
549
|
+
if (!items?.length) {
|
|
550
|
+
const noun = req.kind === "video" ? "videos" : "images";
|
|
551
|
+
throw new Error(
|
|
552
|
+
`fal returned no ${noun}. Raw response: ${JSON.stringify(result?.data).slice(0, 300)}`
|
|
553
|
+
);
|
|
554
|
+
}
|
|
171
555
|
const out = [];
|
|
172
|
-
for (let i = 0; i < Math.min(
|
|
173
|
-
const
|
|
174
|
-
if (!
|
|
175
|
-
|
|
176
|
-
|
|
556
|
+
for (let i = 0; i < Math.min(items.length, req.n); i++) {
|
|
557
|
+
const m = items[i];
|
|
558
|
+
if (!m?.url) {
|
|
559
|
+
log2(verbose, `Item ${i} has no URL, skipping`);
|
|
560
|
+
continue;
|
|
561
|
+
}
|
|
562
|
+
log2(verbose, `Downloading item ${i}...`);
|
|
563
|
+
const { bytes, mimeType } = await downloadBytes2(m.url, verbose);
|
|
564
|
+
const finalMimeType = m.content_type ?? mimeType;
|
|
565
|
+
out.push({
|
|
566
|
+
kind: req.kind,
|
|
567
|
+
provider: "fal",
|
|
568
|
+
model,
|
|
569
|
+
index: i,
|
|
570
|
+
url: m.url,
|
|
571
|
+
bytes,
|
|
572
|
+
...finalMimeType !== void 0 ? { mimeType: finalMimeType } : {}
|
|
573
|
+
});
|
|
177
574
|
}
|
|
178
|
-
if (!out.length)
|
|
575
|
+
if (!out.length) {
|
|
576
|
+
const noun = req.kind === "video" ? "videos" : "images";
|
|
577
|
+
throw new Error(`fal returned ${noun} but none were downloadable`);
|
|
578
|
+
}
|
|
579
|
+
log2(verbose, `Successfully generated ${out.length} ${req.kind}(s)`);
|
|
179
580
|
return out;
|
|
180
581
|
}
|
|
181
582
|
};
|
|
@@ -185,7 +586,7 @@ import { GoogleGenAI } from "@google/genai";
|
|
|
185
586
|
function getGeminiApiKey(env) {
|
|
186
587
|
return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
|
|
187
588
|
}
|
|
188
|
-
function
|
|
589
|
+
function mimeForImageFormat(format) {
|
|
189
590
|
switch (format) {
|
|
190
591
|
case "jpg":
|
|
191
592
|
return "image/jpeg";
|
|
@@ -196,43 +597,522 @@ function mimeForFormat(format) {
|
|
|
196
597
|
return "image/png";
|
|
197
598
|
}
|
|
198
599
|
}
|
|
600
|
+
var verboseMode2 = false;
|
|
601
|
+
function log3(...args) {
|
|
602
|
+
if (verboseMode2) console.error("[google]", ...args);
|
|
603
|
+
}
|
|
604
|
+
var MODEL_ALIASES = {
|
|
605
|
+
"nano-banana": "gemini-2.5-flash-image",
|
|
606
|
+
"nano-banana-pro": "gemini-3-pro-image-preview",
|
|
607
|
+
// Veo (video)
|
|
608
|
+
veo2: "veo-2.0-generate-001",
|
|
609
|
+
"veo-2": "veo-2.0-generate-001",
|
|
610
|
+
veo3: "veo-3.0-generate-001",
|
|
611
|
+
"veo-3": "veo-3.0-generate-001",
|
|
612
|
+
"veo-3.1": "veo-3.1-generate-preview",
|
|
613
|
+
veo31: "veo-3.1-generate-preview"
|
|
614
|
+
};
|
|
615
|
+
var VEO_31_MODELS = ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"];
|
|
616
|
+
function isVeo31Model(model) {
|
|
617
|
+
return VEO_31_MODELS.some((m) => model.includes(m) || model.includes("veo-3.1"));
|
|
618
|
+
}
|
|
619
|
+
function parseDataUri(dataUri) {
|
|
620
|
+
const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
|
|
621
|
+
if (!match) return null;
|
|
622
|
+
return { mimeType: match[1] ?? "image/png", data: match[2] ?? "" };
|
|
623
|
+
}
|
|
624
|
+
function imageToGoogleFormat(imageInput) {
|
|
625
|
+
if (imageInput.startsWith("data:")) {
|
|
626
|
+
const parsed = parseDataUri(imageInput);
|
|
627
|
+
if (parsed) {
|
|
628
|
+
return { inlineData: { data: parsed.data, mimeType: parsed.mimeType } };
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
return { fileUri: imageInput };
|
|
632
|
+
}
|
|
633
|
+
var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
|
|
634
|
+
function resolveModel(model) {
|
|
635
|
+
if (!model) return "gemini-2.5-flash-image";
|
|
636
|
+
return MODEL_ALIASES[model] ?? model;
|
|
637
|
+
}
|
|
638
|
+
function isGeminiImageModel(model) {
|
|
639
|
+
return GEMINI_IMAGE_MODELS.some((m) => model.startsWith(m));
|
|
640
|
+
}
|
|
641
|
+
async function downloadBytes3(url) {
|
|
642
|
+
log3("Downloading from:", url.slice(0, 100) + "...");
|
|
643
|
+
const start = Date.now();
|
|
644
|
+
const res = await fetch(url);
|
|
645
|
+
if (!res.ok) throw new Error(`Google video download failed (${res.status})`);
|
|
646
|
+
const ab = await res.arrayBuffer();
|
|
647
|
+
const ct = res.headers.get("content-type") || void 0;
|
|
648
|
+
log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
649
|
+
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
650
|
+
}
|
|
651
|
+
async function sleep2(ms) {
|
|
652
|
+
await new Promise((r) => setTimeout(r, ms));
|
|
653
|
+
}
|
|
654
|
+
var googleCapabilities = {
|
|
655
|
+
maxInputImages: 3,
|
|
656
|
+
// Veo 3.1 supports up to 3 reference images
|
|
657
|
+
supportsVideoInterpolation: true,
|
|
658
|
+
// Veo 3.1 supports first + last frame
|
|
659
|
+
videoDurationRange: [4, 8],
|
|
660
|
+
// Veo 3.1 supports 4, 6, 8 seconds
|
|
661
|
+
supportsImageEditing: true
|
|
662
|
+
};
|
|
199
663
|
var googleProvider = {
|
|
200
664
|
id: "google",
|
|
201
|
-
displayName: "Google (Gemini / Imagen)",
|
|
665
|
+
displayName: "Google (Gemini / Imagen / Veo)",
|
|
666
|
+
supports: ["image", "video"],
|
|
667
|
+
capabilities: googleCapabilities,
|
|
202
668
|
isAvailable(env) {
|
|
203
669
|
return Boolean(getGeminiApiKey(env));
|
|
204
670
|
},
|
|
205
671
|
async generate(req, env) {
|
|
206
672
|
const apiKey = getGeminiApiKey(env);
|
|
207
673
|
if (!apiKey) throw new Error("Missing Google API key. Set GEMINI_API_KEY (or GOOGLE_API_KEY).");
|
|
674
|
+
verboseMode2 = req.verbose;
|
|
675
|
+
log3("Provider initialized, kind:", req.kind);
|
|
676
|
+
log3(
|
|
677
|
+
"Input images:",
|
|
678
|
+
req.inputImages?.length ?? 0,
|
|
679
|
+
"startFrame:",
|
|
680
|
+
!!req.startFrame,
|
|
681
|
+
"endFrame:",
|
|
682
|
+
!!req.endFrame
|
|
683
|
+
);
|
|
208
684
|
const ai = new GoogleGenAI({ apiKey });
|
|
209
|
-
|
|
210
|
-
|
|
685
|
+
if (req.kind === "video") {
|
|
686
|
+
const hasAdvancedFeatures = req.startFrame || req.endFrame || req.inputImages?.length;
|
|
687
|
+
const defaultModel = hasAdvancedFeatures ? "veo-3.1-generate-preview" : "veo-2.0-generate-001";
|
|
688
|
+
const model2 = MODEL_ALIASES[req.model ?? ""] ?? req.model ?? defaultModel;
|
|
689
|
+
log3("Using video model:", model2);
|
|
690
|
+
if (hasAdvancedFeatures && !isVeo31Model(model2)) {
|
|
691
|
+
log3(
|
|
692
|
+
"WARNING: Advanced video features (startFrame, endFrame, referenceImages) require Veo 3.1"
|
|
693
|
+
);
|
|
694
|
+
}
|
|
695
|
+
return generateWithVeo(ai, model2, req);
|
|
696
|
+
}
|
|
697
|
+
const model = resolveModel(req.model);
|
|
698
|
+
log3("Resolved model:", model);
|
|
699
|
+
if (isGeminiImageModel(model)) {
|
|
700
|
+
log3("Using Gemini native image generation");
|
|
701
|
+
return generateWithGemini(ai, model, req);
|
|
702
|
+
}
|
|
703
|
+
log3("Using Imagen API");
|
|
704
|
+
return generateWithImagen(ai, model, req);
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
async function generateWithVeo(ai, model, req) {
|
|
708
|
+
log3("Starting Veo video generation, model:", model, "n:", req.n);
|
|
709
|
+
const startTime = Date.now();
|
|
710
|
+
const config = {
|
|
711
|
+
numberOfVideos: req.n,
|
|
712
|
+
...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
|
|
713
|
+
// Add duration if specified (Veo 3.1 supports 4, 6, 8)
|
|
714
|
+
...req.duration !== void 0 ? { durationSeconds: String(req.duration) } : {}
|
|
715
|
+
};
|
|
716
|
+
if (req.inputImages?.length && isVeo31Model(model)) {
|
|
717
|
+
const referenceImages = req.inputImages.slice(0, 3).map((img) => {
|
|
718
|
+
const imageData = imageToGoogleFormat(img);
|
|
719
|
+
return {
|
|
720
|
+
image: imageData,
|
|
721
|
+
referenceType: "asset"
|
|
722
|
+
};
|
|
723
|
+
});
|
|
724
|
+
config.referenceImages = referenceImages;
|
|
725
|
+
log3("Added", referenceImages.length, "reference images");
|
|
726
|
+
}
|
|
727
|
+
const generateParams = {
|
|
728
|
+
model,
|
|
729
|
+
prompt: req.prompt,
|
|
730
|
+
config
|
|
731
|
+
};
|
|
732
|
+
const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
|
|
733
|
+
if (firstFrameImage && isVeo31Model(model)) {
|
|
734
|
+
const imageData = imageToGoogleFormat(firstFrameImage);
|
|
735
|
+
generateParams.image = imageData;
|
|
736
|
+
log3("Added first frame image");
|
|
737
|
+
}
|
|
738
|
+
if (req.endFrame && isVeo31Model(model)) {
|
|
739
|
+
const lastFrameData = imageToGoogleFormat(req.endFrame);
|
|
740
|
+
config.lastFrame = lastFrameData;
|
|
741
|
+
log3("Added last frame for interpolation");
|
|
742
|
+
}
|
|
743
|
+
log3("Calling ai.models.generateVideos...");
|
|
744
|
+
let op = await ai.models.generateVideos(generateParams);
|
|
745
|
+
log3("Initial operation state:", op.done ? "done" : "pending", "name:", op.name);
|
|
746
|
+
const maxAttempts = 60;
|
|
747
|
+
const intervalMs = 1e4;
|
|
748
|
+
for (let attempt = 0; attempt < maxAttempts && !op.done; attempt++) {
|
|
749
|
+
log3(`Poll attempt ${attempt + 1}/${maxAttempts}...`);
|
|
750
|
+
await sleep2(intervalMs);
|
|
751
|
+
op = await ai.operations.getVideosOperation({ operation: op });
|
|
752
|
+
log3(`Poll result: done=${op.done}`);
|
|
753
|
+
}
|
|
754
|
+
log3(`Operation completed in ${Date.now() - startTime}ms`);
|
|
755
|
+
if (!op.done) {
|
|
756
|
+
log3("Timed out. Operation state:", JSON.stringify(op).slice(0, 500));
|
|
757
|
+
throw new Error("Google Veo video generation timed out");
|
|
758
|
+
}
|
|
759
|
+
const videos = op.response?.generatedVideos;
|
|
760
|
+
log3("Generated videos count:", videos?.length);
|
|
761
|
+
if (!videos?.length) {
|
|
762
|
+
log3("Full response:", JSON.stringify(op.response).slice(0, 1e3));
|
|
763
|
+
throw new Error("Google Veo returned no videos");
|
|
764
|
+
}
|
|
765
|
+
const out = [];
|
|
766
|
+
for (let i = 0; i < Math.min(videos.length, req.n); i++) {
|
|
767
|
+
const v = videos[i];
|
|
768
|
+
log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
|
|
769
|
+
const uri = v?.video?.uri;
|
|
770
|
+
if (!uri) {
|
|
771
|
+
log3(`Video ${i} has no URI, skipping`);
|
|
772
|
+
continue;
|
|
773
|
+
}
|
|
774
|
+
if (uri.startsWith("gs://")) {
|
|
775
|
+
throw new Error(
|
|
776
|
+
`Google Veo returned a gs:// URI (${uri}). Configure outputGcsUri / Vertex flow to fetch from GCS.`
|
|
777
|
+
);
|
|
778
|
+
}
|
|
779
|
+
const { bytes, mimeType } = await downloadBytes3(uri);
|
|
780
|
+
out.push({
|
|
781
|
+
kind: "video",
|
|
782
|
+
provider: "google",
|
|
211
783
|
model,
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
784
|
+
index: i,
|
|
785
|
+
url: uri,
|
|
786
|
+
bytes,
|
|
787
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
|
|
791
|
+
log3(`Successfully generated ${out.length} video(s)`);
|
|
792
|
+
return out;
|
|
793
|
+
}
|
|
794
|
+
async function generateWithGemini(ai, model, req) {
|
|
795
|
+
const hasInputImage = req.inputImages?.length;
|
|
796
|
+
log3(
|
|
797
|
+
"Starting Gemini image generation, model:",
|
|
798
|
+
model,
|
|
799
|
+
"n:",
|
|
800
|
+
req.n,
|
|
801
|
+
"hasInputImage:",
|
|
802
|
+
!!hasInputImage
|
|
803
|
+
);
|
|
804
|
+
const startTime = Date.now();
|
|
805
|
+
const out = [];
|
|
806
|
+
const buildContents = () => {
|
|
807
|
+
if (hasInputImage && req.inputImages?.[0]) {
|
|
808
|
+
const imageData = imageToGoogleFormat(req.inputImages[0]);
|
|
809
|
+
return [{ ...imageData }, { text: req.prompt }];
|
|
810
|
+
}
|
|
811
|
+
return req.prompt;
|
|
812
|
+
};
|
|
813
|
+
for (let i = 0; i < req.n; i++) {
|
|
814
|
+
log3(`Generating image ${i + 1}/${req.n}...`);
|
|
815
|
+
const callStart = Date.now();
|
|
816
|
+
try {
|
|
817
|
+
const res = await ai.models.generateContent({
|
|
818
|
+
model,
|
|
819
|
+
contents: buildContents(),
|
|
820
|
+
config: {
|
|
821
|
+
responseModalities: ["IMAGE"]
|
|
822
|
+
}
|
|
823
|
+
});
|
|
824
|
+
log3(`API call ${i + 1} took ${Date.now() - callStart}ms`);
|
|
825
|
+
const parts = res.candidates?.[0]?.content?.parts;
|
|
826
|
+
log3(`Response has ${parts?.length ?? 0} parts`);
|
|
827
|
+
if (!parts) {
|
|
828
|
+
log3(
|
|
829
|
+
`No parts in response for image ${i}. Full response:`,
|
|
830
|
+
JSON.stringify(res).slice(0, 500)
|
|
831
|
+
);
|
|
832
|
+
continue;
|
|
833
|
+
}
|
|
834
|
+
for (const part of parts) {
|
|
835
|
+
if (part.inlineData?.data) {
|
|
836
|
+
const rawBytes = part.inlineData.data;
|
|
837
|
+
const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
|
|
838
|
+
log3(`Image ${i}: got ${bytes.byteLength} bytes, mimeType: ${part.inlineData.mimeType}`);
|
|
839
|
+
out.push({
|
|
840
|
+
kind: "image",
|
|
841
|
+
provider: "google",
|
|
842
|
+
model,
|
|
843
|
+
index: i,
|
|
844
|
+
bytes,
|
|
845
|
+
mimeType: part.inlineData.mimeType ?? mimeForImageFormat(req.format)
|
|
846
|
+
});
|
|
847
|
+
break;
|
|
848
|
+
}
|
|
217
849
|
}
|
|
850
|
+
} catch (err) {
|
|
851
|
+
log3(`Error generating image ${i}:`, err);
|
|
852
|
+
throw err;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
log3(`Total generation time: ${Date.now() - startTime}ms`);
|
|
856
|
+
if (!out.length) throw new Error("Gemini returned no images");
|
|
857
|
+
log3(`Successfully generated ${out.length} image(s)`);
|
|
858
|
+
return out;
|
|
859
|
+
}
|
|
860
|
+
async function generateWithImagen(ai, model, req) {
|
|
861
|
+
log3("Starting Imagen generation, model:", model, "n:", req.n);
|
|
862
|
+
const startTime = Date.now();
|
|
863
|
+
log3("Calling ai.models.generateImages...");
|
|
864
|
+
const res = await ai.models.generateImages({
|
|
865
|
+
model,
|
|
866
|
+
prompt: req.prompt,
|
|
867
|
+
config: {
|
|
868
|
+
numberOfImages: req.n,
|
|
869
|
+
outputMimeType: mimeForImageFormat(req.format),
|
|
870
|
+
// Imagen 4 supports aspectRatio
|
|
871
|
+
...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {}
|
|
872
|
+
}
|
|
873
|
+
});
|
|
874
|
+
log3(`API call took ${Date.now() - startTime}ms`);
|
|
875
|
+
const imgs = res.generatedImages;
|
|
876
|
+
log3("Generated images count:", imgs?.length);
|
|
877
|
+
if (!imgs?.length) {
|
|
878
|
+
log3("Full response:", JSON.stringify(res).slice(0, 1e3));
|
|
879
|
+
throw new Error("Google generateImages returned no images");
|
|
880
|
+
}
|
|
881
|
+
const out = [];
|
|
882
|
+
for (let i = 0; i < Math.min(imgs.length, req.n); i++) {
|
|
883
|
+
const img = imgs[i];
|
|
884
|
+
const rawBytes = img?.image?.imageBytes;
|
|
885
|
+
if (!rawBytes) {
|
|
886
|
+
log3(`Image ${i} has no bytes, skipping`);
|
|
887
|
+
continue;
|
|
888
|
+
}
|
|
889
|
+
const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
|
|
890
|
+
log3(`Image ${i}: got ${bytes.byteLength} bytes`);
|
|
891
|
+
out.push({
|
|
892
|
+
kind: "image",
|
|
893
|
+
provider: "google",
|
|
894
|
+
model,
|
|
895
|
+
index: i,
|
|
896
|
+
bytes,
|
|
897
|
+
mimeType: mimeForImageFormat(req.format)
|
|
218
898
|
});
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
899
|
+
}
|
|
900
|
+
if (!out.length) throw new Error("Google returned images but no bytes were present");
|
|
901
|
+
log3(`Successfully generated ${out.length} image(s)`);
|
|
902
|
+
return out;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
// src/providers/openai.ts
|
|
906
|
+
var OPENAI_API_BASE = "https://api.openai.com/v1";
|
|
907
|
+
function getOpenAIApiKey(env) {
|
|
908
|
+
return env.OPENAI_API_KEY || env.OPENAI_KEY;
|
|
909
|
+
}
|
|
910
|
+
var verboseMode3 = false;
|
|
911
|
+
function log4(...args) {
|
|
912
|
+
if (verboseMode3) console.error("[openai]", ...args);
|
|
913
|
+
}
|
|
914
|
+
function dataUriToBlob(dataUri) {
|
|
915
|
+
const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
|
|
916
|
+
if (!match) throw new Error("Invalid data URI");
|
|
917
|
+
const mimeType = match[1] ?? "image/png";
|
|
918
|
+
const base64 = match[2] ?? "";
|
|
919
|
+
const binary = Buffer.from(base64, "base64");
|
|
920
|
+
return new Blob([binary], { type: mimeType });
|
|
921
|
+
}
|
|
922
|
+
async function urlToBlob(url) {
|
|
923
|
+
const res = await fetch(url);
|
|
924
|
+
if (!res.ok) throw new Error(`Failed to fetch image: ${res.status}`);
|
|
925
|
+
return res.blob();
|
|
926
|
+
}
|
|
927
|
+
async function imageInputToBlob(input) {
|
|
928
|
+
if (input.startsWith("data:")) {
|
|
929
|
+
return dataUriToBlob(input);
|
|
930
|
+
}
|
|
931
|
+
return urlToBlob(input);
|
|
932
|
+
}
|
|
933
|
+
async function downloadBytes4(url) {
|
|
934
|
+
log4("Downloading from:", url.slice(0, 100) + "...");
|
|
935
|
+
const start = Date.now();
|
|
936
|
+
const res = await fetch(url);
|
|
937
|
+
if (!res.ok) throw new Error(`OpenAI image download failed (${res.status})`);
|
|
938
|
+
const ab = await res.arrayBuffer();
|
|
939
|
+
const ct = res.headers.get("content-type");
|
|
940
|
+
log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
941
|
+
return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
|
|
942
|
+
}
|
|
943
|
+
function mapAspectRatioToSize(aspectRatio, model) {
|
|
944
|
+
if (!aspectRatio) return void 0;
|
|
945
|
+
const ar = aspectRatio.trim();
|
|
946
|
+
if (model?.startsWith("gpt-image")) {
|
|
947
|
+
if (ar === "1:1") return "1024x1024";
|
|
948
|
+
if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
|
|
949
|
+
if (ar === "2:3" || ar === "3:4" || ar === "9:16") return "1024x1536";
|
|
950
|
+
} else if (model === "dall-e-3") {
|
|
951
|
+
if (ar === "1:1") return "1024x1024";
|
|
952
|
+
if (ar === "16:9" || ar === "4:3") return "1792x1024";
|
|
953
|
+
if (ar === "9:16" || ar === "3:4") return "1024x1792";
|
|
954
|
+
}
|
|
955
|
+
return void 0;
|
|
956
|
+
}
|
|
957
|
+
var openaiCapabilities = {
|
|
958
|
+
maxInputImages: 2,
|
|
959
|
+
// image + optional mask
|
|
960
|
+
supportsVideoInterpolation: false,
|
|
961
|
+
// OpenAI doesn't support video
|
|
962
|
+
// videoDurationRange omitted - no video support
|
|
963
|
+
supportsImageEditing: true
|
|
964
|
+
};
|
|
965
|
+
async function generateWithEdit(req, apiKey, model) {
|
|
966
|
+
log4("Using edit endpoint for image editing");
|
|
967
|
+
const startTime = Date.now();
|
|
968
|
+
const formData = new FormData();
|
|
969
|
+
formData.append("model", model);
|
|
970
|
+
formData.append("prompt", req.prompt);
|
|
971
|
+
formData.append("n", String(req.n));
|
|
972
|
+
const size = mapAspectRatioToSize(req.aspectRatio, model);
|
|
973
|
+
if (size) formData.append("size", size);
|
|
974
|
+
const imageInput = req.inputImages?.[0];
|
|
975
|
+
if (!imageInput) throw new Error("No input image provided for editing");
|
|
976
|
+
const imageBlob = await imageInputToBlob(imageInput);
|
|
977
|
+
formData.append("image", imageBlob, "image.png");
|
|
978
|
+
log4("Added input image to form data");
|
|
979
|
+
const maskInput = req.inputImages?.[1];
|
|
980
|
+
if (maskInput) {
|
|
981
|
+
const maskBlob = await imageInputToBlob(maskInput);
|
|
982
|
+
formData.append("mask", maskBlob, "mask.png");
|
|
983
|
+
log4("Added mask image to form data");
|
|
984
|
+
}
|
|
985
|
+
log4("Calling OpenAI images/edits...");
|
|
986
|
+
const res = await fetch(`${OPENAI_API_BASE}/images/edits`, {
|
|
987
|
+
method: "POST",
|
|
988
|
+
headers: {
|
|
989
|
+
authorization: `Bearer ${apiKey}`
|
|
990
|
+
// Don't set content-type - FormData sets it with boundary
|
|
991
|
+
},
|
|
992
|
+
body: formData
|
|
993
|
+
});
|
|
994
|
+
log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
995
|
+
if (!res.ok) {
|
|
996
|
+
const txt = await res.text().catch(() => "");
|
|
997
|
+
log4("Error response:", txt.slice(0, 1e3));
|
|
998
|
+
throw new Error(`OpenAI edit failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
999
|
+
}
|
|
1000
|
+
const json = await res.json();
|
|
1001
|
+
log4("Response data count:", json.data?.length);
|
|
1002
|
+
if (!json.data?.length) throw new Error("OpenAI edit returned no images");
|
|
1003
|
+
const results = [];
|
|
1004
|
+
for (let i = 0; i < json.data.length; i++) {
|
|
1005
|
+
const img = json.data[i];
|
|
1006
|
+
if (!img) continue;
|
|
1007
|
+
log4(`Processing image ${i}...`);
|
|
1008
|
+
if (img.url) {
|
|
1009
|
+
const dl = await downloadBytes4(img.url);
|
|
1010
|
+
results.push({
|
|
1011
|
+
kind: "image",
|
|
1012
|
+
provider: "openai",
|
|
1013
|
+
model,
|
|
1014
|
+
index: i,
|
|
1015
|
+
url: img.url,
|
|
1016
|
+
bytes: dl.bytes,
|
|
1017
|
+
...dl.mimeType ? { mimeType: dl.mimeType } : {}
|
|
1018
|
+
});
|
|
1019
|
+
continue;
|
|
1020
|
+
}
|
|
1021
|
+
if (img.b64_json) {
|
|
1022
|
+
log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
|
|
1023
|
+
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
1024
|
+
results.push({ kind: "image", provider: "openai", model, index: i, bytes });
|
|
1025
|
+
continue;
|
|
1026
|
+
}
|
|
1027
|
+
throw new Error("OpenAI returned image without url or b64_json");
|
|
1028
|
+
}
|
|
1029
|
+
log4(`Successfully edited ${results.length} image(s)`);
|
|
1030
|
+
return results;
|
|
1031
|
+
}
|
|
1032
|
+
var openaiProvider = {
|
|
1033
|
+
id: "openai",
|
|
1034
|
+
displayName: "OpenAI (GPT Image / DALL-E)",
|
|
1035
|
+
supports: ["image"],
|
|
1036
|
+
capabilities: openaiCapabilities,
|
|
1037
|
+
isAvailable(env) {
|
|
1038
|
+
return Boolean(getOpenAIApiKey(env));
|
|
1039
|
+
},
|
|
1040
|
+
async generate(req, env) {
|
|
1041
|
+
const apiKey = getOpenAIApiKey(env);
|
|
1042
|
+
if (!apiKey) throw new Error("Missing OpenAI API key. Set OPENAI_API_KEY.");
|
|
1043
|
+
verboseMode3 = req.verbose;
|
|
1044
|
+
log4("Provider initialized, kind:", req.kind);
|
|
1045
|
+
const model = req.model ?? "gpt-image-1";
|
|
1046
|
+
log4("Using model:", model, "hasInputImages:", !!req.inputImages?.length);
|
|
1047
|
+
if (req.inputImages?.length) {
|
|
1048
|
+
return generateWithEdit(req, apiKey, model);
|
|
1049
|
+
}
|
|
1050
|
+
const size = mapAspectRatioToSize(req.aspectRatio, model);
|
|
1051
|
+
const body = {
|
|
1052
|
+
model,
|
|
1053
|
+
prompt: req.prompt,
|
|
1054
|
+
n: req.n,
|
|
1055
|
+
...size ? { size } : {},
|
|
1056
|
+
// gpt-image-1 doesn't support response_format, defaults to b64_json
|
|
1057
|
+
// dall-e-2/3 support response_format
|
|
1058
|
+
...!model.startsWith("gpt-image") ? { response_format: "url" } : {}
|
|
1059
|
+
};
|
|
1060
|
+
log4("Request body:", JSON.stringify(body));
|
|
1061
|
+
log4("Calling OpenAI images/generations...");
|
|
1062
|
+
const startTime = Date.now();
|
|
1063
|
+
const res = await fetch(`${OPENAI_API_BASE}/images/generations`, {
|
|
1064
|
+
method: "POST",
|
|
1065
|
+
headers: {
|
|
1066
|
+
authorization: `Bearer ${apiKey}`,
|
|
1067
|
+
"content-type": "application/json"
|
|
1068
|
+
},
|
|
1069
|
+
body: JSON.stringify(body)
|
|
1070
|
+
});
|
|
1071
|
+
log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
1072
|
+
if (!res.ok) {
|
|
1073
|
+
const txt = await res.text().catch(() => "");
|
|
1074
|
+
log4("Error response:", txt.slice(0, 1e3));
|
|
1075
|
+
throw new Error(`OpenAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
1076
|
+
}
|
|
1077
|
+
const json = await res.json();
|
|
1078
|
+
log4("Response data count:", json.data?.length);
|
|
1079
|
+
if (!json.data?.length) throw new Error("OpenAI returned no images");
|
|
1080
|
+
const results = [];
|
|
1081
|
+
for (let i = 0; i < json.data.length; i++) {
|
|
1082
|
+
const img = json.data[i];
|
|
1083
|
+
if (!img) continue;
|
|
1084
|
+
log4(`Processing image ${i}...`);
|
|
1085
|
+
if (img.url) {
|
|
1086
|
+
const dl = await downloadBytes4(img.url);
|
|
1087
|
+
results.push({
|
|
1088
|
+
kind: "image",
|
|
1089
|
+
provider: "openai",
|
|
1090
|
+
model,
|
|
1091
|
+
index: i,
|
|
1092
|
+
url: img.url,
|
|
1093
|
+
bytes: dl.bytes,
|
|
1094
|
+
...dl.mimeType ? { mimeType: dl.mimeType } : {}
|
|
1095
|
+
});
|
|
1096
|
+
continue;
|
|
1097
|
+
}
|
|
1098
|
+
if (img.b64_json) {
|
|
1099
|
+
log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
|
|
1100
|
+
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
1101
|
+
results.push({ kind: "image", provider: "openai", model, index: i, bytes });
|
|
1102
|
+
continue;
|
|
1103
|
+
}
|
|
1104
|
+
throw new Error("OpenAI returned image without url or b64_json");
|
|
1105
|
+
}
|
|
1106
|
+
log4(`Successfully generated ${results.length} image(s)`);
|
|
1107
|
+
return results;
|
|
231
1108
|
}
|
|
232
1109
|
};
|
|
233
1110
|
|
|
234
1111
|
// src/core/router.ts
|
|
235
|
-
var providers = [googleProvider, xaiProvider, falProvider];
|
|
1112
|
+
var providers = [googleProvider, xaiProvider, falProvider, openaiProvider];
|
|
1113
|
+
function log5(verbose, ...args) {
|
|
1114
|
+
if (verbose) console.error("[router]", ...args);
|
|
1115
|
+
}
|
|
236
1116
|
function listProviders() {
|
|
237
1117
|
return [...providers];
|
|
238
1118
|
}
|
|
@@ -244,44 +1124,119 @@ function pickProvider(id, env) {
|
|
|
244
1124
|
return p2;
|
|
245
1125
|
}
|
|
246
1126
|
const p = providers.find((pp) => pp.isAvailable(env));
|
|
247
|
-
if (!p)
|
|
1127
|
+
if (!p)
|
|
1128
|
+
throw new Error(
|
|
1129
|
+
"No providers available. Set XAI_API_KEY (or other provider keys) in .env or environment."
|
|
1130
|
+
);
|
|
248
1131
|
return p;
|
|
249
1132
|
}
|
|
250
|
-
function
|
|
1133
|
+
function defaultFormatForKind(kind) {
|
|
1134
|
+
return kind === "video" ? "mp4" : "png";
|
|
1135
|
+
}
|
|
1136
|
+
async function normalizeOptions(prompt, opts, verbose) {
|
|
251
1137
|
const nRaw = opts.n ?? 1;
|
|
252
1138
|
const n = Math.max(1, Math.min(10, Math.floor(nRaw)));
|
|
253
|
-
const
|
|
1139
|
+
const kind = opts.kind ?? "image";
|
|
1140
|
+
const format = opts.format ?? defaultFormatForKind(kind);
|
|
254
1141
|
const outDir = resolveOutDir(opts.outDir ?? ".");
|
|
255
1142
|
const timestamp = timestampLocalCompact();
|
|
256
1143
|
const nameBase = slugify(opts.name ?? prompt);
|
|
1144
|
+
let inputImages;
|
|
1145
|
+
if (opts.inputImages?.length) {
|
|
1146
|
+
log5(verbose, `Resolving ${opts.inputImages.length} input image(s)...`);
|
|
1147
|
+
inputImages = await resolveImageInputs(opts.inputImages);
|
|
1148
|
+
log5(verbose, `Resolved input images`);
|
|
1149
|
+
}
|
|
1150
|
+
let startFrame;
|
|
1151
|
+
let endFrame;
|
|
1152
|
+
if (opts.startFrame) {
|
|
1153
|
+
log5(verbose, `Resolving start frame: ${opts.startFrame}`);
|
|
1154
|
+
startFrame = await resolveImageInput(opts.startFrame);
|
|
1155
|
+
}
|
|
1156
|
+
if (opts.endFrame) {
|
|
1157
|
+
log5(verbose, `Resolving end frame: ${opts.endFrame}`);
|
|
1158
|
+
endFrame = await resolveImageInput(opts.endFrame);
|
|
1159
|
+
}
|
|
257
1160
|
return {
|
|
258
1161
|
prompt,
|
|
259
1162
|
provider: opts.provider ?? "auto",
|
|
260
1163
|
model: opts.model ?? void 0,
|
|
261
1164
|
n,
|
|
262
1165
|
aspectRatio: opts.aspectRatio ?? void 0,
|
|
1166
|
+
kind,
|
|
263
1167
|
format,
|
|
264
1168
|
outDir,
|
|
265
1169
|
out: opts.out ? path3.resolve(process.cwd(), opts.out) : void 0,
|
|
266
1170
|
nameBase,
|
|
267
1171
|
timestamp,
|
|
268
|
-
verbose: Boolean(opts.verbose)
|
|
1172
|
+
verbose: Boolean(opts.verbose),
|
|
1173
|
+
// New fields
|
|
1174
|
+
inputImages,
|
|
1175
|
+
startFrame,
|
|
1176
|
+
endFrame,
|
|
1177
|
+
duration: opts.duration
|
|
269
1178
|
};
|
|
270
1179
|
}
|
|
271
|
-
|
|
1180
|
+
function validateRequestForProvider(req, provider) {
|
|
1181
|
+
const caps = provider.capabilities;
|
|
1182
|
+
const inputCount = req.inputImages?.length ?? 0;
|
|
1183
|
+
if (inputCount > caps.maxInputImages) {
|
|
1184
|
+
throw new Error(
|
|
1185
|
+
`Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
|
|
1186
|
+
);
|
|
1187
|
+
}
|
|
1188
|
+
if (req.endFrame && !caps.supportsVideoInterpolation) {
|
|
1189
|
+
throw new Error(
|
|
1190
|
+
`Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
|
|
1191
|
+
);
|
|
1192
|
+
}
|
|
1193
|
+
if (req.duration !== void 0 && req.kind === "video" && caps.videoDurationRange) {
|
|
1194
|
+
const [min, max] = caps.videoDurationRange;
|
|
1195
|
+
if (req.duration < min || req.duration > max) {
|
|
1196
|
+
throw new Error(
|
|
1197
|
+
`Provider ${provider.id} supports video duration ${min}-${max}s, but ${req.duration}s requested`
|
|
1198
|
+
);
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
if (req.kind === "image" && inputCount > 0 && !caps.supportsImageEditing) {
|
|
1202
|
+
throw new Error(`Provider ${provider.id} does not support image editing with input images`);
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
async function generateMedia(prompt, opts = {}) {
|
|
272
1206
|
const { env } = loadEnv(process.cwd());
|
|
273
|
-
const
|
|
1207
|
+
const verbose = Boolean(opts.verbose);
|
|
1208
|
+
const req = await normalizeOptions(prompt, opts, verbose);
|
|
1209
|
+
const reqSummary = {
|
|
1210
|
+
...req,
|
|
1211
|
+
prompt: req.prompt.slice(0, 50) + "...",
|
|
1212
|
+
inputImages: req.inputImages?.map(
|
|
1213
|
+
(img) => img.startsWith("data:") ? `data:...${img.length} chars` : img
|
|
1214
|
+
),
|
|
1215
|
+
startFrame: req.startFrame?.startsWith("data:") ? `data:...${req.startFrame.length} chars` : req.startFrame,
|
|
1216
|
+
endFrame: req.endFrame?.startsWith("data:") ? `data:...${req.endFrame.length} chars` : req.endFrame
|
|
1217
|
+
};
|
|
1218
|
+
log5(verbose, "Request:", JSON.stringify(reqSummary));
|
|
274
1219
|
const provider = pickProvider(req.provider, env);
|
|
1220
|
+
log5(verbose, "Selected provider:", provider.id, "| supports:", provider.supports);
|
|
1221
|
+
if (!provider.supports.includes(req.kind)) {
|
|
1222
|
+
throw new Error(`Provider ${provider.id} does not support ${req.kind} generation`);
|
|
1223
|
+
}
|
|
1224
|
+
validateRequestForProvider(req, provider);
|
|
1225
|
+
log5(verbose, "Calling provider.generate()...");
|
|
1226
|
+
const startTime = Date.now();
|
|
275
1227
|
const partials = await provider.generate(req, env);
|
|
276
|
-
|
|
1228
|
+
log5(verbose, `Provider returned ${partials.length} items in ${Date.now() - startTime}ms`);
|
|
1229
|
+
const items = [];
|
|
277
1230
|
for (let i = 0; i < partials.length; i++) {
|
|
278
1231
|
const p = partials[i];
|
|
279
1232
|
if (!p) continue;
|
|
280
1233
|
const filePath = makeOutputPath(req, i);
|
|
281
|
-
|
|
282
|
-
|
|
1234
|
+
log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
|
|
1235
|
+
await writeMediaFile(filePath, p.bytes);
|
|
1236
|
+
items.push({ ...p, filePath });
|
|
283
1237
|
}
|
|
284
|
-
|
|
1238
|
+
log5(verbose, `Done! Generated ${items.length} ${req.kind}(s)`);
|
|
1239
|
+
return items;
|
|
285
1240
|
}
|
|
286
1241
|
|
|
287
1242
|
// src/cli.ts
|
|
@@ -295,24 +1250,38 @@ Usage:
|
|
|
295
1250
|
Options:
|
|
296
1251
|
--provider <auto|${providers2}> Provider (default: auto)
|
|
297
1252
|
--model <id> Model id (provider-specific)
|
|
298
|
-
--n <1..10> Number of
|
|
299
|
-
--
|
|
1253
|
+
--n <1..10> Number of outputs (default: 1)
|
|
1254
|
+
--type <image|video> Output type (default: image)
|
|
1255
|
+
--video Shortcut for: --type video
|
|
1256
|
+
--format <png|jpg|webp|mp4|webm|gif>
|
|
1257
|
+
Output format (default: png for image, mp4 for video)
|
|
300
1258
|
--out <path> Output file path (only when n=1)
|
|
301
1259
|
--outDir <dir> Output directory (default: .)
|
|
302
1260
|
--name <text> Base name (slugified); default: prompt
|
|
303
|
-
--aspect-ratio <w:h> Aspect ratio (
|
|
1261
|
+
--aspect-ratio <w:h> Aspect ratio (provider-specific)
|
|
304
1262
|
--json Print machine-readable JSON
|
|
305
1263
|
--verbose Verbose logging
|
|
306
1264
|
-h, --help Show help
|
|
307
1265
|
|
|
1266
|
+
Input Images:
|
|
1267
|
+
--input <path> Input image for editing or reference (repeatable)
|
|
1268
|
+
--start-frame <path> First frame image (for video generation)
|
|
1269
|
+
--end-frame <path> Last frame image (for video interpolation)
|
|
1270
|
+
--duration <seconds> Video duration in seconds (provider-specific)
|
|
1271
|
+
|
|
308
1272
|
Env:
|
|
309
1273
|
GEMINI_API_KEY (or GOOGLE_API_KEY)
|
|
310
1274
|
XAI_API_KEY (or XAI_TOKEN, GROK_API_KEY)
|
|
311
1275
|
FAL_KEY (or FAL_API_KEY)
|
|
1276
|
+
OPENAI_API_KEY
|
|
312
1277
|
|
|
313
1278
|
Examples:
|
|
314
1279
|
npx climage "make image of kitten"
|
|
315
1280
|
npx climage "A cat in a tree" --provider xai --n 4
|
|
1281
|
+
npx climage "a cinematic shot of a corgi running" --provider fal --type video
|
|
1282
|
+
npx climage "make the cat orange" --provider xai --input photo.jpg
|
|
1283
|
+
npx climage "the cat walks away" --video --provider google --start-frame cat.png
|
|
1284
|
+
npx climage "morphing transition" --video --provider fal --start-frame a.png --end-frame b.png
|
|
316
1285
|
`);
|
|
317
1286
|
process3.exit(code);
|
|
318
1287
|
}
|
|
@@ -320,68 +1289,116 @@ function parseArgs(argv) {
|
|
|
320
1289
|
const args = [...argv];
|
|
321
1290
|
const opts = {};
|
|
322
1291
|
let json = false;
|
|
323
|
-
const
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
1292
|
+
const promptParts = [];
|
|
1293
|
+
const inputImages = [];
|
|
1294
|
+
const optionsWithValue = /* @__PURE__ */ new Set([
|
|
1295
|
+
"--provider",
|
|
1296
|
+
"--model",
|
|
1297
|
+
"--n",
|
|
1298
|
+
"--type",
|
|
1299
|
+
"--format",
|
|
1300
|
+
"--out",
|
|
1301
|
+
"--outDir",
|
|
1302
|
+
"--name",
|
|
1303
|
+
"--aspect-ratio",
|
|
1304
|
+
"--input",
|
|
1305
|
+
"--start-frame",
|
|
1306
|
+
"--end-frame",
|
|
1307
|
+
"--duration"
|
|
1308
|
+
]);
|
|
1309
|
+
let i = 0;
|
|
1310
|
+
while (i < args.length) {
|
|
1311
|
+
const a = args[i];
|
|
1312
|
+
if (!a) {
|
|
1313
|
+
i++;
|
|
1314
|
+
continue;
|
|
1315
|
+
}
|
|
331
1316
|
if (a === "-h" || a === "--help") usage(0);
|
|
332
1317
|
if (a === "--json") {
|
|
333
1318
|
json = true;
|
|
334
|
-
|
|
1319
|
+
i++;
|
|
1320
|
+
continue;
|
|
1321
|
+
}
|
|
1322
|
+
if (a === "--video") {
|
|
1323
|
+
opts.kind = "video";
|
|
1324
|
+
i++;
|
|
1325
|
+
continue;
|
|
1326
|
+
}
|
|
1327
|
+
if (a === "--verbose") {
|
|
1328
|
+
opts.verbose = true;
|
|
1329
|
+
i++;
|
|
1330
|
+
continue;
|
|
1331
|
+
}
|
|
1332
|
+
if (optionsWithValue.has(a)) {
|
|
1333
|
+
const v = args[i + 1];
|
|
1334
|
+
if (!v || v.startsWith("-")) throw new Error(`Missing value for ${a}`);
|
|
1335
|
+
switch (a) {
|
|
1336
|
+
case "--provider":
|
|
1337
|
+
opts.provider = v;
|
|
1338
|
+
break;
|
|
1339
|
+
case "--model":
|
|
1340
|
+
opts.model = v;
|
|
1341
|
+
break;
|
|
1342
|
+
case "--n":
|
|
1343
|
+
opts.n = Number(v);
|
|
1344
|
+
break;
|
|
1345
|
+
case "--type":
|
|
1346
|
+
opts.kind = v;
|
|
1347
|
+
break;
|
|
1348
|
+
case "--format":
|
|
1349
|
+
opts.format = v;
|
|
1350
|
+
break;
|
|
1351
|
+
case "--out":
|
|
1352
|
+
opts.out = v;
|
|
1353
|
+
break;
|
|
1354
|
+
case "--outDir":
|
|
1355
|
+
opts.outDir = v;
|
|
1356
|
+
break;
|
|
1357
|
+
case "--name":
|
|
1358
|
+
opts.name = v;
|
|
1359
|
+
break;
|
|
1360
|
+
case "--aspect-ratio":
|
|
1361
|
+
opts.aspectRatio = v;
|
|
1362
|
+
break;
|
|
1363
|
+
case "--input":
|
|
1364
|
+
inputImages.push(v);
|
|
1365
|
+
break;
|
|
1366
|
+
case "--start-frame":
|
|
1367
|
+
opts.startFrame = v;
|
|
1368
|
+
break;
|
|
1369
|
+
case "--end-frame":
|
|
1370
|
+
opts.endFrame = v;
|
|
1371
|
+
break;
|
|
1372
|
+
case "--duration":
|
|
1373
|
+
opts.duration = Number(v);
|
|
1374
|
+
break;
|
|
1375
|
+
}
|
|
1376
|
+
i += 2;
|
|
335
1377
|
continue;
|
|
336
1378
|
}
|
|
337
|
-
if (
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
opts.n = Number(take(a));
|
|
348
|
-
break;
|
|
349
|
-
case "--format":
|
|
350
|
-
opts.format = take(a);
|
|
351
|
-
break;
|
|
352
|
-
case "--out":
|
|
353
|
-
opts.out = take(a);
|
|
354
|
-
break;
|
|
355
|
-
case "--outDir":
|
|
356
|
-
opts.outDir = take(a);
|
|
357
|
-
break;
|
|
358
|
-
case "--name":
|
|
359
|
-
opts.name = take(a);
|
|
360
|
-
break;
|
|
361
|
-
case "--aspect-ratio":
|
|
362
|
-
opts.aspectRatio = take(a);
|
|
363
|
-
break;
|
|
364
|
-
case "--verbose":
|
|
365
|
-
opts.verbose = true;
|
|
366
|
-
break;
|
|
367
|
-
default:
|
|
368
|
-
throw new Error(`Unknown option: ${a}`);
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
const prompt = args.join(" ").trim();
|
|
1379
|
+
if (a.startsWith("-")) {
|
|
1380
|
+
throw new Error(`Unknown option: ${a}`);
|
|
1381
|
+
}
|
|
1382
|
+
promptParts.push(a);
|
|
1383
|
+
i++;
|
|
1384
|
+
}
|
|
1385
|
+
if (inputImages.length) {
|
|
1386
|
+
opts.inputImages = inputImages;
|
|
1387
|
+
}
|
|
1388
|
+
const prompt = promptParts.join(" ").trim();
|
|
372
1389
|
if (!prompt) throw new Error("Missing prompt");
|
|
373
1390
|
return { prompt, opts, json };
|
|
374
1391
|
}
|
|
375
1392
|
async function main() {
|
|
376
1393
|
try {
|
|
377
1394
|
const { prompt, opts, json } = parseArgs(process3.argv.slice(2));
|
|
378
|
-
const
|
|
1395
|
+
const items = await generateMedia(prompt, opts);
|
|
379
1396
|
if (json) {
|
|
380
|
-
process3.stdout.write(JSON.stringify(
|
|
1397
|
+
process3.stdout.write(JSON.stringify(toJsonResult(items), null, 2) + "\n");
|
|
381
1398
|
return;
|
|
382
1399
|
}
|
|
383
|
-
for (const
|
|
384
|
-
process3.stdout.write(
|
|
1400
|
+
for (const item of items) {
|
|
1401
|
+
process3.stdout.write(item.filePath + "\n");
|
|
385
1402
|
}
|
|
386
1403
|
} catch (err) {
|
|
387
1404
|
const msg = err instanceof Error ? err.message : String(err);
|