climage 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -19
- package/dist/cli.js +1146 -170
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +63 -7
- package/dist/index.js +1017 -118
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -21,6 +21,16 @@ function loadEnv(cwd = process2.cwd()) {
|
|
|
21
21
|
// src/core/output.ts
|
|
22
22
|
import fs2 from "fs/promises";
|
|
23
23
|
import path2 from "path";
|
|
24
|
+
var IMAGE_MIME_TYPES = {
|
|
25
|
+
".png": "image/png",
|
|
26
|
+
".jpg": "image/jpeg",
|
|
27
|
+
".jpeg": "image/jpeg",
|
|
28
|
+
".webp": "image/webp",
|
|
29
|
+
".gif": "image/gif",
|
|
30
|
+
".avif": "image/avif",
|
|
31
|
+
".heif": "image/heif",
|
|
32
|
+
".heic": "image/heic"
|
|
33
|
+
};
|
|
24
34
|
function extensionForFormat(format) {
|
|
25
35
|
switch (format) {
|
|
26
36
|
case "jpg":
|
|
@@ -29,23 +39,64 @@ function extensionForFormat(format) {
|
|
|
29
39
|
return "png";
|
|
30
40
|
case "webp":
|
|
31
41
|
return "webp";
|
|
42
|
+
case "mp4":
|
|
43
|
+
return "mp4";
|
|
44
|
+
case "webm":
|
|
45
|
+
return "webm";
|
|
46
|
+
case "gif":
|
|
47
|
+
return "gif";
|
|
32
48
|
}
|
|
33
49
|
}
|
|
34
50
|
function resolveOutDir(outDir) {
|
|
35
51
|
return path2.isAbsolute(outDir) ? outDir : path2.resolve(process.cwd(), outDir);
|
|
36
52
|
}
|
|
37
|
-
function
|
|
38
|
-
|
|
53
|
+
function extensionFromMimeType(mimeType) {
|
|
54
|
+
if (!mimeType) return void 0;
|
|
55
|
+
const t = mimeType.toLowerCase().split(";")[0]?.trim();
|
|
56
|
+
if (!t) return void 0;
|
|
57
|
+
if (t === "image/png") return "png";
|
|
58
|
+
if (t === "image/jpeg") return "jpg";
|
|
59
|
+
if (t === "image/webp") return "webp";
|
|
60
|
+
if (t === "image/gif") return "gif";
|
|
61
|
+
if (t === "image/avif") return "avif";
|
|
62
|
+
if (t === "video/mp4") return "mp4";
|
|
63
|
+
if (t === "video/webm") return "webm";
|
|
64
|
+
return void 0;
|
|
65
|
+
}
|
|
66
|
+
function makeOutputPath(req, index, mimeType) {
|
|
39
67
|
if (req.out) return path2.resolve(process.cwd(), req.out);
|
|
68
|
+
const ext = extensionFromMimeType(mimeType) ?? extensionForFormat(req.format);
|
|
40
69
|
const base = `${req.nameBase}-${req.timestamp}`;
|
|
41
70
|
const suffix = req.n > 1 ? `-${String(index + 1).padStart(2, "0")}` : "";
|
|
42
71
|
const filename = `${base}${suffix}.${ext}`;
|
|
43
72
|
return path2.join(req.outDir, filename);
|
|
44
73
|
}
|
|
45
|
-
async function
|
|
74
|
+
async function writeMediaFile(filePath, bytes) {
|
|
46
75
|
await fs2.mkdir(path2.dirname(filePath), { recursive: true });
|
|
47
76
|
await fs2.writeFile(filePath, bytes);
|
|
48
77
|
}
|
|
78
|
+
async function resolveImageInput(pathOrUrl) {
|
|
79
|
+
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
80
|
+
return pathOrUrl;
|
|
81
|
+
}
|
|
82
|
+
if (pathOrUrl.startsWith("data:")) {
|
|
83
|
+
return pathOrUrl;
|
|
84
|
+
}
|
|
85
|
+
const resolvedPath = path2.isAbsolute(pathOrUrl) ? pathOrUrl : path2.resolve(process.cwd(), pathOrUrl);
|
|
86
|
+
const ext = path2.extname(resolvedPath).toLowerCase();
|
|
87
|
+
const mimeType = IMAGE_MIME_TYPES[ext];
|
|
88
|
+
if (!mimeType) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
`Unsupported image format: ${ext}. Supported: ${Object.keys(IMAGE_MIME_TYPES).join(", ")}`
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const fileBuffer = await fs2.readFile(resolvedPath);
|
|
94
|
+
const base64 = fileBuffer.toString("base64");
|
|
95
|
+
return `data:${mimeType};base64,${base64}`;
|
|
96
|
+
}
|
|
97
|
+
async function resolveImageInputs(pathsOrUrls) {
|
|
98
|
+
return Promise.all(pathsOrUrls.map(resolveImageInput));
|
|
99
|
+
}
|
|
49
100
|
|
|
50
101
|
// src/core/strings.ts
|
|
51
102
|
function slugify(input, maxLen = 60) {
|
|
@@ -63,70 +114,266 @@ var XAI_API_BASE = "https://api.x.ai/v1";
|
|
|
63
114
|
function getXaiApiKey(env) {
|
|
64
115
|
return env.XAI_API_KEY || env.XAI_TOKEN || env.GROK_API_KEY;
|
|
65
116
|
}
|
|
117
|
+
var verboseMode = false;
|
|
118
|
+
function log(...args) {
|
|
119
|
+
if (verboseMode) console.error("[xai]", ...args);
|
|
120
|
+
}
|
|
66
121
|
async function downloadBytes(url) {
|
|
122
|
+
log("Downloading from:", url.slice(0, 100) + "...");
|
|
123
|
+
const start = Date.now();
|
|
67
124
|
const res = await fetch(url);
|
|
68
|
-
if (!res.ok) throw new Error(`xAI
|
|
125
|
+
if (!res.ok) throw new Error(`xAI download failed (${res.status})`);
|
|
69
126
|
const ab = await res.arrayBuffer();
|
|
70
127
|
const ct = res.headers.get("content-type") || void 0;
|
|
128
|
+
log(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
71
129
|
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
72
130
|
}
|
|
131
|
+
async function sleep(ms) {
|
|
132
|
+
await new Promise((r) => setTimeout(r, ms));
|
|
133
|
+
}
|
|
134
|
+
async function generateXaiImages(req, apiKey) {
|
|
135
|
+
const model = req.model ?? "grok-imagine-image";
|
|
136
|
+
log("Starting image generation, model:", model, "n:", req.n);
|
|
137
|
+
const body = {
|
|
138
|
+
model,
|
|
139
|
+
prompt: req.prompt,
|
|
140
|
+
n: req.n,
|
|
141
|
+
// xAI docs: endpoint supports aspect_ratio
|
|
142
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
|
|
143
|
+
// Use URL format to download + save.
|
|
144
|
+
response_format: "url"
|
|
145
|
+
};
|
|
146
|
+
log("Request body:", JSON.stringify(body));
|
|
147
|
+
log("Calling xAI images/generations...");
|
|
148
|
+
const startTime = Date.now();
|
|
149
|
+
const res = await fetch(`${XAI_API_BASE}/images/generations`, {
|
|
150
|
+
method: "POST",
|
|
151
|
+
headers: {
|
|
152
|
+
authorization: `Bearer ${apiKey}`,
|
|
153
|
+
"content-type": "application/json"
|
|
154
|
+
},
|
|
155
|
+
body: JSON.stringify(body)
|
|
156
|
+
});
|
|
157
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
158
|
+
if (!res.ok) {
|
|
159
|
+
const txt = await res.text().catch(() => "");
|
|
160
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
161
|
+
throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
162
|
+
}
|
|
163
|
+
const json = await res.json();
|
|
164
|
+
log("Response data count:", json.data?.length);
|
|
165
|
+
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
166
|
+
return processXaiImageResponse(json, model);
|
|
167
|
+
}
|
|
168
|
+
async function editXaiImages(req, apiKey) {
|
|
169
|
+
const model = req.model ?? "grok-imagine-image";
|
|
170
|
+
const inputImage = req.inputImages?.[0];
|
|
171
|
+
if (!inputImage) throw new Error("No input image provided for editing");
|
|
172
|
+
if ((req.inputImages?.length ?? 0) > 1) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
"xAI image editing supports only 1 input image (image_url). Provide exactly one --input for xAI edits."
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
log("Starting image editing, model:", model, "n:", req.n);
|
|
178
|
+
const body = {
|
|
179
|
+
model,
|
|
180
|
+
prompt: req.prompt,
|
|
181
|
+
n: req.n,
|
|
182
|
+
image: { url: inputImage },
|
|
183
|
+
// Object with url field containing data URI or URL
|
|
184
|
+
response_format: "url",
|
|
185
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {}
|
|
186
|
+
};
|
|
187
|
+
log("Request body:", JSON.stringify({ ...body, image: { url: "...(data uri)..." } }));
|
|
188
|
+
log("Calling xAI images/edits...");
|
|
189
|
+
const startTime = Date.now();
|
|
190
|
+
const res = await fetch(`${XAI_API_BASE}/images/edits`, {
|
|
191
|
+
method: "POST",
|
|
192
|
+
headers: {
|
|
193
|
+
authorization: `Bearer ${apiKey}`,
|
|
194
|
+
"content-type": "application/json"
|
|
195
|
+
},
|
|
196
|
+
body: JSON.stringify(body)
|
|
197
|
+
});
|
|
198
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
199
|
+
if (!res.ok) {
|
|
200
|
+
const txt = await res.text().catch(() => "");
|
|
201
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
202
|
+
throw new Error(`xAI edits failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
203
|
+
}
|
|
204
|
+
const json = await res.json();
|
|
205
|
+
log("Response data count:", json.data?.length);
|
|
206
|
+
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
207
|
+
return processXaiImageResponse(json, model);
|
|
208
|
+
}
|
|
209
|
+
async function processXaiImageResponse(json, model) {
|
|
210
|
+
const results = [];
|
|
211
|
+
for (let i = 0; i < json.data.length; i++) {
|
|
212
|
+
const img = json.data[i];
|
|
213
|
+
if (!img) continue;
|
|
214
|
+
log(`Processing image ${i}...`);
|
|
215
|
+
if (img.url) {
|
|
216
|
+
const { bytes, mimeType } = await downloadBytes(img.url);
|
|
217
|
+
results.push({
|
|
218
|
+
kind: "image",
|
|
219
|
+
provider: "xai",
|
|
220
|
+
model,
|
|
221
|
+
index: i,
|
|
222
|
+
url: img.url,
|
|
223
|
+
bytes,
|
|
224
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
225
|
+
});
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (img.b64_json) {
|
|
229
|
+
log(`Image ${i} is base64 encoded`);
|
|
230
|
+
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
231
|
+
results.push({ kind: "image", provider: "xai", model, index: i, bytes });
|
|
232
|
+
continue;
|
|
233
|
+
}
|
|
234
|
+
throw new Error("xAI returned image without url or b64_json");
|
|
235
|
+
}
|
|
236
|
+
log(`Successfully generated ${results.length} image(s)`);
|
|
237
|
+
return results;
|
|
238
|
+
}
|
|
239
|
+
async function generateXaiVideo(req, apiKey) {
|
|
240
|
+
const model = req.model ?? "grok-imagine-video";
|
|
241
|
+
const imageUrl = req.startFrame ?? req.inputImages?.[0];
|
|
242
|
+
if ((req.inputImages?.length ?? 0) > 1 && !req.startFrame) {
|
|
243
|
+
throw new Error(
|
|
244
|
+
"xAI video generation supports only 1 input image (image_url). Provide exactly one --input or use --start-frame."
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
log(
|
|
248
|
+
"Starting video generation, model:",
|
|
249
|
+
model,
|
|
250
|
+
"hasImageUrl:",
|
|
251
|
+
!!imageUrl,
|
|
252
|
+
"duration:",
|
|
253
|
+
req.duration
|
|
254
|
+
);
|
|
255
|
+
const createBody = {
|
|
256
|
+
prompt: req.prompt,
|
|
257
|
+
model,
|
|
258
|
+
...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
|
|
259
|
+
// Add image_url for image-to-video (data URI or URL string)
|
|
260
|
+
...imageUrl ? { image_url: imageUrl } : {},
|
|
261
|
+
// Add duration (xAI supports 1-15 seconds)
|
|
262
|
+
...req.duration !== void 0 ? { duration: req.duration } : {}
|
|
263
|
+
};
|
|
264
|
+
log(
|
|
265
|
+
"Request body:",
|
|
266
|
+
JSON.stringify({
|
|
267
|
+
...createBody,
|
|
268
|
+
image_url: createBody.image_url ? `...(${String(createBody.image_url).length} chars)` : void 0
|
|
269
|
+
})
|
|
270
|
+
);
|
|
271
|
+
log("Calling xAI videos/generations...");
|
|
272
|
+
const startTime = Date.now();
|
|
273
|
+
const createRes = await fetch(`${XAI_API_BASE}/videos/generations`, {
|
|
274
|
+
method: "POST",
|
|
275
|
+
headers: {
|
|
276
|
+
authorization: `Bearer ${apiKey}`,
|
|
277
|
+
"content-type": "application/json"
|
|
278
|
+
},
|
|
279
|
+
body: JSON.stringify(createBody)
|
|
280
|
+
});
|
|
281
|
+
log(`API responded in ${Date.now() - startTime}ms, status: ${createRes.status}`);
|
|
282
|
+
if (!createRes.ok) {
|
|
283
|
+
const txt = await createRes.text().catch(() => "");
|
|
284
|
+
log("Error response:", txt.slice(0, 1e3));
|
|
285
|
+
throw new Error(`xAI video generations failed (${createRes.status}): ${txt.slice(0, 500)}`);
|
|
286
|
+
}
|
|
287
|
+
const createJson = await createRes.json();
|
|
288
|
+
const requestId = createJson.request_id;
|
|
289
|
+
log("Got request_id:", requestId);
|
|
290
|
+
if (!requestId) throw new Error("xAI video generation returned no request_id");
|
|
291
|
+
const maxAttempts = 120;
|
|
292
|
+
const intervalMs = 3e3;
|
|
293
|
+
let result;
|
|
294
|
+
log(`Starting poll loop (max ${maxAttempts} attempts, ${intervalMs}ms interval)...`);
|
|
295
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
296
|
+
const res = await fetch(`${XAI_API_BASE}/videos/${encodeURIComponent(requestId)}`, {
|
|
297
|
+
method: "GET",
|
|
298
|
+
headers: {
|
|
299
|
+
authorization: `Bearer ${apiKey}`
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
if (!res.ok) {
|
|
303
|
+
const txt = await res.text().catch(() => "");
|
|
304
|
+
log(`Poll attempt ${attempt + 1} failed:`, txt.slice(0, 500));
|
|
305
|
+
throw new Error(`xAI video poll failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
306
|
+
}
|
|
307
|
+
const json = await res.json();
|
|
308
|
+
result = json;
|
|
309
|
+
log(
|
|
310
|
+
`Poll attempt ${attempt + 1}/${maxAttempts}: status=${json.status}, raw:`,
|
|
311
|
+
JSON.stringify(json).slice(0, 300)
|
|
312
|
+
);
|
|
313
|
+
if (json.video?.url) {
|
|
314
|
+
log("Video generation complete!");
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
if (json.status === "failed" || json.status === "error") {
|
|
318
|
+
log("Video generation failed:", JSON.stringify(json));
|
|
319
|
+
throw new Error(`xAI video generation failed: ${JSON.stringify(json)}`);
|
|
320
|
+
}
|
|
321
|
+
await sleep(intervalMs);
|
|
322
|
+
}
|
|
323
|
+
if (!result?.video?.url) {
|
|
324
|
+
log("Timed out. Last result:", JSON.stringify(result));
|
|
325
|
+
throw new Error(`xAI video generation timed out (request_id=${requestId})`);
|
|
326
|
+
}
|
|
327
|
+
const url = result.video.url;
|
|
328
|
+
log("Video URL:", url);
|
|
329
|
+
if (result.video?.respect_moderation === false) {
|
|
330
|
+
throw new Error("xAI video generation was blocked by moderation");
|
|
331
|
+
}
|
|
332
|
+
const { bytes, mimeType } = await downloadBytes(url);
|
|
333
|
+
log(`Successfully generated video, ${bytes.byteLength} bytes`);
|
|
334
|
+
return [
|
|
335
|
+
{
|
|
336
|
+
kind: "video",
|
|
337
|
+
provider: "xai",
|
|
338
|
+
model: result.model ?? model,
|
|
339
|
+
index: 0,
|
|
340
|
+
url,
|
|
341
|
+
bytes,
|
|
342
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
343
|
+
}
|
|
344
|
+
];
|
|
345
|
+
}
|
|
346
|
+
var xaiCapabilities = {
|
|
347
|
+
// xAI docs show a single image_url for edits and a single image_url for image-to-video.
|
|
348
|
+
maxInputImages: 1,
|
|
349
|
+
// xAI aspect_ratio examples show "4:3"; docs don't publish a strict allowlist.
|
|
350
|
+
supportsCustomAspectRatio: true,
|
|
351
|
+
supportsVideoInterpolation: false,
|
|
352
|
+
// xAI does not support end frame
|
|
353
|
+
videoDurationRange: [1, 15],
|
|
354
|
+
// 1-15 seconds
|
|
355
|
+
supportsImageEditing: true
|
|
356
|
+
};
|
|
73
357
|
var xaiProvider = {
|
|
74
358
|
id: "xai",
|
|
75
|
-
displayName: "xAI
|
|
359
|
+
displayName: "xAI",
|
|
360
|
+
supports: ["image", "video"],
|
|
361
|
+
capabilities: xaiCapabilities,
|
|
76
362
|
isAvailable(env) {
|
|
77
363
|
return Boolean(getXaiApiKey(env));
|
|
78
364
|
},
|
|
79
365
|
async generate(req, env) {
|
|
80
366
|
const apiKey = getXaiApiKey(env);
|
|
81
367
|
if (!apiKey) throw new Error("Missing xAI API key. Set XAI_API_KEY (or XAI_TOKEN).");
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
// Use URL format to download + save.
|
|
90
|
-
response_format: "url"
|
|
91
|
-
};
|
|
92
|
-
const res = await fetch(`${XAI_API_BASE}/images/generations`, {
|
|
93
|
-
method: "POST",
|
|
94
|
-
headers: {
|
|
95
|
-
authorization: `Bearer ${apiKey}`,
|
|
96
|
-
"content-type": "application/json"
|
|
97
|
-
},
|
|
98
|
-
body: JSON.stringify(body)
|
|
99
|
-
});
|
|
100
|
-
if (!res.ok) {
|
|
101
|
-
const txt = await res.text().catch(() => "");
|
|
102
|
-
throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
103
|
-
}
|
|
104
|
-
const json = await res.json();
|
|
105
|
-
if (!json.data?.length) throw new Error("xAI returned no images");
|
|
106
|
-
const results = [];
|
|
107
|
-
for (let i = 0; i < json.data.length; i++) {
|
|
108
|
-
const img = json.data[i];
|
|
109
|
-
if (!img) continue;
|
|
110
|
-
if (img.url) {
|
|
111
|
-
const { bytes, mimeType } = await downloadBytes(img.url);
|
|
112
|
-
results.push({
|
|
113
|
-
provider: "xai",
|
|
114
|
-
model,
|
|
115
|
-
index: i,
|
|
116
|
-
url: img.url,
|
|
117
|
-
bytes,
|
|
118
|
-
...mimeType !== void 0 ? { mimeType } : {}
|
|
119
|
-
});
|
|
120
|
-
continue;
|
|
121
|
-
}
|
|
122
|
-
if (img.b64_json) {
|
|
123
|
-
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
124
|
-
results.push({ provider: "xai", model, index: i, bytes });
|
|
125
|
-
continue;
|
|
126
|
-
}
|
|
127
|
-
throw new Error("xAI returned image without url or b64_json");
|
|
368
|
+
verboseMode = req.verbose;
|
|
369
|
+
log("Provider initialized, kind:", req.kind);
|
|
370
|
+
if (req.kind === "video") return generateXaiVideo(req, apiKey);
|
|
371
|
+
const hasInputImages = req.inputImages && req.inputImages.length > 0;
|
|
372
|
+
if (hasInputImages) {
|
|
373
|
+
log("Input images detected, using edit endpoint");
|
|
374
|
+
return editXaiImages(req, apiKey);
|
|
128
375
|
}
|
|
129
|
-
return
|
|
376
|
+
return generateXaiImages(req, apiKey);
|
|
130
377
|
}
|
|
131
378
|
};
|
|
132
379
|
|
|
@@ -135,58 +382,200 @@ import { fal } from "@fal-ai/client";
|
|
|
135
382
|
function getFalKey(env) {
|
|
136
383
|
return env.FAL_API_KEY || env.FAL_KEY;
|
|
137
384
|
}
|
|
138
|
-
|
|
385
|
+
function log2(verbose, ...args) {
|
|
386
|
+
if (verbose) console.error("[fal]", ...args);
|
|
387
|
+
}
|
|
388
|
+
async function downloadBytes2(url, verbose) {
|
|
389
|
+
log2(verbose, "Downloading from:", url.slice(0, 100) + "...");
|
|
390
|
+
const start = Date.now();
|
|
139
391
|
const res = await fetch(url);
|
|
140
|
-
if (!res.ok) throw new Error(`fal
|
|
392
|
+
if (!res.ok) throw new Error(`fal download failed (${res.status})`);
|
|
141
393
|
const ab = await res.arrayBuffer();
|
|
142
394
|
const ct = res.headers.get("content-type") || void 0;
|
|
395
|
+
log2(verbose, `Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
143
396
|
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
144
397
|
}
|
|
398
|
+
function pickMany(result, kind) {
|
|
399
|
+
if (kind === "image") {
|
|
400
|
+
if (Array.isArray(result.images) && result.images.length) return result.images;
|
|
401
|
+
if (result.image?.url) return [result.image];
|
|
402
|
+
return [];
|
|
403
|
+
}
|
|
404
|
+
if (Array.isArray(result.videos) && result.videos.length) return result.videos;
|
|
405
|
+
if (result.video?.url) return [result.video];
|
|
406
|
+
return [];
|
|
407
|
+
}
|
|
408
|
+
var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
|
|
409
|
+
var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
|
|
410
|
+
var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
|
|
411
|
+
var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
|
|
412
|
+
var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
|
|
413
|
+
var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
|
|
414
|
+
function selectVideoModel(req) {
|
|
415
|
+
if (req.model) return req.model;
|
|
416
|
+
if (req.startFrame && req.endFrame) {
|
|
417
|
+
return DEFAULT_START_END_VIDEO_MODEL;
|
|
418
|
+
}
|
|
419
|
+
if (req.inputImages?.length && !req.startFrame) {
|
|
420
|
+
return DEFAULT_REFERENCE_VIDEO_MODEL;
|
|
421
|
+
}
|
|
422
|
+
if (req.startFrame || req.inputImages?.length) {
|
|
423
|
+
return DEFAULT_IMAGE_TO_VIDEO_MODEL;
|
|
424
|
+
}
|
|
425
|
+
return DEFAULT_VIDEO_MODEL;
|
|
426
|
+
}
|
|
427
|
+
function selectImageModel(req) {
|
|
428
|
+
if (req.model) return req.model;
|
|
429
|
+
if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
|
|
430
|
+
return DEFAULT_IMAGE_MODEL;
|
|
431
|
+
}
|
|
432
|
+
function mapAspectRatio(aspectRatio) {
|
|
433
|
+
if (!aspectRatio) return void 0;
|
|
434
|
+
const ar = aspectRatio.trim();
|
|
435
|
+
if (ar === "1:1") return "square";
|
|
436
|
+
if (ar === "4:3") return "landscape_4_3";
|
|
437
|
+
if (ar === "16:9") return "landscape_16_9";
|
|
438
|
+
if (ar === "3:4") return "portrait_4_3";
|
|
439
|
+
if (ar === "9:16") return "portrait_16_9";
|
|
440
|
+
return ar;
|
|
441
|
+
}
|
|
442
|
+
function buildVideoInput(req) {
|
|
443
|
+
const input = {
|
|
444
|
+
prompt: req.prompt
|
|
445
|
+
};
|
|
446
|
+
if (req.startFrame && req.endFrame) {
|
|
447
|
+
input.start_image_url = req.startFrame;
|
|
448
|
+
input.end_image_url = req.endFrame;
|
|
449
|
+
return input;
|
|
450
|
+
}
|
|
451
|
+
if (req.inputImages?.length && !req.startFrame) {
|
|
452
|
+
input.reference_image_urls = req.inputImages.slice(0, 7);
|
|
453
|
+
const ar = mapAspectRatio(req.aspectRatio);
|
|
454
|
+
if (ar) input.aspect_ratio = ar;
|
|
455
|
+
if (req.duration) input.duration = String(req.duration);
|
|
456
|
+
return input;
|
|
457
|
+
}
|
|
458
|
+
const imageUrl = req.startFrame ?? req.inputImages?.[0];
|
|
459
|
+
if (imageUrl) {
|
|
460
|
+
input.image_url = imageUrl;
|
|
461
|
+
if (req.duration) input.duration = String(req.duration);
|
|
462
|
+
return input;
|
|
463
|
+
}
|
|
464
|
+
const imageSize = mapAspectRatio(req.aspectRatio);
|
|
465
|
+
if (imageSize) input.image_size = imageSize;
|
|
466
|
+
if (req.n) input.num_videos = req.n;
|
|
467
|
+
return input;
|
|
468
|
+
}
|
|
469
|
+
function buildImageInput(req) {
|
|
470
|
+
const input = {
|
|
471
|
+
prompt: req.prompt
|
|
472
|
+
};
|
|
473
|
+
const imageSize = mapAspectRatio(req.aspectRatio);
|
|
474
|
+
if (imageSize) input.image_size = imageSize;
|
|
475
|
+
if (req.n) input.num_images = req.n;
|
|
476
|
+
if (req.inputImages?.[0]) {
|
|
477
|
+
input.image_url = req.inputImages[0];
|
|
478
|
+
input.strength = 0.75;
|
|
479
|
+
}
|
|
480
|
+
return input;
|
|
481
|
+
}
|
|
482
|
+
var falCapabilities = {
|
|
483
|
+
maxInputImages: 7,
|
|
484
|
+
// Vidu supports up to 7 reference images
|
|
485
|
+
// fal models vary. We map common ratios to enums, but also allow custom pass-through.
|
|
486
|
+
supportsCustomAspectRatio: true,
|
|
487
|
+
supportsVideoInterpolation: true,
|
|
488
|
+
// Vidu start-end-to-video
|
|
489
|
+
videoDurationRange: [2, 8],
|
|
490
|
+
// Vidu supports 2-8 seconds
|
|
491
|
+
supportsImageEditing: true
|
|
492
|
+
};
|
|
145
493
|
var falProvider = {
|
|
146
494
|
id: "fal",
|
|
147
495
|
displayName: "fal.ai",
|
|
496
|
+
supports: ["image", "video"],
|
|
497
|
+
capabilities: falCapabilities,
|
|
148
498
|
isAvailable(env) {
|
|
149
499
|
return Boolean(getFalKey(env));
|
|
150
500
|
},
|
|
151
501
|
async generate(req, env) {
|
|
152
502
|
const key = getFalKey(env);
|
|
153
503
|
if (!key) throw new Error("Missing fal API key. Set FAL_KEY (or FAL_API_KEY).");
|
|
504
|
+
const verbose = req.verbose;
|
|
505
|
+
log2(verbose, "Starting generation, kind:", req.kind, "n:", req.n);
|
|
506
|
+
log2(
|
|
507
|
+
verbose,
|
|
508
|
+
"Input images:",
|
|
509
|
+
req.inputImages?.length ?? 0,
|
|
510
|
+
"startFrame:",
|
|
511
|
+
!!req.startFrame,
|
|
512
|
+
"endFrame:",
|
|
513
|
+
!!req.endFrame
|
|
514
|
+
);
|
|
154
515
|
fal.config({ credentials: key });
|
|
155
|
-
const model = req.
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
else if (ar === "9:16") image_size = "portrait_16_9";
|
|
516
|
+
const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
|
|
517
|
+
log2(verbose, "Selected model:", model);
|
|
518
|
+
const input = req.kind === "video" ? buildVideoInput(req) : buildImageInput(req);
|
|
519
|
+
const inputSummary = { ...input };
|
|
520
|
+
for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
|
|
521
|
+
if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
|
|
522
|
+
inputSummary[key2] = `data:...${inputSummary[key2].length} chars`;
|
|
523
|
+
}
|
|
164
524
|
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
525
|
+
if (Array.isArray(inputSummary.reference_image_urls)) {
|
|
526
|
+
inputSummary.reference_image_urls = inputSummary.reference_image_urls.map(
|
|
527
|
+
(url) => url.startsWith("data:") ? `data:...${url.length} chars` : url
|
|
528
|
+
);
|
|
529
|
+
}
|
|
530
|
+
log2(verbose, "Request input:", JSON.stringify(inputSummary));
|
|
531
|
+
log2(verbose, "Calling fal.subscribe...");
|
|
532
|
+
const startTime = Date.now();
|
|
533
|
+
const subscribeOptions = {
|
|
534
|
+
input,
|
|
535
|
+
logs: verbose
|
|
170
536
|
};
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
537
|
+
if (verbose) {
|
|
538
|
+
subscribeOptions.onQueueUpdate = (update) => {
|
|
539
|
+
log2(true, "Queue update:", update.status, JSON.stringify(update).slice(0, 200));
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
const result = await fal.subscribe(model, subscribeOptions);
|
|
543
|
+
log2(verbose, `fal.subscribe completed in ${Date.now() - startTime}ms`);
|
|
544
|
+
log2(verbose, "Raw result keys:", Object.keys(result?.data ?? {}));
|
|
545
|
+
log2(verbose, "Result preview:", JSON.stringify(result?.data ?? {}).slice(0, 500));
|
|
546
|
+
const items = pickMany(result?.data ?? {}, req.kind);
|
|
547
|
+
log2(verbose, `Found ${items.length} ${req.kind}(s) in response`);
|
|
548
|
+
if (!items?.length) {
|
|
549
|
+
const noun = req.kind === "video" ? "videos" : "images";
|
|
550
|
+
throw new Error(
|
|
551
|
+
`fal returned no ${noun}. Raw response: ${JSON.stringify(result?.data).slice(0, 300)}`
|
|
552
|
+
);
|
|
553
|
+
}
|
|
174
554
|
const out = [];
|
|
175
|
-
for (let i = 0; i < Math.min(
|
|
176
|
-
const
|
|
177
|
-
if (!
|
|
178
|
-
|
|
179
|
-
|
|
555
|
+
for (let i = 0; i < Math.min(items.length, req.n); i++) {
|
|
556
|
+
const m = items[i];
|
|
557
|
+
if (!m?.url) {
|
|
558
|
+
log2(verbose, `Item ${i} has no URL, skipping`);
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
log2(verbose, `Downloading item ${i}...`);
|
|
562
|
+
const { bytes, mimeType } = await downloadBytes2(m.url, verbose);
|
|
563
|
+
const finalMimeType = m.content_type ?? mimeType;
|
|
180
564
|
out.push({
|
|
565
|
+
kind: req.kind,
|
|
181
566
|
provider: "fal",
|
|
182
567
|
model,
|
|
183
568
|
index: i,
|
|
184
|
-
url:
|
|
569
|
+
url: m.url,
|
|
185
570
|
bytes,
|
|
186
571
|
...finalMimeType !== void 0 ? { mimeType: finalMimeType } : {}
|
|
187
572
|
});
|
|
188
573
|
}
|
|
189
|
-
if (!out.length)
|
|
574
|
+
if (!out.length) {
|
|
575
|
+
const noun = req.kind === "video" ? "videos" : "images";
|
|
576
|
+
throw new Error(`fal returned ${noun} but none were downloadable`);
|
|
577
|
+
}
|
|
578
|
+
log2(verbose, `Successfully generated ${out.length} ${req.kind}(s)`);
|
|
190
579
|
return out;
|
|
191
580
|
}
|
|
192
581
|
};
|
|
@@ -196,7 +585,7 @@ import { GoogleGenAI } from "@google/genai";
|
|
|
196
585
|
function getGeminiApiKey(env) {
|
|
197
586
|
return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
|
|
198
587
|
}
|
|
199
|
-
function
|
|
588
|
+
function mimeForImageFormat(format) {
|
|
200
589
|
switch (format) {
|
|
201
590
|
case "jpg":
|
|
202
591
|
return "image/jpeg";
|
|
@@ -207,62 +596,367 @@ function mimeForFormat(format) {
|
|
|
207
596
|
return "image/png";
|
|
208
597
|
}
|
|
209
598
|
}
|
|
599
|
+
var verboseMode2 = false;
|
|
600
|
+
function log3(...args) {
|
|
601
|
+
if (verboseMode2) console.error("[google]", ...args);
|
|
602
|
+
}
|
|
603
|
+
var MODEL_ALIASES = {
|
|
604
|
+
"nano-banana": "gemini-2.5-flash-image",
|
|
605
|
+
"nano-banana-pro": "gemini-3-pro-image-preview",
|
|
606
|
+
// Veo (video)
|
|
607
|
+
veo2: "veo-2.0-generate-001",
|
|
608
|
+
"veo-2": "veo-2.0-generate-001",
|
|
609
|
+
veo3: "veo-3.0-generate-001",
|
|
610
|
+
"veo-3": "veo-3.0-generate-001",
|
|
611
|
+
"veo-3.1": "veo-3.1-generate-preview",
|
|
612
|
+
veo31: "veo-3.1-generate-preview"
|
|
613
|
+
};
|
|
614
|
+
var VEO_31_MODELS = ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"];
|
|
615
|
+
function isVeo31Model(model) {
|
|
616
|
+
return VEO_31_MODELS.some((m) => model.includes(m) || model.includes("veo-3.1"));
|
|
617
|
+
}
|
|
618
|
+
function parseDataUri(dataUri) {
|
|
619
|
+
const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
|
|
620
|
+
if (!match) return null;
|
|
621
|
+
return { mimeType: match[1] ?? "image/png", data: match[2] ?? "" };
|
|
622
|
+
}
|
|
623
|
+
function imageToGoogleFormat(imageInput) {
|
|
624
|
+
if (imageInput.startsWith("data:")) {
|
|
625
|
+
const parsed = parseDataUri(imageInput);
|
|
626
|
+
if (parsed) {
|
|
627
|
+
return { inlineData: { data: parsed.data, mimeType: parsed.mimeType } };
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
return { fileUri: imageInput };
|
|
631
|
+
}
|
|
632
|
+
var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
|
|
633
|
+
function resolveModel(model) {
|
|
634
|
+
if (!model) return "gemini-2.5-flash-image";
|
|
635
|
+
return MODEL_ALIASES[model] ?? model;
|
|
636
|
+
}
|
|
637
|
+
function isGeminiImageModel(model) {
|
|
638
|
+
return GEMINI_IMAGE_MODELS.some((m) => model.startsWith(m));
|
|
639
|
+
}
|
|
640
|
+
async function downloadBytes3(url) {
|
|
641
|
+
log3("Downloading from:", url.slice(0, 100) + "...");
|
|
642
|
+
const start = Date.now();
|
|
643
|
+
const res = await fetch(url);
|
|
644
|
+
if (!res.ok) throw new Error(`Google video download failed (${res.status})`);
|
|
645
|
+
const ab = await res.arrayBuffer();
|
|
646
|
+
const ct = res.headers.get("content-type") || void 0;
|
|
647
|
+
log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
648
|
+
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
649
|
+
}
|
|
650
|
+
async function sleep2(ms) {
|
|
651
|
+
await new Promise((r) => setTimeout(r, ms));
|
|
652
|
+
}
|
|
653
|
+
var googleCapabilities = {
|
|
654
|
+
maxInputImages: 3,
|
|
655
|
+
// Veo 3.1 supports up to 3 reference images
|
|
656
|
+
// Imagen / Veo aspect ratio is expressed as "w:h" (e.g. "16:9").
|
|
657
|
+
// Public docs/examples focus on the common set below.
|
|
658
|
+
supportedAspectRatios: ["1:1", "4:3", "3:4", "16:9", "9:16"],
|
|
659
|
+
supportsVideoInterpolation: true,
|
|
660
|
+
// Veo 3.1 supports first + last frame
|
|
661
|
+
videoDurationRange: [4, 8],
|
|
662
|
+
// Veo 3.1 supports 4, 6, 8 seconds
|
|
663
|
+
supportsImageEditing: true
|
|
664
|
+
};
|
|
210
665
|
var googleProvider = {
|
|
211
666
|
id: "google",
|
|
212
|
-
displayName: "Google (Gemini / Imagen)",
|
|
667
|
+
displayName: "Google (Gemini / Imagen / Veo)",
|
|
668
|
+
supports: ["image", "video"],
|
|
669
|
+
capabilities: googleCapabilities,
|
|
213
670
|
isAvailable(env) {
|
|
214
671
|
return Boolean(getGeminiApiKey(env));
|
|
215
672
|
},
|
|
216
673
|
async generate(req, env) {
|
|
217
674
|
const apiKey = getGeminiApiKey(env);
|
|
218
675
|
if (!apiKey) throw new Error("Missing Google API key. Set GEMINI_API_KEY (or GOOGLE_API_KEY).");
|
|
676
|
+
verboseMode2 = req.verbose;
|
|
677
|
+
log3("Provider initialized, kind:", req.kind);
|
|
678
|
+
log3(
|
|
679
|
+
"Input images:",
|
|
680
|
+
req.inputImages?.length ?? 0,
|
|
681
|
+
"startFrame:",
|
|
682
|
+
!!req.startFrame,
|
|
683
|
+
"endFrame:",
|
|
684
|
+
!!req.endFrame
|
|
685
|
+
);
|
|
219
686
|
const ai = new GoogleGenAI({ apiKey });
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
687
|
+
if (req.kind === "video") {
|
|
688
|
+
const hasAdvancedFeatures = req.startFrame || req.endFrame || req.inputImages?.length;
|
|
689
|
+
const defaultModel = hasAdvancedFeatures ? "veo-3.1-generate-preview" : "veo-2.0-generate-001";
|
|
690
|
+
const model2 = MODEL_ALIASES[req.model ?? ""] ?? req.model ?? defaultModel;
|
|
691
|
+
log3("Using video model:", model2);
|
|
692
|
+
if (hasAdvancedFeatures && !isVeo31Model(model2)) {
|
|
693
|
+
log3(
|
|
694
|
+
"WARNING: Advanced video features (startFrame, endFrame, referenceImages) require Veo 3.1"
|
|
695
|
+
);
|
|
228
696
|
}
|
|
697
|
+
return generateWithVeo(ai, model2, req);
|
|
698
|
+
}
|
|
699
|
+
const model = resolveModel(req.model);
|
|
700
|
+
log3("Resolved model:", model);
|
|
701
|
+
if (isGeminiImageModel(model)) {
|
|
702
|
+
log3("Using Gemini native image generation");
|
|
703
|
+
return generateWithGemini(ai, model, req);
|
|
704
|
+
}
|
|
705
|
+
log3("Using Imagen API");
|
|
706
|
+
return generateWithImagen(ai, model, req);
|
|
707
|
+
}
|
|
708
|
+
};
|
|
709
|
+
async function generateWithVeo(ai, model, req) {
|
|
710
|
+
log3("Starting Veo video generation, model:", model, "n:", req.n);
|
|
711
|
+
const startTime = Date.now();
|
|
712
|
+
const config = {
|
|
713
|
+
numberOfVideos: req.n,
|
|
714
|
+
...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
|
|
715
|
+
// Add duration if specified (Veo 3.1 supports 4, 6, 8)
|
|
716
|
+
...req.duration !== void 0 ? { durationSeconds: String(req.duration) } : {}
|
|
717
|
+
};
|
|
718
|
+
if (req.inputImages?.length && isVeo31Model(model)) {
|
|
719
|
+
const referenceImages = req.inputImages.slice(0, 3).map((img) => {
|
|
720
|
+
const imageData = imageToGoogleFormat(img);
|
|
721
|
+
return {
|
|
722
|
+
image: imageData,
|
|
723
|
+
referenceType: "asset"
|
|
724
|
+
};
|
|
229
725
|
});
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
726
|
+
config.referenceImages = referenceImages;
|
|
727
|
+
log3("Added", referenceImages.length, "reference images");
|
|
728
|
+
}
|
|
729
|
+
const generateParams = {
|
|
730
|
+
model,
|
|
731
|
+
prompt: req.prompt,
|
|
732
|
+
config
|
|
733
|
+
};
|
|
734
|
+
const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
|
|
735
|
+
if (firstFrameImage && isVeo31Model(model)) {
|
|
736
|
+
const imageData = imageToGoogleFormat(firstFrameImage);
|
|
737
|
+
generateParams.image = imageData;
|
|
738
|
+
log3("Added first frame image");
|
|
739
|
+
}
|
|
740
|
+
if (req.endFrame && isVeo31Model(model)) {
|
|
741
|
+
const lastFrameData = imageToGoogleFormat(req.endFrame);
|
|
742
|
+
config.lastFrame = lastFrameData;
|
|
743
|
+
log3("Added last frame for interpolation");
|
|
744
|
+
}
|
|
745
|
+
log3("Calling ai.models.generateVideos...");
|
|
746
|
+
let op = await ai.models.generateVideos(generateParams);
|
|
747
|
+
log3("Initial operation state:", op.done ? "done" : "pending", "name:", op.name);
|
|
748
|
+
const maxAttempts = 60;
|
|
749
|
+
const intervalMs = 1e4;
|
|
750
|
+
for (let attempt = 0; attempt < maxAttempts && !op.done; attempt++) {
|
|
751
|
+
log3(`Poll attempt ${attempt + 1}/${maxAttempts}...`);
|
|
752
|
+
await sleep2(intervalMs);
|
|
753
|
+
op = await ai.operations.getVideosOperation({ operation: op });
|
|
754
|
+
log3(`Poll result: done=${op.done}`);
|
|
755
|
+
}
|
|
756
|
+
log3(`Operation completed in ${Date.now() - startTime}ms`);
|
|
757
|
+
if (!op.done) {
|
|
758
|
+
log3("Timed out. Operation state:", JSON.stringify(op).slice(0, 500));
|
|
759
|
+
throw new Error("Google Veo video generation timed out");
|
|
760
|
+
}
|
|
761
|
+
const videos = op.response?.generatedVideos;
|
|
762
|
+
log3("Generated videos count:", videos?.length);
|
|
763
|
+
if (!videos?.length) {
|
|
764
|
+
log3("Full response:", JSON.stringify(op.response).slice(0, 1e3));
|
|
765
|
+
throw new Error("Google Veo returned no videos");
|
|
766
|
+
}
|
|
767
|
+
const out = [];
|
|
768
|
+
for (let i = 0; i < Math.min(videos.length, req.n); i++) {
|
|
769
|
+
const v = videos[i];
|
|
770
|
+
log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
|
|
771
|
+
const uri = v?.video?.uri;
|
|
772
|
+
if (!uri) {
|
|
773
|
+
log3(`Video ${i} has no URI, skipping`);
|
|
774
|
+
continue;
|
|
775
|
+
}
|
|
776
|
+
if (uri.startsWith("gs://")) {
|
|
777
|
+
throw new Error(
|
|
778
|
+
`Google Veo returned a gs:// URI (${uri}). Configure outputGcsUri / Vertex flow to fetch from GCS.`
|
|
779
|
+
);
|
|
780
|
+
}
|
|
781
|
+
const { bytes, mimeType } = await downloadBytes3(uri);
|
|
782
|
+
out.push({
|
|
783
|
+
kind: "video",
|
|
784
|
+
provider: "google",
|
|
785
|
+
model,
|
|
786
|
+
index: i,
|
|
787
|
+
url: uri,
|
|
788
|
+
bytes,
|
|
789
|
+
...mimeType !== void 0 ? { mimeType } : {}
|
|
790
|
+
});
|
|
791
|
+
}
|
|
792
|
+
if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
|
|
793
|
+
log3(`Successfully generated ${out.length} video(s)`);
|
|
794
|
+
return out;
|
|
795
|
+
}
|
|
796
|
+
async function generateWithGemini(ai, model, req) {
|
|
797
|
+
const hasInputImage = req.inputImages?.length;
|
|
798
|
+
log3(
|
|
799
|
+
"Starting Gemini image generation, model:",
|
|
800
|
+
model,
|
|
801
|
+
"n:",
|
|
802
|
+
req.n,
|
|
803
|
+
"hasInputImage:",
|
|
804
|
+
!!hasInputImage
|
|
805
|
+
);
|
|
806
|
+
const startTime = Date.now();
|
|
807
|
+
const out = [];
|
|
808
|
+
const buildContents = () => {
|
|
809
|
+
if (hasInputImage && req.inputImages?.[0]) {
|
|
810
|
+
const imageData = imageToGoogleFormat(req.inputImages[0]);
|
|
811
|
+
return [{ ...imageData }, { text: req.prompt }];
|
|
812
|
+
}
|
|
813
|
+
return req.prompt;
|
|
814
|
+
};
|
|
815
|
+
for (let i = 0; i < req.n; i++) {
|
|
816
|
+
log3(`Generating image ${i + 1}/${req.n}...`);
|
|
817
|
+
const callStart = Date.now();
|
|
818
|
+
try {
|
|
819
|
+
const res = await ai.models.generateContent({
|
|
240
820
|
model,
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
821
|
+
contents: buildContents(),
|
|
822
|
+
config: {
|
|
823
|
+
responseModalities: ["IMAGE"]
|
|
824
|
+
}
|
|
244
825
|
});
|
|
826
|
+
log3(`API call ${i + 1} took ${Date.now() - callStart}ms`);
|
|
827
|
+
const parts = res.candidates?.[0]?.content?.parts;
|
|
828
|
+
log3(`Response has ${parts?.length ?? 0} parts`);
|
|
829
|
+
if (!parts) {
|
|
830
|
+
log3(
|
|
831
|
+
`No parts in response for image ${i}. Full response:`,
|
|
832
|
+
JSON.stringify(res).slice(0, 500)
|
|
833
|
+
);
|
|
834
|
+
continue;
|
|
835
|
+
}
|
|
836
|
+
for (const part of parts) {
|
|
837
|
+
if (part.inlineData?.data) {
|
|
838
|
+
const rawBytes = part.inlineData.data;
|
|
839
|
+
const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
|
|
840
|
+
log3(`Image ${i}: got ${bytes.byteLength} bytes, mimeType: ${part.inlineData.mimeType}`);
|
|
841
|
+
out.push({
|
|
842
|
+
kind: "image",
|
|
843
|
+
provider: "google",
|
|
844
|
+
model,
|
|
845
|
+
index: i,
|
|
846
|
+
bytes,
|
|
847
|
+
mimeType: part.inlineData.mimeType ?? mimeForImageFormat(req.format)
|
|
848
|
+
});
|
|
849
|
+
break;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
} catch (err) {
|
|
853
|
+
log3(`Error generating image ${i}:`, err);
|
|
854
|
+
throw err;
|
|
245
855
|
}
|
|
246
|
-
if (!out.length) throw new Error("Google returned images but no bytes were present");
|
|
247
|
-
return out;
|
|
248
856
|
}
|
|
249
|
-
};
|
|
857
|
+
log3(`Total generation time: ${Date.now() - startTime}ms`);
|
|
858
|
+
if (!out.length) throw new Error("Gemini returned no images");
|
|
859
|
+
log3(`Successfully generated ${out.length} image(s)`);
|
|
860
|
+
return out;
|
|
861
|
+
}
|
|
862
|
+
async function generateWithImagen(ai, model, req) {
|
|
863
|
+
log3("Starting Imagen generation, model:", model, "n:", req.n);
|
|
864
|
+
const startTime = Date.now();
|
|
865
|
+
log3("Calling ai.models.generateImages...");
|
|
866
|
+
const res = await ai.models.generateImages({
|
|
867
|
+
model,
|
|
868
|
+
prompt: req.prompt,
|
|
869
|
+
config: {
|
|
870
|
+
numberOfImages: req.n,
|
|
871
|
+
outputMimeType: mimeForImageFormat(req.format),
|
|
872
|
+
// Imagen 4 supports aspectRatio
|
|
873
|
+
...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {}
|
|
874
|
+
}
|
|
875
|
+
});
|
|
876
|
+
log3(`API call took ${Date.now() - startTime}ms`);
|
|
877
|
+
const imgs = res.generatedImages;
|
|
878
|
+
log3("Generated images count:", imgs?.length);
|
|
879
|
+
if (!imgs?.length) {
|
|
880
|
+
log3("Full response:", JSON.stringify(res).slice(0, 1e3));
|
|
881
|
+
throw new Error("Google generateImages returned no images");
|
|
882
|
+
}
|
|
883
|
+
const out = [];
|
|
884
|
+
for (let i = 0; i < Math.min(imgs.length, req.n); i++) {
|
|
885
|
+
const img = imgs[i];
|
|
886
|
+
const rawBytes = img?.image?.imageBytes;
|
|
887
|
+
if (!rawBytes) {
|
|
888
|
+
log3(`Image ${i} has no bytes, skipping`);
|
|
889
|
+
continue;
|
|
890
|
+
}
|
|
891
|
+
const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
|
|
892
|
+
log3(`Image ${i}: got ${bytes.byteLength} bytes`);
|
|
893
|
+
out.push({
|
|
894
|
+
kind: "image",
|
|
895
|
+
provider: "google",
|
|
896
|
+
model,
|
|
897
|
+
index: i,
|
|
898
|
+
bytes,
|
|
899
|
+
mimeType: mimeForImageFormat(req.format)
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
if (!out.length) throw new Error("Google returned images but no bytes were present");
|
|
903
|
+
log3(`Successfully generated ${out.length} image(s)`);
|
|
904
|
+
return out;
|
|
905
|
+
}
|
|
250
906
|
|
|
251
907
|
// src/providers/openai.ts
|
|
252
908
|
var OPENAI_API_BASE = "https://api.openai.com/v1";
|
|
253
909
|
function getOpenAIApiKey(env) {
|
|
254
910
|
return env.OPENAI_API_KEY || env.OPENAI_KEY;
|
|
255
911
|
}
|
|
256
|
-
|
|
912
|
+
var verboseMode3 = false;
|
|
913
|
+
function log4(...args) {
|
|
914
|
+
if (verboseMode3) console.error("[openai]", ...args);
|
|
915
|
+
}
|
|
916
|
+
function dataUriToBlob(dataUri) {
|
|
917
|
+
const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
|
|
918
|
+
if (!match) throw new Error("Invalid data URI");
|
|
919
|
+
const mimeType = match[1] ?? "image/png";
|
|
920
|
+
const base64 = match[2] ?? "";
|
|
921
|
+
const binary = Buffer.from(base64, "base64");
|
|
922
|
+
return new Blob([binary], { type: mimeType });
|
|
923
|
+
}
|
|
924
|
+
async function urlToBlob(url) {
|
|
925
|
+
const res = await fetch(url);
|
|
926
|
+
if (!res.ok) throw new Error(`Failed to fetch image: ${res.status}`);
|
|
927
|
+
return res.blob();
|
|
928
|
+
}
|
|
929
|
+
async function imageInputToBlob(input) {
|
|
930
|
+
if (input.startsWith("data:")) {
|
|
931
|
+
return dataUriToBlob(input);
|
|
932
|
+
}
|
|
933
|
+
return urlToBlob(input);
|
|
934
|
+
}
|
|
935
|
+
async function downloadBytes4(url) {
|
|
936
|
+
log4("Downloading from:", url.slice(0, 100) + "...");
|
|
937
|
+
const start = Date.now();
|
|
257
938
|
const res = await fetch(url);
|
|
258
939
|
if (!res.ok) throw new Error(`OpenAI image download failed (${res.status})`);
|
|
259
940
|
const ab = await res.arrayBuffer();
|
|
260
941
|
const ct = res.headers.get("content-type");
|
|
942
|
+
log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
261
943
|
return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
|
|
262
944
|
}
|
|
945
|
+
function supportedAspectRatiosForModel(model) {
|
|
946
|
+
if (model.startsWith("gpt-image")) {
|
|
947
|
+
return ["1:1", "3:2", "4:3", "16:9", "2:3", "3:4", "9:16"];
|
|
948
|
+
}
|
|
949
|
+
if (model === "dall-e-3") {
|
|
950
|
+
return ["1:1", "4:3", "16:9", "3:4", "9:16"];
|
|
951
|
+
}
|
|
952
|
+
if (model === "dall-e-2") {
|
|
953
|
+
return ["1:1"];
|
|
954
|
+
}
|
|
955
|
+
return [];
|
|
956
|
+
}
|
|
263
957
|
function mapAspectRatioToSize(aspectRatio, model) {
|
|
264
958
|
if (!aspectRatio) return void 0;
|
|
265
|
-
const ar = aspectRatio.trim();
|
|
959
|
+
const ar = aspectRatio.trim().replace(/\s+/g, "");
|
|
266
960
|
if (model?.startsWith("gpt-image")) {
|
|
267
961
|
if (ar === "1:1") return "1024x1024";
|
|
268
962
|
if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
|
|
@@ -271,20 +965,117 @@ function mapAspectRatioToSize(aspectRatio, model) {
|
|
|
271
965
|
if (ar === "1:1") return "1024x1024";
|
|
272
966
|
if (ar === "16:9" || ar === "4:3") return "1792x1024";
|
|
273
967
|
if (ar === "9:16" || ar === "3:4") return "1024x1792";
|
|
968
|
+
} else if (model === "dall-e-2") {
|
|
969
|
+
if (ar === "1:1") return "1024x1024";
|
|
274
970
|
}
|
|
275
971
|
return void 0;
|
|
276
972
|
}
|
|
973
|
+
var openaiCapabilities = {
|
|
974
|
+
maxInputImages: 2,
|
|
975
|
+
// image + optional mask
|
|
976
|
+
supportsVideoInterpolation: false,
|
|
977
|
+
// OpenAI doesn't support video
|
|
978
|
+
// videoDurationRange omitted - no video support
|
|
979
|
+
supportsImageEditing: true
|
|
980
|
+
};
|
|
981
|
+
async function generateWithEdit(req, apiKey, model) {
|
|
982
|
+
log4("Using edit endpoint for image editing");
|
|
983
|
+
const startTime = Date.now();
|
|
984
|
+
const formData = new FormData();
|
|
985
|
+
formData.append("model", model);
|
|
986
|
+
formData.append("prompt", req.prompt);
|
|
987
|
+
formData.append("n", String(req.n));
|
|
988
|
+
const size = mapAspectRatioToSize(req.aspectRatio, model);
|
|
989
|
+
if (req.aspectRatio && !size) {
|
|
990
|
+
const supported = supportedAspectRatiosForModel(model);
|
|
991
|
+
throw new Error(
|
|
992
|
+
`OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
|
|
993
|
+
);
|
|
994
|
+
}
|
|
995
|
+
if (size) formData.append("size", size);
|
|
996
|
+
const imageInput = req.inputImages?.[0];
|
|
997
|
+
if (!imageInput) throw new Error("No input image provided for editing");
|
|
998
|
+
const imageBlob = await imageInputToBlob(imageInput);
|
|
999
|
+
formData.append("image", imageBlob, "image.png");
|
|
1000
|
+
log4("Added input image to form data");
|
|
1001
|
+
const maskInput = req.inputImages?.[1];
|
|
1002
|
+
if (maskInput) {
|
|
1003
|
+
const maskBlob = await imageInputToBlob(maskInput);
|
|
1004
|
+
formData.append("mask", maskBlob, "mask.png");
|
|
1005
|
+
log4("Added mask image to form data");
|
|
1006
|
+
}
|
|
1007
|
+
log4("Calling OpenAI images/edits...");
|
|
1008
|
+
const res = await fetch(`${OPENAI_API_BASE}/images/edits`, {
|
|
1009
|
+
method: "POST",
|
|
1010
|
+
headers: {
|
|
1011
|
+
authorization: `Bearer ${apiKey}`
|
|
1012
|
+
// Don't set content-type - FormData sets it with boundary
|
|
1013
|
+
},
|
|
1014
|
+
body: formData
|
|
1015
|
+
});
|
|
1016
|
+
log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
1017
|
+
if (!res.ok) {
|
|
1018
|
+
const txt = await res.text().catch(() => "");
|
|
1019
|
+
log4("Error response:", txt.slice(0, 1e3));
|
|
1020
|
+
throw new Error(`OpenAI edit failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
1021
|
+
}
|
|
1022
|
+
const json = await res.json();
|
|
1023
|
+
log4("Response data count:", json.data?.length);
|
|
1024
|
+
if (!json.data?.length) throw new Error("OpenAI edit returned no images");
|
|
1025
|
+
const results = [];
|
|
1026
|
+
for (let i = 0; i < json.data.length; i++) {
|
|
1027
|
+
const img = json.data[i];
|
|
1028
|
+
if (!img) continue;
|
|
1029
|
+
log4(`Processing image ${i}...`);
|
|
1030
|
+
if (img.url) {
|
|
1031
|
+
const dl = await downloadBytes4(img.url);
|
|
1032
|
+
results.push({
|
|
1033
|
+
kind: "image",
|
|
1034
|
+
provider: "openai",
|
|
1035
|
+
model,
|
|
1036
|
+
index: i,
|
|
1037
|
+
url: img.url,
|
|
1038
|
+
bytes: dl.bytes,
|
|
1039
|
+
...dl.mimeType ? { mimeType: dl.mimeType } : {}
|
|
1040
|
+
});
|
|
1041
|
+
continue;
|
|
1042
|
+
}
|
|
1043
|
+
if (img.b64_json) {
|
|
1044
|
+
log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
|
|
1045
|
+
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
1046
|
+
results.push({ kind: "image", provider: "openai", model, index: i, bytes });
|
|
1047
|
+
continue;
|
|
1048
|
+
}
|
|
1049
|
+
throw new Error("OpenAI returned image without url or b64_json");
|
|
1050
|
+
}
|
|
1051
|
+
log4(`Successfully edited ${results.length} image(s)`);
|
|
1052
|
+
return results;
|
|
1053
|
+
}
|
|
277
1054
|
var openaiProvider = {
|
|
278
1055
|
id: "openai",
|
|
279
1056
|
displayName: "OpenAI (GPT Image / DALL-E)",
|
|
1057
|
+
supports: ["image"],
|
|
1058
|
+
capabilities: openaiCapabilities,
|
|
280
1059
|
isAvailable(env) {
|
|
281
1060
|
return Boolean(getOpenAIApiKey(env));
|
|
282
1061
|
},
|
|
283
1062
|
async generate(req, env) {
|
|
284
1063
|
const apiKey = getOpenAIApiKey(env);
|
|
285
1064
|
if (!apiKey) throw new Error("Missing OpenAI API key. Set OPENAI_API_KEY.");
|
|
1065
|
+
verboseMode3 = req.verbose;
|
|
1066
|
+
log4("Provider initialized, kind:", req.kind);
|
|
286
1067
|
const model = req.model ?? "gpt-image-1";
|
|
1068
|
+
log4("Using model:", model, "hasInputImages:", !!req.inputImages?.length);
|
|
1069
|
+
if (req.inputImages?.length) {
|
|
1070
|
+
return generateWithEdit(req, apiKey, model);
|
|
1071
|
+
}
|
|
287
1072
|
const size = mapAspectRatioToSize(req.aspectRatio, model);
|
|
1073
|
+
if (req.aspectRatio && !size) {
|
|
1074
|
+
const supported = supportedAspectRatiosForModel(model);
|
|
1075
|
+
throw new Error(
|
|
1076
|
+
`OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
|
|
1077
|
+
);
|
|
1078
|
+
}
|
|
288
1079
|
const body = {
|
|
289
1080
|
model,
|
|
290
1081
|
prompt: req.prompt,
|
|
@@ -294,6 +1085,9 @@ var openaiProvider = {
|
|
|
294
1085
|
// dall-e-2/3 support response_format
|
|
295
1086
|
...!model.startsWith("gpt-image") ? { response_format: "url" } : {}
|
|
296
1087
|
};
|
|
1088
|
+
log4("Request body:", JSON.stringify(body));
|
|
1089
|
+
log4("Calling OpenAI images/generations...");
|
|
1090
|
+
const startTime = Date.now();
|
|
297
1091
|
const res = await fetch(`${OPENAI_API_BASE}/images/generations`, {
|
|
298
1092
|
method: "POST",
|
|
299
1093
|
headers: {
|
|
@@ -302,19 +1096,24 @@ var openaiProvider = {
|
|
|
302
1096
|
},
|
|
303
1097
|
body: JSON.stringify(body)
|
|
304
1098
|
});
|
|
1099
|
+
log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
|
|
305
1100
|
if (!res.ok) {
|
|
306
1101
|
const txt = await res.text().catch(() => "");
|
|
1102
|
+
log4("Error response:", txt.slice(0, 1e3));
|
|
307
1103
|
throw new Error(`OpenAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
|
|
308
1104
|
}
|
|
309
1105
|
const json = await res.json();
|
|
1106
|
+
log4("Response data count:", json.data?.length);
|
|
310
1107
|
if (!json.data?.length) throw new Error("OpenAI returned no images");
|
|
311
1108
|
const results = [];
|
|
312
1109
|
for (let i = 0; i < json.data.length; i++) {
|
|
313
1110
|
const img = json.data[i];
|
|
314
1111
|
if (!img) continue;
|
|
1112
|
+
log4(`Processing image ${i}...`);
|
|
315
1113
|
if (img.url) {
|
|
316
|
-
const dl = await
|
|
1114
|
+
const dl = await downloadBytes4(img.url);
|
|
317
1115
|
results.push({
|
|
1116
|
+
kind: "image",
|
|
318
1117
|
provider: "openai",
|
|
319
1118
|
model,
|
|
320
1119
|
index: i,
|
|
@@ -325,18 +1124,23 @@ var openaiProvider = {
|
|
|
325
1124
|
continue;
|
|
326
1125
|
}
|
|
327
1126
|
if (img.b64_json) {
|
|
1127
|
+
log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
|
|
328
1128
|
const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
|
|
329
|
-
results.push({ provider: "openai", model, index: i, bytes });
|
|
1129
|
+
results.push({ kind: "image", provider: "openai", model, index: i, bytes });
|
|
330
1130
|
continue;
|
|
331
1131
|
}
|
|
332
1132
|
throw new Error("OpenAI returned image without url or b64_json");
|
|
333
1133
|
}
|
|
1134
|
+
log4(`Successfully generated ${results.length} image(s)`);
|
|
334
1135
|
return results;
|
|
335
1136
|
}
|
|
336
1137
|
};
|
|
337
1138
|
|
|
338
1139
|
// src/core/router.ts
|
|
339
1140
|
var providers = [googleProvider, xaiProvider, falProvider, openaiProvider];
|
|
1141
|
+
function log5(verbose, ...args) {
|
|
1142
|
+
if (verbose) console.error("[router]", ...args);
|
|
1143
|
+
}
|
|
340
1144
|
function listProviders() {
|
|
341
1145
|
return [...providers];
|
|
342
1146
|
}
|
|
@@ -354,44 +1158,139 @@ function pickProvider(id, env) {
|
|
|
354
1158
|
);
|
|
355
1159
|
return p;
|
|
356
1160
|
}
|
|
357
|
-
function
|
|
1161
|
+
function defaultFormatForKind(kind) {
|
|
1162
|
+
return kind === "video" ? "mp4" : "png";
|
|
1163
|
+
}
|
|
1164
|
+
async function normalizeOptions(prompt, opts, verbose) {
|
|
358
1165
|
const nRaw = opts.n ?? 1;
|
|
359
1166
|
const n = Math.max(1, Math.min(10, Math.floor(nRaw)));
|
|
360
|
-
const
|
|
1167
|
+
const kind = opts.kind ?? "image";
|
|
1168
|
+
const format = opts.format ?? defaultFormatForKind(kind);
|
|
361
1169
|
const outDir = resolveOutDir(opts.outDir ?? ".");
|
|
362
1170
|
const timestamp = timestampLocalCompact();
|
|
363
1171
|
const nameBase = slugify(opts.name ?? prompt);
|
|
1172
|
+
let inputImages;
|
|
1173
|
+
if (opts.inputImages?.length) {
|
|
1174
|
+
log5(verbose, `Resolving ${opts.inputImages.length} input image(s)...`);
|
|
1175
|
+
inputImages = await resolveImageInputs(opts.inputImages);
|
|
1176
|
+
log5(verbose, `Resolved input images`);
|
|
1177
|
+
}
|
|
1178
|
+
let startFrame;
|
|
1179
|
+
let endFrame;
|
|
1180
|
+
if (opts.startFrame) {
|
|
1181
|
+
log5(verbose, `Resolving start frame: ${opts.startFrame}`);
|
|
1182
|
+
startFrame = await resolveImageInput(opts.startFrame);
|
|
1183
|
+
}
|
|
1184
|
+
if (opts.endFrame) {
|
|
1185
|
+
log5(verbose, `Resolving end frame: ${opts.endFrame}`);
|
|
1186
|
+
endFrame = await resolveImageInput(opts.endFrame);
|
|
1187
|
+
}
|
|
364
1188
|
return {
|
|
365
1189
|
prompt,
|
|
366
1190
|
provider: opts.provider ?? "auto",
|
|
367
1191
|
model: opts.model ?? void 0,
|
|
368
1192
|
n,
|
|
369
1193
|
aspectRatio: opts.aspectRatio ?? void 0,
|
|
1194
|
+
kind,
|
|
370
1195
|
format,
|
|
371
1196
|
outDir,
|
|
372
1197
|
out: opts.out ? path3.resolve(process.cwd(), opts.out) : void 0,
|
|
373
1198
|
nameBase,
|
|
374
1199
|
timestamp,
|
|
375
|
-
verbose: Boolean(opts.verbose)
|
|
1200
|
+
verbose: Boolean(opts.verbose),
|
|
1201
|
+
// New fields
|
|
1202
|
+
inputImages,
|
|
1203
|
+
startFrame,
|
|
1204
|
+
endFrame,
|
|
1205
|
+
duration: opts.duration
|
|
376
1206
|
};
|
|
377
1207
|
}
|
|
378
|
-
|
|
1208
|
+
function validateRequestForProvider(req, provider) {
|
|
1209
|
+
const caps = provider.capabilities;
|
|
1210
|
+
const inputCount = req.inputImages?.length ?? 0;
|
|
1211
|
+
if (inputCount > caps.maxInputImages) {
|
|
1212
|
+
throw new Error(
|
|
1213
|
+
`Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
|
|
1214
|
+
);
|
|
1215
|
+
}
|
|
1216
|
+
if (req.aspectRatio) {
|
|
1217
|
+
const normalized = req.aspectRatio.trim().replace(/\s+/g, "");
|
|
1218
|
+
const looksLikeRatio = /^\d+:\d+$/.test(normalized);
|
|
1219
|
+
if (!looksLikeRatio) {
|
|
1220
|
+
throw new Error(`Invalid aspect ratio: "${req.aspectRatio}" (expected format: w:h)`);
|
|
1221
|
+
}
|
|
1222
|
+
if (caps.supportsCustomAspectRatio !== true && Array.isArray(caps.supportedAspectRatios) && caps.supportedAspectRatios.length) {
|
|
1223
|
+
const ok = caps.supportedAspectRatios.includes(normalized);
|
|
1224
|
+
if (!ok) {
|
|
1225
|
+
throw new Error(
|
|
1226
|
+
`Provider ${provider.id} does not support aspect ratio "${normalized}". Supported: ${caps.supportedAspectRatios.join(", ")}`
|
|
1227
|
+
);
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
if (req.endFrame && !caps.supportsVideoInterpolation) {
|
|
1232
|
+
throw new Error(
|
|
1233
|
+
`Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
|
|
1234
|
+
);
|
|
1235
|
+
}
|
|
1236
|
+
if (req.duration !== void 0 && req.kind === "video" && caps.videoDurationRange) {
|
|
1237
|
+
const [min, max] = caps.videoDurationRange;
|
|
1238
|
+
if (req.duration < min || req.duration > max) {
|
|
1239
|
+
throw new Error(
|
|
1240
|
+
`Provider ${provider.id} supports video duration ${min}-${max}s, but ${req.duration}s requested`
|
|
1241
|
+
);
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
if (req.kind === "image" && inputCount > 0 && !caps.supportsImageEditing) {
|
|
1245
|
+
throw new Error(`Provider ${provider.id} does not support image editing with input images`);
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
async function generateMedia(prompt, opts = {}) {
|
|
379
1249
|
const { env } = loadEnv(process.cwd());
|
|
380
|
-
const
|
|
1250
|
+
const verbose = Boolean(opts.verbose);
|
|
1251
|
+
const req = await normalizeOptions(prompt, opts, verbose);
|
|
1252
|
+
const reqSummary = {
|
|
1253
|
+
...req,
|
|
1254
|
+
prompt: req.prompt.slice(0, 50) + "...",
|
|
1255
|
+
inputImages: req.inputImages?.map(
|
|
1256
|
+
(img) => img.startsWith("data:") ? `data:...${img.length} chars` : img
|
|
1257
|
+
),
|
|
1258
|
+
startFrame: req.startFrame?.startsWith("data:") ? `data:...${req.startFrame.length} chars` : req.startFrame,
|
|
1259
|
+
endFrame: req.endFrame?.startsWith("data:") ? `data:...${req.endFrame.length} chars` : req.endFrame
|
|
1260
|
+
};
|
|
1261
|
+
log5(verbose, "Request:", JSON.stringify(reqSummary));
|
|
381
1262
|
const provider = pickProvider(req.provider, env);
|
|
1263
|
+
log5(verbose, "Selected provider:", provider.id, "| supports:", provider.supports);
|
|
1264
|
+
if (!provider.supports.includes(req.kind)) {
|
|
1265
|
+
throw new Error(`Provider ${provider.id} does not support ${req.kind} generation`);
|
|
1266
|
+
}
|
|
1267
|
+
validateRequestForProvider(req, provider);
|
|
1268
|
+
log5(verbose, "Calling provider.generate()...");
|
|
1269
|
+
const startTime = Date.now();
|
|
382
1270
|
const partials = await provider.generate(req, env);
|
|
383
|
-
|
|
1271
|
+
log5(verbose, `Provider returned ${partials.length} items in ${Date.now() - startTime}ms`);
|
|
1272
|
+
const items = [];
|
|
384
1273
|
for (let i = 0; i < partials.length; i++) {
|
|
385
1274
|
const p = partials[i];
|
|
386
1275
|
if (!p) continue;
|
|
387
|
-
const filePath = makeOutputPath(req, i);
|
|
388
|
-
|
|
389
|
-
|
|
1276
|
+
const filePath = makeOutputPath(req, i, p.mimeType);
|
|
1277
|
+
log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
|
|
1278
|
+
await writeMediaFile(filePath, p.bytes);
|
|
1279
|
+
items.push({ ...p, filePath });
|
|
390
1280
|
}
|
|
391
|
-
|
|
1281
|
+
log5(verbose, `Done! Generated ${items.length} ${req.kind}(s)`);
|
|
1282
|
+
return items;
|
|
1283
|
+
}
|
|
1284
|
+
async function generateImage(prompt, opts = {}) {
|
|
1285
|
+
return generateMedia(prompt, { ...opts, kind: "image" });
|
|
1286
|
+
}
|
|
1287
|
+
async function generateVideo(prompt, opts = {}) {
|
|
1288
|
+
return generateMedia(prompt, { ...opts, kind: "video" });
|
|
392
1289
|
}
|
|
393
1290
|
export {
|
|
394
1291
|
generateImage,
|
|
1292
|
+
generateMedia,
|
|
1293
|
+
generateVideo,
|
|
395
1294
|
listProviders
|
|
396
1295
|
};
|
|
397
1296
|
//# sourceMappingURL=index.js.map
|