climage 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,6 +21,16 @@ function loadEnv(cwd = process2.cwd()) {
21
21
  // src/core/output.ts
22
22
  import fs2 from "fs/promises";
23
23
  import path2 from "path";
24
+ var IMAGE_MIME_TYPES = {
25
+ ".png": "image/png",
26
+ ".jpg": "image/jpeg",
27
+ ".jpeg": "image/jpeg",
28
+ ".webp": "image/webp",
29
+ ".gif": "image/gif",
30
+ ".avif": "image/avif",
31
+ ".heif": "image/heif",
32
+ ".heic": "image/heic"
33
+ };
24
34
  function extensionForFormat(format) {
25
35
  switch (format) {
26
36
  case "jpg":
@@ -29,6 +39,12 @@ function extensionForFormat(format) {
29
39
  return "png";
30
40
  case "webp":
31
41
  return "webp";
42
+ case "mp4":
43
+ return "mp4";
44
+ case "webm":
45
+ return "webm";
46
+ case "gif":
47
+ return "gif";
32
48
  }
33
49
  }
34
50
  function resolveOutDir(outDir) {
@@ -42,10 +58,32 @@ function makeOutputPath(req, index) {
42
58
  const filename = `${base}${suffix}.${ext}`;
43
59
  return path2.join(req.outDir, filename);
44
60
  }
45
- async function writeImageFile(filePath, bytes) {
61
+ async function writeMediaFile(filePath, bytes) {
46
62
  await fs2.mkdir(path2.dirname(filePath), { recursive: true });
47
63
  await fs2.writeFile(filePath, bytes);
48
64
  }
65
+ async function resolveImageInput(pathOrUrl) {
66
+ if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
67
+ return pathOrUrl;
68
+ }
69
+ if (pathOrUrl.startsWith("data:")) {
70
+ return pathOrUrl;
71
+ }
72
+ const resolvedPath = path2.isAbsolute(pathOrUrl) ? pathOrUrl : path2.resolve(process.cwd(), pathOrUrl);
73
+ const ext = path2.extname(resolvedPath).toLowerCase();
74
+ const mimeType = IMAGE_MIME_TYPES[ext];
75
+ if (!mimeType) {
76
+ throw new Error(
77
+ `Unsupported image format: ${ext}. Supported: ${Object.keys(IMAGE_MIME_TYPES).join(", ")}`
78
+ );
79
+ }
80
+ const fileBuffer = await fs2.readFile(resolvedPath);
81
+ const base64 = fileBuffer.toString("base64");
82
+ return `data:${mimeType};base64,${base64}`;
83
+ }
84
+ async function resolveImageInputs(pathsOrUrls) {
85
+ return Promise.all(pathsOrUrls.map(resolveImageInput));
86
+ }
49
87
 
50
88
  // src/core/strings.ts
51
89
  function slugify(input, maxLen = 60) {
@@ -63,62 +101,253 @@ var XAI_API_BASE = "https://api.x.ai/v1";
63
101
  function getXaiApiKey(env) {
64
102
  return env.XAI_API_KEY || env.XAI_TOKEN || env.GROK_API_KEY;
65
103
  }
104
+ var verboseMode = false;
105
+ function log(...args) {
106
+ if (verboseMode) console.error("[xai]", ...args);
107
+ }
66
108
  async function downloadBytes(url) {
109
+ log("Downloading from:", url.slice(0, 100) + "...");
110
+ const start = Date.now();
67
111
  const res = await fetch(url);
68
- if (!res.ok) throw new Error(`xAI image download failed (${res.status})`);
112
+ if (!res.ok) throw new Error(`xAI download failed (${res.status})`);
69
113
  const ab = await res.arrayBuffer();
70
114
  const ct = res.headers.get("content-type") || void 0;
115
+ log(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
71
116
  return { bytes: new Uint8Array(ab), mimeType: ct };
72
117
  }
118
+ async function sleep(ms) {
119
+ await new Promise((r) => setTimeout(r, ms));
120
+ }
121
+ async function generateXaiImages(req, apiKey) {
122
+ const model = req.model ?? "grok-imagine-image";
123
+ log("Starting image generation, model:", model, "n:", req.n);
124
+ const body = {
125
+ model,
126
+ prompt: req.prompt,
127
+ n: req.n,
128
+ // xAI docs: endpoint supports aspect_ratio
129
+ ...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
130
+ // Use URL format to download + save.
131
+ response_format: "url"
132
+ };
133
+ log("Request body:", JSON.stringify(body));
134
+ log("Calling xAI images/generations...");
135
+ const startTime = Date.now();
136
+ const res = await fetch(`${XAI_API_BASE}/images/generations`, {
137
+ method: "POST",
138
+ headers: {
139
+ authorization: `Bearer ${apiKey}`,
140
+ "content-type": "application/json"
141
+ },
142
+ body: JSON.stringify(body)
143
+ });
144
+ log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
145
+ if (!res.ok) {
146
+ const txt = await res.text().catch(() => "");
147
+ log("Error response:", txt.slice(0, 1e3));
148
+ throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
149
+ }
150
+ const json = await res.json();
151
+ log("Response data count:", json.data?.length);
152
+ if (!json.data?.length) throw new Error("xAI returned no images");
153
+ return processXaiImageResponse(json, model);
154
+ }
155
+ async function editXaiImages(req, apiKey) {
156
+ const model = req.model ?? "grok-imagine-image";
157
+ const inputImage = req.inputImages?.[0];
158
+ if (!inputImage) throw new Error("No input image provided for editing");
159
+ log("Starting image editing, model:", model, "n:", req.n);
160
+ const body = {
161
+ model,
162
+ prompt: req.prompt,
163
+ n: req.n,
164
+ image: { url: inputImage },
165
+ // Object with url field containing data URI or URL
166
+ response_format: "url",
167
+ ...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {}
168
+ };
169
+ log("Request body:", JSON.stringify({ ...body, image: { url: "...(data uri)..." } }));
170
+ log("Calling xAI images/edits...");
171
+ const startTime = Date.now();
172
+ const res = await fetch(`${XAI_API_BASE}/images/edits`, {
173
+ method: "POST",
174
+ headers: {
175
+ authorization: `Bearer ${apiKey}`,
176
+ "content-type": "application/json"
177
+ },
178
+ body: JSON.stringify(body)
179
+ });
180
+ log(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
181
+ if (!res.ok) {
182
+ const txt = await res.text().catch(() => "");
183
+ log("Error response:", txt.slice(0, 1e3));
184
+ throw new Error(`xAI edits failed (${res.status}): ${txt.slice(0, 500)}`);
185
+ }
186
+ const json = await res.json();
187
+ log("Response data count:", json.data?.length);
188
+ if (!json.data?.length) throw new Error("xAI returned no images");
189
+ return processXaiImageResponse(json, model);
190
+ }
191
+ async function processXaiImageResponse(json, model) {
192
+ const results = [];
193
+ for (let i = 0; i < json.data.length; i++) {
194
+ const img = json.data[i];
195
+ if (!img) continue;
196
+ log(`Processing image ${i}...`);
197
+ if (img.url) {
198
+ const { bytes, mimeType } = await downloadBytes(img.url);
199
+ results.push({
200
+ kind: "image",
201
+ provider: "xai",
202
+ model,
203
+ index: i,
204
+ url: img.url,
205
+ bytes,
206
+ ...mimeType !== void 0 ? { mimeType } : {}
207
+ });
208
+ continue;
209
+ }
210
+ if (img.b64_json) {
211
+ log(`Image ${i} is base64 encoded`);
212
+ const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
213
+ results.push({ kind: "image", provider: "xai", model, index: i, bytes });
214
+ continue;
215
+ }
216
+ throw new Error("xAI returned image without url or b64_json");
217
+ }
218
+ log(`Successfully generated ${results.length} image(s)`);
219
+ return results;
220
+ }
221
+ async function generateXaiVideo(req, apiKey) {
222
+ const model = req.model ?? "grok-imagine-video";
223
+ const imageUrl = req.startFrame ?? req.inputImages?.[0];
224
+ log(
225
+ "Starting video generation, model:",
226
+ model,
227
+ "hasImageUrl:",
228
+ !!imageUrl,
229
+ "duration:",
230
+ req.duration
231
+ );
232
+ const createBody = {
233
+ prompt: req.prompt,
234
+ model,
235
+ ...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
236
+ // Add image_url for image-to-video (data URI or URL string)
237
+ ...imageUrl ? { image_url: imageUrl } : {},
238
+ // Add duration (xAI supports 1-15 seconds)
239
+ ...req.duration !== void 0 ? { duration: req.duration } : {}
240
+ };
241
+ log(
242
+ "Request body:",
243
+ JSON.stringify({
244
+ ...createBody,
245
+ image_url: createBody.image_url ? `...(${String(createBody.image_url).length} chars)` : void 0
246
+ })
247
+ );
248
+ log("Calling xAI videos/generations...");
249
+ const startTime = Date.now();
250
+ const createRes = await fetch(`${XAI_API_BASE}/videos/generations`, {
251
+ method: "POST",
252
+ headers: {
253
+ authorization: `Bearer ${apiKey}`,
254
+ "content-type": "application/json"
255
+ },
256
+ body: JSON.stringify(createBody)
257
+ });
258
+ log(`API responded in ${Date.now() - startTime}ms, status: ${createRes.status}`);
259
+ if (!createRes.ok) {
260
+ const txt = await createRes.text().catch(() => "");
261
+ log("Error response:", txt.slice(0, 1e3));
262
+ throw new Error(`xAI video generations failed (${createRes.status}): ${txt.slice(0, 500)}`);
263
+ }
264
+ const createJson = await createRes.json();
265
+ const requestId = createJson.request_id;
266
+ log("Got request_id:", requestId);
267
+ if (!requestId) throw new Error("xAI video generation returned no request_id");
268
+ const maxAttempts = 120;
269
+ const intervalMs = 3e3;
270
+ let result;
271
+ log(`Starting poll loop (max ${maxAttempts} attempts, ${intervalMs}ms interval)...`);
272
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
273
+ const res = await fetch(`${XAI_API_BASE}/videos/${encodeURIComponent(requestId)}`, {
274
+ method: "GET",
275
+ headers: {
276
+ authorization: `Bearer ${apiKey}`
277
+ }
278
+ });
279
+ if (!res.ok) {
280
+ const txt = await res.text().catch(() => "");
281
+ log(`Poll attempt ${attempt + 1} failed:`, txt.slice(0, 500));
282
+ throw new Error(`xAI video poll failed (${res.status}): ${txt.slice(0, 500)}`);
283
+ }
284
+ const json = await res.json();
285
+ result = json;
286
+ log(
287
+ `Poll attempt ${attempt + 1}/${maxAttempts}: status=${json.status}, raw:`,
288
+ JSON.stringify(json).slice(0, 300)
289
+ );
290
+ if (json.video?.url) {
291
+ log("Video generation complete!");
292
+ break;
293
+ }
294
+ if (json.status === "failed" || json.status === "error") {
295
+ log("Video generation failed:", JSON.stringify(json));
296
+ throw new Error(`xAI video generation failed: ${JSON.stringify(json)}`);
297
+ }
298
+ await sleep(intervalMs);
299
+ }
300
+ if (!result?.video?.url) {
301
+ log("Timed out. Last result:", JSON.stringify(result));
302
+ throw new Error(`xAI video generation timed out (request_id=${requestId})`);
303
+ }
304
+ const url = result.video.url;
305
+ log("Video URL:", url);
306
+ if (result.video?.respect_moderation === false) {
307
+ throw new Error("xAI video generation was blocked by moderation");
308
+ }
309
+ const { bytes, mimeType } = await downloadBytes(url);
310
+ log(`Successfully generated video, ${bytes.byteLength} bytes`);
311
+ return [
312
+ {
313
+ kind: "video",
314
+ provider: "xai",
315
+ model: result.model ?? model,
316
+ index: 0,
317
+ url,
318
+ bytes,
319
+ ...mimeType !== void 0 ? { mimeType } : {}
320
+ }
321
+ ];
322
+ }
323
+ var xaiCapabilities = {
324
+ maxInputImages: 1,
325
+ supportsVideoInterpolation: false,
326
+ // xAI does not support end frame
327
+ videoDurationRange: [1, 15],
328
+ // 1-15 seconds
329
+ supportsImageEditing: true
330
+ };
73
331
  var xaiProvider = {
74
332
  id: "xai",
75
- displayName: "xAI (grok-imagine-image)",
333
+ displayName: "xAI",
334
+ supports: ["image", "video"],
335
+ capabilities: xaiCapabilities,
76
336
  isAvailable(env) {
77
337
  return Boolean(getXaiApiKey(env));
78
338
  },
79
339
  async generate(req, env) {
80
340
  const apiKey = getXaiApiKey(env);
81
341
  if (!apiKey) throw new Error("Missing xAI API key. Set XAI_API_KEY (or XAI_TOKEN).");
82
- const model = req.model ?? "grok-imagine-image";
83
- const body = {
84
- model,
85
- prompt: req.prompt,
86
- n: req.n,
87
- // xAI docs: endpoint supports aspect_ratio
88
- ...req.aspectRatio ? { aspect_ratio: req.aspectRatio } : {},
89
- // Use URL format to download + save.
90
- response_format: "url"
91
- };
92
- const res = await fetch(`${XAI_API_BASE}/images/generations`, {
93
- method: "POST",
94
- headers: {
95
- authorization: `Bearer ${apiKey}`,
96
- "content-type": "application/json"
97
- },
98
- body: JSON.stringify(body)
99
- });
100
- if (!res.ok) {
101
- const txt = await res.text().catch(() => "");
102
- throw new Error(`xAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
103
- }
104
- const json = await res.json();
105
- if (!json.data?.length) throw new Error("xAI returned no images");
106
- const results = [];
107
- for (let i = 0; i < json.data.length; i++) {
108
- const img = json.data[i];
109
- if (img.url) {
110
- const { bytes, mimeType } = await downloadBytes(img.url);
111
- results.push({ provider: "xai", model, index: i, url: img.url, bytes, mimeType });
112
- continue;
113
- }
114
- if (img.b64_json) {
115
- const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
116
- results.push({ provider: "xai", model, index: i, bytes });
117
- continue;
118
- }
119
- throw new Error("xAI returned image without url or b64_json");
342
+ verboseMode = req.verbose;
343
+ log("Provider initialized, kind:", req.kind);
344
+ if (req.kind === "video") return generateXaiVideo(req, apiKey);
345
+ const hasInputImages = req.inputImages && req.inputImages.length > 0;
346
+ if (hasInputImages) {
347
+ log("Input images detected, using edit endpoint");
348
+ return editXaiImages(req, apiKey);
120
349
  }
121
- return results;
350
+ return generateXaiImages(req, apiKey);
122
351
  }
123
352
  };
124
353
 
@@ -127,50 +356,198 @@ import { fal } from "@fal-ai/client";
127
356
  function getFalKey(env) {
128
357
  return env.FAL_API_KEY || env.FAL_KEY;
129
358
  }
130
- async function downloadBytes2(url) {
359
+ function log2(verbose, ...args) {
360
+ if (verbose) console.error("[fal]", ...args);
361
+ }
362
+ async function downloadBytes2(url, verbose) {
363
+ log2(verbose, "Downloading from:", url.slice(0, 100) + "...");
364
+ const start = Date.now();
131
365
  const res = await fetch(url);
132
- if (!res.ok) throw new Error(`fal image download failed (${res.status})`);
366
+ if (!res.ok) throw new Error(`fal download failed (${res.status})`);
133
367
  const ab = await res.arrayBuffer();
134
368
  const ct = res.headers.get("content-type") || void 0;
369
+ log2(verbose, `Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
135
370
  return { bytes: new Uint8Array(ab), mimeType: ct };
136
371
  }
372
+ function pickMany(result, kind) {
373
+ if (kind === "image") {
374
+ if (Array.isArray(result.images) && result.images.length) return result.images;
375
+ if (result.image?.url) return [result.image];
376
+ return [];
377
+ }
378
+ if (Array.isArray(result.videos) && result.videos.length) return result.videos;
379
+ if (result.video?.url) return [result.video];
380
+ return [];
381
+ }
382
+ var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
383
+ var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
384
+ var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
385
+ var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
386
+ var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
387
+ var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
388
+ function selectVideoModel(req) {
389
+ if (req.model) return req.model;
390
+ if (req.startFrame && req.endFrame) {
391
+ return DEFAULT_START_END_VIDEO_MODEL;
392
+ }
393
+ if (req.inputImages?.length && !req.startFrame) {
394
+ return DEFAULT_REFERENCE_VIDEO_MODEL;
395
+ }
396
+ if (req.startFrame || req.inputImages?.length) {
397
+ return DEFAULT_IMAGE_TO_VIDEO_MODEL;
398
+ }
399
+ return DEFAULT_VIDEO_MODEL;
400
+ }
401
+ function selectImageModel(req) {
402
+ if (req.model) return req.model;
403
+ if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
404
+ return DEFAULT_IMAGE_MODEL;
405
+ }
406
+ function mapAspectRatio(aspectRatio) {
407
+ if (!aspectRatio) return void 0;
408
+ const ar = aspectRatio.trim();
409
+ if (ar === "1:1") return "square";
410
+ if (ar === "4:3") return "landscape_4_3";
411
+ if (ar === "16:9") return "landscape_16_9";
412
+ if (ar === "3:4") return "portrait_4_3";
413
+ if (ar === "9:16") return "portrait_16_9";
414
+ return ar;
415
+ }
416
+ function buildVideoInput(req) {
417
+ const input = {
418
+ prompt: req.prompt
419
+ };
420
+ if (req.startFrame && req.endFrame) {
421
+ input.start_image_url = req.startFrame;
422
+ input.end_image_url = req.endFrame;
423
+ return input;
424
+ }
425
+ if (req.inputImages?.length && !req.startFrame) {
426
+ input.reference_image_urls = req.inputImages.slice(0, 7);
427
+ const ar = mapAspectRatio(req.aspectRatio);
428
+ if (ar) input.aspect_ratio = ar;
429
+ if (req.duration) input.duration = String(req.duration);
430
+ return input;
431
+ }
432
+ const imageUrl = req.startFrame ?? req.inputImages?.[0];
433
+ if (imageUrl) {
434
+ input.image_url = imageUrl;
435
+ if (req.duration) input.duration = String(req.duration);
436
+ return input;
437
+ }
438
+ const imageSize = mapAspectRatio(req.aspectRatio);
439
+ if (imageSize) input.image_size = imageSize;
440
+ if (req.n) input.num_videos = req.n;
441
+ return input;
442
+ }
443
+ function buildImageInput(req) {
444
+ const input = {
445
+ prompt: req.prompt
446
+ };
447
+ const imageSize = mapAspectRatio(req.aspectRatio);
448
+ if (imageSize) input.image_size = imageSize;
449
+ if (req.n) input.num_images = req.n;
450
+ if (req.inputImages?.[0]) {
451
+ input.image_url = req.inputImages[0];
452
+ input.strength = 0.75;
453
+ }
454
+ return input;
455
+ }
456
+ var falCapabilities = {
457
+ maxInputImages: 7,
458
+ // Vidu supports up to 7 reference images
459
+ supportsVideoInterpolation: true,
460
+ // Vidu start-end-to-video
461
+ videoDurationRange: [2, 8],
462
+ // Vidu supports 2-8 seconds
463
+ supportsImageEditing: true
464
+ };
137
465
  var falProvider = {
138
466
  id: "fal",
139
467
  displayName: "fal.ai",
468
+ supports: ["image", "video"],
469
+ capabilities: falCapabilities,
140
470
  isAvailable(env) {
141
471
  return Boolean(getFalKey(env));
142
472
  },
143
473
  async generate(req, env) {
144
474
  const key = getFalKey(env);
145
475
  if (!key) throw new Error("Missing fal API key. Set FAL_KEY (or FAL_API_KEY).");
476
+ const verbose = req.verbose;
477
+ log2(verbose, "Starting generation, kind:", req.kind, "n:", req.n);
478
+ log2(
479
+ verbose,
480
+ "Input images:",
481
+ req.inputImages?.length ?? 0,
482
+ "startFrame:",
483
+ !!req.startFrame,
484
+ "endFrame:",
485
+ !!req.endFrame
486
+ );
146
487
  fal.config({ credentials: key });
147
- const model = req.model ?? "fal-ai/flux/dev";
148
- let image_size = void 0;
149
- if (req.aspectRatio) {
150
- const ar = req.aspectRatio.trim();
151
- if (ar === "1:1") image_size = "square";
152
- else if (ar === "4:3") image_size = "landscape_4_3";
153
- else if (ar === "16:9") image_size = "landscape_16_9";
154
- else if (ar === "3:4") image_size = "portrait_4_3";
155
- else if (ar === "9:16") image_size = "portrait_16_9";
488
+ const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
489
+ log2(verbose, "Selected model:", model);
490
+ const input = req.kind === "video" ? buildVideoInput(req) : buildImageInput(req);
491
+ const inputSummary = { ...input };
492
+ for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
493
+ if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
494
+ inputSummary[key2] = `data:...${inputSummary[key2].length} chars`;
495
+ }
156
496
  }
157
- const input = {
158
- prompt: req.prompt,
159
- ...image_size ? { image_size } : {},
160
- // Some fal models support "num_images"; flux/dev returns images array length.
161
- ...req.n ? { num_images: req.n } : {}
497
+ if (Array.isArray(inputSummary.reference_image_urls)) {
498
+ inputSummary.reference_image_urls = inputSummary.reference_image_urls.map(
499
+ (url) => url.startsWith("data:") ? `data:...${url.length} chars` : url
500
+ );
501
+ }
502
+ log2(verbose, "Request input:", JSON.stringify(inputSummary));
503
+ log2(verbose, "Calling fal.subscribe...");
504
+ const startTime = Date.now();
505
+ const subscribeOptions = {
506
+ input,
507
+ logs: verbose
162
508
  };
163
- const result = await fal.subscribe(model, { input });
164
- const images = result?.data?.images;
165
- if (!images?.length) throw new Error("fal returned no images");
509
+ if (verbose) {
510
+ subscribeOptions.onQueueUpdate = (update) => {
511
+ log2(true, "Queue update:", update.status, JSON.stringify(update).slice(0, 200));
512
+ };
513
+ }
514
+ const result = await fal.subscribe(model, subscribeOptions);
515
+ log2(verbose, `fal.subscribe completed in ${Date.now() - startTime}ms`);
516
+ log2(verbose, "Raw result keys:", Object.keys(result?.data ?? {}));
517
+ log2(verbose, "Result preview:", JSON.stringify(result?.data ?? {}).slice(0, 500));
518
+ const items = pickMany(result?.data ?? {}, req.kind);
519
+ log2(verbose, `Found ${items.length} ${req.kind}(s) in response`);
520
+ if (!items?.length) {
521
+ const noun = req.kind === "video" ? "videos" : "images";
522
+ throw new Error(
523
+ `fal returned no ${noun}. Raw response: ${JSON.stringify(result?.data).slice(0, 300)}`
524
+ );
525
+ }
166
526
  const out = [];
167
- for (let i = 0; i < Math.min(images.length, req.n); i++) {
168
- const img = images[i];
169
- if (!img?.url) continue;
170
- const { bytes, mimeType } = await downloadBytes2(img.url);
171
- out.push({ provider: "fal", model, index: i, url: img.url, bytes, mimeType: img.content_type ?? mimeType });
527
+ for (let i = 0; i < Math.min(items.length, req.n); i++) {
528
+ const m = items[i];
529
+ if (!m?.url) {
530
+ log2(verbose, `Item ${i} has no URL, skipping`);
531
+ continue;
532
+ }
533
+ log2(verbose, `Downloading item ${i}...`);
534
+ const { bytes, mimeType } = await downloadBytes2(m.url, verbose);
535
+ const finalMimeType = m.content_type ?? mimeType;
536
+ out.push({
537
+ kind: req.kind,
538
+ provider: "fal",
539
+ model,
540
+ index: i,
541
+ url: m.url,
542
+ bytes,
543
+ ...finalMimeType !== void 0 ? { mimeType: finalMimeType } : {}
544
+ });
545
+ }
546
+ if (!out.length) {
547
+ const noun = req.kind === "video" ? "videos" : "images";
548
+ throw new Error(`fal returned ${noun} but none were downloadable`);
172
549
  }
173
- if (!out.length) throw new Error("fal returned images but none were downloadable");
550
+ log2(verbose, `Successfully generated ${out.length} ${req.kind}(s)`);
174
551
  return out;
175
552
  }
176
553
  };
@@ -180,7 +557,7 @@ import { GoogleGenAI } from "@google/genai";
180
557
  function getGeminiApiKey(env) {
181
558
  return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
182
559
  }
183
- function mimeForFormat(format) {
560
+ function mimeForImageFormat(format) {
184
561
  switch (format) {
185
562
  case "jpg":
186
563
  return "image/jpeg";
@@ -191,43 +568,522 @@ function mimeForFormat(format) {
191
568
  return "image/png";
192
569
  }
193
570
  }
571
+ var verboseMode2 = false;
572
+ function log3(...args) {
573
+ if (verboseMode2) console.error("[google]", ...args);
574
+ }
575
+ var MODEL_ALIASES = {
576
+ "nano-banana": "gemini-2.5-flash-image",
577
+ "nano-banana-pro": "gemini-3-pro-image-preview",
578
+ // Veo (video)
579
+ veo2: "veo-2.0-generate-001",
580
+ "veo-2": "veo-2.0-generate-001",
581
+ veo3: "veo-3.0-generate-001",
582
+ "veo-3": "veo-3.0-generate-001",
583
+ "veo-3.1": "veo-3.1-generate-preview",
584
+ veo31: "veo-3.1-generate-preview"
585
+ };
586
+ var VEO_31_MODELS = ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"];
587
+ function isVeo31Model(model) {
588
+ return VEO_31_MODELS.some((m) => model.includes(m) || model.includes("veo-3.1"));
589
+ }
590
+ function parseDataUri(dataUri) {
591
+ const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
592
+ if (!match) return null;
593
+ return { mimeType: match[1] ?? "image/png", data: match[2] ?? "" };
594
+ }
595
+ function imageToGoogleFormat(imageInput) {
596
+ if (imageInput.startsWith("data:")) {
597
+ const parsed = parseDataUri(imageInput);
598
+ if (parsed) {
599
+ return { inlineData: { data: parsed.data, mimeType: parsed.mimeType } };
600
+ }
601
+ }
602
+ return { fileUri: imageInput };
603
+ }
604
+ var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
605
+ function resolveModel(model) {
606
+ if (!model) return "gemini-2.5-flash-image";
607
+ return MODEL_ALIASES[model] ?? model;
608
+ }
609
+ function isGeminiImageModel(model) {
610
+ return GEMINI_IMAGE_MODELS.some((m) => model.startsWith(m));
611
+ }
612
+ async function downloadBytes3(url) {
613
+ log3("Downloading from:", url.slice(0, 100) + "...");
614
+ const start = Date.now();
615
+ const res = await fetch(url);
616
+ if (!res.ok) throw new Error(`Google video download failed (${res.status})`);
617
+ const ab = await res.arrayBuffer();
618
+ const ct = res.headers.get("content-type") || void 0;
619
+ log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
620
+ return { bytes: new Uint8Array(ab), mimeType: ct };
621
+ }
622
+ async function sleep2(ms) {
623
+ await new Promise((r) => setTimeout(r, ms));
624
+ }
625
+ var googleCapabilities = {
626
+ maxInputImages: 3,
627
+ // Veo 3.1 supports up to 3 reference images
628
+ supportsVideoInterpolation: true,
629
+ // Veo 3.1 supports first + last frame
630
+ videoDurationRange: [4, 8],
631
+ // Veo 3.1 supports 4, 6, 8 seconds
632
+ supportsImageEditing: true
633
+ };
194
634
  var googleProvider = {
195
635
  id: "google",
196
- displayName: "Google (Gemini / Imagen)",
636
+ displayName: "Google (Gemini / Imagen / Veo)",
637
+ supports: ["image", "video"],
638
+ capabilities: googleCapabilities,
197
639
  isAvailable(env) {
198
640
  return Boolean(getGeminiApiKey(env));
199
641
  },
200
642
  async generate(req, env) {
201
643
  const apiKey = getGeminiApiKey(env);
202
644
  if (!apiKey) throw new Error("Missing Google API key. Set GEMINI_API_KEY (or GOOGLE_API_KEY).");
645
+ verboseMode2 = req.verbose;
646
+ log3("Provider initialized, kind:", req.kind);
647
+ log3(
648
+ "Input images:",
649
+ req.inputImages?.length ?? 0,
650
+ "startFrame:",
651
+ !!req.startFrame,
652
+ "endFrame:",
653
+ !!req.endFrame
654
+ );
203
655
  const ai = new GoogleGenAI({ apiKey });
204
- const model = req.model ?? "imagen-4.0-generate-001";
205
- const res = await ai.models.generateImages({
656
+ if (req.kind === "video") {
657
+ const hasAdvancedFeatures = req.startFrame || req.endFrame || req.inputImages?.length;
658
+ const defaultModel = hasAdvancedFeatures ? "veo-3.1-generate-preview" : "veo-2.0-generate-001";
659
+ const model2 = MODEL_ALIASES[req.model ?? ""] ?? req.model ?? defaultModel;
660
+ log3("Using video model:", model2);
661
+ if (hasAdvancedFeatures && !isVeo31Model(model2)) {
662
+ log3(
663
+ "WARNING: Advanced video features (startFrame, endFrame, referenceImages) require Veo 3.1"
664
+ );
665
+ }
666
+ return generateWithVeo(ai, model2, req);
667
+ }
668
+ const model = resolveModel(req.model);
669
+ log3("Resolved model:", model);
670
+ if (isGeminiImageModel(model)) {
671
+ log3("Using Gemini native image generation");
672
+ return generateWithGemini(ai, model, req);
673
+ }
674
+ log3("Using Imagen API");
675
+ return generateWithImagen(ai, model, req);
676
+ }
677
+ };
678
+ async function generateWithVeo(ai, model, req) {
679
+ log3("Starting Veo video generation, model:", model, "n:", req.n);
680
+ const startTime = Date.now();
681
+ const config = {
682
+ numberOfVideos: req.n,
683
+ ...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
684
+ // Add duration if specified (Veo 3.1 supports 4, 6, 8)
685
+ ...req.duration !== void 0 ? { durationSeconds: String(req.duration) } : {}
686
+ };
687
+ if (req.inputImages?.length && isVeo31Model(model)) {
688
+ const referenceImages = req.inputImages.slice(0, 3).map((img) => {
689
+ const imageData = imageToGoogleFormat(img);
690
+ return {
691
+ image: imageData,
692
+ referenceType: "asset"
693
+ };
694
+ });
695
+ config.referenceImages = referenceImages;
696
+ log3("Added", referenceImages.length, "reference images");
697
+ }
698
+ const generateParams = {
699
+ model,
700
+ prompt: req.prompt,
701
+ config
702
+ };
703
+ const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
704
+ if (firstFrameImage && isVeo31Model(model)) {
705
+ const imageData = imageToGoogleFormat(firstFrameImage);
706
+ generateParams.image = imageData;
707
+ log3("Added first frame image");
708
+ }
709
+ if (req.endFrame && isVeo31Model(model)) {
710
+ const lastFrameData = imageToGoogleFormat(req.endFrame);
711
+ config.lastFrame = lastFrameData;
712
+ log3("Added last frame for interpolation");
713
+ }
714
+ log3("Calling ai.models.generateVideos...");
715
+ let op = await ai.models.generateVideos(generateParams);
716
+ log3("Initial operation state:", op.done ? "done" : "pending", "name:", op.name);
717
+ const maxAttempts = 60;
718
+ const intervalMs = 1e4;
719
+ for (let attempt = 0; attempt < maxAttempts && !op.done; attempt++) {
720
+ log3(`Poll attempt ${attempt + 1}/${maxAttempts}...`);
721
+ await sleep2(intervalMs);
722
+ op = await ai.operations.getVideosOperation({ operation: op });
723
+ log3(`Poll result: done=${op.done}`);
724
+ }
725
+ log3(`Operation completed in ${Date.now() - startTime}ms`);
726
+ if (!op.done) {
727
+ log3("Timed out. Operation state:", JSON.stringify(op).slice(0, 500));
728
+ throw new Error("Google Veo video generation timed out");
729
+ }
730
+ const videos = op.response?.generatedVideos;
731
+ log3("Generated videos count:", videos?.length);
732
+ if (!videos?.length) {
733
+ log3("Full response:", JSON.stringify(op.response).slice(0, 1e3));
734
+ throw new Error("Google Veo returned no videos");
735
+ }
736
+ const out = [];
737
+ for (let i = 0; i < Math.min(videos.length, req.n); i++) {
738
+ const v = videos[i];
739
+ log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
740
+ const uri = v?.video?.uri;
741
+ if (!uri) {
742
+ log3(`Video ${i} has no URI, skipping`);
743
+ continue;
744
+ }
745
+ if (uri.startsWith("gs://")) {
746
+ throw new Error(
747
+ `Google Veo returned a gs:// URI (${uri}). Configure outputGcsUri / Vertex flow to fetch from GCS.`
748
+ );
749
+ }
750
+ const { bytes, mimeType } = await downloadBytes3(uri);
751
+ out.push({
752
+ kind: "video",
753
+ provider: "google",
206
754
  model,
207
- prompt: req.prompt,
208
- config: {
209
- numberOfImages: req.n,
210
- outputMimeType: mimeForFormat(req.format)
211
- // Note: aspect ratio / size varies by model. Add later.
755
+ index: i,
756
+ url: uri,
757
+ bytes,
758
+ ...mimeType !== void 0 ? { mimeType } : {}
759
+ });
760
+ }
761
+ if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
762
+ log3(`Successfully generated ${out.length} video(s)`);
763
+ return out;
764
+ }
765
+ async function generateWithGemini(ai, model, req) {
766
+ const hasInputImage = req.inputImages?.length;
767
+ log3(
768
+ "Starting Gemini image generation, model:",
769
+ model,
770
+ "n:",
771
+ req.n,
772
+ "hasInputImage:",
773
+ !!hasInputImage
774
+ );
775
+ const startTime = Date.now();
776
+ const out = [];
777
+ const buildContents = () => {
778
+ if (hasInputImage && req.inputImages?.[0]) {
779
+ const imageData = imageToGoogleFormat(req.inputImages[0]);
780
+ return [{ ...imageData }, { text: req.prompt }];
781
+ }
782
+ return req.prompt;
783
+ };
784
+ for (let i = 0; i < req.n; i++) {
785
+ log3(`Generating image ${i + 1}/${req.n}...`);
786
+ const callStart = Date.now();
787
+ try {
788
+ const res = await ai.models.generateContent({
789
+ model,
790
+ contents: buildContents(),
791
+ config: {
792
+ responseModalities: ["IMAGE"]
793
+ }
794
+ });
795
+ log3(`API call ${i + 1} took ${Date.now() - callStart}ms`);
796
+ const parts = res.candidates?.[0]?.content?.parts;
797
+ log3(`Response has ${parts?.length ?? 0} parts`);
798
+ if (!parts) {
799
+ log3(
800
+ `No parts in response for image ${i}. Full response:`,
801
+ JSON.stringify(res).slice(0, 500)
802
+ );
803
+ continue;
212
804
  }
805
+ for (const part of parts) {
806
+ if (part.inlineData?.data) {
807
+ const rawBytes = part.inlineData.data;
808
+ const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
809
+ log3(`Image ${i}: got ${bytes.byteLength} bytes, mimeType: ${part.inlineData.mimeType}`);
810
+ out.push({
811
+ kind: "image",
812
+ provider: "google",
813
+ model,
814
+ index: i,
815
+ bytes,
816
+ mimeType: part.inlineData.mimeType ?? mimeForImageFormat(req.format)
817
+ });
818
+ break;
819
+ }
820
+ }
821
+ } catch (err) {
822
+ log3(`Error generating image ${i}:`, err);
823
+ throw err;
824
+ }
825
+ }
826
+ log3(`Total generation time: ${Date.now() - startTime}ms`);
827
+ if (!out.length) throw new Error("Gemini returned no images");
828
+ log3(`Successfully generated ${out.length} image(s)`);
829
+ return out;
830
+ }
831
+ async function generateWithImagen(ai, model, req) {
832
+ log3("Starting Imagen generation, model:", model, "n:", req.n);
833
+ const startTime = Date.now();
834
+ log3("Calling ai.models.generateImages...");
835
+ const res = await ai.models.generateImages({
836
+ model,
837
+ prompt: req.prompt,
838
+ config: {
839
+ numberOfImages: req.n,
840
+ outputMimeType: mimeForImageFormat(req.format),
841
+ // Imagen 4 supports aspectRatio
842
+ ...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {}
843
+ }
844
+ });
845
+ log3(`API call took ${Date.now() - startTime}ms`);
846
+ const imgs = res.generatedImages;
847
+ log3("Generated images count:", imgs?.length);
848
+ if (!imgs?.length) {
849
+ log3("Full response:", JSON.stringify(res).slice(0, 1e3));
850
+ throw new Error("Google generateImages returned no images");
851
+ }
852
+ const out = [];
853
+ for (let i = 0; i < Math.min(imgs.length, req.n); i++) {
854
+ const img = imgs[i];
855
+ const rawBytes = img?.image?.imageBytes;
856
+ if (!rawBytes) {
857
+ log3(`Image ${i} has no bytes, skipping`);
858
+ continue;
859
+ }
860
+ const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
861
+ log3(`Image ${i}: got ${bytes.byteLength} bytes`);
862
+ out.push({
863
+ kind: "image",
864
+ provider: "google",
865
+ model,
866
+ index: i,
867
+ bytes,
868
+ mimeType: mimeForImageFormat(req.format)
213
869
  });
214
- const imgs = res.generatedImages;
215
- if (!imgs?.length) throw new Error("Google generateImages returned no images");
216
- const out = [];
217
- for (let i = 0; i < Math.min(imgs.length, req.n); i++) {
218
- const img = imgs[i];
219
- const rawBytes = img?.image?.imageBytes;
220
- if (!rawBytes) continue;
221
- const bytes = typeof rawBytes === "string" ? Uint8Array.from(Buffer.from(rawBytes, "base64")) : rawBytes;
222
- out.push({ provider: "google", model, index: i, bytes, mimeType: mimeForFormat(req.format) });
870
+ }
871
+ if (!out.length) throw new Error("Google returned images but no bytes were present");
872
+ log3(`Successfully generated ${out.length} image(s)`);
873
+ return out;
874
+ }
875
+
876
+ // src/providers/openai.ts
877
+ var OPENAI_API_BASE = "https://api.openai.com/v1";
878
+ function getOpenAIApiKey(env) {
879
+ return env.OPENAI_API_KEY || env.OPENAI_KEY;
880
+ }
881
+ var verboseMode3 = false;
882
+ function log4(...args) {
883
+ if (verboseMode3) console.error("[openai]", ...args);
884
+ }
885
+ function dataUriToBlob(dataUri) {
886
+ const match = dataUri.match(/^data:([^;]+);base64,(.+)$/);
887
+ if (!match) throw new Error("Invalid data URI");
888
+ const mimeType = match[1] ?? "image/png";
889
+ const base64 = match[2] ?? "";
890
+ const binary = Buffer.from(base64, "base64");
891
+ return new Blob([binary], { type: mimeType });
892
+ }
893
+ async function urlToBlob(url) {
894
+ const res = await fetch(url);
895
+ if (!res.ok) throw new Error(`Failed to fetch image: ${res.status}`);
896
+ return res.blob();
897
+ }
898
+ async function imageInputToBlob(input) {
899
+ if (input.startsWith("data:")) {
900
+ return dataUriToBlob(input);
901
+ }
902
+ return urlToBlob(input);
903
+ }
904
+ async function downloadBytes4(url) {
905
+ log4("Downloading from:", url.slice(0, 100) + "...");
906
+ const start = Date.now();
907
+ const res = await fetch(url);
908
+ if (!res.ok) throw new Error(`OpenAI image download failed (${res.status})`);
909
+ const ab = await res.arrayBuffer();
910
+ const ct = res.headers.get("content-type");
911
+ log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
912
+ return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
913
+ }
914
+ function mapAspectRatioToSize(aspectRatio, model) {
915
+ if (!aspectRatio) return void 0;
916
+ const ar = aspectRatio.trim();
917
+ if (model?.startsWith("gpt-image")) {
918
+ if (ar === "1:1") return "1024x1024";
919
+ if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
920
+ if (ar === "2:3" || ar === "3:4" || ar === "9:16") return "1024x1536";
921
+ } else if (model === "dall-e-3") {
922
+ if (ar === "1:1") return "1024x1024";
923
+ if (ar === "16:9" || ar === "4:3") return "1792x1024";
924
+ if (ar === "9:16" || ar === "3:4") return "1024x1792";
925
+ }
926
+ return void 0;
927
+ }
928
+ var openaiCapabilities = {
929
+ maxInputImages: 2,
930
+ // image + optional mask
931
+ supportsVideoInterpolation: false,
932
+ // OpenAI doesn't support video
933
+ // videoDurationRange omitted - no video support
934
+ supportsImageEditing: true
935
+ };
936
+ async function generateWithEdit(req, apiKey, model) {
937
+ log4("Using edit endpoint for image editing");
938
+ const startTime = Date.now();
939
+ const formData = new FormData();
940
+ formData.append("model", model);
941
+ formData.append("prompt", req.prompt);
942
+ formData.append("n", String(req.n));
943
+ const size = mapAspectRatioToSize(req.aspectRatio, model);
944
+ if (size) formData.append("size", size);
945
+ const imageInput = req.inputImages?.[0];
946
+ if (!imageInput) throw new Error("No input image provided for editing");
947
+ const imageBlob = await imageInputToBlob(imageInput);
948
+ formData.append("image", imageBlob, "image.png");
949
+ log4("Added input image to form data");
950
+ const maskInput = req.inputImages?.[1];
951
+ if (maskInput) {
952
+ const maskBlob = await imageInputToBlob(maskInput);
953
+ formData.append("mask", maskBlob, "mask.png");
954
+ log4("Added mask image to form data");
955
+ }
956
+ log4("Calling OpenAI images/edits...");
957
+ const res = await fetch(`${OPENAI_API_BASE}/images/edits`, {
958
+ method: "POST",
959
+ headers: {
960
+ authorization: `Bearer ${apiKey}`
961
+ // Don't set content-type - FormData sets it with boundary
962
+ },
963
+ body: formData
964
+ });
965
+ log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
966
+ if (!res.ok) {
967
+ const txt = await res.text().catch(() => "");
968
+ log4("Error response:", txt.slice(0, 1e3));
969
+ throw new Error(`OpenAI edit failed (${res.status}): ${txt.slice(0, 500)}`);
970
+ }
971
+ const json = await res.json();
972
+ log4("Response data count:", json.data?.length);
973
+ if (!json.data?.length) throw new Error("OpenAI edit returned no images");
974
+ const results = [];
975
+ for (let i = 0; i < json.data.length; i++) {
976
+ const img = json.data[i];
977
+ if (!img) continue;
978
+ log4(`Processing image ${i}...`);
979
+ if (img.url) {
980
+ const dl = await downloadBytes4(img.url);
981
+ results.push({
982
+ kind: "image",
983
+ provider: "openai",
984
+ model,
985
+ index: i,
986
+ url: img.url,
987
+ bytes: dl.bytes,
988
+ ...dl.mimeType ? { mimeType: dl.mimeType } : {}
989
+ });
990
+ continue;
223
991
  }
224
- if (!out.length) throw new Error("Google returned images but no bytes were present");
225
- return out;
992
+ if (img.b64_json) {
993
+ log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
994
+ const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
995
+ results.push({ kind: "image", provider: "openai", model, index: i, bytes });
996
+ continue;
997
+ }
998
+ throw new Error("OpenAI returned image without url or b64_json");
999
+ }
1000
+ log4(`Successfully edited ${results.length} image(s)`);
1001
+ return results;
1002
+ }
1003
+ var openaiProvider = {
1004
+ id: "openai",
1005
+ displayName: "OpenAI (GPT Image / DALL-E)",
1006
+ supports: ["image"],
1007
+ capabilities: openaiCapabilities,
1008
+ isAvailable(env) {
1009
+ return Boolean(getOpenAIApiKey(env));
1010
+ },
1011
+ async generate(req, env) {
1012
+ const apiKey = getOpenAIApiKey(env);
1013
+ if (!apiKey) throw new Error("Missing OpenAI API key. Set OPENAI_API_KEY.");
1014
+ verboseMode3 = req.verbose;
1015
+ log4("Provider initialized, kind:", req.kind);
1016
+ const model = req.model ?? "gpt-image-1";
1017
+ log4("Using model:", model, "hasInputImages:", !!req.inputImages?.length);
1018
+ if (req.inputImages?.length) {
1019
+ return generateWithEdit(req, apiKey, model);
1020
+ }
1021
+ const size = mapAspectRatioToSize(req.aspectRatio, model);
1022
+ const body = {
1023
+ model,
1024
+ prompt: req.prompt,
1025
+ n: req.n,
1026
+ ...size ? { size } : {},
1027
+ // gpt-image-1 doesn't support response_format, defaults to b64_json
1028
+ // dall-e-2/3 support response_format
1029
+ ...!model.startsWith("gpt-image") ? { response_format: "url" } : {}
1030
+ };
1031
+ log4("Request body:", JSON.stringify(body));
1032
+ log4("Calling OpenAI images/generations...");
1033
+ const startTime = Date.now();
1034
+ const res = await fetch(`${OPENAI_API_BASE}/images/generations`, {
1035
+ method: "POST",
1036
+ headers: {
1037
+ authorization: `Bearer ${apiKey}`,
1038
+ "content-type": "application/json"
1039
+ },
1040
+ body: JSON.stringify(body)
1041
+ });
1042
+ log4(`API responded in ${Date.now() - startTime}ms, status: ${res.status}`);
1043
+ if (!res.ok) {
1044
+ const txt = await res.text().catch(() => "");
1045
+ log4("Error response:", txt.slice(0, 1e3));
1046
+ throw new Error(`OpenAI generations failed (${res.status}): ${txt.slice(0, 500)}`);
1047
+ }
1048
+ const json = await res.json();
1049
+ log4("Response data count:", json.data?.length);
1050
+ if (!json.data?.length) throw new Error("OpenAI returned no images");
1051
+ const results = [];
1052
+ for (let i = 0; i < json.data.length; i++) {
1053
+ const img = json.data[i];
1054
+ if (!img) continue;
1055
+ log4(`Processing image ${i}...`);
1056
+ if (img.url) {
1057
+ const dl = await downloadBytes4(img.url);
1058
+ results.push({
1059
+ kind: "image",
1060
+ provider: "openai",
1061
+ model,
1062
+ index: i,
1063
+ url: img.url,
1064
+ bytes: dl.bytes,
1065
+ ...dl.mimeType ? { mimeType: dl.mimeType } : {}
1066
+ });
1067
+ continue;
1068
+ }
1069
+ if (img.b64_json) {
1070
+ log4(`Image ${i} is base64 encoded, ${img.b64_json.length} chars`);
1071
+ const bytes = Uint8Array.from(Buffer.from(img.b64_json, "base64"));
1072
+ results.push({ kind: "image", provider: "openai", model, index: i, bytes });
1073
+ continue;
1074
+ }
1075
+ throw new Error("OpenAI returned image without url or b64_json");
1076
+ }
1077
+ log4(`Successfully generated ${results.length} image(s)`);
1078
+ return results;
226
1079
  }
227
1080
  };
228
1081
 
229
1082
  // src/core/router.ts
230
- var providers = [googleProvider, xaiProvider, falProvider];
1083
+ var providers = [googleProvider, xaiProvider, falProvider, openaiProvider];
1084
+ function log5(verbose, ...args) {
1085
+ if (verbose) console.error("[router]", ...args);
1086
+ }
231
1087
  function listProviders() {
232
1088
  return [...providers];
233
1089
  }
@@ -239,47 +1095,130 @@ function pickProvider(id, env) {
239
1095
  return p2;
240
1096
  }
241
1097
  const p = providers.find((pp) => pp.isAvailable(env));
242
- if (!p) throw new Error("No providers available. Set XAI_API_KEY (or other provider keys) in .env or environment.");
1098
+ if (!p)
1099
+ throw new Error(
1100
+ "No providers available. Set XAI_API_KEY (or other provider keys) in .env or environment."
1101
+ );
243
1102
  return p;
244
1103
  }
245
- function normalizeOptions(prompt, opts) {
1104
+ function defaultFormatForKind(kind) {
1105
+ return kind === "video" ? "mp4" : "png";
1106
+ }
1107
+ async function normalizeOptions(prompt, opts, verbose) {
246
1108
  const nRaw = opts.n ?? 1;
247
1109
  const n = Math.max(1, Math.min(10, Math.floor(nRaw)));
248
- const format = opts.format ?? "png";
1110
+ const kind = opts.kind ?? "image";
1111
+ const format = opts.format ?? defaultFormatForKind(kind);
249
1112
  const outDir = resolveOutDir(opts.outDir ?? ".");
250
1113
  const timestamp = timestampLocalCompact();
251
1114
  const nameBase = slugify(opts.name ?? prompt);
1115
+ let inputImages;
1116
+ if (opts.inputImages?.length) {
1117
+ log5(verbose, `Resolving ${opts.inputImages.length} input image(s)...`);
1118
+ inputImages = await resolveImageInputs(opts.inputImages);
1119
+ log5(verbose, `Resolved input images`);
1120
+ }
1121
+ let startFrame;
1122
+ let endFrame;
1123
+ if (opts.startFrame) {
1124
+ log5(verbose, `Resolving start frame: ${opts.startFrame}`);
1125
+ startFrame = await resolveImageInput(opts.startFrame);
1126
+ }
1127
+ if (opts.endFrame) {
1128
+ log5(verbose, `Resolving end frame: ${opts.endFrame}`);
1129
+ endFrame = await resolveImageInput(opts.endFrame);
1130
+ }
252
1131
  return {
253
1132
  prompt,
254
1133
  provider: opts.provider ?? "auto",
255
1134
  model: opts.model ?? void 0,
256
1135
  n,
257
1136
  aspectRatio: opts.aspectRatio ?? void 0,
1137
+ kind,
258
1138
  format,
259
1139
  outDir,
260
1140
  out: opts.out ? path3.resolve(process.cwd(), opts.out) : void 0,
261
1141
  nameBase,
262
1142
  timestamp,
263
- verbose: Boolean(opts.verbose)
1143
+ verbose: Boolean(opts.verbose),
1144
+ // New fields
1145
+ inputImages,
1146
+ startFrame,
1147
+ endFrame,
1148
+ duration: opts.duration
264
1149
  };
265
1150
  }
266
- async function generateImage(prompt, opts = {}) {
1151
+ function validateRequestForProvider(req, provider) {
1152
+ const caps = provider.capabilities;
1153
+ const inputCount = req.inputImages?.length ?? 0;
1154
+ if (inputCount > caps.maxInputImages) {
1155
+ throw new Error(
1156
+ `Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
1157
+ );
1158
+ }
1159
+ if (req.endFrame && !caps.supportsVideoInterpolation) {
1160
+ throw new Error(
1161
+ `Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
1162
+ );
1163
+ }
1164
+ if (req.duration !== void 0 && req.kind === "video" && caps.videoDurationRange) {
1165
+ const [min, max] = caps.videoDurationRange;
1166
+ if (req.duration < min || req.duration > max) {
1167
+ throw new Error(
1168
+ `Provider ${provider.id} supports video duration ${min}-${max}s, but ${req.duration}s requested`
1169
+ );
1170
+ }
1171
+ }
1172
+ if (req.kind === "image" && inputCount > 0 && !caps.supportsImageEditing) {
1173
+ throw new Error(`Provider ${provider.id} does not support image editing with input images`);
1174
+ }
1175
+ }
1176
+ async function generateMedia(prompt, opts = {}) {
267
1177
  const { env } = loadEnv(process.cwd());
268
- const req = normalizeOptions(prompt, opts);
1178
+ const verbose = Boolean(opts.verbose);
1179
+ const req = await normalizeOptions(prompt, opts, verbose);
1180
+ const reqSummary = {
1181
+ ...req,
1182
+ prompt: req.prompt.slice(0, 50) + "...",
1183
+ inputImages: req.inputImages?.map(
1184
+ (img) => img.startsWith("data:") ? `data:...${img.length} chars` : img
1185
+ ),
1186
+ startFrame: req.startFrame?.startsWith("data:") ? `data:...${req.startFrame.length} chars` : req.startFrame,
1187
+ endFrame: req.endFrame?.startsWith("data:") ? `data:...${req.endFrame.length} chars` : req.endFrame
1188
+ };
1189
+ log5(verbose, "Request:", JSON.stringify(reqSummary));
269
1190
  const provider = pickProvider(req.provider, env);
1191
+ log5(verbose, "Selected provider:", provider.id, "| supports:", provider.supports);
1192
+ if (!provider.supports.includes(req.kind)) {
1193
+ throw new Error(`Provider ${provider.id} does not support ${req.kind} generation`);
1194
+ }
1195
+ validateRequestForProvider(req, provider);
1196
+ log5(verbose, "Calling provider.generate()...");
1197
+ const startTime = Date.now();
270
1198
  const partials = await provider.generate(req, env);
271
- const images = [];
1199
+ log5(verbose, `Provider returned ${partials.length} items in ${Date.now() - startTime}ms`);
1200
+ const items = [];
272
1201
  for (let i = 0; i < partials.length; i++) {
273
1202
  const p = partials[i];
274
1203
  if (!p) continue;
275
1204
  const filePath = makeOutputPath(req, i);
276
- await writeImageFile(filePath, p.bytes);
277
- images.push({ ...p, filePath });
1205
+ log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
1206
+ await writeMediaFile(filePath, p.bytes);
1207
+ items.push({ ...p, filePath });
278
1208
  }
279
- return images;
1209
+ log5(verbose, `Done! Generated ${items.length} ${req.kind}(s)`);
1210
+ return items;
1211
+ }
1212
+ async function generateImage(prompt, opts = {}) {
1213
+ return generateMedia(prompt, { ...opts, kind: "image" });
1214
+ }
1215
+ async function generateVideo(prompt, opts = {}) {
1216
+ return generateMedia(prompt, { ...opts, kind: "video" });
280
1217
  }
281
1218
  export {
282
1219
  generateImage,
1220
+ generateMedia,
1221
+ generateVideo,
283
1222
  listProviders
284
1223
  };
285
1224
  //# sourceMappingURL=index.js.map