climage 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -124,11 +124,18 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
124
124
  - `--outDir <dir>` (default: current directory)
125
125
  - `--name <text>` base name override
126
126
  - `--aspect-ratio <w:h>` (e.g. `16:9`, `4:3`, `1:1`)
127
+
128
+ ### Aspect ratio support (by provider)
129
+
130
+ - **Google (Imagen/Veo):** `1:1`, `4:3`, `3:4`, `16:9`, `9:16`
131
+ - **OpenAI (gpt-image-_/dall-e-_):** limited set (depends on model). Custom ratios are **not** supported.
132
+ - **xAI:** accepts `aspect_ratio: "w:h"` (docs show `4:3`).
133
+ - **fal.ai:** provider/model-specific; common ratios are supported and arbitrary `w:h` is passed through for models that accept it.
127
134
  - `--json`
128
135
 
129
136
  ### Input Images
130
137
 
131
- - `--input <path>` Input image for editing or reference (can be used multiple times)
138
+ - `--input <path>` Input image for editing or reference (can be used multiple times; provider-specific limits apply)
132
139
  - `--start-frame <path>` First frame image for video generation
133
140
  - `--end-frame <path>` Last frame image for video interpolation
134
141
  - `--duration <seconds>` Video duration in seconds
@@ -138,7 +145,7 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
138
145
  Edit existing images by providing an input image:
139
146
 
140
147
  ```bash
141
- # Edit with xAI
148
+ # Edit with xAI (exactly one input image supported)
142
149
  npx climage "make the cat orange" --provider xai --input photo.jpg
143
150
 
144
151
  # Edit with Google Gemini
package/dist/cli.js CHANGED
@@ -55,9 +55,22 @@ function extensionForFormat(format) {
55
55
  function resolveOutDir(outDir) {
56
56
  return path2.isAbsolute(outDir) ? outDir : path2.resolve(process.cwd(), outDir);
57
57
  }
58
- function makeOutputPath(req, index) {
59
- const ext = extensionForFormat(req.format);
58
+ function extensionFromMimeType(mimeType) {
59
+ if (!mimeType) return void 0;
60
+ const t = mimeType.toLowerCase().split(";")[0]?.trim();
61
+ if (!t) return void 0;
62
+ if (t === "image/png") return "png";
63
+ if (t === "image/jpeg") return "jpg";
64
+ if (t === "image/webp") return "webp";
65
+ if (t === "image/gif") return "gif";
66
+ if (t === "image/avif") return "avif";
67
+ if (t === "video/mp4") return "mp4";
68
+ if (t === "video/webm") return "webm";
69
+ return void 0;
70
+ }
71
+ function makeOutputPath(req, index, mimeType) {
60
72
  if (req.out) return path2.resolve(process.cwd(), req.out);
73
+ const ext = extensionFromMimeType(mimeType) ?? extensionForFormat(req.format);
61
74
  const base = `${req.nameBase}-${req.timestamp}`;
62
75
  const suffix = req.n > 1 ? `-${String(index + 1).padStart(2, "0")}` : "";
63
76
  const filename = `${base}${suffix}.${ext}`;
@@ -185,6 +198,11 @@ async function editXaiImages(req, apiKey) {
185
198
  const model = req.model ?? "grok-imagine-image";
186
199
  const inputImage = req.inputImages?.[0];
187
200
  if (!inputImage) throw new Error("No input image provided for editing");
201
+ if ((req.inputImages?.length ?? 0) > 1) {
202
+ throw new Error(
203
+ "xAI image editing supports only 1 input image (image_url). Provide exactly one --input for xAI edits."
204
+ );
205
+ }
188
206
  log("Starting image editing, model:", model, "n:", req.n);
189
207
  const body = {
190
208
  model,
@@ -250,6 +268,11 @@ async function processXaiImageResponse(json, model) {
250
268
  async function generateXaiVideo(req, apiKey) {
251
269
  const model = req.model ?? "grok-imagine-video";
252
270
  const imageUrl = req.startFrame ?? req.inputImages?.[0];
271
+ if ((req.inputImages?.length ?? 0) > 1 && !req.startFrame) {
272
+ throw new Error(
273
+ "xAI video generation supports only 1 input image (image_url). Provide exactly one --input or use --start-frame."
274
+ );
275
+ }
253
276
  log(
254
277
  "Starting video generation, model:",
255
278
  model,
@@ -350,7 +373,10 @@ async function generateXaiVideo(req, apiKey) {
350
373
  ];
351
374
  }
352
375
  var xaiCapabilities = {
376
+ // xAI docs show a single image_url for edits and a single image_url for image-to-video.
353
377
  maxInputImages: 1,
378
+ // xAI aspect_ratio examples show "4:3"; docs don't publish a strict allowlist.
379
+ supportsCustomAspectRatio: true,
354
380
  supportsVideoInterpolation: false,
355
381
  // xAI does not support end frame
356
382
  videoDurationRange: [1, 15],
@@ -485,6 +511,8 @@ function buildImageInput(req) {
485
511
  var falCapabilities = {
486
512
  maxInputImages: 7,
487
513
  // Vidu supports up to 7 reference images
514
+ // fal models vary. We map common ratios to enums, but also allow custom pass-through.
515
+ supportsCustomAspectRatio: true,
488
516
  supportsVideoInterpolation: true,
489
517
  // Vidu start-end-to-video
490
518
  videoDurationRange: [2, 8],
@@ -654,6 +682,9 @@ async function sleep2(ms) {
654
682
  var googleCapabilities = {
655
683
  maxInputImages: 3,
656
684
  // Veo 3.1 supports up to 3 reference images
685
+ // Imagen / Veo aspect ratio is expressed as "w:h" (e.g. "16:9").
686
+ // Public docs/examples focus on the common set below.
687
+ supportedAspectRatios: ["1:1", "4:3", "3:4", "16:9", "9:16"],
657
688
  supportsVideoInterpolation: true,
658
689
  // Veo 3.1 supports first + last frame
659
690
  videoDurationRange: [4, 8],
@@ -940,9 +971,21 @@ async function downloadBytes4(url) {
940
971
  log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
941
972
  return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
942
973
  }
974
+ function supportedAspectRatiosForModel(model) {
975
+ if (model.startsWith("gpt-image")) {
976
+ return ["1:1", "3:2", "4:3", "16:9", "2:3", "3:4", "9:16"];
977
+ }
978
+ if (model === "dall-e-3") {
979
+ return ["1:1", "4:3", "16:9", "3:4", "9:16"];
980
+ }
981
+ if (model === "dall-e-2") {
982
+ return ["1:1"];
983
+ }
984
+ return [];
985
+ }
943
986
  function mapAspectRatioToSize(aspectRatio, model) {
944
987
  if (!aspectRatio) return void 0;
945
- const ar = aspectRatio.trim();
988
+ const ar = aspectRatio.trim().replace(/\s+/g, "");
946
989
  if (model?.startsWith("gpt-image")) {
947
990
  if (ar === "1:1") return "1024x1024";
948
991
  if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
@@ -951,6 +994,8 @@ function mapAspectRatioToSize(aspectRatio, model) {
951
994
  if (ar === "1:1") return "1024x1024";
952
995
  if (ar === "16:9" || ar === "4:3") return "1792x1024";
953
996
  if (ar === "9:16" || ar === "3:4") return "1024x1792";
997
+ } else if (model === "dall-e-2") {
998
+ if (ar === "1:1") return "1024x1024";
954
999
  }
955
1000
  return void 0;
956
1001
  }
@@ -970,6 +1015,12 @@ async function generateWithEdit(req, apiKey, model) {
970
1015
  formData.append("prompt", req.prompt);
971
1016
  formData.append("n", String(req.n));
972
1017
  const size = mapAspectRatioToSize(req.aspectRatio, model);
1018
+ if (req.aspectRatio && !size) {
1019
+ const supported = supportedAspectRatiosForModel(model);
1020
+ throw new Error(
1021
+ `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
1022
+ );
1023
+ }
973
1024
  if (size) formData.append("size", size);
974
1025
  const imageInput = req.inputImages?.[0];
975
1026
  if (!imageInput) throw new Error("No input image provided for editing");
@@ -1048,6 +1099,12 @@ var openaiProvider = {
1048
1099
  return generateWithEdit(req, apiKey, model);
1049
1100
  }
1050
1101
  const size = mapAspectRatioToSize(req.aspectRatio, model);
1102
+ if (req.aspectRatio && !size) {
1103
+ const supported = supportedAspectRatiosForModel(model);
1104
+ throw new Error(
1105
+ `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
1106
+ );
1107
+ }
1051
1108
  const body = {
1052
1109
  model,
1053
1110
  prompt: req.prompt,
@@ -1185,6 +1242,21 @@ function validateRequestForProvider(req, provider) {
1185
1242
  `Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
1186
1243
  );
1187
1244
  }
1245
+ if (req.aspectRatio) {
1246
+ const normalized = req.aspectRatio.trim().replace(/\s+/g, "");
1247
+ const looksLikeRatio = /^\d+:\d+$/.test(normalized);
1248
+ if (!looksLikeRatio) {
1249
+ throw new Error(`Invalid aspect ratio: "${req.aspectRatio}" (expected format: w:h)`);
1250
+ }
1251
+ if (caps.supportsCustomAspectRatio !== true && Array.isArray(caps.supportedAspectRatios) && caps.supportedAspectRatios.length) {
1252
+ const ok = caps.supportedAspectRatios.includes(normalized);
1253
+ if (!ok) {
1254
+ throw new Error(
1255
+ `Provider ${provider.id} does not support aspect ratio "${normalized}". Supported: ${caps.supportedAspectRatios.join(", ")}`
1256
+ );
1257
+ }
1258
+ }
1259
+ }
1188
1260
  if (req.endFrame && !caps.supportsVideoInterpolation) {
1189
1261
  throw new Error(
1190
1262
  `Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
@@ -1230,7 +1302,7 @@ async function generateMedia(prompt, opts = {}) {
1230
1302
  for (let i = 0; i < partials.length; i++) {
1231
1303
  const p = partials[i];
1232
1304
  if (!p) continue;
1233
- const filePath = makeOutputPath(req, i);
1305
+ const filePath = makeOutputPath(req, i, p.mimeType);
1234
1306
  log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
1235
1307
  await writeMediaFile(filePath, p.bytes);
1236
1308
  items.push({ ...p, filePath });