climage 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -124,11 +124,18 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
124
124
  - `--outDir <dir>` (default: current directory)
125
125
  - `--name <text>` base name override
126
126
  - `--aspect-ratio <w:h>` (e.g. `16:9`, `4:3`, `1:1`)
127
+
128
+ ### Aspect ratio support (by provider)
129
+
130
+ - **Google (Imagen/Veo):** `1:1`, `4:3`, `3:4`, `16:9`, `9:16`
131
+ - **OpenAI (gpt-image-_/dall-e-_):** limited set (depends on model). Custom ratios are **not** supported.
132
+ - **xAI:** accepts `aspect_ratio: "w:h"` (docs show `4:3`).
133
+ - **fal.ai:** provider/model-specific; common ratios are supported and arbitrary `w:h` is passed through for models that accept it.
127
134
  - `--json`
128
135
 
129
136
  ### Input Images
130
137
 
131
- - `--input <path>` Input image for editing or reference (can be used multiple times)
138
+ - `--input <path>` Input image for editing or reference (can be used multiple times; provider-specific limits apply)
132
139
  - `--start-frame <path>` First frame image for video generation
133
140
  - `--end-frame <path>` Last frame image for video interpolation
134
141
  - `--duration <seconds>` Video duration in seconds
@@ -138,7 +145,7 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
138
145
  Edit existing images by providing an input image:
139
146
 
140
147
  ```bash
141
- # Edit with xAI
148
+ # Edit with xAI (exactly one input image supported)
142
149
  npx climage "make the cat orange" --provider xai --input photo.jpg
143
150
 
144
151
  # Edit with Google Gemini
package/dist/cli.js CHANGED
@@ -55,9 +55,22 @@ function extensionForFormat(format) {
55
55
  function resolveOutDir(outDir) {
56
56
  return path2.isAbsolute(outDir) ? outDir : path2.resolve(process.cwd(), outDir);
57
57
  }
58
- function makeOutputPath(req, index) {
59
- const ext = extensionForFormat(req.format);
58
+ function extensionFromMimeType(mimeType) {
59
+ if (!mimeType) return void 0;
60
+ const t = mimeType.toLowerCase().split(";")[0]?.trim();
61
+ if (!t) return void 0;
62
+ if (t === "image/png") return "png";
63
+ if (t === "image/jpeg") return "jpg";
64
+ if (t === "image/webp") return "webp";
65
+ if (t === "image/gif") return "gif";
66
+ if (t === "image/avif") return "avif";
67
+ if (t === "video/mp4") return "mp4";
68
+ if (t === "video/webm") return "webm";
69
+ return void 0;
70
+ }
71
+ function makeOutputPath(req, index, mimeType) {
60
72
  if (req.out) return path2.resolve(process.cwd(), req.out);
73
+ const ext = extensionFromMimeType(mimeType) ?? extensionForFormat(req.format);
61
74
  const base = `${req.nameBase}-${req.timestamp}`;
62
75
  const suffix = req.n > 1 ? `-${String(index + 1).padStart(2, "0")}` : "";
63
76
  const filename = `${base}${suffix}.${ext}`;
@@ -185,6 +198,11 @@ async function editXaiImages(req, apiKey) {
185
198
  const model = req.model ?? "grok-imagine-image";
186
199
  const inputImage = req.inputImages?.[0];
187
200
  if (!inputImage) throw new Error("No input image provided for editing");
201
+ if ((req.inputImages?.length ?? 0) > 1) {
202
+ throw new Error(
203
+ "xAI image editing supports only 1 input image (image_url). Provide exactly one --input for xAI edits."
204
+ );
205
+ }
188
206
  log("Starting image editing, model:", model, "n:", req.n);
189
207
  const body = {
190
208
  model,
@@ -250,6 +268,11 @@ async function processXaiImageResponse(json, model) {
250
268
  async function generateXaiVideo(req, apiKey) {
251
269
  const model = req.model ?? "grok-imagine-video";
252
270
  const imageUrl = req.startFrame ?? req.inputImages?.[0];
271
+ if ((req.inputImages?.length ?? 0) > 1 && !req.startFrame) {
272
+ throw new Error(
273
+ "xAI video generation supports only 1 input image (image_url). Provide exactly one --input or use --start-frame."
274
+ );
275
+ }
253
276
  log(
254
277
  "Starting video generation, model:",
255
278
  model,
@@ -350,7 +373,10 @@ async function generateXaiVideo(req, apiKey) {
350
373
  ];
351
374
  }
352
375
  var xaiCapabilities = {
376
+ // xAI docs show a single image_url for edits and a single image_url for image-to-video.
353
377
  maxInputImages: 1,
378
+ // xAI aspect_ratio examples show "4:3"; docs don't publish a strict allowlist.
379
+ supportsCustomAspectRatio: true,
354
380
  supportsVideoInterpolation: false,
355
381
  // xAI does not support end frame
356
382
  videoDurationRange: [1, 15],
@@ -485,6 +511,8 @@ function buildImageInput(req) {
485
511
  var falCapabilities = {
486
512
  maxInputImages: 7,
487
513
  // Vidu supports up to 7 reference images
514
+ // fal models vary. We map common ratios to enums, but also allow custom pass-through.
515
+ supportsCustomAspectRatio: true,
488
516
  supportsVideoInterpolation: true,
489
517
  // Vidu start-end-to-video
490
518
  videoDurationRange: [2, 8],
@@ -654,6 +682,9 @@ async function sleep2(ms) {
654
682
  var googleCapabilities = {
655
683
  maxInputImages: 3,
656
684
  // Veo 3.1 supports up to 3 reference images
685
+ // Imagen / Veo aspect ratio is expressed as "w:h" (e.g. "16:9").
686
+ // Public docs/examples focus on the common set below.
687
+ supportedAspectRatios: ["1:1", "4:3", "3:4", "16:9", "9:16"],
657
688
  supportsVideoInterpolation: true,
658
689
  // Veo 3.1 supports first + last frame
659
690
  videoDurationRange: [4, 8],
@@ -818,7 +849,10 @@ async function generateWithGemini(ai, model, req) {
818
849
  model,
819
850
  contents: buildContents(),
820
851
  config: {
821
- responseModalities: ["IMAGE"]
852
+ responseModalities: ["IMAGE"],
853
+ // Gemini native image generation (Nano Banana) supports aspect ratio via imageConfig.
854
+ // Note: when editing from an input image, the model may still bias toward the input image's aspect.
855
+ ...req.aspectRatio ? { imageConfig: { aspectRatio: req.aspectRatio } } : {}
822
856
  }
823
857
  });
824
858
  log3(`API call ${i + 1} took ${Date.now() - callStart}ms`);
@@ -940,9 +974,21 @@ async function downloadBytes4(url) {
940
974
  log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
941
975
  return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
942
976
  }
977
+ function supportedAspectRatiosForModel(model) {
978
+ if (model.startsWith("gpt-image")) {
979
+ return ["1:1", "3:2", "4:3", "16:9", "2:3", "3:4", "9:16"];
980
+ }
981
+ if (model === "dall-e-3") {
982
+ return ["1:1", "4:3", "16:9", "3:4", "9:16"];
983
+ }
984
+ if (model === "dall-e-2") {
985
+ return ["1:1"];
986
+ }
987
+ return [];
988
+ }
943
989
  function mapAspectRatioToSize(aspectRatio, model) {
944
990
  if (!aspectRatio) return void 0;
945
- const ar = aspectRatio.trim();
991
+ const ar = aspectRatio.trim().replace(/\s+/g, "");
946
992
  if (model?.startsWith("gpt-image")) {
947
993
  if (ar === "1:1") return "1024x1024";
948
994
  if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
@@ -951,6 +997,8 @@ function mapAspectRatioToSize(aspectRatio, model) {
951
997
  if (ar === "1:1") return "1024x1024";
952
998
  if (ar === "16:9" || ar === "4:3") return "1792x1024";
953
999
  if (ar === "9:16" || ar === "3:4") return "1024x1792";
1000
+ } else if (model === "dall-e-2") {
1001
+ if (ar === "1:1") return "1024x1024";
954
1002
  }
955
1003
  return void 0;
956
1004
  }
@@ -970,6 +1018,12 @@ async function generateWithEdit(req, apiKey, model) {
970
1018
  formData.append("prompt", req.prompt);
971
1019
  formData.append("n", String(req.n));
972
1020
  const size = mapAspectRatioToSize(req.aspectRatio, model);
1021
+ if (req.aspectRatio && !size) {
1022
+ const supported = supportedAspectRatiosForModel(model);
1023
+ throw new Error(
1024
+ `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
1025
+ );
1026
+ }
973
1027
  if (size) formData.append("size", size);
974
1028
  const imageInput = req.inputImages?.[0];
975
1029
  if (!imageInput) throw new Error("No input image provided for editing");
@@ -1048,6 +1102,12 @@ var openaiProvider = {
1048
1102
  return generateWithEdit(req, apiKey, model);
1049
1103
  }
1050
1104
  const size = mapAspectRatioToSize(req.aspectRatio, model);
1105
+ if (req.aspectRatio && !size) {
1106
+ const supported = supportedAspectRatiosForModel(model);
1107
+ throw new Error(
1108
+ `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
1109
+ );
1110
+ }
1051
1111
  const body = {
1052
1112
  model,
1053
1113
  prompt: req.prompt,
@@ -1185,6 +1245,21 @@ function validateRequestForProvider(req, provider) {
1185
1245
  `Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
1186
1246
  );
1187
1247
  }
1248
+ if (req.aspectRatio) {
1249
+ const normalized = req.aspectRatio.trim().replace(/\s+/g, "");
1250
+ const looksLikeRatio = /^\d+:\d+$/.test(normalized);
1251
+ if (!looksLikeRatio) {
1252
+ throw new Error(`Invalid aspect ratio: "${req.aspectRatio}" (expected format: w:h)`);
1253
+ }
1254
+ if (caps.supportsCustomAspectRatio !== true && Array.isArray(caps.supportedAspectRatios) && caps.supportedAspectRatios.length) {
1255
+ const ok = caps.supportedAspectRatios.includes(normalized);
1256
+ if (!ok) {
1257
+ throw new Error(
1258
+ `Provider ${provider.id} does not support aspect ratio "${normalized}". Supported: ${caps.supportedAspectRatios.join(", ")}`
1259
+ );
1260
+ }
1261
+ }
1262
+ }
1188
1263
  if (req.endFrame && !caps.supportsVideoInterpolation) {
1189
1264
  throw new Error(
1190
1265
  `Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
@@ -1230,7 +1305,7 @@ async function generateMedia(prompt, opts = {}) {
1230
1305
  for (let i = 0; i < partials.length; i++) {
1231
1306
  const p = partials[i];
1232
1307
  if (!p) continue;
1233
- const filePath = makeOutputPath(req, i);
1308
+ const filePath = makeOutputPath(req, i, p.mimeType);
1234
1309
  log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
1235
1310
  await writeMediaFile(filePath, p.bytes);
1236
1311
  items.push({ ...p, filePath });