npm - climage - Versions diffs - 0.3.0 → 0.4.1 - Mend

climage 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -124,11 +124,18 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
 - `--outDir <dir>` (default: current directory)
 - `--name <text>` base name override
 - `--aspect-ratio <w:h>` (e.g. `16:9`, `4:3`, `1:1`)
+### Aspect ratio support (by provider)
+- **Google (Imagen/Veo):** `1:1`, `4:3`, `3:4`, `16:9`, `9:16`
+- **OpenAI (gpt-image-_/dall-e-_):** limited set (depends on model). Custom ratios are **not** supported.
+- **xAI:** accepts `aspect_ratio: "w:h"` (docs show `4:3`).
+- **fal.ai:** provider/model-specific; common ratios are supported and arbitrary `w:h` is passed through for models that accept it.
 - `--json`
 ### Input Images
-- `--input <path>` Input image for editing or reference (can be used multiple times)
+- `--input <path>` Input image for editing or reference (can be used multiple times; provider-specific limits apply)
 - `--start-frame <path>` First frame image for video generation
 - `--end-frame <path>` Last frame image for video interpolation
 - `--duration <seconds>` Video duration in seconds
@@ -138,7 +145,7 @@ FAL_KEY=... npx climage "A cat in a tree" --provider fal
 Edit existing images by providing an input image:
 ```bash
-# Edit with xAI
+# Edit with xAI (exactly one input image supported)
 npx climage "make the cat orange" --provider xai --input photo.jpg
 # Edit with Google Gemini

package/dist/cli.js CHANGED Viewed

@@ -55,9 +55,22 @@ function extensionForFormat(format) {
 function resolveOutDir(outDir) {
   return path2.isAbsolute(outDir) ? outDir : path2.resolve(process.cwd(), outDir);
 }
-function makeOutputPath(req, index) {
-  const ext = extensionForFormat(req.format);
+function extensionFromMimeType(mimeType) {
+  if (!mimeType) return void 0;
+  const t = mimeType.toLowerCase().split(";")[0]?.trim();
+  if (!t) return void 0;
+  if (t === "image/png") return "png";
+  if (t === "image/jpeg") return "jpg";
+  if (t === "image/webp") return "webp";
+  if (t === "image/gif") return "gif";
+  if (t === "image/avif") return "avif";
+  if (t === "video/mp4") return "mp4";
+  if (t === "video/webm") return "webm";
+  return void 0;
+}
+function makeOutputPath(req, index, mimeType) {
   if (req.out) return path2.resolve(process.cwd(), req.out);
+  const ext = extensionFromMimeType(mimeType) ?? extensionForFormat(req.format);
   const base = `${req.nameBase}-${req.timestamp}`;
   const suffix = req.n > 1 ? `-${String(index + 1).padStart(2, "0")}` : "";
   const filename = `${base}${suffix}.${ext}`;
@@ -185,6 +198,11 @@ async function editXaiImages(req, apiKey) {
   const model = req.model ?? "grok-imagine-image";
   const inputImage = req.inputImages?.[0];
   if (!inputImage) throw new Error("No input image provided for editing");
+  if ((req.inputImages?.length ?? 0) > 1) {
+    throw new Error(
+      "xAI image editing supports only 1 input image (image_url). Provide exactly one --input for xAI edits."
+    );
+  }
   log("Starting image editing, model:", model, "n:", req.n);
   const body = {
     model,
@@ -250,6 +268,11 @@ async function processXaiImageResponse(json, model) {
 async function generateXaiVideo(req, apiKey) {
   const model = req.model ?? "grok-imagine-video";
   const imageUrl = req.startFrame ?? req.inputImages?.[0];
+  if ((req.inputImages?.length ?? 0) > 1 && !req.startFrame) {
+    throw new Error(
+      "xAI video generation supports only 1 input image (image_url). Provide exactly one --input or use --start-frame."
+    );
+  }
   log(
     "Starting video generation, model:",
     model,
@@ -350,7 +373,10 @@ async function generateXaiVideo(req, apiKey) {
   ];
 }
 var xaiCapabilities = {
+  // xAI docs show a single image_url for edits and a single image_url for image-to-video.
   maxInputImages: 1,
+  // xAI aspect_ratio examples show "4:3"; docs don't publish a strict allowlist.
+  supportsCustomAspectRatio: true,
   supportsVideoInterpolation: false,
   // xAI does not support end frame
   videoDurationRange: [1, 15],
@@ -485,6 +511,8 @@ function buildImageInput(req) {
 var falCapabilities = {
   maxInputImages: 7,
   // Vidu supports up to 7 reference images
+  // fal models vary. We map common ratios to enums, but also allow custom pass-through.
+  supportsCustomAspectRatio: true,
   supportsVideoInterpolation: true,
   // Vidu start-end-to-video
   videoDurationRange: [2, 8],
@@ -654,6 +682,9 @@ async function sleep2(ms) {
 var googleCapabilities = {
   maxInputImages: 3,
   // Veo 3.1 supports up to 3 reference images
+  // Imagen / Veo aspect ratio is expressed as "w:h" (e.g. "16:9").
+  // Public docs/examples focus on the common set below.
+  supportedAspectRatios: ["1:1", "4:3", "3:4", "16:9", "9:16"],
   supportsVideoInterpolation: true,
   // Veo 3.1 supports first + last frame
   videoDurationRange: [4, 8],
@@ -818,7 +849,10 @@ async function generateWithGemini(ai, model, req) {
         model,
         contents: buildContents(),
         config: {
-          responseModalities: ["IMAGE"]
+          responseModalities: ["IMAGE"],
+          // Gemini native image generation (Nano Banana) supports aspect ratio via imageConfig.
+          // Note: when editing from an input image, the model may still bias toward the input image's aspect.
+          ...req.aspectRatio ? { imageConfig: { aspectRatio: req.aspectRatio } } : {}
         }
       });
       log3(`API call ${i + 1} took ${Date.now() - callStart}ms`);
@@ -940,9 +974,21 @@ async function downloadBytes4(url) {
   log4(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
   return ct ? { bytes: new Uint8Array(ab), mimeType: ct } : { bytes: new Uint8Array(ab) };
 }
+function supportedAspectRatiosForModel(model) {
+  if (model.startsWith("gpt-image")) {
+    return ["1:1", "3:2", "4:3", "16:9", "2:3", "3:4", "9:16"];
+  }
+  if (model === "dall-e-3") {
+    return ["1:1", "4:3", "16:9", "3:4", "9:16"];
+  }
+  if (model === "dall-e-2") {
+    return ["1:1"];
+  }
+  return [];
+}
 function mapAspectRatioToSize(aspectRatio, model) {
   if (!aspectRatio) return void 0;
-  const ar = aspectRatio.trim();
+  const ar = aspectRatio.trim().replace(/\s+/g, "");
   if (model?.startsWith("gpt-image")) {
     if (ar === "1:1") return "1024x1024";
     if (ar === "3:2" || ar === "4:3" || ar === "16:9") return "1536x1024";
@@ -951,6 +997,8 @@ function mapAspectRatioToSize(aspectRatio, model) {
     if (ar === "1:1") return "1024x1024";
     if (ar === "16:9" || ar === "4:3") return "1792x1024";
     if (ar === "9:16" || ar === "3:4") return "1024x1792";
+  } else if (model === "dall-e-2") {
+    if (ar === "1:1") return "1024x1024";
   }
   return void 0;
 }
@@ -970,6 +1018,12 @@ async function generateWithEdit(req, apiKey, model) {
   formData.append("prompt", req.prompt);
   formData.append("n", String(req.n));
   const size = mapAspectRatioToSize(req.aspectRatio, model);
+  if (req.aspectRatio && !size) {
+    const supported = supportedAspectRatiosForModel(model);
+    throw new Error(
+      `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
+    );
+  }
   if (size) formData.append("size", size);
   const imageInput = req.inputImages?.[0];
   if (!imageInput) throw new Error("No input image provided for editing");
@@ -1048,6 +1102,12 @@ var openaiProvider = {
       return generateWithEdit(req, apiKey, model);
     }
     const size = mapAspectRatioToSize(req.aspectRatio, model);
+    if (req.aspectRatio && !size) {
+      const supported = supportedAspectRatiosForModel(model);
+      throw new Error(
+        `OpenAI model ${model} does not support aspect ratio "${req.aspectRatio}". Supported: ${supported.length ? supported.join(", ") : "unknown (model not recognized)"}`
+      );
+    }
     const body = {
       model,
       prompt: req.prompt,
@@ -1185,6 +1245,21 @@ function validateRequestForProvider(req, provider) {
       `Provider ${provider.id} supports max ${caps.maxInputImages} input image(s), but ${inputCount} provided`
     );
   }
+  if (req.aspectRatio) {
+    const normalized = req.aspectRatio.trim().replace(/\s+/g, "");
+    const looksLikeRatio = /^\d+:\d+$/.test(normalized);
+    if (!looksLikeRatio) {
+      throw new Error(`Invalid aspect ratio: "${req.aspectRatio}" (expected format: w:h)`);
+    }
+    if (caps.supportsCustomAspectRatio !== true && Array.isArray(caps.supportedAspectRatios) && caps.supportedAspectRatios.length) {
+      const ok = caps.supportedAspectRatios.includes(normalized);
+      if (!ok) {
+        throw new Error(
+          `Provider ${provider.id} does not support aspect ratio "${normalized}". Supported: ${caps.supportedAspectRatios.join(", ")}`
+        );
+      }
+    }
+  }
   if (req.endFrame && !caps.supportsVideoInterpolation) {
     throw new Error(
       `Provider ${provider.id} does not support video interpolation (end frame). Only startFrame is supported for image-to-video.`
@@ -1230,7 +1305,7 @@ async function generateMedia(prompt, opts = {}) {
   for (let i = 0; i < partials.length; i++) {
     const p = partials[i];
     if (!p) continue;
-    const filePath = makeOutputPath(req, i);
+    const filePath = makeOutputPath(req, i, p.mimeType);
     log5(verbose, `Writing ${p.bytes.byteLength} bytes to: ${filePath}`);
     await writeMediaFile(filePath, p.bytes);
     items.push({ ...p, filePath });