climage 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -100,11 +100,12 @@ Set one of:
100
100
 
101
101
  **Models:**
102
102
 
103
- | Model | Description |
104
- | --------------------- | -------------------------------------- |
105
- | `fal-ai/flux/dev` | **Default.** Flux dev (fast & popular) |
106
- | `fal-ai/flux/pro` | Flux pro (higher quality) |
107
- | `fal-ai/flux-realism` | Photorealistic style |
103
+ | Model | Description |
104
+ | ------------------------------------------ | -------------------------------------- |
105
+ | `fal-ai/flux/dev` | **Default.** Flux dev (fast & popular) |
106
+ | `fal-ai/flux/pro` | Flux pro (higher quality) |
107
+ | `fal-ai/flux-realism` | Photorealistic style |
108
+ | `fal-ai/kling-video/v3/pro/image-to-video` | Kling v3 Pro image-to-video |
108
109
 
109
110
  Example:
110
111
 
@@ -166,6 +167,9 @@ npx climage "the scene comes to life" --video --provider google --start-frame sc
166
167
  # Image-to-video with fal.ai
167
168
  npx climage "dramatic camera zoom" --video --provider fal --start-frame photo.jpg
168
169
 
170
+ # Image-to-video with fal.ai Kling v3 Pro
171
+ npx climage "dramatic camera zoom" --video --provider fal --model fal-ai/kling-video/v3/pro/image-to-video --start-frame photo.jpg
172
+
169
173
  # Image-to-video with xAI
170
174
  npx climage "animate this scene" --video --provider xai --start-frame cat.png
171
175
  ```
@@ -204,7 +208,9 @@ npx climage "character in motion" --video --provider fal --input ref1.png --inpu
204
208
  | Image-to-Video | Yes | Yes | Yes | No |
205
209
  | Video Interpolation | Yes | No | Yes | No |
206
210
  | Max Input Images | 3 | 1 | 7 | 2 |
207
- | Video Duration (seconds) | 4-8 | 1-15 | 2-8 | N/A |
211
+ | Video Duration (seconds) | 4-8 | 1-15 | 2-15\* | N/A |
212
+
213
+ \* Model-specific on fal.ai (e.g. Vidu: 2-8, Kling v3 Pro: 3-15).
208
214
 
209
215
  ## Library API
210
216
 
package/dist/cli.js CHANGED
@@ -437,8 +437,15 @@ var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
437
437
  var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
438
438
  var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
439
439
  var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
440
+ var KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL = "fal-ai/kling-video/v3/pro/image-to-video";
440
441
  var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
441
442
  var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
443
+ function isKlingV3Model(model) {
444
+ return model === KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL || model.startsWith("fal-ai/kling-video/v3/");
445
+ }
446
+ function isViduModel(model) {
447
+ return model.includes("/vidu/");
448
+ }
442
449
  function selectVideoModel(req) {
443
450
  if (req.model) return req.model;
444
451
  if (req.startFrame && req.endFrame) {
@@ -457,9 +464,12 @@ function selectImageModel(req) {
457
464
  if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
458
465
  return DEFAULT_IMAGE_MODEL;
459
466
  }
460
- function mapAspectRatio(aspectRatio) {
467
+ function mapAspectRatio(aspectRatio, model) {
461
468
  if (!aspectRatio) return void 0;
462
469
  const ar = aspectRatio.trim();
470
+ if (model && isKlingV3Model(model)) {
471
+ return ar;
472
+ }
463
473
  if (ar === "1:1") return "square";
464
474
  if (ar === "4:3") return "landscape_4_3";
465
475
  if (ar === "16:9") return "landscape_16_9";
@@ -467,29 +477,50 @@ function mapAspectRatio(aspectRatio) {
467
477
  if (ar === "9:16") return "portrait_16_9";
468
478
  return ar;
469
479
  }
470
- function buildVideoInput(req) {
480
+ function buildVideoInput(req, model) {
471
481
  const input = {
472
482
  prompt: req.prompt
473
483
  };
474
484
  if (req.startFrame && req.endFrame) {
475
485
  input.start_image_url = req.startFrame;
476
486
  input.end_image_url = req.endFrame;
487
+ const ar = mapAspectRatio(req.aspectRatio, model);
488
+ if (ar) input.aspect_ratio = ar;
489
+ if (req.duration) input.duration = String(req.duration);
477
490
  return input;
478
491
  }
479
492
  if (req.inputImages?.length && !req.startFrame) {
493
+ if (isKlingV3Model(model)) {
494
+ input.start_image_url = req.inputImages[0];
495
+ const ar2 = mapAspectRatio(req.aspectRatio, model);
496
+ if (ar2) input.aspect_ratio = ar2;
497
+ if (req.duration) input.duration = String(req.duration);
498
+ return input;
499
+ }
480
500
  input.reference_image_urls = req.inputImages.slice(0, 7);
481
- const ar = mapAspectRatio(req.aspectRatio);
501
+ const ar = mapAspectRatio(req.aspectRatio, model);
482
502
  if (ar) input.aspect_ratio = ar;
483
503
  if (req.duration) input.duration = String(req.duration);
484
504
  return input;
485
505
  }
486
506
  const imageUrl = req.startFrame ?? req.inputImages?.[0];
487
507
  if (imageUrl) {
488
- input.image_url = imageUrl;
508
+ if (isKlingV3Model(model)) {
509
+ input.start_image_url = imageUrl;
510
+ const ar = mapAspectRatio(req.aspectRatio, model);
511
+ if (ar) input.aspect_ratio = ar;
512
+ } else {
513
+ input.image_url = imageUrl;
514
+ }
489
515
  if (req.duration) input.duration = String(req.duration);
490
516
  return input;
491
517
  }
492
- const imageSize = mapAspectRatio(req.aspectRatio);
518
+ if (isKlingV3Model(model)) {
519
+ throw new Error(
520
+ `Model ${model} requires --start-frame (or --input) because it is image-to-video only`
521
+ );
522
+ }
523
+ const imageSize = mapAspectRatio(req.aspectRatio, model);
493
524
  if (imageSize) input.image_size = imageSize;
494
525
  if (req.n) input.num_videos = req.n;
495
526
  return input;
@@ -514,8 +545,8 @@ var falCapabilities = {
514
545
  supportsCustomAspectRatio: true,
515
546
  supportsVideoInterpolation: true,
516
547
  // Vidu start-end-to-video
517
- videoDurationRange: [2, 8],
518
- // Vidu supports 2-8 seconds
548
+ videoDurationRange: [2, 15],
549
+ // Most models are 2-8; Kling v3 supports up to 15
519
550
  supportsImageEditing: true
520
551
  };
521
552
  var falProvider = {
@@ -543,7 +574,19 @@ var falProvider = {
543
574
  fal.config({ credentials: key });
544
575
  const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
545
576
  log2(verbose, "Selected model:", model);
546
- const input = req.kind === "video" ? buildVideoInput(req) : buildImageInput(req);
577
+ if (req.kind === "video" && req.duration !== void 0) {
578
+ if (isKlingV3Model(model) && (req.duration < 3 || req.duration > 15)) {
579
+ throw new Error(
580
+ `Model ${model} supports video duration 3-15s, but ${req.duration}s requested`
581
+ );
582
+ }
583
+ if (isViduModel(model) && (req.duration < 2 || req.duration > 8)) {
584
+ throw new Error(
585
+ `Model ${model} supports video duration 2-8s, but ${req.duration}s requested`
586
+ );
587
+ }
588
+ }
589
+ const input = req.kind === "video" ? buildVideoInput(req, model) : buildImageInput(req);
547
590
  const inputSummary = { ...input };
548
591
  for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
549
592
  if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
@@ -610,6 +653,9 @@ var falProvider = {
610
653
 
611
654
  // src/providers/google.ts
612
655
  import { GoogleGenAI } from "@google/genai";
656
+ import { mkdtemp, readFile, rm } from "fs/promises";
657
+ import { tmpdir } from "os";
658
+ import { join } from "path";
613
659
  function getGeminiApiKey(env) {
614
660
  return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
615
661
  }
@@ -657,6 +703,21 @@ function imageToGoogleFormat(imageInput) {
657
703
  }
658
704
  return { fileUri: imageInput };
659
705
  }
706
+ function imageToVeoFormat(imageInput) {
707
+ if (imageInput.startsWith("data:")) {
708
+ const parsed = parseDataUri(imageInput);
709
+ if (!parsed?.data) {
710
+ throw new Error("Failed to parse data URI for Veo image input");
711
+ }
712
+ return { imageBytes: parsed.data, mimeType: parsed.mimeType };
713
+ }
714
+ if (imageInput.startsWith("gs://")) {
715
+ return { gcsUri: imageInput };
716
+ }
717
+ throw new Error(
718
+ `Veo image inputs must be data: URIs or gs:// URIs (got ${imageInput.slice(0, 24)}...)`
719
+ );
720
+ }
660
721
  var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
661
722
  function resolveModel(model) {
662
723
  if (!model) return "gemini-2.5-flash-image";
@@ -675,6 +736,31 @@ async function downloadBytes3(url) {
675
736
  log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
676
737
  return { bytes: new Uint8Array(ab), mimeType: ct };
677
738
  }
739
+ async function downloadGeneratedVideo(ai, generatedVideo) {
740
+ const video = generatedVideo?.video;
741
+ if (video?.videoBytes) {
742
+ return {
743
+ bytes: new Uint8Array(Buffer.from(video.videoBytes, "base64")),
744
+ mimeType: video.mimeType
745
+ };
746
+ }
747
+ if (video?.uri && !video.uri.startsWith("gs://")) {
748
+ try {
749
+ return await downloadBytes3(video.uri);
750
+ } catch (err) {
751
+ log3("Direct video download failed, falling back to ai.files.download:", String(err));
752
+ }
753
+ }
754
+ const tempDir = await mkdtemp(join(tmpdir(), "climage-veo-"));
755
+ const downloadPath = join(tempDir, "video.mp4");
756
+ try {
757
+ await ai.files.download({ file: generatedVideo, downloadPath });
758
+ const buf = await readFile(downloadPath);
759
+ return { bytes: new Uint8Array(buf), mimeType: video?.mimeType ?? "video/mp4" };
760
+ } finally {
761
+ await rm(tempDir, { recursive: true, force: true });
762
+ }
763
+ }
678
764
  async function sleep2(ms) {
679
765
  await new Promise((r) => setTimeout(r, ms));
680
766
  }
@@ -740,15 +826,15 @@ async function generateWithVeo(ai, model, req) {
740
826
  const config = {
741
827
  numberOfVideos: req.n,
742
828
  ...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
743
- // Add duration if specified (Veo 3.1 supports 4, 6, 8)
744
- ...req.duration !== void 0 ? { durationSeconds: String(req.duration) } : {}
829
+ // Add duration if specified (Veo supports 4-8 seconds depending on model)
830
+ ...req.duration !== void 0 ? { durationSeconds: req.duration } : {}
745
831
  };
746
832
  if (req.inputImages?.length && isVeo31Model(model)) {
747
833
  const referenceImages = req.inputImages.slice(0, 3).map((img) => {
748
- const imageData = imageToGoogleFormat(img);
834
+ const imageData = imageToVeoFormat(img);
749
835
  return {
750
836
  image: imageData,
751
- referenceType: "asset"
837
+ referenceType: "ASSET"
752
838
  };
753
839
  });
754
840
  config.referenceImages = referenceImages;
@@ -761,12 +847,12 @@ async function generateWithVeo(ai, model, req) {
761
847
  };
762
848
  const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
763
849
  if (firstFrameImage && isVeo31Model(model)) {
764
- const imageData = imageToGoogleFormat(firstFrameImage);
850
+ const imageData = imageToVeoFormat(firstFrameImage);
765
851
  generateParams.image = imageData;
766
852
  log3("Added first frame image");
767
853
  }
768
854
  if (req.endFrame && isVeo31Model(model)) {
769
- const lastFrameData = imageToGoogleFormat(req.endFrame);
855
+ const lastFrameData = imageToVeoFormat(req.endFrame);
770
856
  config.lastFrame = lastFrameData;
771
857
  log3("Added last frame for interpolation");
772
858
  }
@@ -796,26 +882,22 @@ async function generateWithVeo(ai, model, req) {
796
882
  for (let i = 0; i < Math.min(videos.length, req.n); i++) {
797
883
  const v = videos[i];
798
884
  log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
799
- const uri = v?.video?.uri;
800
- if (!uri) {
801
- log3(`Video ${i} has no URI, skipping`);
885
+ if (!v?.video) {
886
+ log3(`Video ${i} has no video payload, skipping`);
802
887
  continue;
803
888
  }
804
- if (uri.startsWith("gs://")) {
805
- throw new Error(
806
- `Google Veo returned a gs:// URI (${uri}). Configure outputGcsUri / Vertex flow to fetch from GCS.`
807
- );
808
- }
809
- const { bytes, mimeType } = await downloadBytes3(uri);
810
- out.push({
889
+ const uri = v?.video?.uri;
890
+ const { bytes, mimeType } = await downloadGeneratedVideo(ai, v);
891
+ const item = {
811
892
  kind: "video",
812
893
  provider: "google",
813
894
  model,
814
895
  index: i,
815
- url: uri,
816
896
  bytes,
817
897
  ...mimeType !== void 0 ? { mimeType } : {}
818
- });
898
+ };
899
+ if (uri) item.url = uri;
900
+ out.push(item);
819
901
  }
820
902
  if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
821
903
  log3(`Successfully generated ${out.length} video(s)`);