climage 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -408,8 +408,15 @@ var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
408
408
  var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
409
409
  var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
410
410
  var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
411
+ var KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL = "fal-ai/kling-video/v3/pro/image-to-video";
411
412
  var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
412
413
  var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
414
+ function isKlingV3Model(model) {
415
+ return model === KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL || model.startsWith("fal-ai/kling-video/v3/");
416
+ }
417
+ function isViduModel(model) {
418
+ return model.includes("/vidu/");
419
+ }
413
420
  function selectVideoModel(req) {
414
421
  if (req.model) return req.model;
415
422
  if (req.startFrame && req.endFrame) {
@@ -428,9 +435,12 @@ function selectImageModel(req) {
428
435
  if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
429
436
  return DEFAULT_IMAGE_MODEL;
430
437
  }
431
- function mapAspectRatio(aspectRatio) {
438
+ function mapAspectRatio(aspectRatio, model) {
432
439
  if (!aspectRatio) return void 0;
433
440
  const ar = aspectRatio.trim();
441
+ if (model && isKlingV3Model(model)) {
442
+ return ar;
443
+ }
434
444
  if (ar === "1:1") return "square";
435
445
  if (ar === "4:3") return "landscape_4_3";
436
446
  if (ar === "16:9") return "landscape_16_9";
@@ -438,29 +448,50 @@ function mapAspectRatio(aspectRatio) {
438
448
  if (ar === "9:16") return "portrait_16_9";
439
449
  return ar;
440
450
  }
441
- function buildVideoInput(req) {
451
+ function buildVideoInput(req, model) {
442
452
  const input = {
443
453
  prompt: req.prompt
444
454
  };
445
455
  if (req.startFrame && req.endFrame) {
446
456
  input.start_image_url = req.startFrame;
447
457
  input.end_image_url = req.endFrame;
458
+ const ar = mapAspectRatio(req.aspectRatio, model);
459
+ if (ar) input.aspect_ratio = ar;
460
+ if (req.duration) input.duration = String(req.duration);
448
461
  return input;
449
462
  }
450
463
  if (req.inputImages?.length && !req.startFrame) {
464
+ if (isKlingV3Model(model)) {
465
+ input.start_image_url = req.inputImages[0];
466
+ const ar2 = mapAspectRatio(req.aspectRatio, model);
467
+ if (ar2) input.aspect_ratio = ar2;
468
+ if (req.duration) input.duration = String(req.duration);
469
+ return input;
470
+ }
451
471
  input.reference_image_urls = req.inputImages.slice(0, 7);
452
- const ar = mapAspectRatio(req.aspectRatio);
472
+ const ar = mapAspectRatio(req.aspectRatio, model);
453
473
  if (ar) input.aspect_ratio = ar;
454
474
  if (req.duration) input.duration = String(req.duration);
455
475
  return input;
456
476
  }
457
477
  const imageUrl = req.startFrame ?? req.inputImages?.[0];
458
478
  if (imageUrl) {
459
- input.image_url = imageUrl;
479
+ if (isKlingV3Model(model)) {
480
+ input.start_image_url = imageUrl;
481
+ const ar = mapAspectRatio(req.aspectRatio, model);
482
+ if (ar) input.aspect_ratio = ar;
483
+ } else {
484
+ input.image_url = imageUrl;
485
+ }
460
486
  if (req.duration) input.duration = String(req.duration);
461
487
  return input;
462
488
  }
463
- const imageSize = mapAspectRatio(req.aspectRatio);
489
+ if (isKlingV3Model(model)) {
490
+ throw new Error(
491
+ `Model ${model} requires --start-frame (or --input) because it is image-to-video only`
492
+ );
493
+ }
494
+ const imageSize = mapAspectRatio(req.aspectRatio, model);
464
495
  if (imageSize) input.image_size = imageSize;
465
496
  if (req.n) input.num_videos = req.n;
466
497
  return input;
@@ -485,8 +516,8 @@ var falCapabilities = {
485
516
  supportsCustomAspectRatio: true,
486
517
  supportsVideoInterpolation: true,
487
518
  // Vidu start-end-to-video
488
- videoDurationRange: [2, 8],
489
- // Vidu supports 2-8 seconds
519
+ videoDurationRange: [2, 15],
520
+ // Most models are 2-8; Kling v3 supports up to 15
490
521
  supportsImageEditing: true
491
522
  };
492
523
  var falProvider = {
@@ -514,7 +545,19 @@ var falProvider = {
514
545
  fal.config({ credentials: key });
515
546
  const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
516
547
  log2(verbose, "Selected model:", model);
517
- const input = req.kind === "video" ? buildVideoInput(req) : buildImageInput(req);
548
+ if (req.kind === "video" && req.duration !== void 0) {
549
+ if (isKlingV3Model(model) && (req.duration < 3 || req.duration > 15)) {
550
+ throw new Error(
551
+ `Model ${model} supports video duration 3-15s, but ${req.duration}s requested`
552
+ );
553
+ }
554
+ if (isViduModel(model) && (req.duration < 2 || req.duration > 8)) {
555
+ throw new Error(
556
+ `Model ${model} supports video duration 2-8s, but ${req.duration}s requested`
557
+ );
558
+ }
559
+ }
560
+ const input = req.kind === "video" ? buildVideoInput(req, model) : buildImageInput(req);
518
561
  const inputSummary = { ...input };
519
562
  for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
520
563
  if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
@@ -581,6 +624,9 @@ var falProvider = {
581
624
 
582
625
  // src/providers/google.ts
583
626
  import { GoogleGenAI } from "@google/genai";
627
+ import { mkdtemp, readFile, rm } from "fs/promises";
628
+ import { tmpdir } from "os";
629
+ import { join } from "path";
584
630
  function getGeminiApiKey(env) {
585
631
  return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
586
632
  }
@@ -628,6 +674,21 @@ function imageToGoogleFormat(imageInput) {
628
674
  }
629
675
  return { fileUri: imageInput };
630
676
  }
677
+ function imageToVeoFormat(imageInput) {
678
+ if (imageInput.startsWith("data:")) {
679
+ const parsed = parseDataUri(imageInput);
680
+ if (!parsed?.data) {
681
+ throw new Error("Failed to parse data URI for Veo image input");
682
+ }
683
+ return { imageBytes: parsed.data, mimeType: parsed.mimeType };
684
+ }
685
+ if (imageInput.startsWith("gs://")) {
686
+ return { gcsUri: imageInput };
687
+ }
688
+ throw new Error(
689
+ `Veo image inputs must be data: URIs or gs:// URIs (got ${imageInput.slice(0, 24)}...)`
690
+ );
691
+ }
631
692
  var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
632
693
  function resolveModel(model) {
633
694
  if (!model) return "gemini-2.5-flash-image";
@@ -646,6 +707,31 @@ async function downloadBytes3(url) {
646
707
  log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
647
708
  return { bytes: new Uint8Array(ab), mimeType: ct };
648
709
  }
710
+ async function downloadGeneratedVideo(ai, generatedVideo) {
711
+ const video = generatedVideo?.video;
712
+ if (video?.videoBytes) {
713
+ return {
714
+ bytes: new Uint8Array(Buffer.from(video.videoBytes, "base64")),
715
+ mimeType: video.mimeType
716
+ };
717
+ }
718
+ if (video?.uri && !video.uri.startsWith("gs://")) {
719
+ try {
720
+ return await downloadBytes3(video.uri);
721
+ } catch (err) {
722
+ log3("Direct video download failed, falling back to ai.files.download:", String(err));
723
+ }
724
+ }
725
+ const tempDir = await mkdtemp(join(tmpdir(), "climage-veo-"));
726
+ const downloadPath = join(tempDir, "video.mp4");
727
+ try {
728
+ await ai.files.download({ file: generatedVideo, downloadPath });
729
+ const buf = await readFile(downloadPath);
730
+ return { bytes: new Uint8Array(buf), mimeType: video?.mimeType ?? "video/mp4" };
731
+ } finally {
732
+ await rm(tempDir, { recursive: true, force: true });
733
+ }
734
+ }
649
735
  async function sleep2(ms) {
650
736
  await new Promise((r) => setTimeout(r, ms));
651
737
  }
@@ -711,15 +797,15 @@ async function generateWithVeo(ai, model, req) {
711
797
  const config = {
712
798
  numberOfVideos: req.n,
713
799
  ...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
714
- // Add duration if specified (Veo 3.1 supports 4, 6, 8)
715
- ...req.duration !== void 0 ? { durationSeconds: String(req.duration) } : {}
800
+ // Add duration if specified (Veo supports 4-8 seconds depending on model)
801
+ ...req.duration !== void 0 ? { durationSeconds: req.duration } : {}
716
802
  };
717
803
  if (req.inputImages?.length && isVeo31Model(model)) {
718
804
  const referenceImages = req.inputImages.slice(0, 3).map((img) => {
719
- const imageData = imageToGoogleFormat(img);
805
+ const imageData = imageToVeoFormat(img);
720
806
  return {
721
807
  image: imageData,
722
- referenceType: "asset"
808
+ referenceType: "ASSET"
723
809
  };
724
810
  });
725
811
  config.referenceImages = referenceImages;
@@ -732,12 +818,12 @@ async function generateWithVeo(ai, model, req) {
732
818
  };
733
819
  const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
734
820
  if (firstFrameImage && isVeo31Model(model)) {
735
- const imageData = imageToGoogleFormat(firstFrameImage);
821
+ const imageData = imageToVeoFormat(firstFrameImage);
736
822
  generateParams.image = imageData;
737
823
  log3("Added first frame image");
738
824
  }
739
825
  if (req.endFrame && isVeo31Model(model)) {
740
- const lastFrameData = imageToGoogleFormat(req.endFrame);
826
+ const lastFrameData = imageToVeoFormat(req.endFrame);
741
827
  config.lastFrame = lastFrameData;
742
828
  log3("Added last frame for interpolation");
743
829
  }
@@ -767,26 +853,22 @@ async function generateWithVeo(ai, model, req) {
767
853
  for (let i = 0; i < Math.min(videos.length, req.n); i++) {
768
854
  const v = videos[i];
769
855
  log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
770
- const uri = v?.video?.uri;
771
- if (!uri) {
772
- log3(`Video ${i} has no URI, skipping`);
856
+ if (!v?.video) {
857
+ log3(`Video ${i} has no video payload, skipping`);
773
858
  continue;
774
859
  }
775
- if (uri.startsWith("gs://")) {
776
- throw new Error(
777
- `Google Veo returned a gs:// URI (${uri}). Configure outputGcsUri / Vertex flow to fetch from GCS.`
778
- );
779
- }
780
- const { bytes, mimeType } = await downloadBytes3(uri);
781
- out.push({
860
+ const uri = v?.video?.uri;
861
+ const { bytes, mimeType } = await downloadGeneratedVideo(ai, v);
862
+ const item = {
782
863
  kind: "video",
783
864
  provider: "google",
784
865
  model,
785
866
  index: i,
786
- url: uri,
787
867
  bytes,
788
868
  ...mimeType !== void 0 ? { mimeType } : {}
789
- });
869
+ };
870
+ if (uri) item.url = uri;
871
+ out.push(item);
790
872
  }
791
873
  if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
792
874
  log3(`Successfully generated ${out.length} video(s)`);