climage 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -6
- package/dist/cli.js +108 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.js +108 -26
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -100,11 +100,12 @@ Set one of:
|
|
|
100
100
|
|
|
101
101
|
**Models:**
|
|
102
102
|
|
|
103
|
-
| Model
|
|
104
|
-
|
|
|
105
|
-
| `fal-ai/flux/dev`
|
|
106
|
-
| `fal-ai/flux/pro`
|
|
107
|
-
| `fal-ai/flux-realism`
|
|
103
|
+
| Model | Description |
|
|
104
|
+
| ------------------------------------------ | -------------------------------------- |
|
|
105
|
+
| `fal-ai/flux/dev` | **Default.** Flux dev (fast & popular) |
|
|
106
|
+
| `fal-ai/flux/pro` | Flux pro (higher quality) |
|
|
107
|
+
| `fal-ai/flux-realism` | Photorealistic style |
|
|
108
|
+
| `fal-ai/kling-video/v3/pro/image-to-video` | Kling v3 Pro image-to-video |
|
|
108
109
|
|
|
109
110
|
Example:
|
|
110
111
|
|
|
@@ -166,6 +167,9 @@ npx climage "the scene comes to life" --video --provider google --start-frame sc
|
|
|
166
167
|
# Image-to-video with fal.ai
|
|
167
168
|
npx climage "dramatic camera zoom" --video --provider fal --start-frame photo.jpg
|
|
168
169
|
|
|
170
|
+
# Image-to-video with fal.ai Kling v3 Pro
|
|
171
|
+
npx climage "dramatic camera zoom" --video --provider fal --model fal-ai/kling-video/v3/pro/image-to-video --start-frame photo.jpg
|
|
172
|
+
|
|
169
173
|
# Image-to-video with xAI
|
|
170
174
|
npx climage "animate this scene" --video --provider xai --start-frame cat.png
|
|
171
175
|
```
|
|
@@ -204,7 +208,9 @@ npx climage "character in motion" --video --provider fal --input ref1.png --inpu
|
|
|
204
208
|
| Image-to-Video | Yes | Yes | Yes | No |
|
|
205
209
|
| Video Interpolation | Yes | No | Yes | No |
|
|
206
210
|
| Max Input Images | 3 | 1 | 7 | 2 |
|
|
207
|
-
| Video Duration (seconds) | 4-8 | 1-15 | 2-
|
|
211
|
+
| Video Duration (seconds) | 4-8 | 1-15 | 2-15\* | N/A |
|
|
212
|
+
|
|
213
|
+
\* Model-specific on fal.ai (e.g. Vidu: 2-8, Kling v3 Pro: 3-15).
|
|
208
214
|
|
|
209
215
|
## Library API
|
|
210
216
|
|
package/dist/cli.js
CHANGED
|
@@ -437,8 +437,15 @@ var DEFAULT_IMAGE_MODEL = "fal-ai/flux/dev";
|
|
|
437
437
|
var DEFAULT_IMAGE_TO_IMAGE_MODEL = "fal-ai/flux/dev/image-to-image";
|
|
438
438
|
var DEFAULT_VIDEO_MODEL = "fal-ai/ltxv-2/text-to-video/fast";
|
|
439
439
|
var DEFAULT_IMAGE_TO_VIDEO_MODEL = "fal-ai/vidu/q2/image-to-video";
|
|
440
|
+
var KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL = "fal-ai/kling-video/v3/pro/image-to-video";
|
|
440
441
|
var DEFAULT_START_END_VIDEO_MODEL = "fal-ai/vidu/start-end-to-video";
|
|
441
442
|
var DEFAULT_REFERENCE_VIDEO_MODEL = "fal-ai/vidu/q2/reference-to-video";
|
|
443
|
+
function isKlingV3Model(model) {
|
|
444
|
+
return model === KLING_V3_PRO_IMAGE_TO_VIDEO_MODEL || model.startsWith("fal-ai/kling-video/v3/");
|
|
445
|
+
}
|
|
446
|
+
function isViduModel(model) {
|
|
447
|
+
return model.includes("/vidu/");
|
|
448
|
+
}
|
|
442
449
|
function selectVideoModel(req) {
|
|
443
450
|
if (req.model) return req.model;
|
|
444
451
|
if (req.startFrame && req.endFrame) {
|
|
@@ -457,9 +464,12 @@ function selectImageModel(req) {
|
|
|
457
464
|
if (req.inputImages?.length) return DEFAULT_IMAGE_TO_IMAGE_MODEL;
|
|
458
465
|
return DEFAULT_IMAGE_MODEL;
|
|
459
466
|
}
|
|
460
|
-
function mapAspectRatio(aspectRatio) {
|
|
467
|
+
function mapAspectRatio(aspectRatio, model) {
|
|
461
468
|
if (!aspectRatio) return void 0;
|
|
462
469
|
const ar = aspectRatio.trim();
|
|
470
|
+
if (model && isKlingV3Model(model)) {
|
|
471
|
+
return ar;
|
|
472
|
+
}
|
|
463
473
|
if (ar === "1:1") return "square";
|
|
464
474
|
if (ar === "4:3") return "landscape_4_3";
|
|
465
475
|
if (ar === "16:9") return "landscape_16_9";
|
|
@@ -467,29 +477,50 @@ function mapAspectRatio(aspectRatio) {
|
|
|
467
477
|
if (ar === "9:16") return "portrait_16_9";
|
|
468
478
|
return ar;
|
|
469
479
|
}
|
|
470
|
-
function buildVideoInput(req) {
|
|
480
|
+
function buildVideoInput(req, model) {
|
|
471
481
|
const input = {
|
|
472
482
|
prompt: req.prompt
|
|
473
483
|
};
|
|
474
484
|
if (req.startFrame && req.endFrame) {
|
|
475
485
|
input.start_image_url = req.startFrame;
|
|
476
486
|
input.end_image_url = req.endFrame;
|
|
487
|
+
const ar = mapAspectRatio(req.aspectRatio, model);
|
|
488
|
+
if (ar) input.aspect_ratio = ar;
|
|
489
|
+
if (req.duration) input.duration = String(req.duration);
|
|
477
490
|
return input;
|
|
478
491
|
}
|
|
479
492
|
if (req.inputImages?.length && !req.startFrame) {
|
|
493
|
+
if (isKlingV3Model(model)) {
|
|
494
|
+
input.start_image_url = req.inputImages[0];
|
|
495
|
+
const ar2 = mapAspectRatio(req.aspectRatio, model);
|
|
496
|
+
if (ar2) input.aspect_ratio = ar2;
|
|
497
|
+
if (req.duration) input.duration = String(req.duration);
|
|
498
|
+
return input;
|
|
499
|
+
}
|
|
480
500
|
input.reference_image_urls = req.inputImages.slice(0, 7);
|
|
481
|
-
const ar = mapAspectRatio(req.aspectRatio);
|
|
501
|
+
const ar = mapAspectRatio(req.aspectRatio, model);
|
|
482
502
|
if (ar) input.aspect_ratio = ar;
|
|
483
503
|
if (req.duration) input.duration = String(req.duration);
|
|
484
504
|
return input;
|
|
485
505
|
}
|
|
486
506
|
const imageUrl = req.startFrame ?? req.inputImages?.[0];
|
|
487
507
|
if (imageUrl) {
|
|
488
|
-
|
|
508
|
+
if (isKlingV3Model(model)) {
|
|
509
|
+
input.start_image_url = imageUrl;
|
|
510
|
+
const ar = mapAspectRatio(req.aspectRatio, model);
|
|
511
|
+
if (ar) input.aspect_ratio = ar;
|
|
512
|
+
} else {
|
|
513
|
+
input.image_url = imageUrl;
|
|
514
|
+
}
|
|
489
515
|
if (req.duration) input.duration = String(req.duration);
|
|
490
516
|
return input;
|
|
491
517
|
}
|
|
492
|
-
|
|
518
|
+
if (isKlingV3Model(model)) {
|
|
519
|
+
throw new Error(
|
|
520
|
+
`Model ${model} requires --start-frame (or --input) because it is image-to-video only`
|
|
521
|
+
);
|
|
522
|
+
}
|
|
523
|
+
const imageSize = mapAspectRatio(req.aspectRatio, model);
|
|
493
524
|
if (imageSize) input.image_size = imageSize;
|
|
494
525
|
if (req.n) input.num_videos = req.n;
|
|
495
526
|
return input;
|
|
@@ -514,8 +545,8 @@ var falCapabilities = {
|
|
|
514
545
|
supportsCustomAspectRatio: true,
|
|
515
546
|
supportsVideoInterpolation: true,
|
|
516
547
|
// Vidu start-end-to-video
|
|
517
|
-
videoDurationRange: [2,
|
|
518
|
-
//
|
|
548
|
+
videoDurationRange: [2, 15],
|
|
549
|
+
// Most models are 2-8; Kling v3 supports up to 15
|
|
519
550
|
supportsImageEditing: true
|
|
520
551
|
};
|
|
521
552
|
var falProvider = {
|
|
@@ -543,7 +574,19 @@ var falProvider = {
|
|
|
543
574
|
fal.config({ credentials: key });
|
|
544
575
|
const model = req.kind === "video" ? selectVideoModel(req) : selectImageModel(req);
|
|
545
576
|
log2(verbose, "Selected model:", model);
|
|
546
|
-
|
|
577
|
+
if (req.kind === "video" && req.duration !== void 0) {
|
|
578
|
+
if (isKlingV3Model(model) && (req.duration < 3 || req.duration > 15)) {
|
|
579
|
+
throw new Error(
|
|
580
|
+
`Model ${model} supports video duration 3-15s, but ${req.duration}s requested`
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
if (isViduModel(model) && (req.duration < 2 || req.duration > 8)) {
|
|
584
|
+
throw new Error(
|
|
585
|
+
`Model ${model} supports video duration 2-8s, but ${req.duration}s requested`
|
|
586
|
+
);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
const input = req.kind === "video" ? buildVideoInput(req, model) : buildImageInput(req);
|
|
547
590
|
const inputSummary = { ...input };
|
|
548
591
|
for (const key2 of ["image_url", "start_image_url", "end_image_url"]) {
|
|
549
592
|
if (typeof inputSummary[key2] === "string" && inputSummary[key2].startsWith("data:")) {
|
|
@@ -610,6 +653,9 @@ var falProvider = {
|
|
|
610
653
|
|
|
611
654
|
// src/providers/google.ts
|
|
612
655
|
import { GoogleGenAI } from "@google/genai";
|
|
656
|
+
import { mkdtemp, readFile, rm } from "fs/promises";
|
|
657
|
+
import { tmpdir } from "os";
|
|
658
|
+
import { join } from "path";
|
|
613
659
|
function getGeminiApiKey(env) {
|
|
614
660
|
return env.GEMINI_API_KEY || env.GOOGLE_API_KEY || env.GOOGLE_GENAI_API_KEY;
|
|
615
661
|
}
|
|
@@ -657,6 +703,21 @@ function imageToGoogleFormat(imageInput) {
|
|
|
657
703
|
}
|
|
658
704
|
return { fileUri: imageInput };
|
|
659
705
|
}
|
|
706
|
+
function imageToVeoFormat(imageInput) {
|
|
707
|
+
if (imageInput.startsWith("data:")) {
|
|
708
|
+
const parsed = parseDataUri(imageInput);
|
|
709
|
+
if (!parsed?.data) {
|
|
710
|
+
throw new Error("Failed to parse data URI for Veo image input");
|
|
711
|
+
}
|
|
712
|
+
return { imageBytes: parsed.data, mimeType: parsed.mimeType };
|
|
713
|
+
}
|
|
714
|
+
if (imageInput.startsWith("gs://")) {
|
|
715
|
+
return { gcsUri: imageInput };
|
|
716
|
+
}
|
|
717
|
+
throw new Error(
|
|
718
|
+
`Veo image inputs must be data: URIs or gs:// URIs (got ${imageInput.slice(0, 24)}...)`
|
|
719
|
+
);
|
|
720
|
+
}
|
|
660
721
|
var GEMINI_IMAGE_MODELS = ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"];
|
|
661
722
|
function resolveModel(model) {
|
|
662
723
|
if (!model) return "gemini-2.5-flash-image";
|
|
@@ -675,6 +736,31 @@ async function downloadBytes3(url) {
|
|
|
675
736
|
log3(`Downloaded ${ab.byteLength} bytes in ${Date.now() - start}ms, type: ${ct}`);
|
|
676
737
|
return { bytes: new Uint8Array(ab), mimeType: ct };
|
|
677
738
|
}
|
|
739
|
+
async function downloadGeneratedVideo(ai, generatedVideo) {
|
|
740
|
+
const video = generatedVideo?.video;
|
|
741
|
+
if (video?.videoBytes) {
|
|
742
|
+
return {
|
|
743
|
+
bytes: new Uint8Array(Buffer.from(video.videoBytes, "base64")),
|
|
744
|
+
mimeType: video.mimeType
|
|
745
|
+
};
|
|
746
|
+
}
|
|
747
|
+
if (video?.uri && !video.uri.startsWith("gs://")) {
|
|
748
|
+
try {
|
|
749
|
+
return await downloadBytes3(video.uri);
|
|
750
|
+
} catch (err) {
|
|
751
|
+
log3("Direct video download failed, falling back to ai.files.download:", String(err));
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
const tempDir = await mkdtemp(join(tmpdir(), "climage-veo-"));
|
|
755
|
+
const downloadPath = join(tempDir, "video.mp4");
|
|
756
|
+
try {
|
|
757
|
+
await ai.files.download({ file: generatedVideo, downloadPath });
|
|
758
|
+
const buf = await readFile(downloadPath);
|
|
759
|
+
return { bytes: new Uint8Array(buf), mimeType: video?.mimeType ?? "video/mp4" };
|
|
760
|
+
} finally {
|
|
761
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
762
|
+
}
|
|
763
|
+
}
|
|
678
764
|
async function sleep2(ms) {
|
|
679
765
|
await new Promise((r) => setTimeout(r, ms));
|
|
680
766
|
}
|
|
@@ -740,15 +826,15 @@ async function generateWithVeo(ai, model, req) {
|
|
|
740
826
|
const config = {
|
|
741
827
|
numberOfVideos: req.n,
|
|
742
828
|
...req.aspectRatio ? { aspectRatio: req.aspectRatio } : {},
|
|
743
|
-
// Add duration if specified (Veo
|
|
744
|
-
...req.duration !== void 0 ? { durationSeconds:
|
|
829
|
+
// Add duration if specified (Veo supports 4-8 seconds depending on model)
|
|
830
|
+
...req.duration !== void 0 ? { durationSeconds: req.duration } : {}
|
|
745
831
|
};
|
|
746
832
|
if (req.inputImages?.length && isVeo31Model(model)) {
|
|
747
833
|
const referenceImages = req.inputImages.slice(0, 3).map((img) => {
|
|
748
|
-
const imageData =
|
|
834
|
+
const imageData = imageToVeoFormat(img);
|
|
749
835
|
return {
|
|
750
836
|
image: imageData,
|
|
751
|
-
referenceType: "
|
|
837
|
+
referenceType: "ASSET"
|
|
752
838
|
};
|
|
753
839
|
});
|
|
754
840
|
config.referenceImages = referenceImages;
|
|
@@ -761,12 +847,12 @@ async function generateWithVeo(ai, model, req) {
|
|
|
761
847
|
};
|
|
762
848
|
const firstFrameImage = req.startFrame ?? (req.inputImages?.length === 1 ? req.inputImages[0] : void 0);
|
|
763
849
|
if (firstFrameImage && isVeo31Model(model)) {
|
|
764
|
-
const imageData =
|
|
850
|
+
const imageData = imageToVeoFormat(firstFrameImage);
|
|
765
851
|
generateParams.image = imageData;
|
|
766
852
|
log3("Added first frame image");
|
|
767
853
|
}
|
|
768
854
|
if (req.endFrame && isVeo31Model(model)) {
|
|
769
|
-
const lastFrameData =
|
|
855
|
+
const lastFrameData = imageToVeoFormat(req.endFrame);
|
|
770
856
|
config.lastFrame = lastFrameData;
|
|
771
857
|
log3("Added last frame for interpolation");
|
|
772
858
|
}
|
|
@@ -796,26 +882,22 @@ async function generateWithVeo(ai, model, req) {
|
|
|
796
882
|
for (let i = 0; i < Math.min(videos.length, req.n); i++) {
|
|
797
883
|
const v = videos[i];
|
|
798
884
|
log3(`Processing video ${i}:`, JSON.stringify(v).slice(0, 300));
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
log3(`Video ${i} has no URI, skipping`);
|
|
885
|
+
if (!v?.video) {
|
|
886
|
+
log3(`Video ${i} has no video payload, skipping`);
|
|
802
887
|
continue;
|
|
803
888
|
}
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
);
|
|
808
|
-
}
|
|
809
|
-
const { bytes, mimeType } = await downloadBytes3(uri);
|
|
810
|
-
out.push({
|
|
889
|
+
const uri = v?.video?.uri;
|
|
890
|
+
const { bytes, mimeType } = await downloadGeneratedVideo(ai, v);
|
|
891
|
+
const item = {
|
|
811
892
|
kind: "video",
|
|
812
893
|
provider: "google",
|
|
813
894
|
model,
|
|
814
895
|
index: i,
|
|
815
|
-
url: uri,
|
|
816
896
|
bytes,
|
|
817
897
|
...mimeType !== void 0 ? { mimeType } : {}
|
|
818
|
-
}
|
|
898
|
+
};
|
|
899
|
+
if (uri) item.url = uri;
|
|
900
|
+
out.push(item);
|
|
819
901
|
}
|
|
820
902
|
if (!out.length) throw new Error("Google Veo returned videos but none were downloadable");
|
|
821
903
|
log3(`Successfully generated ${out.length} video(s)`);
|