vargai 0.4.0-alpha35 → 0.4.0-alpha37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Grok Imagine Video Test Example
3
+ *
4
+ * Run with: bun run examples/grok-imagine-test.tsx
5
+ *
6
+ * Tests all three Grok Imagine Video endpoints:
7
+ * 1. Text-to-Video
8
+ * 2. Image-to-Video
9
+ * 3. Edit Video
10
+ */
11
+
12
+ import { fal } from "@fal-ai/client";
13
+ import { falProvider } from "../src/providers/fal";
14
+
15
+ // Configure fal client
16
+ const apiKey = process.env.FAL_API_KEY ?? process.env.FAL_KEY;
17
+ if (!apiKey) {
18
+ console.error("Error: FAL_API_KEY or FAL_KEY environment variable required");
19
+ process.exit(1);
20
+ }
21
+ fal.config({ credentials: apiKey });
22
+
23
+ async function testTextToVideo() {
24
+ console.log("\n=== Testing Grok Text-to-Video ===\n");
25
+
26
+ const result = await falProvider.grokTextToVideo({
27
+ prompt:
28
+ "A majestic eagle soaring through clouds at sunset, cinematic lighting, slow motion",
29
+ duration: 6,
30
+ aspectRatio: "16:9",
31
+ resolution: "720p",
32
+ });
33
+
34
+ const data = result.data as { video?: { url?: string; duration?: number } };
35
+ console.log("Text-to-Video Result:");
36
+ console.log(" Video URL:", data?.video?.url);
37
+ console.log(" Duration:", data?.video?.duration);
38
+
39
+ return data?.video?.url;
40
+ }
41
+
42
+ async function testImageToVideo(imageUrl: string) {
43
+ console.log("\n=== Testing Grok Image-to-Video ===\n");
44
+
45
+ const result = await falProvider.grokImageToVideo({
46
+ prompt:
47
+ "The subject slowly turns their head and smiles, gentle wind blowing their hair",
48
+ imageUrl,
49
+ duration: 6,
50
+ aspectRatio: "auto",
51
+ resolution: "720p",
52
+ });
53
+
54
+ const data = result.data as { video?: { url?: string; duration?: number } };
55
+ console.log("Image-to-Video Result:");
56
+ console.log(" Video URL:", data?.video?.url);
57
+ console.log(" Duration:", data?.video?.duration);
58
+
59
+ return data?.video?.url;
60
+ }
61
+
62
+ async function testEditVideo(videoUrl: string) {
63
+ console.log("\n=== Testing Grok Edit Video ===\n");
64
+
65
+ const result = await falProvider.grokEditVideo({
66
+ prompt: "Add a vintage film grain effect and warm color grading",
67
+ videoUrl,
68
+ resolution: "auto",
69
+ });
70
+
71
+ const data = result.data as { video?: { url?: string; duration?: number } };
72
+ console.log("Edit Video Result:");
73
+ console.log(" Video URL:", data?.video?.url);
74
+ console.log(" Duration:", data?.video?.duration);
75
+
76
+ return data?.video?.url;
77
+ }
78
+
79
+ // Main execution
80
+ async function main() {
81
+ const args = process.argv.slice(2);
82
+ const mode = args[0] || "t2v";
83
+
84
+ console.log("Grok Imagine Video Test");
85
+ console.log("=======================");
86
+ console.log(`Mode: ${mode}`);
87
+
88
+ try {
89
+ switch (mode) {
90
+ case "t2v":
91
+ case "text-to-video": {
92
+ await testTextToVideo();
93
+ break;
94
+ }
95
+
96
+ case "i2v":
97
+ case "image-to-video": {
98
+ const imageUrl =
99
+ args[1] ||
100
+ "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png";
101
+ await testImageToVideo(imageUrl);
102
+ break;
103
+ }
104
+
105
+ case "edit":
106
+ case "edit-video": {
107
+ const videoUrl =
108
+ args[1] ||
109
+ "https://v3b.fal.media/files/b/0a8b9112/V5Z_NIPE3ppMDWivNo6_q_video_019.mp4";
110
+ await testEditVideo(videoUrl);
111
+ break;
112
+ }
113
+
114
+ case "all": {
115
+ // Run all tests in sequence
116
+ const t2vUrl = await testTextToVideo();
117
+
118
+ // Use a sample image for i2v test
119
+ const sampleImage =
120
+ "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png";
121
+ await testImageToVideo(sampleImage);
122
+
123
+ // Use the t2v result for edit test if available
124
+ if (t2vUrl) {
125
+ await testEditVideo(t2vUrl);
126
+ }
127
+ break;
128
+ }
129
+
130
+ default:
131
+ console.log(`
132
+ Usage: bun run examples/grok-imagine-test.tsx [mode] [url]
133
+
134
+ Modes:
135
+ t2v, text-to-video Generate video from text prompt
136
+ i2v, image-to-video Generate video from image (provide image URL)
137
+ edit, edit-video Edit existing video (provide video URL)
138
+ all Run all tests
139
+
140
+ Examples:
141
+ bun run examples/grok-imagine-test.tsx t2v
142
+ bun run examples/grok-imagine-test.tsx i2v https://example.com/image.png
143
+ bun run examples/grok-imagine-test.tsx edit https://example.com/video.mp4
144
+ bun run examples/grok-imagine-test.tsx all
145
+ `);
146
+ }
147
+
148
+ console.log("\nTest completed successfully!");
149
+ } catch (error) {
150
+ console.error("\nTest failed:", error);
151
+ process.exit(1);
152
+ }
153
+ }
154
+
155
+ main();
package/package.json CHANGED
@@ -68,7 +68,7 @@
68
68
  "sharp": "^0.34.5",
69
69
  "zod": "^4.2.1"
70
70
  },
71
- "version": "0.4.0-alpha35",
71
+ "version": "0.4.0-alpha37",
72
72
  "exports": {
73
73
  ".": "./src/index.ts",
74
74
  "./ai": "./src/ai-sdk/index.ts",
@@ -44,7 +44,7 @@ export class LocalBackend implements FFmpegBackend {
44
44
  if (typeof input === "string") {
45
45
  args.push("-i", input);
46
46
  } else if ("raw" in input) {
47
- args.push(...input.raw.split(" "));
47
+ args.push(...input.raw);
48
48
  } else {
49
49
  if (input.options) args.push(...input.options);
50
50
  args.push("-i", input.path);
@@ -22,8 +22,8 @@ export type FFmpegInput =
22
22
  options?: string[];
23
23
  }
24
24
  | {
25
- /** Raw ffmpeg args that don't use -i (e.g. "-f lavfi -i color=black") */
26
- raw: string;
25
+ /** Raw ffmpeg args that don't use -i (e.g. ["-f", "lavfi", "-i", "color=black"]) */
26
+ raw: string[];
27
27
  };
28
28
 
29
29
  /**
@@ -134,7 +134,10 @@ export class RendiBackend implements FFmpegBackend {
134
134
 
135
135
  const replaceWithPlaceholders = (str: string): string => {
136
136
  let result = str;
137
- for (const [url, ph] of pathToPlaceholder) {
137
+ const sortedEntries = [...pathToPlaceholder.entries()].sort(
138
+ (a, b) => b[0].length - a[0].length,
139
+ );
140
+ for (const [url, ph] of sortedEntries) {
138
141
  if (result.includes(url)) {
139
142
  result = result.replaceAll(url, ph);
140
143
  }
@@ -43,6 +43,21 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
43
43
  t2v: "fal-ai/minimax-video/text-to-video",
44
44
  i2v: "fal-ai/minimax-video/image-to-video",
45
45
  },
46
+ // LTX-2 19B Distilled - video with native audio generation
47
+ "ltx-2-19b-distilled": {
48
+ t2v: "fal-ai/ltx-2-19b/distilled/text-to-video",
49
+ i2v: "fal-ai/ltx-2-19b/distilled/image-to-video",
50
+ },
51
+ // Grok Imagine Video - xAI's video generation with audio
52
+ "grok-imagine": {
53
+ t2v: "xai/grok-imagine-video/text-to-video",
54
+ i2v: "xai/grok-imagine-video/image-to-video",
55
+ },
56
+ };
57
+
58
+ // Video edit models - video-to-video editing
59
+ const VIDEO_EDIT_MODELS: Record<string, string> = {
60
+ "grok-imagine-edit": "xai/grok-imagine-video/edit-video",
46
61
  };
47
62
 
48
63
  // Motion control models - video-to-video with motion transfer
@@ -67,6 +82,8 @@ const IMAGE_MODELS: Record<string, string> = {
67
82
  "nano-banana-pro": "fal-ai/nano-banana-pro",
68
83
  "nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
69
84
  "seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
85
+ // Qwen Image Edit 2511 Multiple Angles - camera angle adjustment
86
+ "qwen-angles": "fal-ai/qwen-image-edit-2511-multiple-angles",
70
87
  };
71
88
 
72
89
  // Models that use image_size instead of aspect_ratio
@@ -77,6 +94,23 @@ const IMAGE_SIZE_MODELS = new Set([
77
94
  "seedream-v4.5/edit",
78
95
  ]);
79
96
 
97
+ // Qwen Angles model - image-to-image with camera angle adjustment
98
+ const QWEN_ANGLES_MODEL = "qwen-angles";
99
+
100
+ // Map aspect ratio to image_size for Qwen Angles (base dimension 1024)
101
+ const ASPECT_RATIO_TO_QWEN_SIZE: Record<
102
+ string,
103
+ { width: number; height: number }
104
+ > = {
105
+ "1:1": { width: 1024, height: 1024 },
106
+ "4:3": { width: 1024, height: 768 },
107
+ "3:4": { width: 768, height: 1024 },
108
+ "16:9": { width: 1024, height: 576 },
109
+ "9:16": { width: 576, height: 1024 },
110
+ "3:2": { width: 1024, height: 683 },
111
+ "2:3": { width: 683, height: 1024 },
112
+ };
113
+
80
114
  // Map aspect ratio strings to image_size enum values
81
115
  const ASPECT_RATIO_TO_IMAGE_SIZE: Record<string, string> = {
82
116
  "1:1": "square",
@@ -181,13 +215,18 @@ class FalVideoModel implements VideoModelV3 {
181
215
 
182
216
  const isLipsync = LIPSYNC_MODELS[this.modelId] !== undefined;
183
217
  const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined;
218
+ const isVideoEdit = VIDEO_EDIT_MODELS[this.modelId] !== undefined;
184
219
  const isKlingV26 = this.modelId === "kling-v2.6";
220
+ const isLtx2 = this.modelId === "ltx-2-19b-distilled";
221
+ const isGrokImagine = this.modelId === "grok-imagine";
185
222
 
186
223
  const endpoint = isLipsync
187
224
  ? this.resolveLipsyncEndpoint()
188
225
  : isMotionControl
189
226
  ? this.resolveMotionControlEndpoint()
190
- : this.resolveEndpoint(hasImageInput ?? false);
227
+ : isVideoEdit
228
+ ? this.resolveVideoEditEndpoint()
229
+ : this.resolveEndpoint(hasImageInput ?? false);
191
230
 
192
231
  const input: Record<string, unknown> = {
193
232
  ...(providerOptions?.fal ?? {}),
@@ -237,13 +276,49 @@ class FalVideoModel implements VideoModelV3 {
237
276
  if (input.keep_original_sound === undefined) {
238
277
  input.keep_original_sound = true;
239
278
  }
279
+ } else if (isVideoEdit) {
280
+ // Video edit: video input + prompt for editing instruction
281
+ input.prompt = prompt;
282
+
283
+ const videoFile = files?.find((f) =>
284
+ getMediaType(f)?.startsWith("video/"),
285
+ );
286
+
287
+ if (videoFile) {
288
+ input.video_url = await fileToUrl(videoFile);
289
+ }
290
+
291
+ // Grok Imagine Edit supports resolution: "auto", "480p", "720p"
292
+ if (!input.resolution) {
293
+ input.resolution = "auto";
294
+ }
240
295
  } else {
241
296
  // Standard video generation
242
297
  input.prompt = prompt;
243
298
 
244
- // Duration must be string "5" or "10" for Kling v2.6
245
- if (isKlingV26) {
299
+ // LTX-2 uses num_frames instead of duration, and has different defaults
300
+ if (isLtx2) {
301
+ // LTX-2: convert duration to num_frames (25fps default)
302
+ // Always set num_frames from duration unless explicitly provided via providerOptions
303
+ if (input.num_frames === undefined) {
304
+ const fps = (input.fps as number) ?? 25;
305
+ const durationSec = duration ?? 5; // default 5 seconds
306
+ input.num_frames = Math.round(durationSec * fps);
307
+ }
308
+ // LTX-2 uses video_size instead of aspect_ratio
309
+ if (input.video_size === undefined) {
310
+ input.video_size = "auto";
311
+ }
312
+ } else if (isKlingV26) {
313
+ // Duration must be string "5" or "10" for Kling v2.6
246
314
  input.duration = String(duration ?? 5);
315
+ } else if (isGrokImagine) {
316
+ // Grok Imagine: duration 1-15 seconds (default 6)
317
+ input.duration = duration ?? 6;
318
+ // Grok Imagine supports resolution: "480p", "720p" (default "720p")
319
+ if (!input.resolution) {
320
+ input.resolution = "720p";
321
+ }
247
322
  } else {
248
323
  input.duration = duration ?? 5;
249
324
  }
@@ -255,23 +330,36 @@ class FalVideoModel implements VideoModelV3 {
255
330
  if (imageFiles.length > 0) {
256
331
  // First image is start image
257
332
  input.image_url = await fileToUrl(imageFiles[0]!);
258
- // Second image (if provided) is end image for Kling v2.6
259
- if (isKlingV26 && imageFiles.length > 1) {
333
+ // Second image (if provided) is end image for Kling v2.6 and LTX-2
334
+ if ((isKlingV26 || isLtx2) && imageFiles.length > 1) {
260
335
  input.end_image_url = await fileToUrl(imageFiles[1]!);
261
336
  }
262
337
  }
263
- } else {
338
+ } else if (!isLtx2) {
339
+ // LTX-2 uses video_size, not aspect_ratio
264
340
  input.aspect_ratio = aspectRatio ?? "16:9";
265
341
  }
266
342
 
267
- // Kling v2.6 supports native audio generation
268
- if (isKlingV26) {
343
+ // Kling v2.6 and LTX-2 support native audio generation
344
+ if (isKlingV26 || isLtx2) {
269
345
  // Default to generating audio unless explicitly disabled
270
346
  if (input.generate_audio === undefined) {
271
347
  input.generate_audio = true;
272
348
  }
273
349
  }
274
350
 
351
+ // LTX-2 specific defaults
352
+ if (isLtx2) {
353
+ // Enable multiscale for better coherence (default: true)
354
+ if (input.use_multiscale === undefined) {
355
+ input.use_multiscale = true;
356
+ }
357
+ // Enable prompt expansion for better results (default: true)
358
+ if (input.enable_prompt_expansion === undefined) {
359
+ input.enable_prompt_expansion = true;
360
+ }
361
+ }
362
+
275
363
  const audioFile = files?.find((f) =>
276
364
  getMediaType(f)?.startsWith("audio/"),
277
365
  );
@@ -280,12 +368,17 @@ class FalVideoModel implements VideoModelV3 {
280
368
  }
281
369
  }
282
370
 
371
+ // LTX-2 supports seed, other models don't
283
372
  if (options.seed !== undefined) {
284
- warnings.push({
285
- type: "unsupported",
286
- feature: "seed",
287
- details: "Seed is not supported by this model",
288
- });
373
+ if (isLtx2) {
374
+ input.seed = options.seed;
375
+ } else {
376
+ warnings.push({
377
+ type: "unsupported",
378
+ feature: "seed",
379
+ details: "Seed is not supported by this model",
380
+ });
381
+ }
289
382
  }
290
383
 
291
384
  if (options.resolution !== undefined) {
@@ -296,12 +389,17 @@ class FalVideoModel implements VideoModelV3 {
296
389
  });
297
390
  }
298
391
 
392
+ // LTX-2 supports fps configuration
299
393
  if (options.fps !== undefined) {
300
- warnings.push({
301
- type: "unsupported",
302
- feature: "fps",
303
- details: "FPS is not configurable for this model",
304
- });
394
+ if (isLtx2) {
395
+ input.fps = options.fps;
396
+ } else {
397
+ warnings.push({
398
+ type: "unsupported",
399
+ feature: "fps",
400
+ details: "FPS is not configurable for this model",
401
+ });
402
+ }
305
403
  }
306
404
 
307
405
  const result = await fal.subscribe(endpoint, {
@@ -358,6 +456,14 @@ class FalVideoModel implements VideoModelV3 {
358
456
 
359
457
  return MOTION_CONTROL_MODELS[this.modelId] ?? this.modelId;
360
458
  }
459
+
460
+ private resolveVideoEditEndpoint(): string {
461
+ if (this.modelId.startsWith("raw:")) {
462
+ return this.modelId.slice(4);
463
+ }
464
+
465
+ return VIDEO_EDIT_MODELS[this.modelId] ?? this.modelId;
466
+ }
361
467
  }
362
468
 
363
469
  class FalImageModel implements ImageModelV3 {
@@ -383,14 +489,28 @@ class FalImageModel implements ImageModelV3 {
383
489
  } = options;
384
490
  const warnings: SharedV3Warning[] = [];
385
491
 
492
+ const isQwenAngles = this.modelId === QWEN_ANGLES_MODEL;
493
+
386
494
  const input: Record<string, unknown> = {
387
- prompt,
388
495
  num_images: n ?? 1,
389
- // Use high acceleration for faster queue processing on supported models (flux-schnell)
390
- acceleration: "high",
391
496
  ...(providerOptions?.fal ?? {}),
392
497
  };
393
498
 
499
+ // Qwen Angles uses additional_prompt instead of prompt
500
+ if (isQwenAngles) {
501
+ if (prompt) {
502
+ input.additional_prompt = prompt;
503
+ }
504
+ // Qwen Angles supports "regular" or "none" acceleration, not "high"
505
+ if (!input.acceleration) {
506
+ input.acceleration = "regular";
507
+ }
508
+ } else {
509
+ input.prompt = prompt;
510
+ // Use high acceleration for faster queue processing on supported models (flux-schnell)
511
+ input.acceleration = "high";
512
+ }
513
+
394
514
  const usesImageSize = IMAGE_SIZE_MODELS.has(this.modelId);
395
515
 
396
516
  if (size) {
@@ -404,7 +524,21 @@ class FalImageModel implements ImageModelV3 {
404
524
  }
405
525
 
406
526
  if (aspectRatio) {
407
- if (usesImageSize) {
527
+ if (isQwenAngles) {
528
+ // Convert aspect ratio to image_size dimensions for Qwen Angles
529
+ if (!input.image_size) {
530
+ const qwenSize = ASPECT_RATIO_TO_QWEN_SIZE[aspectRatio];
531
+ if (qwenSize) {
532
+ input.image_size = qwenSize;
533
+ } else {
534
+ warnings.push({
535
+ type: "unsupported",
536
+ feature: "aspectRatio",
537
+ details: `Aspect ratio "${aspectRatio}" not supported for qwen-angles, use one of: ${Object.keys(ASPECT_RATIO_TO_QWEN_SIZE).join(", ")}`,
538
+ });
539
+ }
540
+ }
541
+ } else if (usesImageSize) {
408
542
  // Convert aspect ratio to image_size enum for models that require it
409
543
  // Only set if size wasn't already provided
410
544
  if (!input.image_size) {
@@ -433,11 +567,16 @@ class FalImageModel implements ImageModelV3 {
433
567
  input.image_urls = await Promise.all(files.map((f) => fileToUrl(f)));
434
568
  }
435
569
 
570
+ // Qwen Angles requires image_urls
571
+ if (isQwenAngles && !input.image_urls) {
572
+ throw new Error("qwen-angles requires at least one image file");
573
+ }
574
+
436
575
  const hasImageUrls =
437
576
  hasFiles ||
438
577
  !!(providerOptions?.fal as Record<string, unknown>)?.image_urls;
439
578
  if (hasImageUrls) {
440
- if (!files) {
579
+ if (!files && !isQwenAngles) {
441
580
  throw new Error("No files provided");
442
581
  }
443
582
  }