vargai 0.4.0-alpha35 → 0.4.0-alpha36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -68,7 +68,7 @@
68
68
  "sharp": "^0.34.5",
69
69
  "zod": "^4.2.1"
70
70
  },
71
- "version": "0.4.0-alpha35",
71
+ "version": "0.4.0-alpha36",
72
72
  "exports": {
73
73
  ".": "./src/index.ts",
74
74
  "./ai": "./src/ai-sdk/index.ts",
@@ -44,7 +44,7 @@ export class LocalBackend implements FFmpegBackend {
44
44
  if (typeof input === "string") {
45
45
  args.push("-i", input);
46
46
  } else if ("raw" in input) {
47
- args.push(...input.raw.split(" "));
47
+ args.push(...input.raw);
48
48
  } else {
49
49
  if (input.options) args.push(...input.options);
50
50
  args.push("-i", input.path);
@@ -22,8 +22,8 @@ export type FFmpegInput =
22
22
  options?: string[];
23
23
  }
24
24
  | {
25
- /** Raw ffmpeg args that don't use -i (e.g. "-f lavfi -i color=black") */
26
- raw: string;
25
+ /** Raw ffmpeg args that don't use -i (e.g. ["-f", "lavfi", "-i", "color=black"]) */
26
+ raw: string[];
27
27
  };
28
28
 
29
29
  /**
@@ -134,7 +134,10 @@ export class RendiBackend implements FFmpegBackend {
134
134
 
135
135
  const replaceWithPlaceholders = (str: string): string => {
136
136
  let result = str;
137
- for (const [url, ph] of pathToPlaceholder) {
137
+ const sortedEntries = [...pathToPlaceholder.entries()].sort(
138
+ (a, b) => b[0].length - a[0].length,
139
+ );
140
+ for (const [url, ph] of sortedEntries) {
138
141
  if (result.includes(url)) {
139
142
  result = result.replaceAll(url, ph);
140
143
  }
@@ -43,6 +43,11 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
43
43
  t2v: "fal-ai/minimax-video/text-to-video",
44
44
  i2v: "fal-ai/minimax-video/image-to-video",
45
45
  },
46
+ // LTX-2 19B Distilled - video with native audio generation
47
+ "ltx-2-19b-distilled": {
48
+ t2v: "fal-ai/ltx-2-19b/distilled/text-to-video",
49
+ i2v: "fal-ai/ltx-2-19b/distilled/image-to-video",
50
+ },
46
51
  };
47
52
 
48
53
  // Motion control models - video-to-video with motion transfer
@@ -182,6 +187,7 @@ class FalVideoModel implements VideoModelV3 {
182
187
  const isLipsync = LIPSYNC_MODELS[this.modelId] !== undefined;
183
188
  const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined;
184
189
  const isKlingV26 = this.modelId === "kling-v2.6";
190
+ const isLtx2 = this.modelId === "ltx-2-19b-distilled";
185
191
 
186
192
  const endpoint = isLipsync
187
193
  ? this.resolveLipsyncEndpoint()
@@ -241,8 +247,21 @@ class FalVideoModel implements VideoModelV3 {
241
247
  // Standard video generation
242
248
  input.prompt = prompt;
243
249
 
244
- // Duration must be string "5" or "10" for Kling v2.6
245
- if (isKlingV26) {
250
+ // LTX-2 uses num_frames instead of duration, and has different defaults
251
+ if (isLtx2) {
252
+ // LTX-2: convert duration to num_frames (25fps default)
253
+ // Always set num_frames from duration unless explicitly provided via providerOptions
254
+ if (input.num_frames === undefined) {
255
+ const fps = (input.fps as number) ?? 25;
256
+ const durationSec = duration ?? 5; // default 5 seconds
257
+ input.num_frames = Math.round(durationSec * fps);
258
+ }
259
+ // LTX-2 uses video_size instead of aspect_ratio
260
+ if (input.video_size === undefined) {
261
+ input.video_size = "auto";
262
+ }
263
+ } else if (isKlingV26) {
264
+ // Duration must be string "5" or "10" for Kling v2.6
246
265
  input.duration = String(duration ?? 5);
247
266
  } else {
248
267
  input.duration = duration ?? 5;
@@ -255,23 +274,36 @@ class FalVideoModel implements VideoModelV3 {
255
274
  if (imageFiles.length > 0) {
256
275
  // First image is start image
257
276
  input.image_url = await fileToUrl(imageFiles[0]!);
258
- // Second image (if provided) is end image for Kling v2.6
259
- if (isKlingV26 && imageFiles.length > 1) {
277
+ // Second image (if provided) is end image for Kling v2.6 and LTX-2
278
+ if ((isKlingV26 || isLtx2) && imageFiles.length > 1) {
260
279
  input.end_image_url = await fileToUrl(imageFiles[1]!);
261
280
  }
262
281
  }
263
- } else {
282
+ } else if (!isLtx2) {
283
+ // LTX-2 uses video_size, not aspect_ratio
264
284
  input.aspect_ratio = aspectRatio ?? "16:9";
265
285
  }
266
286
 
267
- // Kling v2.6 supports native audio generation
268
- if (isKlingV26) {
287
+ // Kling v2.6 and LTX-2 support native audio generation
288
+ if (isKlingV26 || isLtx2) {
269
289
  // Default to generating audio unless explicitly disabled
270
290
  if (input.generate_audio === undefined) {
271
291
  input.generate_audio = true;
272
292
  }
273
293
  }
274
294
 
295
+ // LTX-2 specific defaults
296
+ if (isLtx2) {
297
+ // Enable multiscale for better coherence (default: true)
298
+ if (input.use_multiscale === undefined) {
299
+ input.use_multiscale = true;
300
+ }
301
+ // Enable prompt expansion for better results (default: true)
302
+ if (input.enable_prompt_expansion === undefined) {
303
+ input.enable_prompt_expansion = true;
304
+ }
305
+ }
306
+
275
307
  const audioFile = files?.find((f) =>
276
308
  getMediaType(f)?.startsWith("audio/"),
277
309
  );
@@ -280,12 +312,17 @@ class FalVideoModel implements VideoModelV3 {
280
312
  }
281
313
  }
282
314
 
315
+ // LTX-2 supports seed, other models don't
283
316
  if (options.seed !== undefined) {
284
- warnings.push({
285
- type: "unsupported",
286
- feature: "seed",
287
- details: "Seed is not supported by this model",
288
- });
317
+ if (isLtx2) {
318
+ input.seed = options.seed;
319
+ } else {
320
+ warnings.push({
321
+ type: "unsupported",
322
+ feature: "seed",
323
+ details: "Seed is not supported by this model",
324
+ });
325
+ }
289
326
  }
290
327
 
291
328
  if (options.resolution !== undefined) {
@@ -296,12 +333,17 @@ class FalVideoModel implements VideoModelV3 {
296
333
  });
297
334
  }
298
335
 
336
+ // LTX-2 supports fps configuration
299
337
  if (options.fps !== undefined) {
300
- warnings.push({
301
- type: "unsupported",
302
- feature: "fps",
303
- details: "FPS is not configurable for this model",
304
- });
338
+ if (isLtx2) {
339
+ input.fps = options.fps;
340
+ } else {
341
+ warnings.push({
342
+ type: "unsupported",
343
+ feature: "fps",
344
+ details: "FPS is not configurable for this model",
345
+ });
346
+ }
305
347
  }
306
348
 
307
349
  const result = await fal.subscribe(endpoint, {
@@ -135,6 +135,19 @@ function extractNestedFromPrompt(prompt: unknown): StoryboardElement[] {
135
135
  for (const img of p.images) {
136
136
  if (img && typeof img === "object" && "type" in img) {
137
137
  nested.push(extractElementInfo(img as VargElement));
138
+ } else if (typeof img === "string") {
139
+ const isUrl = img.startsWith("http://") || img.startsWith("https://");
140
+ const isLocalFile =
141
+ img.startsWith("/") || img.startsWith("./") || img.includes(".");
142
+ if (isUrl || isLocalFile) {
143
+ nested.push({
144
+ type: "input",
145
+ src: img,
146
+ details: {
147
+ inputType: isUrl ? "url" : "file",
148
+ },
149
+ });
150
+ }
138
151
  }
139
152
  }
140
153
  }
@@ -378,12 +391,22 @@ const TYPE_COLORS: Record<string, string> = {
378
391
  split: "#818cf8",
379
392
  slider: "#2dd4bf",
380
393
  swipe: "#fb923c",
394
+ input: "#9ca3af",
381
395
  };
382
396
 
383
397
  function escapeHtml(str: string): string {
384
398
  return str.replace(/</g, "&lt;").replace(/>/g, "&gt;");
385
399
  }
386
400
 
401
+ function escapeAttr(str: string): string {
402
+ return str
403
+ .replace(/&/g, "&amp;")
404
+ .replace(/"/g, "&quot;")
405
+ .replace(/'/g, "&#39;")
406
+ .replace(/</g, "&lt;")
407
+ .replace(/>/g, "&gt;");
408
+ }
409
+
387
410
  function generateHtml(storyboard: Storyboard, sourceFile: string): string {
388
411
  const escapedSourceFile = escapeHtml(sourceFile);
389
412
 
@@ -398,9 +421,6 @@ function generateHtml(storyboard: Storyboard, sourceFile: string): string {
398
421
  const childPrefix =
399
422
  depth === 0 ? "" : parentPrefix + (isLast ? " " : "│ ");
400
423
 
401
- const promptOrText = el.prompt || el.text || el.src || "";
402
- const shortPrompt = promptOrText ? escapeHtml(promptOrText) : "";
403
-
404
424
  const children =
405
425
  (el.details.children as StoryboardElement[] | undefined) || [];
406
426
  const childrenHtml = children
@@ -414,6 +434,29 @@ function generateHtml(storyboard: Storyboard, sourceFile: string): string {
414
434
  )
415
435
  .join("");
416
436
 
437
+ const isInputWithUrl =
438
+ el.type === "input" &&
439
+ el.src &&
440
+ (el.src.startsWith("http://") || el.src.startsWith("https://"));
441
+
442
+ if (isInputWithUrl) {
443
+ const shortUrl =
444
+ el.src!.length > 50 ? `${el.src!.slice(0, 50)}...` : el.src!;
445
+ const escapedSrc = escapeAttr(el.src!);
446
+ return `
447
+ <div class="tree-node" style="--depth: ${depth}">
448
+ <span class="tree-prefix">${parentPrefix}${connector}</span>
449
+ <span class="type-tag" style="background: ${color}">${el.type}</span>
450
+ <span class="input-preview-wrapper">
451
+ <a href="${escapedSrc}" target="_blank" rel="noopener noreferrer" class="tree-prompt input-url">${escapeHtml(shortUrl)}</a>
452
+ <span class="input-preview-tooltip"><img src="${escapedSrc}" alt="preview" /></span>
453
+ </span>
454
+ </div>${childrenHtml}`;
455
+ }
456
+
457
+ const promptOrText = el.prompt || el.text || el.src || "";
458
+ const shortPrompt = promptOrText ? escapeHtml(promptOrText) : "";
459
+
417
460
  return `
418
461
  <div class="tree-node" style="--depth: ${depth}">
419
462
  <span class="tree-prefix">${parentPrefix}${connector}</span>
@@ -476,10 +519,34 @@ function generateHtml(storyboard: Storyboard, sourceFile: string): string {
476
519
  const isLast = i === children.length - 1;
477
520
  const connector = isLast ? "└─" : "├─";
478
521
  const color = TYPE_COLORS[child.type] || "#666";
479
- const childPrompt = child.prompt || child.text || "";
480
522
  const grandChildren =
481
523
  (child.details.children as StoryboardElement[]) || [];
482
524
 
525
+ const isInputWithUrl =
526
+ child.type === "input" &&
527
+ child.src &&
528
+ (child.src.startsWith("http://") || child.src.startsWith("https://"));
529
+
530
+ if (isInputWithUrl) {
531
+ const shortUrl =
532
+ child.src!.length > 60
533
+ ? `${child.src!.slice(0, 60)}...`
534
+ : child.src!;
535
+ const escapedSrc = escapeAttr(child.src!);
536
+ return `
537
+ <div class="timeline-nested">
538
+ <span class="nested-connector">${connector}</span>
539
+ <span class="nested-type" style="background: ${color}">${child.type}</span>
540
+ <span class="input-preview-wrapper">
541
+ <a href="${escapedSrc}" target="_blank" rel="noopener noreferrer" class="nested-prompt input-url">${escapeHtml(shortUrl)}</a>
542
+ <span class="input-preview-tooltip"><img src="${escapedSrc}" alt="preview" /></span>
543
+ </span>
544
+ </div>
545
+ ${grandChildren.length > 0 ? renderNestedTree(grandChildren, depth + 1) : ""}`;
546
+ }
547
+
548
+ const childPrompt = child.prompt || child.text || child.src || "";
549
+
483
550
  return `
484
551
  <div class="timeline-nested">
485
552
  <span class="nested-connector">${connector}</span>
@@ -1201,6 +1268,47 @@ function generateHtml(storyboard: Storyboard, sourceFile: string): string {
1201
1268
  width: 100%;
1202
1269
  margin-top: 0.25rem;
1203
1270
  }
1271
+
1272
+ .input-preview-wrapper {
1273
+ position: relative;
1274
+ display: inline-block;
1275
+ }
1276
+
1277
+ .input-url {
1278
+ color: var(--accent-sky);
1279
+ text-decoration: none;
1280
+ word-break: break-all;
1281
+ }
1282
+
1283
+ .input-url:hover {
1284
+ text-decoration: underline;
1285
+ }
1286
+
1287
+ .input-preview-tooltip {
1288
+ display: none;
1289
+ position: absolute;
1290
+ left: 0;
1291
+ top: 100%;
1292
+ margin-top: 8px;
1293
+ z-index: 1000;
1294
+ background: var(--bg-card);
1295
+ border: 1px solid var(--border-soft);
1296
+ border-radius: var(--radius-squishy);
1297
+ box-shadow: var(--shadow-soft);
1298
+ padding: 8px;
1299
+ max-width: 300px;
1300
+ }
1301
+
1302
+ .input-preview-tooltip img {
1303
+ max-width: 100%;
1304
+ max-height: 200px;
1305
+ border-radius: 8px;
1306
+ display: block;
1307
+ }
1308
+
1309
+ .input-preview-wrapper:hover .input-preview-tooltip {
1310
+ display: block;
1311
+ }
1204
1312
  </style>
1205
1313
  </head>
1206
1314
  <body>
@@ -0,0 +1,19 @@
1
+ import { fal } from "../../ai-sdk/providers/fal";
2
+ import { Clip, Image, Render, Video } from "..";
3
+
4
+ export default (
5
+ <Render width={1080} height={1920}>
6
+ <Clip duration={3}>
7
+ <Image src="media/cyberpunk-street.png" />
8
+ </Clip>
9
+ <Clip duration={3}>
10
+ <Video
11
+ prompt={{
12
+ text: "camera pans across the scene",
13
+ images: [Image({ src: "media/fal-coffee-shop.png" })],
14
+ }}
15
+ model={fal.videoModel("kling-v2.5")}
16
+ />
17
+ </Clip>
18
+ </Render>
19
+ );
@@ -0,0 +1,25 @@
1
+ import { fal } from "../../ai-sdk/providers/fal";
2
+ import { Clip, Render, Video } from "..";
3
+
4
+ export default (
5
+ <Render width={1248} height={704}>
6
+ <Clip>
7
+ <Video
8
+ prompt={{
9
+ text: "Camera slowly dollies in toward her face, city lights flicker",
10
+ images: [
11
+ "https://storage.googleapis.com/falserverless/example_inputs/ltxv-2-i2v-input.jpg",
12
+ ],
13
+ }}
14
+ model={fal.videoModel("ltx-2-19b-distilled")}
15
+ keepAudio
16
+ providerOptions={{
17
+ fal: {
18
+ generate_audio: true,
19
+ camera_lora: "dolly_in",
20
+ },
21
+ }}
22
+ />
23
+ </Clip>
24
+ </Render>
25
+ );
@@ -1,3 +1,4 @@
1
+ export type { CacheStorage } from "../ai-sdk/cache";
1
2
  export type { SizeValue } from "../ai-sdk/providers/editly/types";
2
3
  export { assets } from "./assets";
3
4
  export {
@@ -1,5 +1,5 @@
1
1
  import type { generateImage } from "ai";
2
- import type { fileCache } from "../../ai-sdk/file-cache";
2
+ import type { CacheStorage } from "../../ai-sdk/cache";
3
3
  import type { generateVideo } from "../../ai-sdk/generate-video";
4
4
  import type { DefaultModels } from "../types";
5
5
  import type { ProgressTracker } from "./progress";
@@ -8,7 +8,7 @@ export interface RenderContext {
8
8
  width: number;
9
9
  height: number;
10
10
  fps: number;
11
- cache?: ReturnType<typeof fileCache>;
11
+ cache?: CacheStorage;
12
12
  generateImage: typeof generateImage;
13
13
  generateVideo: typeof generateVideo;
14
14
  tempFiles: string[];
@@ -85,6 +85,7 @@ export async function renderImage(
85
85
  model,
86
86
  prompt: resolvedPrompt,
87
87
  aspectRatio: props.aspectRatio,
88
+ providerOptions: props.providerOptions,
88
89
  n: 1,
89
90
  cacheKey,
90
91
  } as Parameters<typeof generateImage>[0]);
@@ -1,5 +1,5 @@
1
1
  import { generateImage, wrapImageModel } from "ai";
2
- import { withCache } from "../../ai-sdk/cache";
2
+ import { type CacheStorage, withCache } from "../../ai-sdk/cache";
3
3
  import { fileCache } from "../../ai-sdk/file-cache";
4
4
  import { generateVideo } from "../../ai-sdk/generate-video";
5
5
  import {
@@ -48,6 +48,16 @@ interface RenderedOverlay {
48
48
  isVideo: boolean;
49
49
  }
50
50
 
51
+ function resolveCacheStorage(
52
+ cache: string | CacheStorage | undefined,
53
+ ): CacheStorage | undefined {
54
+ if (!cache) return undefined;
55
+ if (typeof cache === "string") {
56
+ return fileCache({ dir: cache });
57
+ }
58
+ return cache;
59
+ }
60
+
51
61
  export async function renderRoot(
52
62
  element: VargElement<"render">,
53
63
  options: RenderOptions,
@@ -63,12 +73,14 @@ export async function renderRoot(
63
73
  placeholderCount.total++;
64
74
  };
65
75
 
66
- const cachedGenerateImage = options.cache
67
- ? withCache(generateImage, { storage: fileCache({ dir: options.cache }) })
76
+ const cacheStorage = resolveCacheStorage(options.cache);
77
+
78
+ const cachedGenerateImage = cacheStorage
79
+ ? withCache(generateImage, { storage: cacheStorage })
68
80
  : generateImage;
69
81
 
70
- const cachedGenerateVideo = options.cache
71
- ? withCache(generateVideo, { storage: fileCache({ dir: options.cache }) })
82
+ const cachedGenerateVideo = cacheStorage
83
+ ? withCache(generateVideo, { storage: cacheStorage })
72
84
  : generateVideo;
73
85
 
74
86
  const wrapGenerateImage: typeof generateImage = async (opts) => {
@@ -114,7 +126,7 @@ export async function renderRoot(
114
126
  width: props.width ?? 1920,
115
127
  height: props.height ?? 1080,
116
128
  fps: props.fps ?? 30,
117
- cache: options.cache ? fileCache({ dir: options.cache }) : undefined,
129
+ cache: cacheStorage,
118
130
  generateImage: wrapGenerateImage,
119
131
  generateVideo: wrapGenerateVideo,
120
132
  tempFiles: [],
@@ -147,6 +147,7 @@ export async function renderVideo(
147
147
  prompt: resolvedPrompt,
148
148
  duration: props.duration ?? 5,
149
149
  aspectRatio: props.aspectRatio,
150
+ providerOptions: props.providerOptions,
150
151
  cacheKey,
151
152
  } as Parameters<typeof generateVideo>[0]);
152
153
 
@@ -1,5 +1,10 @@
1
- import type { ImageModelV3, SpeechModelV3 } from "@ai-sdk/provider";
1
+ import type {
2
+ ImageModelV3,
3
+ SharedV3ProviderOptions,
4
+ SpeechModelV3,
5
+ } from "@ai-sdk/provider";
2
6
  import type { FFmpegBackend } from "@/ai-sdk/providers/editly/backends";
7
+ import type { CacheStorage } from "../ai-sdk/cache";
3
8
  import type { MusicModelV3 } from "../ai-sdk/music-model";
4
9
  import type {
5
10
  CropPosition,
@@ -101,6 +106,8 @@ export interface ImageProps extends BaseProps, PositionProps {
101
106
  position?: Position;
102
107
  size?: { width: string; height: string };
103
108
  removeBackground?: boolean;
109
+ /** Provider-specific options (e.g., fal: { acceleration: "high" }) */
110
+ providerOptions?: SharedV3ProviderOptions;
104
111
  }
105
112
 
106
113
  export type VideoPrompt =
@@ -122,6 +129,8 @@ export type VideoProps = BaseProps &
122
129
  resize?: ResizeMode;
123
130
  cropPosition?: CropPosition;
124
131
  aspectRatio?: `${number}:${number}`;
132
+ /** Provider-specific options (e.g., fal: { generate_audio: true }) */
133
+ providerOptions?: SharedV3ProviderOptions;
125
134
  };
126
135
 
127
136
  export interface SpeechProps extends BaseProps, VolumeProps {
@@ -256,7 +265,7 @@ export interface DefaultModels {
256
265
 
257
266
  export interface RenderOptions {
258
267
  output?: string;
259
- cache?: string;
268
+ cache?: string | CacheStorage;
260
269
  quiet?: boolean;
261
270
  verbose?: boolean;
262
271
  mode?: RenderMode;
@@ -1,5 +1,5 @@
1
1
  import { generateImage } from "ai";
2
- import { withCache } from "../ai-sdk/cache";
2
+ import { type CacheStorage, withCache } from "../ai-sdk/cache";
3
3
  import { fileCache } from "../ai-sdk/file-cache";
4
4
  import { generateVideo } from "../ai-sdk/generate-video";
5
5
  import type { RenderContext } from "../react/renderers/context";
@@ -27,21 +27,26 @@ const sessions = new Map<string, StepSession>();
27
27
  export function createStepSession(
28
28
  code: string,
29
29
  rootElement: VargElement,
30
- cacheDir?: string,
30
+ cache?: string | CacheStorage,
31
31
  ): StepSession {
32
32
  const props = rootElement.props as RenderProps;
33
- const cache = cacheDir ? fileCache({ dir: cacheDir }) : undefined;
33
+ const cacheStorage =
34
+ cache === undefined
35
+ ? undefined
36
+ : typeof cache === "string"
37
+ ? fileCache({ dir: cache })
38
+ : cache;
34
39
 
35
40
  const ctx: RenderContext = {
36
41
  width: props.width ?? 1920,
37
42
  height: props.height ?? 1080,
38
43
  fps: props.fps ?? 30,
39
- cache,
40
- generateImage: cache
41
- ? withCache(generateImage, { storage: cache })
44
+ cache: cacheStorage,
45
+ generateImage: cacheStorage
46
+ ? withCache(generateImage, { storage: cacheStorage })
42
47
  : generateImage,
43
- generateVideo: cache
44
- ? withCache(generateVideo, { storage: cache })
48
+ generateVideo: cacheStorage
49
+ ? withCache(generateVideo, { storage: cacheStorage })
45
50
  : generateVideo,
46
51
  tempFiles: [],
47
52
  progress: createProgressTracker(false),
@@ -223,7 +228,7 @@ export async function finalizeRender(
223
228
 
224
229
  await render(session.rootElement, {
225
230
  output: outputPath,
226
- cache: session.ctx.cache ? ".cache/ai" : undefined,
231
+ cache: session.ctx.cache,
227
232
  quiet: true,
228
233
  });
229
234